diff --git a/README.md b/README.md
index 4fd6029a8a9bab87eace2e6531b83fe94b3af2a6..c9ca3c537e64c32333c1d8cb8673cb3e40b67d7d 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
---
-title: HaMeR Test
-emoji: 📚
-colorFrom: pink
-colorTo: purple
+title: HaMeR
+emoji: 🔥
+colorFrom: yellow
+colorTo: yellow
sdk: gradio
sdk_version: 4.8.0
app_file: app.py
pinned: false
---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
\ No newline at end of file
diff --git a/_DATA/data/mano/MANO_RIGHT.pkl b/_DATA/data/mano/MANO_RIGHT.pkl
new file mode 100755
index 0000000000000000000000000000000000000000..8e7ac7faf64ad51096ec1da626ea13757ed7f665
--- /dev/null
+++ b/_DATA/data/mano/MANO_RIGHT.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45d60aa3b27ef9107a7afd4e00808f307fd91111e1cfa35afd5c4a62de264767
+size 3821356
diff --git a/_DATA/data/mano_mean_params.npz b/_DATA/data/mano_mean_params.npz
new file mode 100644
index 0000000000000000000000000000000000000000..dc294b01fb78a9cd6636c87a69b59cf82d28d15b
--- /dev/null
+++ b/_DATA/data/mano_mean_params.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efc0ec58e4a5cef78f3abfb4e8f91623b8950be9eff8b8e0dbb0d036ebc63988
+size 1178
diff --git a/_DATA/hamer_ckpts/checkpoints/hamer.ckpt b/_DATA/hamer_ckpts/checkpoints/hamer.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d0dae12e9a553336d196e22dea6b4ed74df351
--- /dev/null
+++ b/_DATA/hamer_ckpts/checkpoints/hamer.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5cc06f294d88a92dee24e603480aab04de532b49f0e08200804ee7d90e16f53
+size 2689536166
diff --git a/_DATA/hamer_ckpts/dataset_config.yaml b/_DATA/hamer_ckpts/dataset_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77b67251770062f769fdddfb0c8ffa4cc7720a80
--- /dev/null
+++ b/_DATA/hamer_ckpts/dataset_config.yaml
@@ -0,0 +1,42 @@
+COCOW-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/cocow-train/{000000..000036}.tar
+ epoch_size: 78666
+DEX-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/dex-train/{000000..000406}.tar
+ epoch_size: 406888
+FREIHAND-MOCAP:
+ DATASET_FILE: hamer_training_data/freihand_mocap.npz
+FREIHAND-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/freihand-train/{000000..000130}.tar
+ epoch_size: 130240
+H2O3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/h2o3d-train/{000000..000060}.tar
+ epoch_size: 121996
+HALPE-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/halpe-train/{000000..000022}.tar
+ epoch_size: 34289
+HO3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/ho3d-train/{000000..000083}.tar
+ epoch_size: 83325
+INTERHAND26M-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/interhand26m-train/{000000..001056}.tar
+ epoch_size: 1424632
+MPIINZSL-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/mpiinzsl-train/{000000..000015}.tar
+ epoch_size: 15184
+MTC-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/mtc-train/{000000..000306}.tar
+ epoch_size: 363947
+RHD-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/rhd-train/{000000..000041}.tar
+ epoch_size: 61705
diff --git a/_DATA/hamer_ckpts/model_config.yaml b/_DATA/hamer_ckpts/model_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6026e4e021f3dbd923038f84a6296d5812acaf66
--- /dev/null
+++ b/_DATA/hamer_ckpts/model_config.yaml
@@ -0,0 +1,111 @@
+task_name: train
+tags:
+- dev
+train: true
+test: false
+ckpt_path: null
+seed: null
+DATASETS:
+ TRAIN:
+ FREIHAND-TRAIN:
+ WEIGHT: 0.25
+ INTERHAND26M-TRAIN:
+ WEIGHT: 0.25
+ MTC-TRAIN:
+ WEIGHT: 0.1
+ RHD-TRAIN:
+ WEIGHT: 0.05
+ COCOW-TRAIN:
+ WEIGHT: 0.1
+ HALPE-TRAIN:
+ WEIGHT: 0.05
+ MPIINZSL-TRAIN:
+ WEIGHT: 0.05
+ HO3D-TRAIN:
+ WEIGHT: 0.05
+ H2O3D-TRAIN:
+ WEIGHT: 0.05
+ DEX-TRAIN:
+ WEIGHT: 0.05
+ VAL:
+ FREIHAND-TRAIN:
+ WEIGHT: 1.0
+ MOCAP: FREIHAND-MOCAP
+ BETAS_REG: true
+ CONFIG:
+ SCALE_FACTOR: 0.3
+ ROT_FACTOR: 30
+ TRANS_FACTOR: 0.02
+ COLOR_SCALE: 0.2
+ ROT_AUG_RATE: 0.6
+ TRANS_AUG_RATE: 0.5
+ DO_FLIP: false
+ FLIP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_LEVEL: 1
+extras:
+ ignore_warnings: false
+ enforce_tags: true
+ print_config: true
+exp_name: hamer
+MANO:
+ DATA_DIR: _DATA/data/
+ MODEL_PATH: _DATA/data/mano
+ GENDER: neutral
+ NUM_HAND_JOINTS: 15
+ MEAN_PARAMS: _DATA/data/mano_mean_params.npz
+ CREATE_BODY_POSE: false
+EXTRA:
+ FOCAL_LENGTH: 5000
+ NUM_LOG_IMAGES: 4
+ NUM_LOG_SAMPLES_PER_IMAGE: 8
+ PELVIS_IND: 0
+GENERAL:
+ TOTAL_STEPS: 1000000
+ LOG_STEPS: 1000
+ VAL_STEPS: 1000
+ CHECKPOINT_STEPS: 10000
+ CHECKPOINT_SAVE_TOP_K: 1
+ NUM_WORKERS: 8
+ PREFETCH_FACTOR: 2
+TRAIN:
+ LR: 1.0e-05
+ WEIGHT_DECAY: 0.0001
+ BATCH_SIZE: 32
+ LOSS_REDUCTION: mean
+ NUM_TRAIN_SAMPLES: 2
+ NUM_TEST_SAMPLES: 64
+ POSE_2D_NOISE_RATIO: 0.01
+ SMPL_PARAM_NOISE_RATIO: 0.005
+MODEL:
+ IMAGE_SIZE: 256
+ IMAGE_MEAN:
+ - 0.485
+ - 0.456
+ - 0.406
+ IMAGE_STD:
+ - 0.229
+ - 0.224
+ - 0.225
+ BACKBONE:
+ TYPE: vit
+ PRETRAINED_WEIGHTS: hamer_training_data/vitpose_backbone.pth
+ MANO_HEAD:
+ TYPE: transformer_decoder
+ IN_CHANNELS: 2048
+ TRANSFORMER_DECODER:
+ depth: 6
+ heads: 8
+ mlp_dim: 1024
+ dim_head: 64
+ dropout: 0.0
+ emb_dropout: 0.0
+ norm: layer
+ context_dim: 1280
+LOSS_WEIGHTS:
+ KEYPOINTS_3D: 0.05
+ KEYPOINTS_2D: 0.01
+ GLOBAL_ORIENT: 0.001
+ HAND_POSE: 0.001
+ BETAS: 0.0005
+ ADVERSARIAL: 0.0005
diff --git a/_DATA/vitpose_ckpts/vitpose+_huge/wholebody.pth b/_DATA/vitpose_ckpts/vitpose+_huge/wholebody.pth
new file mode 100644
index 0000000000000000000000000000000000000000..51475b0972e87adb8151ba18c8c1320ba8587934
--- /dev/null
+++ b/_DATA/vitpose_ckpts/vitpose+_huge/wholebody.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0555e1e2392e6a2be2d9265368f344d70ccbfd656ad480aa5c1de2e604519c9
+size 3807742341
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d5623bd2c3d44dc890f1c8f4589100f64a9e939
--- /dev/null
+++ b/app.py
@@ -0,0 +1,234 @@
+import argparse
+import os
+from pathlib import Path
+import tempfile
+import sys
+import cv2
+import gradio as gr
+import numpy as np
+import torch
+from PIL import Image
+
+# print file path
+print(os.path.abspath(__file__))
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+os.environ["MESA_GL_VERSION_OVERRIDE"] = "4.1"
+os.system('pip install /home/user/app/pyrender')
+sys.path.append('/home/user/app/pyrender')
+
+from hamer.configs import get_config
+from hamer.datasets.vitdet_dataset import (DEFAULT_MEAN, DEFAULT_STD,
+ ViTDetDataset)
+from hamer.models import HAMER
+from hamer.utils import recursive_to
+from hamer.utils.renderer import Renderer, cam_crop_to_full
+
+try:
+ import detectron2
+except:
+ import os
+ os.system('pip install --upgrade pip')
+ os.system('pip install git+https://github.com/facebookresearch/detectron2.git')
+
+#try:
+# from vitpose_model import ViTPoseModel
+#except:
+# os.system('pip install -v -e /home/user/app/vendor/ViTPose')
+# from vitpose_model import ViTPoseModel
+from vitpose_model import ViTPoseModel
+
+OUT_FOLDER = 'demo_out'
+os.makedirs(OUT_FOLDER, exist_ok=True)
+
+# Setup HaMeR model
+LIGHT_BLUE=(0.65098039, 0.74117647, 0.85882353)
+DEFAULT_CHECKPOINT='_DATA/hamer_ckpts/checkpoints/hamer.ckpt'
+device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+model_cfg = str(Path(DEFAULT_CHECKPOINT).parent.parent / 'model_config.yaml')
+model_cfg = get_config(model_cfg)
+model = HAMER.load_from_checkpoint(DEFAULT_CHECKPOINT, strict=False, cfg=model_cfg).to(device)
+model.eval()
+
+
+# Load detector
+from detectron2.config import LazyConfig
+
+from hamer.utils.utils_detectron2 import DefaultPredictor_Lazy
+
+detectron2_cfg = LazyConfig.load(f"vendor/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py")
+detectron2_cfg.train.init_checkpoint = "https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h/f328730692/model_final_f05665.pkl"
+for i in range(3):
+ detectron2_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
+detector = DefaultPredictor_Lazy(detectron2_cfg)
+
+# Setup the renderer
+renderer = Renderer(model_cfg, faces=model.mano.faces)
+
+# keypoint detector
+cpm = ViTPoseModel(device)
+
+import numpy as np
+
+def infer(in_pil_img, in_threshold=0.8, out_pil_img=None):
+
+ open_cv_image = np.array(in_pil_img)
+ # Convert RGB to BGR
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
+ print("EEEEE", open_cv_image.shape)
+ det_out = detector(open_cv_image)
+ det_instances = det_out['instances']
+ valid_idx = (det_instances.pred_classes==0) & (det_instances.scores > in_threshold)
+ pred_bboxes=det_instances.pred_boxes.tensor[valid_idx].cpu().numpy()
+ pred_scores=det_instances.scores[valid_idx].cpu().numpy()
+
+
+ # Detect human keypoints for each person
+ vitposes_out = cpm.predict_pose(
+ open_cv_image,
+ [np.concatenate([pred_bboxes, pred_scores[:, None]], axis=1)],
+ )
+
+ bboxes = []
+ is_right = []
+
+ # Use hands based on hand keypoint detections
+ for vitposes in vitposes_out:
+ left_hand_keyp = vitposes['keypoints'][-42:-21]
+ right_hand_keyp = vitposes['keypoints'][-21:]
+
+ # Rejecting not confident detections (this could be improved)
+ keyp = left_hand_keyp
+ valid = keyp[:,2] > 0.5
+ if sum(valid) > 3:
+ bbox = [keyp[valid,0].min(), keyp[valid,1].min(), keyp[valid,0].max(), keyp[valid,1].max()]
+ bboxes.append(bbox)
+ is_right.append(0)
+ keyp = right_hand_keyp
+ valid = keyp[:,2] > 0.5
+ if sum(valid) > 3:
+ bbox = [keyp[valid,0].min(), keyp[valid,1].min(), keyp[valid,0].max(), keyp[valid,1].max()]
+ bboxes.append(bbox)
+ is_right.append(1)
+
+ if len(bboxes) == 0:
+ return None, []
+
+ boxes = np.stack(bboxes)
+ right = np.stack(is_right)
+
+
+ # Run HaMeR on all detected humans
+ dataset = ViTDetDataset(model_cfg, open_cv_image, boxes, right)
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=False, num_workers=0)
+
+ all_verts = []
+ all_cam_t = []
+ all_right = []
+ all_mesh_paths = []
+
+ temp_name = next(tempfile._get_candidate_names())
+
+ for batch in dataloader:
+ batch = recursive_to(batch, device)
+ with torch.no_grad():
+ out = model(batch)
+
+ multiplier = (2*batch['right']-1)
+ pred_cam = out['pred_cam']
+ pred_cam[:,1] = multiplier*pred_cam[:,1]
+ box_center = batch["box_center"].float()
+ box_size = batch["box_size"].float()
+ img_size = batch["img_size"].float()
+ multiplier = (2*batch['right']-1)
+ render_size = img_size
+ scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
+ pred_cam_t = cam_crop_to_full(pred_cam, box_center, box_size, render_size, scaled_focal_length).detach().cpu().numpy()
+
+ # Render the result
+ batch_size = batch['img'].shape[0]
+ for n in range(batch_size):
+ # Get filename from path img_path
+ # img_fn, _ = os.path.splitext(os.path.basename(img_path))
+ person_id = int(batch['personid'][n])
+ white_img = (torch.ones_like(batch['img'][n]).cpu() - DEFAULT_MEAN[:,None,None]/255) / (DEFAULT_STD[:,None,None]/255)
+ input_patch = batch['img'][n].cpu() * (DEFAULT_STD[:,None,None]/255) + (DEFAULT_MEAN[:,None,None]/255)
+ input_patch = input_patch.permute(1,2,0).numpy()
+
+
+ verts = out['pred_vertices'][n].detach().cpu().numpy()
+ is_right = batch['right'][n].cpu().numpy()
+ verts[:,0] = (2*is_right-1)*verts[:,0]
+ cam_t = pred_cam_t[n]
+
+ all_verts.append(verts)
+ all_cam_t.append(cam_t)
+ all_right.append(is_right)
+
+ # Save all meshes to disk
+ # if args.save_mesh:
+ if True:
+ camera_translation = cam_t.copy()
+ tmesh = renderer.vertices_to_trimesh(verts, camera_translation, LIGHT_BLUE, is_right=is_right)
+
+ temp_path = os.path.join(f'{OUT_FOLDER}/{temp_name}_{person_id}.obj')
+ tmesh.export(temp_path)
+ all_mesh_paths.append(temp_path)
+
+ # Render front view
+ if len(all_verts) > 0:
+ misc_args = dict(
+ mesh_base_color=LIGHT_BLUE,
+ scene_bg_color=(1, 1, 1),
+ focal_length=scaled_focal_length,
+ )
+ cam_view = renderer.render_rgba_multiple(all_verts, cam_t=all_cam_t, render_res=render_size[n], is_right=all_right, **misc_args)
+
+ # Overlay image
+ input_img = open_cv_image.astype(np.float32)[:,:,::-1]/255.0
+ input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
+ input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
+
+ # convert to PIL image
+ out_pil_img = Image.fromarray((input_img_overlay*255).astype(np.uint8))
+
+ return out_pil_img, all_mesh_paths
+ else:
+ return None, []
+
+
+with gr.Blocks(title="HaMeR", css=".gradio-container") as demo:
+
+ gr.HTML("""
HaMeR
""")
+
+ with gr.Row():
+ with gr.Column():
+ input_image = gr.Image(label="Input image", type="pil")
+ with gr.Column():
+ output_image = gr.Image(label="Reconstructions", type="pil")
+ output_meshes = gr.File(label="3D meshes")
+
+ gr.HTML(""" """)
+
+ with gr.Row():
+ threshold = gr.Slider(0, 1.0, value=0.6, label='Detection Threshold')
+ send_btn = gr.Button("Infer")
+ send_btn.click(fn=infer, inputs=[input_image, threshold], outputs=[output_image, output_meshes])
+
+ # with gr.Row():
+ example_images = gr.Examples([
+ ['/home/user/app/assets/test1.jpg'],
+ ['/home/user/app/assets/test2.jpg'],
+ ['/home/user/app/assets/test3.jpg'],
+ ['/home/user/app/assets/test4.jpg'],
+ ['/home/user/app/assets/test5.jpg'],
+ ],
+ inputs=[input_image, 0.6])
+
+
+#demo.queue()
+demo.launch(debug=True)
+
+
+
+
+### EOF ###
\ No newline at end of file
diff --git a/assets/list.txt b/assets/list.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/assets/test1.jpg b/assets/test1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b9a98d3719e5025bf0667cf9bc63271db9bd3f94
Binary files /dev/null and b/assets/test1.jpg differ
diff --git a/assets/test2.jpg b/assets/test2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..515cac11d1630c0b432e6b46d246bc30e8558510
Binary files /dev/null and b/assets/test2.jpg differ
diff --git a/assets/test3.jpg b/assets/test3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5494616ede570d70b7b8a3f199e2205700f3b7ce
Binary files /dev/null and b/assets/test3.jpg differ
diff --git a/assets/test4.jpg b/assets/test4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b4260b75c38157966a8a03cfd99469b43257590f
Binary files /dev/null and b/assets/test4.jpg differ
diff --git a/assets/test5.jpg b/assets/test5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9e5eee8f3dcca248b73d1f99dbff1ce1130c3297
Binary files /dev/null and b/assets/test5.jpg differ
diff --git a/hamer/__init__.py b/hamer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/hamer/configs/__init__.py b/hamer/configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e70f8d4fb7621e4f4d2d037fb05852741c6f6ec7
--- /dev/null
+++ b/hamer/configs/__init__.py
@@ -0,0 +1,111 @@
+import os
+from typing import Dict
+from yacs.config import CfgNode as CN
+
+CACHE_DIR_HAMER = "./_DATA"
+
+def to_lower(x: Dict) -> Dict:
+ """
+ Convert all dictionary keys to lowercase
+ Args:
+ x (dict): Input dictionary
+ Returns:
+ dict: Output dictionary with all keys converted to lowercase
+ """
+ return {k.lower(): v for k, v in x.items()}
+
+_C = CN(new_allowed=True)
+
+_C.GENERAL = CN(new_allowed=True)
+_C.GENERAL.RESUME = True
+_C.GENERAL.TIME_TO_RUN = 3300
+_C.GENERAL.VAL_STEPS = 100
+_C.GENERAL.LOG_STEPS = 100
+_C.GENERAL.CHECKPOINT_STEPS = 20000
+_C.GENERAL.CHECKPOINT_DIR = "checkpoints"
+_C.GENERAL.SUMMARY_DIR = "tensorboard"
+_C.GENERAL.NUM_GPUS = 1
+_C.GENERAL.NUM_WORKERS = 4
+_C.GENERAL.MIXED_PRECISION = True
+_C.GENERAL.ALLOW_CUDA = True
+_C.GENERAL.PIN_MEMORY = False
+_C.GENERAL.DISTRIBUTED = False
+_C.GENERAL.LOCAL_RANK = 0
+_C.GENERAL.USE_SYNCBN = False
+_C.GENERAL.WORLD_SIZE = 1
+
+_C.TRAIN = CN(new_allowed=True)
+_C.TRAIN.NUM_EPOCHS = 100
+_C.TRAIN.BATCH_SIZE = 32
+_C.TRAIN.SHUFFLE = True
+_C.TRAIN.WARMUP = False
+_C.TRAIN.NORMALIZE_PER_IMAGE = False
+_C.TRAIN.CLIP_GRAD = False
+_C.TRAIN.CLIP_GRAD_VALUE = 1.0
+_C.LOSS_WEIGHTS = CN(new_allowed=True)
+
+_C.DATASETS = CN(new_allowed=True)
+
+_C.MODEL = CN(new_allowed=True)
+_C.MODEL.IMAGE_SIZE = 224
+
+_C.EXTRA = CN(new_allowed=True)
+_C.EXTRA.FOCAL_LENGTH = 5000
+
+_C.DATASETS.CONFIG = CN(new_allowed=True)
+_C.DATASETS.CONFIG.SCALE_FACTOR = 0.3
+_C.DATASETS.CONFIG.ROT_FACTOR = 30
+_C.DATASETS.CONFIG.TRANS_FACTOR = 0.02
+_C.DATASETS.CONFIG.COLOR_SCALE = 0.2
+_C.DATASETS.CONFIG.ROT_AUG_RATE = 0.6
+_C.DATASETS.CONFIG.TRANS_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.DO_FLIP = False
+_C.DATASETS.CONFIG.FLIP_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.EXTREME_CROP_AUG_RATE = 0.10
+
+def default_config() -> CN:
+ """
+ Get a yacs CfgNode object with the default config values.
+ """
+ # Return a clone so that the defaults will not be altered
+ # This is for the "local variable" use pattern
+ return _C.clone()
+
+def dataset_config() -> CN:
+ """
+ Get dataset config file
+ Returns:
+ CfgNode: Dataset config as a yacs CfgNode object.
+ """
+ cfg = CN(new_allowed=True)
+ config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'datasets_tar.yaml')
+ cfg.merge_from_file(config_file)
+ cfg.freeze()
+ return cfg
+
+def get_config(config_file: str, merge: bool = True, update_cachedir: bool = False) -> CN:
+ """
+ Read a config file and optionally merge it with the default config file.
+ Args:
+ config_file (str): Path to config file.
+ merge (bool): Whether to merge with the default config or not.
+ Returns:
+ CfgNode: Config as a yacs CfgNode object.
+ """
+ if merge:
+ cfg = default_config()
+ else:
+ cfg = CN(new_allowed=True)
+ cfg.merge_from_file(config_file)
+
+ if update_cachedir:
+ def update_path(path: str) -> str:
+ if os.path.isabs(path):
+ return path
+ return os.path.join(CACHE_DIR_HAMER, path)
+
+ cfg.MANO.MODEL_PATH = update_path(cfg.MANO.MODEL_PATH)
+ cfg.MANO.MEAN_PARAMS = update_path(cfg.MANO.MEAN_PARAMS)
+
+ cfg.freeze()
+ return cfg
diff --git a/hamer/configs/cascade_mask_rcnn_vitdet_h_75ep.py b/hamer/configs/cascade_mask_rcnn_vitdet_h_75ep.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c6ae0eaf48c2c2d3b70529a0d2d915432e43db6
--- /dev/null
+++ b/hamer/configs/cascade_mask_rcnn_vitdet_h_75ep.py
@@ -0,0 +1,129 @@
+## coco_loader_lsj.py
+
+import detectron2.data.transforms as T
+from detectron2 import model_zoo
+from detectron2.config import LazyCall as L
+
+# Data using LSJ
+image_size = 1024
+dataloader = model_zoo.get_config("common/data/coco.py").dataloader
+dataloader.train.mapper.augmentations = [
+ L(T.RandomFlip)(horizontal=True), # flip first
+ L(T.ResizeScale)(
+ min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
+ ),
+ L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
+]
+dataloader.train.mapper.image_format = "RGB"
+dataloader.train.total_batch_size = 64
+# recompute boxes due to cropping
+dataloader.train.mapper.recompute_boxes = True
+
+dataloader.test.mapper.augmentations = [
+ L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
+]
+
+from functools import partial
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2 import model_zoo
+from detectron2.config import LazyCall as L
+from detectron2.solver import WarmupParamScheduler
+from detectron2.modeling.backbone.vit import get_vit_lr_decay_rate
+
+# mask_rcnn_vitdet_b_100ep.py
+
+model = model_zoo.get_config("common/models/mask_rcnn_vitdet.py").model
+
+# Initialization and trainer settings
+train = model_zoo.get_config("common/train.py").train
+train.amp.enabled = True
+train.ddp.fp16_compression = True
+train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
+
+
+# Schedule
+# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
+train.max_iter = 184375
+
+lr_multiplier = L(WarmupParamScheduler)(
+ scheduler=L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ milestones=[163889, 177546],
+ num_updates=train.max_iter,
+ ),
+ warmup_length=250 / train.max_iter,
+ warmup_factor=0.001,
+)
+
+# Optimizer
+optimizer = model_zoo.get_config("common/optim.py").AdamW
+optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, num_layers=12, lr_decay_rate=0.7)
+optimizer.params.overrides = {"pos_embed": {"weight_decay": 0.0}}
+
+# cascade_mask_rcnn_vitdet_b_100ep.py
+
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.roi_heads import (
+ FastRCNNOutputLayers,
+ FastRCNNConvFCHead,
+ CascadeROIHeads,
+)
+
+# arguments that don't exist for Cascade R-CNN
+[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
+
+model.roi_heads.update(
+ _target_=CascadeROIHeads,
+ box_heads=[
+ L(FastRCNNConvFCHead)(
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
+ conv_dims=[256, 256, 256, 256],
+ fc_dims=[1024],
+ conv_norm="LN",
+ )
+ for _ in range(3)
+ ],
+ box_predictors=[
+ L(FastRCNNOutputLayers)(
+ input_shape=ShapeSpec(channels=1024),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
+ cls_agnostic_bbox_reg=True,
+ num_classes="${...num_classes}",
+ )
+ for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
+ ],
+ proposal_matchers=[
+ L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
+ for th in [0.5, 0.6, 0.7]
+ ],
+)
+
+# cascade_mask_rcnn_vitdet_h_75ep.py
+
+from functools import partial
+
+train.init_checkpoint = "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth"
+
+model.backbone.net.embed_dim = 1280
+model.backbone.net.depth = 32
+model.backbone.net.num_heads = 16
+model.backbone.net.drop_path_rate = 0.5
+# 7, 15, 23, 31 for global attention
+model.backbone.net.window_block_indexes = (
+ list(range(0, 7)) + list(range(8, 15)) + list(range(16, 23)) + list(range(24, 31))
+)
+
+optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.9, num_layers=32)
+optimizer.params.overrides = {}
+optimizer.params.weight_decay_norm = None
+
+train.max_iter = train.max_iter * 3 // 4 # 100ep -> 75ep
+lr_multiplier.scheduler.milestones = [
+ milestone * 3 // 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/hamer/configs/datasets_tar.yaml b/hamer/configs/datasets_tar.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ebad8a6404e5fe59db55f9e042af8301053eb66
--- /dev/null
+++ b/hamer/configs/datasets_tar.yaml
@@ -0,0 +1,42 @@
+FREIHAND-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/freihand-train/{000000..000130}.tar
+ epoch_size: 130_240
+INTERHAND26M-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/interhand26m-train/{000000..001056}.tar
+ epoch_size: 1_424_632
+HALPE-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/halpe-train/{000000..000022}.tar
+ epoch_size: 34_289
+COCOW-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/cocow-train/{000000..000036}.tar
+ epoch_size: 78_666
+MTC-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/mtc-train/{000000..000306}.tar
+ epoch_size: 363_947
+RHD-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/rhd-train/{000000..000041}.tar
+ epoch_size: 61_705
+MPIINZSL-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/mpiinzsl-train/{000000..000015}.tar
+ epoch_size: 15_184
+HO3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/ho3d-train/{000000..000083}.tar
+ epoch_size: 83_325
+H2O3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/h2o3d-train/{000000..000060}.tar
+ epoch_size: 121_996
+DEX-TRAIN:
+ TYPE: ImageDataset
+ URLS: hamer_training_data/dataset_tars/dex-train/{000000..000406}.tar
+ epoch_size: 406_888
+FREIHAND-MOCAP:
+ DATASET_FILE: hamer_training_data/freihand_mocap.npz
diff --git a/hamer/configs_hydra/data/mix_all.yaml b/hamer/configs_hydra/data/mix_all.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..26e0d7102553772cbb9a4893e55863f56e3bc41d
--- /dev/null
+++ b/hamer/configs_hydra/data/mix_all.yaml
@@ -0,0 +1,31 @@
+# @package _global_
+defaults:
+ - /data_filtering: low1
+
+DATASETS:
+ TRAIN:
+ FREIHAND-TRAIN:
+ WEIGHT: 0.25
+ INTERHAND26M-TRAIN:
+ WEIGHT: 0.25
+ MTC-TRAIN:
+ WEIGHT: 0.1
+ RHD-TRAIN:
+ WEIGHT: 0.05
+ COCOW-TRAIN:
+ WEIGHT: 0.1
+ HALPE-TRAIN:
+ WEIGHT: 0.05
+ MPIINZSL-TRAIN:
+ WEIGHT: 0.05
+ HO3D-TRAIN:
+ WEIGHT: 0.05
+ H2O3D-TRAIN:
+ WEIGHT: 0.05
+ DEX-TRAIN:
+ WEIGHT: 0.05
+ VAL:
+ FREIHAND-TRAIN:
+ WEIGHT: 1.0
+
+ MOCAP: FREIHAND-MOCAP
diff --git a/hamer/configs_hydra/data_filtering/low1.yaml b/hamer/configs_hydra/data_filtering/low1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bea3b9df8c10100f1de32600546f254aa70a5081
--- /dev/null
+++ b/hamer/configs_hydra/data_filtering/low1.yaml
@@ -0,0 +1,13 @@
+# @package _global_
+
+DATASETS:
+ # Data filtering during training
+ SUPPRESS_KP_CONF_THRESH: 0.3
+ FILTER_NUM_KP: 4
+ FILTER_NUM_KP_THRESH: 0.0
+ FILTER_REPROJ_THRESH: 31000
+
+ SUPPRESS_BETAS_THRESH: 3.0
+ SUPPRESS_BAD_POSES: False
+ POSES_BETAS_SIMULTANEOUS: True
+ FILTER_NO_POSES: False # If True, filters images that don't have poses
diff --git a/hamer/configs_hydra/experiment/default.yaml b/hamer/configs_hydra/experiment/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a497f6309fa061638bd9c62a62408d1f558e379
--- /dev/null
+++ b/hamer/configs_hydra/experiment/default.yaml
@@ -0,0 +1,29 @@
+# @package _global_
+
+MANO:
+ DATA_DIR: ${oc.env:HOME}/.cache/4DHumans/data/
+ MODEL_PATH: ${MANO.DATA_DIR}/mano
+ GENDER: neutral
+ NUM_HAND_JOINTS: 15
+ MEAN_PARAMS: ${MANO.DATA_DIR}/mano_mean_params.npz
+ CREATE_BODY_POSE: FALSE
+
+EXTRA:
+ FOCAL_LENGTH: 5000
+ NUM_LOG_IMAGES: 4
+ NUM_LOG_SAMPLES_PER_IMAGE: 8
+ PELVIS_IND: 0
+
+DATASETS:
+ BETAS_REG: True
+ CONFIG:
+ SCALE_FACTOR: 0.3
+ ROT_FACTOR: 30
+ TRANS_FACTOR: 0.02
+ COLOR_SCALE: 0.2
+ ROT_AUG_RATE: 0.6
+ TRANS_AUG_RATE: 0.5
+ DO_FLIP: False
+ FLIP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_LEVEL: 1
diff --git a/hamer/configs_hydra/experiment/hamer_vit_transformer.yaml b/hamer/configs_hydra/experiment/hamer_vit_transformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0092a0488e22e685b548ab04b7830ec42ede8fdb
--- /dev/null
+++ b/hamer/configs_hydra/experiment/hamer_vit_transformer.yaml
@@ -0,0 +1,51 @@
+# @package _global_
+
+defaults:
+ - default.yaml
+
+GENERAL:
+ TOTAL_STEPS: 1_000_000
+ LOG_STEPS: 1000
+ VAL_STEPS: 1000
+ CHECKPOINT_STEPS: 1000
+ CHECKPOINT_SAVE_TOP_K: 1
+ NUM_WORKERS: 25
+ PREFETCH_FACTOR: 2
+
+TRAIN:
+ LR: 1e-5
+ WEIGHT_DECAY: 1e-4
+ BATCH_SIZE: 8
+ LOSS_REDUCTION: mean
+ NUM_TRAIN_SAMPLES: 2
+ NUM_TEST_SAMPLES: 64
+ POSE_2D_NOISE_RATIO: 0.01
+ SMPL_PARAM_NOISE_RATIO: 0.005
+
+MODEL:
+ IMAGE_SIZE: 256
+ IMAGE_MEAN: [0.485, 0.456, 0.406]
+ IMAGE_STD: [0.229, 0.224, 0.225]
+ BACKBONE:
+ TYPE: vit
+ PRETRAINED_WEIGHTS: hamer_training_data/vitpose_backbone.pth
+ MANO_HEAD:
+ TYPE: transformer_decoder
+ IN_CHANNELS: 2048
+ TRANSFORMER_DECODER:
+ depth: 6
+ heads: 8
+ mlp_dim: 1024
+ dim_head: 64
+ dropout: 0.0
+ emb_dropout: 0.0
+ norm: layer
+ context_dim: 1280 # from vitpose-H
+
+LOSS_WEIGHTS:
+ KEYPOINTS_3D: 0.05
+ KEYPOINTS_2D: 0.01
+ GLOBAL_ORIENT: 0.001
+ HAND_POSE: 0.001
+ BETAS: 0.0005
+ ADVERSARIAL: 0.0005
diff --git a/hamer/configs_hydra/extras/default.yaml b/hamer/configs_hydra/extras/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9c6b622283a647fbc513166fc14f016cc3ed8a0
--- /dev/null
+++ b/hamer/configs_hydra/extras/default.yaml
@@ -0,0 +1,8 @@
+# disable python warnings if they annoy you
+ignore_warnings: False
+
+# ask user for tags if none are provided in the config
+enforce_tags: True
+
+# pretty print config tree at the start of the run using Rich library
+print_config: True
diff --git a/hamer/configs_hydra/hydra/default.yaml b/hamer/configs_hydra/hydra/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c30c188f4e68b205ec0f1e5679345626fe187164
--- /dev/null
+++ b/hamer/configs_hydra/hydra/default.yaml
@@ -0,0 +1,26 @@
+# @package _global_
+# https://hydra.cc/docs/configure_hydra/intro/
+
+# enable color logging
+defaults:
+ - override /hydra/hydra_logging: colorlog
+ - override /hydra/job_logging: colorlog
+
+# exp_name: ovrd_${hydra:job.override_dirname}
+exp_name: ${now:%Y-%m-%d}_${now:%H-%M-%S}
+
+hydra:
+ run:
+ dir: ${paths.log_dir}/${task_name}/runs/${exp_name}
+ sweep:
+ dir: ${paths.log_dir}/${task_name}/multiruns/${exp_name}
+ subdir: ${hydra.job.num}
+ job:
+ config:
+ override_dirname:
+ exclude_keys:
+ - trainer
+ - trainer.devices
+ - trainer.num_nodes
+ - callbacks
+ - debug
diff --git a/hamer/configs_hydra/launcher/local.yaml b/hamer/configs_hydra/launcher/local.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da87047acd416fe6d03bc81a74ab62b449b4ac35
--- /dev/null
+++ b/hamer/configs_hydra/launcher/local.yaml
@@ -0,0 +1,13 @@
+# @package _global_
+
+defaults:
+ - override /hydra/launcher: submitit_local
+
+hydra:
+ launcher:
+ timeout_min: 10_080 # 7 days
+ nodes: 1
+ tasks_per_node: ${trainer.devices}
+ cpus_per_task: 6
+ gpus_per_node: ${trainer.devices}
+ name: hamer
diff --git a/hamer/configs_hydra/launcher/slurm.yaml b/hamer/configs_hydra/launcher/slurm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f30ccce9069210830270c665bd31294c9d1799b7
--- /dev/null
+++ b/hamer/configs_hydra/launcher/slurm.yaml
@@ -0,0 +1,22 @@
+# @package _global_
+
+defaults:
+ - override /hydra/launcher: submitit_slurm
+
+hydra:
+ launcher:
+ timeout_min: 10_080 # 7 days
+ max_num_timeout: 3
+ partition: g40
+ qos: idle
+ nodes: 1
+ tasks_per_node: ${trainer.devices}
+ gpus_per_task: null
+ cpus_per_task: 12
+ gpus_per_node: ${trainer.devices}
+ cpus_per_gpu: null
+ comment: laion
+ name: hamer
+ setup:
+ - module load cuda openmpi libfabric-aws
+ - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
diff --git a/hamer/configs_hydra/paths/default.yaml b/hamer/configs_hydra/paths/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2afd22a65d1b34d881943cb48ee4ce3ff37d165
--- /dev/null
+++ b/hamer/configs_hydra/paths/default.yaml
@@ -0,0 +1,18 @@
+# path to root directory
+# this requires PROJECT_ROOT environment variable to exist
+# PROJECT_ROOT is inferred and set by pyrootutils package in `train.py` and `eval.py`
+root_dir: ${oc.env:PROJECT_ROOT}
+
+# path to data directory
+data_dir: ${paths.root_dir}/data/
+
+# path to logging directory
+log_dir: logs/
+
+# path to output directory, created dynamically by hydra
+# path generation pattern is specified in `configs/hydra/default.yaml`
+# use it to store all files generated during the run, like ckpts and metrics
+output_dir: ${hydra:runtime.output_dir}
+
+# path to working directory
+work_dir: ${hydra:runtime.cwd}
diff --git a/hamer/configs_hydra/train.yaml b/hamer/configs_hydra/train.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5021b4c156fc5738aee3d7d2fbd9395a2b3bb987
--- /dev/null
+++ b/hamer/configs_hydra/train.yaml
@@ -0,0 +1,47 @@
+# @package _global_
+
+# specify here default configuration
+# order of defaults determines the order in which configs override each other
+defaults:
+ - _self_
+ - data: mix_all.yaml
+ - trainer: ddp.yaml
+ - paths: default.yaml
+ - extras: default.yaml
+ - hydra: default.yaml
+
+ # experiment configs allow for version control of specific hyperparameters
+ # e.g. best hyperparameters for given model and datamodule
+ - experiment: null
+ - texture_exp: null
+
+ # optional local config for machine/user specific settings
+ # it's optional since it doesn't need to exist and is excluded from version control
+ - optional launcher: local.yaml
+ # - optional launcher: slurm.yaml
+
+ # debugging config (enable through command line, e.g. `python train.py debug=default)
+ - debug: null
+
+# task name, determines output directory path
+task_name: "train"
+
+# tags to help you identify your experiments
+# you can overwrite this in experiment configs
+# overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
+# appending lists from command line is currently not supported :(
+# https://github.com/facebookresearch/hydra/issues/1547
+tags: ["dev"]
+
+# set False to skip model training
+train: True
+
+# evaluate on test set, using best model weights achieved during training
+# lightning chooses best weights based on the metric specified in checkpoint callback
+test: False
+
+# simply provide checkpoint path to resume training
+ckpt_path: null
+
+# seed for random number generators in pytorch, numpy and python.random
+seed: null
diff --git a/hamer/configs_hydra/trainer/cpu.yaml b/hamer/configs_hydra/trainer/cpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2464b95ee0d6c03a3dfe202f8a99b0cf04f37031
--- /dev/null
+++ b/hamer/configs_hydra/trainer/cpu.yaml
@@ -0,0 +1,6 @@
+defaults:
+ - default.yaml
+ - default_hamer.yaml
+
+accelerator: cpu
+devices: 1
diff --git a/hamer/configs_hydra/trainer/ddp.yaml b/hamer/configs_hydra/trainer/ddp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b365ff6df35d3218970a82895f4f0e27b9647780
--- /dev/null
+++ b/hamer/configs_hydra/trainer/ddp.yaml
@@ -0,0 +1,14 @@
+defaults:
+ - default.yaml
+ - default_hamer.yaml
+
+# use "ddp_spawn" instead of "ddp",
+# it's slower but normal "ddp" currently doesn't work ideally with hydra
+# https://github.com/facebookresearch/hydra/issues/2070
+# https://pytorch-lightning.readthedocs.io/en/latest/accelerators/gpu_intermediate.html#distributed-data-parallel-spawn
+strategy: ddp
+
+accelerator: gpu
+devices: 8
+num_nodes: 1
+sync_batchnorm: True
diff --git a/hamer/configs_hydra/trainer/default.yaml b/hamer/configs_hydra/trainer/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d444f4671fc77d7cf3f11ec74e638f3f620098f
--- /dev/null
+++ b/hamer/configs_hydra/trainer/default.yaml
@@ -0,0 +1,10 @@
+_target_: pytorch_lightning.Trainer
+
+default_root_dir: ${paths.output_dir}
+
+accelerator: cpu
+devices: 1
+
+# set True to to ensure deterministic results
+# makes training slower but gives more reproducibility than just setting seeds
+deterministic: False
diff --git a/hamer/configs_hydra/trainer/default_hamer.yaml b/hamer/configs_hydra/trainer/default_hamer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..963b2393c9651ba53f8e0e69256193d635821174
--- /dev/null
+++ b/hamer/configs_hydra/trainer/default_hamer.yaml
@@ -0,0 +1,8 @@
+num_sanity_val_steps: 0
+log_every_n_steps: ${GENERAL.LOG_STEPS}
+val_check_interval: ${GENERAL.VAL_STEPS}
+precision: 16
+max_steps: ${GENERAL.TOTAL_STEPS}
+# move_metrics_to_cpu: True
+limit_val_batches: 1
+# track_grad_norm: -1
diff --git a/hamer/configs_hydra/trainer/gpu.yaml b/hamer/configs_hydra/trainer/gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b0c8b9171a83784a1f243d3e4515bfec0a10b1d
--- /dev/null
+++ b/hamer/configs_hydra/trainer/gpu.yaml
@@ -0,0 +1,6 @@
+defaults:
+ - default.yaml
+ - default_hamer.yaml
+
+accelerator: gpu
+devices: 1
diff --git a/hamer/configs_hydra/trainer/mps.yaml b/hamer/configs_hydra/trainer/mps.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25806bc3cd66c3130ee82c4e14e1700d28b471a0
--- /dev/null
+++ b/hamer/configs_hydra/trainer/mps.yaml
@@ -0,0 +1,6 @@
+defaults:
+ - default.yaml
+ - default_hamer.yaml
+
+accelerator: mps
+devices: 1
diff --git a/hamer/datasets/__init__.py b/hamer/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e41f51dd1c1be840f67a85ff01756d003236c23
--- /dev/null
+++ b/hamer/datasets/__init__.py
@@ -0,0 +1,56 @@
+from typing import Dict, Optional
+
+import torch
+import numpy as np
+import pytorch_lightning as pl
+from yacs.config import CfgNode
+
+from ..configs import to_lower
+from .dataset import Dataset
+
+class HAMERDataModule(pl.LightningDataModule):
+
+ def __init__(self, cfg: CfgNode, dataset_cfg: CfgNode) -> None:
+ """
+ Initialize LightningDataModule for HAMER training
+ Args:
+ cfg (CfgNode): Config file as a yacs CfgNode containing necessary dataset info.
+ dataset_cfg (CfgNode): Dataset configuration file
+ """
+ super().__init__()
+ self.cfg = cfg
+ self.dataset_cfg = dataset_cfg
+ self.train_dataset = None
+ self.val_dataset = None
+ self.test_dataset = None
+ self.mocap_dataset = None
+
+ def setup(self, stage: Optional[str] = None) -> None:
+ """
+ Load datasets necessary for training
+ Args:
+ cfg (CfgNode): Config file as a yacs CfgNode containing necessary dataset info.
+ """
+ if self.train_dataset == None:
+ self.train_dataset = MixedWebDataset(self.cfg, self.dataset_cfg, train=True).with_epoch(100_000).shuffle(4000)
+ self.val_dataset = MixedWebDataset(self.cfg, self.dataset_cfg, train=False).shuffle(4000)
+ self.mocap_dataset = MoCapDataset(**to_lower(self.dataset_cfg[self.cfg.DATASETS.MOCAP]))
+
+ def train_dataloader(self) -> Dict:
+ """
+ Setup training data loader.
+ Returns:
+ Dict: Dictionary containing image and mocap data dataloaders
+ """
+ train_dataloader = torch.utils.data.DataLoader(self.train_dataset, self.cfg.TRAIN.BATCH_SIZE, drop_last=True, num_workers=self.cfg.GENERAL.NUM_WORKERS, prefetch_factor=self.cfg.GENERAL.PREFETCH_FACTOR)
+ mocap_dataloader = torch.utils.data.DataLoader(self.mocap_dataset, self.cfg.TRAIN.NUM_TRAIN_SAMPLES * self.cfg.TRAIN.BATCH_SIZE, shuffle=True, drop_last=True, num_workers=1)
+ return {'img': train_dataloader, 'mocap': mocap_dataloader}
+
+ def val_dataloader(self) -> torch.utils.data.DataLoader:
+ """
+ Setup val data loader.
+ Returns:
+ torch.utils.data.DataLoader: Validation dataloader
+ """
+ val_dataloader = torch.utils.data.DataLoader(self.val_dataset, self.cfg.TRAIN.BATCH_SIZE, drop_last=True, num_workers=self.cfg.GENERAL.NUM_WORKERS)
+ return val_dataloader
diff --git a/hamer/datasets/dataset.py b/hamer/datasets/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..22fc5bc5f4a7b75da672bd89859da14823e71aff
--- /dev/null
+++ b/hamer/datasets/dataset.py
@@ -0,0 +1,27 @@
+"""
+This file contains the defition of the base Dataset class.
+"""
+
+class DatasetRegistration(type):
+ """
+ Metaclass for registering different datasets
+ """
+ def __init__(cls, name, bases, nmspc):
+ super().__init__(name, bases, nmspc)
+ if not hasattr(cls, 'registry'):
+ cls.registry = dict()
+ cls.registry[name] = cls
+
+ # Metamethods, called on class objects:
+ def __iter__(cls):
+ return iter(cls.registry)
+
+ def __str__(cls):
+ return str(cls.registry)
+
+class Dataset(metaclass=DatasetRegistration):
+ """
+ Base Dataset class
+ """
+ def __init__(self, *args, **kwargs):
+ pass
\ No newline at end of file
diff --git a/hamer/datasets/image_dataset.py b/hamer/datasets/image_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a65236369db32ee4ed1582ae400ef04556dd82eb
--- /dev/null
+++ b/hamer/datasets/image_dataset.py
@@ -0,0 +1,275 @@
+import copy
+import os
+import numpy as np
+import torch
+from typing import List
+from yacs.config import CfgNode
+import braceexpand
+import cv2
+
+from .dataset import Dataset
+from .utils import get_example, expand_to_aspect_ratio
+
+def expand(s):
+ return os.path.expanduser(os.path.expandvars(s))
+def expand_urls(urls: str|List[str]):
+ if isinstance(urls, str):
+ urls = [urls]
+ urls = [u for url in urls for u in braceexpand.braceexpand(expand(url))]
+ return urls
+
+FLIP_KEYPOINT_PERMUTATION = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+
+DEFAULT_MEAN = 255. * np.array([0.485, 0.456, 0.406])
+DEFAULT_STD = 255. * np.array([0.229, 0.224, 0.225])
+DEFAULT_IMG_SIZE = 256
+
+class ImageDataset(Dataset):
+
+ @staticmethod
+ def load_tars_as_webdataset(cfg: CfgNode, urls: str|List[str], train: bool,
+ resampled=False,
+ epoch_size=None,
+ cache_dir=None,
+ **kwargs) -> Dataset:
+ """
+ Loads the dataset from a webdataset tar file.
+ """
+
+ IMG_SIZE = cfg.MODEL.IMAGE_SIZE
+ BBOX_SHAPE = cfg.MODEL.get('BBOX_SHAPE', None)
+ MEAN = 255. * np.array(cfg.MODEL.IMAGE_MEAN)
+ STD = 255. * np.array(cfg.MODEL.IMAGE_STD)
+
+ def split_data(source):
+ for item in source:
+ datas = item['data.pyd']
+ for data in datas:
+ if 'detection.npz' in item:
+ det_idx = data['extra_info']['detection_npz_idx']
+ mask = item['detection.npz']['masks'][det_idx]
+ else:
+ mask = np.ones_like(item['jpg'][:,:,0], dtype=bool)
+ yield {
+ '__key__': item['__key__'],
+ 'jpg': item['jpg'],
+ 'data.pyd': data,
+ 'mask': mask,
+ }
+
+ def suppress_bad_kps(item, thresh=0.0):
+ if thresh > 0:
+ kp2d = item['data.pyd']['keypoints_2d']
+ kp2d_conf = np.where(kp2d[:, 2] < thresh, 0.0, kp2d[:, 2])
+ item['data.pyd']['keypoints_2d'] = np.concatenate([kp2d[:,:2], kp2d_conf[:,None]], axis=1)
+ return item
+
+ def filter_numkp(item, numkp=4, thresh=0.0):
+ kp_conf = item['data.pyd']['keypoints_2d'][:, 2]
+ return (kp_conf > thresh).sum() > numkp
+
+ def filter_reproj_error(item, thresh=10**4.5):
+ losses = item['data.pyd'].get('extra_info', {}).get('fitting_loss', np.array({})).item()
+ reproj_loss = losses.get('reprojection_loss', None)
+ return reproj_loss is None or reproj_loss < thresh
+
+ def filter_bbox_size(item, thresh=1):
+ bbox_size_min = item['data.pyd']['scale'].min().item() * 200.
+ return bbox_size_min > thresh
+
+ def filter_no_poses(item):
+ return (item['data.pyd']['has_hand_pose'] > 0)
+
+ def supress_bad_betas(item, thresh=3):
+ has_betas = item['data.pyd']['has_betas']
+ if thresh > 0 and has_betas:
+ betas_abs = np.abs(item['data.pyd']['betas'])
+ if (betas_abs > thresh).any():
+ item['data.pyd']['has_betas'] = False
+ return item
+
+ def supress_bad_poses(item):
+ has_hand_pose = item['data.pyd']['has_hand_pose']
+ if has_hand_pose:
+ hand_pose = item['data.pyd']['hand_pose']
+ pose_is_probable = poses_check_probable(torch.from_numpy(hand_pose)[None, 3:], amass_poses_hist100_smooth).item()
+ if not pose_is_probable:
+ item['data.pyd']['has_hand_pose'] = False
+ return item
+
+ def poses_betas_simultaneous(item):
+ # We either have both hand_pose and betas, or neither
+ has_betas = item['data.pyd']['has_betas']
+ has_hand_pose = item['data.pyd']['has_hand_pose']
+ item['data.pyd']['has_betas'] = item['data.pyd']['has_hand_pose'] = np.array(float((has_hand_pose>0) and (has_betas>0)))
+ return item
+
+ def set_betas_for_reg(item):
+ # Always have betas set to true
+ has_betas = item['data.pyd']['has_betas']
+ betas = item['data.pyd']['betas']
+
+ if not (has_betas>0):
+ item['data.pyd']['has_betas'] = np.array(float((True)))
+ item['data.pyd']['betas'] = betas * 0
+ return item
+
+ # Load the dataset
+ if epoch_size is not None:
+ resampled = True
+ #corrupt_filter = lambda sample: (sample['__key__'] not in CORRUPT_KEYS)
+ import webdataset as wds
+ dataset = wds.WebDataset(expand_urls(urls),
+ nodesplitter=wds.split_by_node,
+ shardshuffle=True,
+ resampled=resampled,
+ cache_dir=cache_dir,
+ ) #.select(corrupt_filter)
+ if train:
+ dataset = dataset.shuffle(100)
+ dataset = dataset.decode('rgb8').rename(jpg='jpg;jpeg;png')
+
+ # Process the dataset
+ dataset = dataset.compose(split_data)
+
+ # Filter/clean the dataset
+ SUPPRESS_KP_CONF_THRESH = cfg.DATASETS.get('SUPPRESS_KP_CONF_THRESH', 0.0)
+ SUPPRESS_BETAS_THRESH = cfg.DATASETS.get('SUPPRESS_BETAS_THRESH', 0.0)
+ SUPPRESS_BAD_POSES = cfg.DATASETS.get('SUPPRESS_BAD_POSES', False)
+ POSES_BETAS_SIMULTANEOUS = cfg.DATASETS.get('POSES_BETAS_SIMULTANEOUS', False)
+ BETAS_REG = cfg.DATASETS.get('BETAS_REG', False)
+ FILTER_NO_POSES = cfg.DATASETS.get('FILTER_NO_POSES', False)
+ FILTER_NUM_KP = cfg.DATASETS.get('FILTER_NUM_KP', 4)
+ FILTER_NUM_KP_THRESH = cfg.DATASETS.get('FILTER_NUM_KP_THRESH', 0.0)
+ FILTER_REPROJ_THRESH = cfg.DATASETS.get('FILTER_REPROJ_THRESH', 0.0)
+ FILTER_MIN_BBOX_SIZE = cfg.DATASETS.get('FILTER_MIN_BBOX_SIZE', 0.0)
+ if SUPPRESS_KP_CONF_THRESH > 0:
+ dataset = dataset.map(lambda x: suppress_bad_kps(x, thresh=SUPPRESS_KP_CONF_THRESH))
+ if SUPPRESS_BETAS_THRESH > 0:
+ dataset = dataset.map(lambda x: supress_bad_betas(x, thresh=SUPPRESS_BETAS_THRESH))
+ if SUPPRESS_BAD_POSES:
+ dataset = dataset.map(lambda x: supress_bad_poses(x))
+ if POSES_BETAS_SIMULTANEOUS:
+ dataset = dataset.map(lambda x: poses_betas_simultaneous(x))
+ if FILTER_NO_POSES:
+ dataset = dataset.select(lambda x: filter_no_poses(x))
+ if FILTER_NUM_KP > 0:
+ dataset = dataset.select(lambda x: filter_numkp(x, numkp=FILTER_NUM_KP, thresh=FILTER_NUM_KP_THRESH))
+ if FILTER_REPROJ_THRESH > 0:
+ dataset = dataset.select(lambda x: filter_reproj_error(x, thresh=FILTER_REPROJ_THRESH))
+ if FILTER_MIN_BBOX_SIZE > 0:
+ dataset = dataset.select(lambda x: filter_bbox_size(x, thresh=FILTER_MIN_BBOX_SIZE))
+ if BETAS_REG:
+ dataset = dataset.map(lambda x: set_betas_for_reg(x)) # NOTE: Must be at the end
+
+ use_skimage_antialias = cfg.DATASETS.get('USE_SKIMAGE_ANTIALIAS', False)
+ border_mode = {
+ 'constant': cv2.BORDER_CONSTANT,
+ 'replicate': cv2.BORDER_REPLICATE,
+ }[cfg.DATASETS.get('BORDER_MODE', 'constant')]
+
+ # Process the dataset further
+ dataset = dataset.map(lambda x: ImageDataset.process_webdataset_tar_item(x, train,
+ augm_config=cfg.DATASETS.CONFIG,
+ MEAN=MEAN, STD=STD, IMG_SIZE=IMG_SIZE,
+ BBOX_SHAPE=BBOX_SHAPE,
+ use_skimage_antialias=use_skimage_antialias,
+ border_mode=border_mode,
+ ))
+ if epoch_size is not None:
+ dataset = dataset.with_epoch(epoch_size)
+
+ return dataset
+
+ @staticmethod
+ def process_webdataset_tar_item(item, train,
+ augm_config=None,
+ MEAN=DEFAULT_MEAN,
+ STD=DEFAULT_STD,
+ IMG_SIZE=DEFAULT_IMG_SIZE,
+ BBOX_SHAPE=None,
+ use_skimage_antialias=False,
+ border_mode=cv2.BORDER_CONSTANT,
+ ):
+ # Read data from item
+ key = item['__key__']
+ image = item['jpg']
+ data = item['data.pyd']
+ mask = item['mask']
+
+ keypoints_2d = data['keypoints_2d']
+ keypoints_3d = data['keypoints_3d']
+ center = data['center']
+ scale = data['scale']
+ hand_pose = data['hand_pose']
+ betas = data['betas']
+ right = data['right']
+ #right = True
+ has_hand_pose = data['has_hand_pose']
+ has_betas = data['has_betas']
+ # image_file = data['image_file']
+
+ # Process data
+ orig_keypoints_2d = keypoints_2d.copy()
+ center_x = center[0]
+ center_y = center[1]
+ bbox_size = expand_to_aspect_ratio(scale*200, target_aspect_ratio=BBOX_SHAPE).max()
+ if bbox_size < 1:
+ breakpoint()
+
+
+ mano_params = {'global_orient': hand_pose[:3],
+ 'hand_pose': hand_pose[3:],
+ 'betas': betas
+ }
+
+ has_mano_params = {'global_orient': has_hand_pose,
+ 'hand_pose': has_hand_pose,
+ 'betas': has_betas
+ }
+
+ mano_params_is_axis_angle = {'global_orient': True,
+ 'hand_pose': True,
+ 'betas': False
+ }
+
+ augm_config = copy.deepcopy(augm_config)
+ # Crop image and (possibly) perform data augmentation
+ img_rgba = np.concatenate([image, mask.astype(np.uint8)[:,:,None]*255], axis=2)
+ img_patch_rgba, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size, trans = get_example(img_rgba,
+ center_x, center_y,
+ bbox_size, bbox_size,
+ keypoints_2d, keypoints_3d,
+ mano_params, has_mano_params,
+ FLIP_KEYPOINT_PERMUTATION,
+ IMG_SIZE, IMG_SIZE,
+ MEAN, STD, train, right, augm_config,
+ is_bgr=False, return_trans=True,
+ use_skimage_antialias=use_skimage_antialias,
+ border_mode=border_mode,
+ )
+ img_patch = img_patch_rgba[:3,:,:]
+ mask_patch = (img_patch_rgba[3,:,:] / 255.0).clip(0,1)
+ if (mask_patch < 0.5).all():
+ mask_patch = np.ones_like(mask_patch)
+
+ item = {}
+
+ item['img'] = img_patch
+ item['mask'] = mask_patch
+ # item['img_og'] = image
+ # item['mask_og'] = mask
+ item['keypoints_2d'] = keypoints_2d.astype(np.float32)
+ item['keypoints_3d'] = keypoints_3d.astype(np.float32)
+ item['orig_keypoints_2d'] = orig_keypoints_2d
+ item['box_center'] = center.copy()
+ item['box_size'] = bbox_size
+ item['img_size'] = 1.0 * img_size[::-1].copy()
+ item['mano_params'] = mano_params
+ item['has_mano_params'] = has_mano_params
+ item['mano_params_is_axis_angle'] = mano_params_is_axis_angle
+ item['_scale'] = scale
+ item['_trans'] = trans
+ item['imgname'] = key
+ # item['idx'] = idx
+ return item
diff --git a/hamer/datasets/json_dataset.py b/hamer/datasets/json_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e258a3e8b84baa386d0edcb75ef45a4770c6301
--- /dev/null
+++ b/hamer/datasets/json_dataset.py
@@ -0,0 +1,213 @@
+import copy
+import os
+import json
+import glob
+import numpy as np
+import torch
+from typing import Any, Dict, List
+from yacs.config import CfgNode
+import braceexpand
+import cv2
+
+from .dataset import Dataset
+from .utils import get_example, expand_to_aspect_ratio
+from .smplh_prob_filter import poses_check_probable, load_amass_hist_smooth
+
+def expand(s):
+ return os.path.expanduser(os.path.expandvars(s))
+def expand_urls(urls: str|List[str]):
+ if isinstance(urls, str):
+ urls = [urls]
+ urls = [u for url in urls for u in braceexpand.braceexpand(expand(url))]
+ return urls
+
+AIC_TRAIN_CORRUPT_KEYS = {
+ '0a047f0124ae48f8eee15a9506ce1449ee1ba669',
+ '1a703aa174450c02fbc9cfbf578a5435ef403689',
+ '0394e6dc4df78042929b891dbc24f0fd7ffb6b6d',
+ '5c032b9626e410441544c7669123ecc4ae077058',
+ 'ca018a7b4c5f53494006ebeeff9b4c0917a55f07',
+ '4a77adb695bef75a5d34c04d589baf646fe2ba35',
+ 'a0689017b1065c664daef4ae2d14ea03d543217e',
+ '39596a45cbd21bed4a5f9c2342505532f8ec5cbb',
+ '3d33283b40610d87db660b62982f797d50a7366b',
+}
+CORRUPT_KEYS = {
+ *{f'aic-train/{k}' for k in AIC_TRAIN_CORRUPT_KEYS},
+ *{f'aic-train-vitpose/{k}' for k in AIC_TRAIN_CORRUPT_KEYS},
+}
+
+FLIP_KEYPOINT_PERMUTATION = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+
+DEFAULT_MEAN = 255. * np.array([0.485, 0.456, 0.406])
+DEFAULT_STD = 255. * np.array([0.229, 0.224, 0.225])
+DEFAULT_IMG_SIZE = 256
+
+class JsonDataset(Dataset):
+
+ def __init__(self,
+ cfg: CfgNode,
+ dataset_file: str,
+ img_dir: str,
+ right: bool,
+ train: bool = False,
+ prune: Dict[str, Any] = {},
+ **kwargs):
+ """
+ Dataset class used for loading images and corresponding annotations.
+ Args:
+ cfg (CfgNode): Model config file.
+ dataset_file (str): Path to npz file containing dataset info.
+ img_dir (str): Path to image folder.
+ train (bool): Whether it is for training or not (enables data augmentation).
+ """
+ super(JsonDataset, self).__init__()
+ self.train = train
+ self.cfg = cfg
+
+ self.img_size = cfg.MODEL.IMAGE_SIZE
+ self.mean = 255. * np.array(self.cfg.MODEL.IMAGE_MEAN)
+ self.std = 255. * np.array(self.cfg.MODEL.IMAGE_STD)
+
+ self.img_dir = img_dir
+ boxes = np.array(json.load(open(dataset_file, 'rb')))
+
+ self.imgname = glob.glob(os.path.join(self.img_dir,'*.jpg'))
+ self.imgname.sort()
+
+ self.flip_keypoint_permutation = copy.copy(FLIP_KEYPOINT_PERMUTATION)
+
+ num_pose = 3 * (self.cfg.MANO.NUM_HAND_JOINTS + 1)
+
+ # Bounding boxes are assumed to be in the center and scale format
+ boxes = boxes.astype(np.float32)
+ self.center = (boxes[:, 2:4] + boxes[:, 0:2]) / 2.0
+ self.scale = 2 * (boxes[:, 2:4] - boxes[:, 0:2]) / 200.0
+ self.personid = np.arange(len(boxes), dtype=np.int32)
+ if right:
+ self.right = np.ones(len(self.imgname), dtype=np.float32)
+ else:
+ self.right = np.zeros(len(self.imgname), dtype=np.float32)
+ assert self.scale.shape == (len(self.center), 2)
+
+ # Get gt SMPLX parameters, if available
+ try:
+ self.hand_pose = self.data['hand_pose'].astype(np.float32)
+ self.has_hand_pose = self.data['has_hand_pose'].astype(np.float32)
+ except:
+ self.hand_pose = np.zeros((len(self.imgname), num_pose), dtype=np.float32)
+ self.has_hand_pose = np.zeros(len(self.imgname), dtype=np.float32)
+ try:
+ self.betas = self.data['betas'].astype(np.float32)
+ self.has_betas = self.data['has_betas'].astype(np.float32)
+ except:
+ self.betas = np.zeros((len(self.imgname), 10), dtype=np.float32)
+ self.has_betas = np.zeros(len(self.imgname), dtype=np.float32)
+
+ # Try to get 2d keypoints, if available
+ try:
+ hand_keypoints_2d = self.data['hand_keypoints_2d']
+ except:
+ hand_keypoints_2d = np.zeros((len(self.center), 21, 3))
+ ## Try to get extra 2d keypoints, if available
+ #try:
+ # extra_keypoints_2d = self.data['extra_keypoints_2d']
+ #except KeyError:
+ # extra_keypoints_2d = np.zeros((len(self.center), 19, 3))
+
+ #self.keypoints_2d = np.concatenate((hand_keypoints_2d, extra_keypoints_2d), axis=1).astype(np.float32)
+ self.keypoints_2d = hand_keypoints_2d
+
+ # Try to get 3d keypoints, if available
+ try:
+ hand_keypoints_3d = self.data['hand_keypoints_3d'].astype(np.float32)
+ except:
+ hand_keypoints_3d = np.zeros((len(self.center), 21, 4), dtype=np.float32)
+ ## Try to get extra 3d keypoints, if available
+ #try:
+ # extra_keypoints_3d = self.data['extra_keypoints_3d'].astype(np.float32)
+ #except KeyError:
+ # extra_keypoints_3d = np.zeros((len(self.center), 19, 4), dtype=np.float32)
+
+ self.keypoints_3d = hand_keypoints_3d
+
+ #body_keypoints_3d[:, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], -1] = 0
+
+ #self.keypoints_3d = np.concatenate((body_keypoints_3d, extra_keypoints_3d), axis=1).astype(np.float32)
+
+ def __len__(self) -> int:
+ return len(self.scale)
+
+ def __getitem__(self, idx: int) -> Dict:
+ """
+ Returns an example from the dataset.
+ """
+ try:
+ image_file = self.imgname[idx].decode('utf-8')
+ except AttributeError:
+ image_file = self.imgname[idx]
+ keypoints_2d = self.keypoints_2d[idx].copy()
+ keypoints_3d = self.keypoints_3d[idx].copy()
+
+ center = self.center[idx].copy()
+ center_x = center[0]
+ center_y = center[1]
+ scale = self.scale[idx]
+ right = self.right[idx].copy()
+ BBOX_SHAPE = self.cfg.MODEL.get('BBOX_SHAPE', None)
+ #bbox_size = expand_to_aspect_ratio(scale*200, target_aspect_ratio=BBOX_SHAPE).max()
+ bbox_size = ((scale*200).max())
+ bbox_expand_factor = bbox_size / ((scale*200).max())
+ hand_pose = self.hand_pose[idx].copy().astype(np.float32)
+ betas = self.betas[idx].copy().astype(np.float32)
+
+ has_hand_pose = self.has_hand_pose[idx].copy()
+ has_betas = self.has_betas[idx].copy()
+
+ mano_params = {'global_orient': hand_pose[:3],
+ 'hand_pose': hand_pose[3:],
+ 'betas': betas
+ }
+
+ has_mano_params = {'global_orient': has_hand_pose,
+ 'hand_pose': has_hand_pose,
+ 'betas': has_betas
+ }
+
+ mano_params_is_axis_angle = {'global_orient': True,
+ 'hand_pose': True,
+ 'betas': False
+ }
+
+ augm_config = self.cfg.DATASETS.CONFIG
+ # Crop image and (possibly) perform data augmentation
+ img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size = get_example(image_file,
+ center_x, center_y,
+ bbox_size, bbox_size,
+ keypoints_2d, keypoints_3d,
+ mano_params, has_mano_params,
+ self.flip_keypoint_permutation,
+ self.img_size, self.img_size,
+ self.mean, self.std, self.train, right, augm_config)
+
+ item = {}
+ # These are the keypoints in the original image coordinates (before cropping)
+ orig_keypoints_2d = self.keypoints_2d[idx].copy()
+
+ item['img'] = img_patch
+ item['keypoints_2d'] = keypoints_2d.astype(np.float32)
+ item['keypoints_3d'] = keypoints_3d.astype(np.float32)
+ item['orig_keypoints_2d'] = orig_keypoints_2d
+ item['box_center'] = self.center[idx].copy()
+ item['box_size'] = bbox_size
+ item['bbox_expand_factor'] = bbox_expand_factor
+ item['img_size'] = 1.0 * img_size[::-1].copy()
+ item['mano_params'] = mano_params
+ item['has_mano_params'] = has_mano_params
+ item['mano_params_is_axis_angle'] = mano_params_is_axis_angle
+ item['imgname'] = image_file
+ item['personid'] = int(self.personid[idx])
+ item['idx'] = idx
+ item['_scale'] = scale
+ item['right'] = self.right[idx].copy()
+ return item
diff --git a/hamer/datasets/mocap_dataset.py b/hamer/datasets/mocap_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbf808f83c462646a19eed7e33dea4e50037b512
--- /dev/null
+++ b/hamer/datasets/mocap_dataset.py
@@ -0,0 +1,25 @@
+import numpy as np
+from typing import Dict
+
+class MoCapDataset:
+
+ def __init__(self, dataset_file: str):
+ """
+ Dataset class used for loading a dataset of unpaired MANO parameter annotations
+ Args:
+ cfg (CfgNode): Model config file.
+ dataset_file (str): Path to npz file containing dataset info.
+ """
+ data = np.load(dataset_file)
+ self.pose = data['hand_pose'].astype(np.float32)[:, 3:]
+ self.betas = data['betas'].astype(np.float32)
+ self.length = len(self.pose)
+
+ def __getitem__(self, idx: int) -> Dict:
+ pose = self.pose[idx].copy()
+ betas = self.betas[idx].copy()
+ item = {'hand_pose': pose, 'betas': betas}
+ return item
+
+ def __len__(self) -> int:
+ return self.length
diff --git a/hamer/datasets/utils.py b/hamer/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..73ded82fcd02ebf95895e3edf6a680f045919d35
--- /dev/null
+++ b/hamer/datasets/utils.py
@@ -0,0 +1,993 @@
+"""
+Parts of the code are taken or adapted from
+https://github.com/mkocabas/EpipolarPose/blob/master/lib/utils/img_utils.py
+"""
+import torch
+import numpy as np
+from skimage.transform import rotate, resize
+from skimage.filters import gaussian
+import random
+import cv2
+from typing import List, Dict, Tuple
+from yacs.config import CfgNode
+
+def expand_to_aspect_ratio(input_shape, target_aspect_ratio=None):
+ """Increase the size of the bounding box to match the target shape."""
+ if target_aspect_ratio is None:
+ return input_shape
+
+ try:
+ w , h = input_shape
+ except (ValueError, TypeError):
+ return input_shape
+
+ w_t, h_t = target_aspect_ratio
+ if h / w < h_t / w_t:
+ h_new = max(w * h_t / w_t, h)
+ w_new = w
+ else:
+ h_new = h
+ w_new = max(h * w_t / h_t, w)
+ if h_new < h or w_new < w:
+ breakpoint()
+ return np.array([w_new, h_new])
+
+def do_augmentation(aug_config: CfgNode) -> Tuple:
+ """
+ Compute random augmentation parameters.
+ Args:
+ aug_config (CfgNode): Config containing augmentation parameters.
+ Returns:
+ scale (float): Box rescaling factor.
+ rot (float): Random image rotation.
+ do_flip (bool): Whether to flip image or not.
+ do_extreme_crop (bool): Whether to apply extreme cropping (as proposed in EFT).
+ color_scale (List): Color rescaling factor
+ tx (float): Random translation along the x axis.
+ ty (float): Random translation along the y axis.
+ """
+
+ tx = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.TRANS_FACTOR
+ ty = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.TRANS_FACTOR
+ scale = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.SCALE_FACTOR + 1.0
+ rot = np.clip(np.random.randn(), -2.0,
+ 2.0) * aug_config.ROT_FACTOR if random.random() <= aug_config.ROT_AUG_RATE else 0
+ do_flip = aug_config.DO_FLIP and random.random() <= aug_config.FLIP_AUG_RATE
+ do_extreme_crop = random.random() <= aug_config.EXTREME_CROP_AUG_RATE
+ extreme_crop_lvl = aug_config.get('EXTREME_CROP_AUG_LEVEL', 0)
+ # extreme_crop_lvl = 0
+ c_up = 1.0 + aug_config.COLOR_SCALE
+ c_low = 1.0 - aug_config.COLOR_SCALE
+ color_scale = [random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)]
+ return scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty
+
+def rotate_2d(pt_2d: np.array, rot_rad: float) -> np.array:
+ """
+ Rotate a 2D point on the x-y plane.
+ Args:
+ pt_2d (np.array): Input 2D point with shape (2,).
+ rot_rad (float): Rotation angle
+ Returns:
+ np.array: Rotated 2D point.
+ """
+ x = pt_2d[0]
+ y = pt_2d[1]
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ xx = x * cs - y * sn
+ yy = x * sn + y * cs
+ return np.array([xx, yy], dtype=np.float32)
+
+
+def gen_trans_from_patch_cv(c_x: float, c_y: float,
+ src_width: float, src_height: float,
+ dst_width: float, dst_height: float,
+ scale: float, rot: float) -> np.array:
+ """
+ Create transformation matrix for the bounding box crop.
+ Args:
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ src_width (float): Bounding box width.
+ src_height (float): Bounding box height.
+ dst_width (float): Output box width.
+ dst_height (float): Output box height.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ trans (np.array): Target geometric transformation.
+ """
+ # augment size with scale
+ src_w = src_width * scale
+ src_h = src_height * scale
+ src_center = np.zeros(2)
+ src_center[0] = c_x
+ src_center[1] = c_y
+ # augment rotation
+ rot_rad = np.pi * rot / 180
+ src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
+ src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)
+
+ dst_w = dst_width
+ dst_h = dst_height
+ dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
+ dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
+ dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ src[0, :] = src_center
+ src[1, :] = src_center + src_downdir
+ src[2, :] = src_center + src_rightdir
+
+ dst = np.zeros((3, 2), dtype=np.float32)
+ dst[0, :] = dst_center
+ dst[1, :] = dst_center + dst_downdir
+ dst[2, :] = dst_center + dst_rightdir
+
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ return trans
+
+
+def trans_point2d(pt_2d: np.array, trans: np.array):
+ """
+ Transform a 2D point using translation matrix trans.
+ Args:
+ pt_2d (np.array): Input 2D point with shape (2,).
+ trans (np.array): Transformation matrix.
+ Returns:
+ np.array: Transformed 2D point.
+ """
+ src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T
+ dst_pt = np.dot(trans, src_pt)
+ return dst_pt[0:2]
+
+def get_transform(center, scale, res, rot=0):
+ """Generate transformation matrix."""
+ """Taken from PARE: https://github.com/mkocabas/PARE/blob/6e0caca86c6ab49ff80014b661350958e5b72fd8/pare/utils/image_utils.py"""
+ h = 200 * scale
+ t = np.zeros((3, 3))
+ t[0, 0] = float(res[1]) / h
+ t[1, 1] = float(res[0]) / h
+ t[0, 2] = res[1] * (-float(center[0]) / h + .5)
+ t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+ t[2, 2] = 1
+ if not rot == 0:
+ rot = -rot # To match direction of rotation from cropping
+ rot_mat = np.zeros((3, 3))
+ rot_rad = rot * np.pi / 180
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0, :2] = [cs, -sn]
+ rot_mat[1, :2] = [sn, cs]
+ rot_mat[2, 2] = 1
+ # Need to rotate around center
+ t_mat = np.eye(3)
+ t_mat[0, 2] = -res[1] / 2
+ t_mat[1, 2] = -res[0] / 2
+ t_inv = t_mat.copy()
+ t_inv[:2, 2] *= -1
+ t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
+ return t
+
+
+def transform(pt, center, scale, res, invert=0, rot=0, as_int=True):
+ """Transform pixel location to different reference."""
+ """Taken from PARE: https://github.com/mkocabas/PARE/blob/6e0caca86c6ab49ff80014b661350958e5b72fd8/pare/utils/image_utils.py"""
+ t = get_transform(center, scale, res, rot=rot)
+ if invert:
+ t = np.linalg.inv(t)
+ new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
+ new_pt = np.dot(t, new_pt)
+ if as_int:
+ new_pt = new_pt.astype(int)
+ return new_pt[:2] + 1
+
+def crop_img(img, ul, br, border_mode=cv2.BORDER_CONSTANT, border_value=0):
+ c_x = (ul[0] + br[0])/2
+ c_y = (ul[1] + br[1])/2
+ bb_width = patch_width = br[0] - ul[0]
+ bb_height = patch_height = br[1] - ul[1]
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, 1.0, 0)
+ img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=border_mode,
+ borderValue=border_value
+ )
+
+ # Force borderValue=cv2.BORDER_CONSTANT for alpha channel
+ if (img.shape[2] == 4) and (border_mode != cv2.BORDER_CONSTANT):
+ img_patch[:,:,3] = cv2.warpAffine(img[:,:,3], trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ )
+
+ return img_patch
+
+def generate_image_patch_skimage(img: np.array, c_x: float, c_y: float,
+ bb_width: float, bb_height: float,
+ patch_width: float, patch_height: float,
+ do_flip: bool, scale: float, rot: float,
+ border_mode=cv2.BORDER_CONSTANT, border_value=0) -> Tuple[np.array, np.array]:
+ """
+ Crop image according to the supplied bounding box.
+ Args:
+ img (np.array): Input image of shape (H, W, 3)
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ bb_width (float): Bounding box width.
+ bb_height (float): Bounding box height.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ do_flip (bool): Whether to flip image or not.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ img_patch (np.array): Cropped image patch of shape (patch_height, patch_height, 3)
+ trans (np.array): Transformation matrix.
+ """
+
+ img_height, img_width, img_channels = img.shape
+ if do_flip:
+ img = img[:, ::-1, :]
+ c_x = img_width - c_x - 1
+
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot)
+
+ #img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)), flags=cv2.INTER_LINEAR)
+
+ # skimage
+ center = np.zeros(2)
+ center[0] = c_x
+ center[1] = c_y
+ res = np.zeros(2)
+ res[0] = patch_width
+ res[1] = patch_height
+ # assumes bb_width = bb_height
+ # assumes patch_width = patch_height
+ assert bb_width == bb_height, f'{bb_width=} != {bb_height=}'
+ assert patch_width == patch_height, f'{patch_width=} != {patch_height=}'
+ scale1 = scale*bb_width/200.
+
+ # Upper left point
+ ul = np.array(transform([1, 1], center, scale1, res, invert=1, as_int=False)) - 1
+ # Bottom right point
+ br = np.array(transform([res[0] + 1,
+ res[1] + 1], center, scale1, res, invert=1, as_int=False)) - 1
+
+ # Padding so that when rotated proper amount of context is included
+ try:
+ pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) + 1
+ except:
+ breakpoint()
+ if not rot == 0:
+ ul -= pad
+ br += pad
+
+
+ if False:
+ # Old way of cropping image
+ ul_int = ul.astype(int)
+ br_int = br.astype(int)
+ new_shape = [br_int[1] - ul_int[1], br_int[0] - ul_int[0]]
+ if len(img.shape) > 2:
+ new_shape += [img.shape[2]]
+ new_img = np.zeros(new_shape)
+
+ # Range to fill new array
+ new_x = max(0, -ul_int[0]), min(br_int[0], len(img[0])) - ul_int[0]
+ new_y = max(0, -ul_int[1]), min(br_int[1], len(img)) - ul_int[1]
+ # Range to sample from original image
+ old_x = max(0, ul_int[0]), min(len(img[0]), br_int[0])
+ old_y = max(0, ul_int[1]), min(len(img), br_int[1])
+ new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
+ old_x[0]:old_x[1]]
+
+ # New way of cropping image
+ new_img = crop_img(img, ul, br, border_mode=border_mode, border_value=border_value).astype(np.float32)
+
+ # print(f'{new_img.shape=}')
+ # print(f'{new_img1.shape=}')
+ # print(f'{np.allclose(new_img, new_img1)=}')
+ # print(f'{img.dtype=}')
+
+
+ if not rot == 0:
+ # Remove padding
+
+ new_img = rotate(new_img, rot) # scipy.misc.imrotate(new_img, rot)
+ new_img = new_img[pad:-pad, pad:-pad]
+
+ if new_img.shape[0] < 1 or new_img.shape[1] < 1:
+ print(f'{img.shape=}')
+ print(f'{new_img.shape=}')
+ print(f'{ul=}')
+ print(f'{br=}')
+ print(f'{pad=}')
+ print(f'{rot=}')
+
+ breakpoint()
+
+ # resize image
+ new_img = resize(new_img, res) # scipy.misc.imresize(new_img, res)
+
+ new_img = np.clip(new_img, 0, 255).astype(np.uint8)
+
+ return new_img, trans
+
+
+def generate_image_patch_cv2(img: np.array, c_x: float, c_y: float,
+ bb_width: float, bb_height: float,
+ patch_width: float, patch_height: float,
+ do_flip: bool, scale: float, rot: float,
+ border_mode=cv2.BORDER_CONSTANT, border_value=0) -> Tuple[np.array, np.array]:
+ """
+ Crop the input image and return the crop and the corresponding transformation matrix.
+ Args:
+ img (np.array): Input image of shape (H, W, 3)
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ bb_width (float): Bounding box width.
+ bb_height (float): Bounding box height.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ do_flip (bool): Whether to flip image or not.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ img_patch (np.array): Cropped image patch of shape (patch_height, patch_height, 3)
+ trans (np.array): Transformation matrix.
+ """
+
+ img_height, img_width, img_channels = img.shape
+ if do_flip:
+ img = img[:, ::-1, :]
+ c_x = img_width - c_x - 1
+
+
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot)
+
+ img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=border_mode,
+ borderValue=border_value,
+ )
+ # Force borderValue=cv2.BORDER_CONSTANT for alpha channel
+ if (img.shape[2] == 4) and (border_mode != cv2.BORDER_CONSTANT):
+ img_patch[:,:,3] = cv2.warpAffine(img[:,:,3], trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ )
+
+ return img_patch, trans
+
+
+def convert_cvimg_to_tensor(cvimg: np.array):
+ """
+ Convert image from HWC to CHW format.
+ Args:
+ cvimg (np.array): Image of shape (H, W, 3) as loaded by OpenCV.
+ Returns:
+ np.array: Output image of shape (3, H, W).
+ """
+ # from h,w,c(OpenCV) to c,h,w
+ img = cvimg.copy()
+ img = np.transpose(img, (2, 0, 1))
+ # from int to float
+ img = img.astype(np.float32)
+ return img
+
+def fliplr_params(mano_params: Dict, has_mano_params: Dict) -> Tuple[Dict, Dict]:
+ """
+ Flip MANO parameters when flipping the image.
+ Args:
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether MANO annotations are valid.
+ Returns:
+ Dict, Dict: Flipped MANO parameters and valid flags.
+ """
+ global_orient = mano_params['global_orient'].copy()
+ hand_pose = mano_params['hand_pose'].copy()
+ betas = mano_params['betas'].copy()
+ has_global_orient = has_mano_params['global_orient'].copy()
+ has_hand_pose = has_mano_params['hand_pose'].copy()
+ has_betas = has_mano_params['betas'].copy()
+
+ global_orient[1::3] *= -1
+ global_orient[2::3] *= -1
+ hand_pose[1::3] *= -1
+ hand_pose[2::3] *= -1
+
+ mano_params = {'global_orient': global_orient.astype(np.float32),
+ 'hand_pose': hand_pose.astype(np.float32),
+ 'betas': betas.astype(np.float32)
+ }
+
+ has_mano_params = {'global_orient': has_global_orient,
+ 'hand_pose': has_hand_pose,
+ 'betas': has_betas
+ }
+
+ return mano_params, has_mano_params
+
+
+def fliplr_keypoints(joints: np.array, width: float, flip_permutation: List[int]) -> np.array:
+ """
+ Flip 2D or 3D keypoints.
+ Args:
+ joints (np.array): Array of shape (N, 3) or (N, 4) containing 2D or 3D keypoint locations and confidence.
+ flip_permutation (List): Permutation to apply after flipping.
+ Returns:
+ np.array: Flipped 2D or 3D keypoints with shape (N, 3) or (N, 4) respectively.
+ """
+ joints = joints.copy()
+ # Flip horizontal
+ joints[:, 0] = width - joints[:, 0] - 1
+ joints = joints[flip_permutation, :]
+
+ return joints
+
+def keypoint_3d_processing(keypoints_3d: np.array, flip_permutation: List[int], rot: float, do_flip: float) -> np.array:
+ """
+ Process 3D keypoints (rotation/flipping).
+ Args:
+ keypoints_3d (np.array): Input array of shape (N, 4) containing the 3D keypoints and confidence.
+ flip_permutation (List): Permutation to apply after flipping.
+ rot (float): Random rotation applied to the keypoints.
+ do_flip (bool): Whether to flip keypoints or not.
+ Returns:
+ np.array: Transformed 3D keypoints with shape (N, 4).
+ """
+ if do_flip:
+ keypoints_3d = fliplr_keypoints(keypoints_3d, 1, flip_permutation)
+ # in-plane rotation
+ rot_mat = np.eye(3)
+ if not rot == 0:
+ rot_rad = -rot * np.pi / 180
+ sn,cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0,:2] = [cs, -sn]
+ rot_mat[1,:2] = [sn, cs]
+ keypoints_3d[:, :-1] = np.einsum('ij,kj->ki', rot_mat, keypoints_3d[:, :-1])
+ # flip the x coordinates
+ keypoints_3d = keypoints_3d.astype('float32')
+ return keypoints_3d
+
+def rot_aa(aa: np.array, rot: float) -> np.array:
+ """
+ Rotate axis angle parameters.
+ Args:
+ aa (np.array): Axis-angle vector of shape (3,).
+ rot (np.array): Rotation angle in degrees.
+ Returns:
+ np.array: Rotated axis-angle vector.
+ """
+ # pose parameters
+ R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
+ [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
+ [0, 0, 1]])
+ # find the rotation of the hand in camera frame
+ per_rdg, _ = cv2.Rodrigues(aa)
+ # apply the global rotation to the global orientation
+ resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg))
+ aa = (resrot.T)[0]
+ return aa.astype(np.float32)
+
+def mano_param_processing(mano_params: Dict, has_mano_params: Dict, rot: float, do_flip: bool) -> Tuple[Dict, Dict]:
+ """
+ Apply random augmentations to the MANO parameters.
+ Args:
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether mano annotations are valid.
+ rot (float): Random rotation applied to the keypoints.
+ do_flip (bool): Whether to flip keypoints or not.
+ Returns:
+ Dict, Dict: Transformed MANO parameters and valid flags.
+ """
+ if do_flip:
+ mano_params, has_mano_params = fliplr_params(mano_params, has_mano_params)
+ mano_params['global_orient'] = rot_aa(mano_params['global_orient'], rot)
+ return mano_params, has_mano_params
+
+
+
+def get_example(img_path: str|np.ndarray, center_x: float, center_y: float,
+ width: float, height: float,
+ keypoints_2d: np.array, keypoints_3d: np.array,
+ mano_params: Dict, has_mano_params: Dict,
+ flip_kp_permutation: List[int],
+ patch_width: int, patch_height: int,
+ mean: np.array, std: np.array,
+ do_augment: bool, is_right: bool, augm_config: CfgNode,
+ is_bgr: bool = True,
+ use_skimage_antialias: bool = False,
+ border_mode: int = cv2.BORDER_CONSTANT,
+ return_trans: bool = False) -> Tuple:
+ """
+ Get an example from the dataset and (possibly) apply random augmentations.
+ Args:
+ img_path (str): Image filename
+ center_x (float): Bounding box center x coordinate in the original image.
+ center_y (float): Bounding box center y coordinate in the original image.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array with shape (N,3) containing the 2D keypoints in the original image coordinates.
+ keypoints_3d (np.array): Array with shape (N,4) containing the 3D keypoints.
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether MANO annotations are valid.
+ flip_kp_permutation (List): Permutation to apply to the keypoints after flipping.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ mean (np.array): Array of shape (3,) containing the mean for normalizing the input image.
+ std (np.array): Array of shape (3,) containing the std for normalizing the input image.
+ do_augment (bool): Whether to apply data augmentation or not.
+ aug_config (CfgNode): Config containing augmentation parameters.
+ Returns:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size
+ img_patch (np.array): Cropped image patch of shape (3, patch_height, patch_height)
+ keypoints_2d (np.array): Array with shape (N,3) containing the transformed 2D keypoints.
+ keypoints_3d (np.array): Array with shape (N,4) containing the transformed 3D keypoints.
+ mano_params (Dict): Transformed MANO parameters.
+ has_mano_params (Dict): Valid flag for transformed MANO parameters.
+ img_size (np.array): Image size of the original image.
+ """
+ if isinstance(img_path, str):
+ # 1. load image
+ cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
+ if not isinstance(cvimg, np.ndarray):
+ raise IOError("Fail to read %s" % img_path)
+ elif isinstance(img_path, np.ndarray):
+ cvimg = img_path
+ else:
+ raise TypeError('img_path must be either a string or a numpy array')
+ img_height, img_width, img_channels = cvimg.shape
+
+ img_size = np.array([img_height, img_width])
+
+ # 2. get augmentation params
+ if do_augment:
+ scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty = do_augmentation(augm_config)
+ else:
+ scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty = 1.0, 0, False, False, 0, [1.0, 1.0, 1.0], 0., 0.
+
+ # if it's a left hand, we flip
+ if not is_right:
+ do_flip = True
+
+ if width < 1 or height < 1:
+ breakpoint()
+
+ if do_extreme_crop:
+ if extreme_crop_lvl == 0:
+ center_x1, center_y1, width1, height1 = extreme_cropping(center_x, center_y, width, height, keypoints_2d)
+ elif extreme_crop_lvl == 1:
+ center_x1, center_y1, width1, height1 = extreme_cropping_aggressive(center_x, center_y, width, height, keypoints_2d)
+
+ THRESH = 4
+ if width1 < THRESH or height1 < THRESH:
+ # print(f'{do_extreme_crop=}')
+ # print(f'width: {width}, height: {height}')
+ # print(f'width1: {width1}, height1: {height1}')
+ # print(f'center_x: {center_x}, center_y: {center_y}')
+ # print(f'center_x1: {center_x1}, center_y1: {center_y1}')
+ # print(f'keypoints_2d: {keypoints_2d}')
+ # print(f'\n\n', flush=True)
+ # breakpoint()
+ pass
+ # print(f'skip ==> width1: {width1}, height1: {height1}, width: {width}, height: {height}')
+ else:
+ center_x, center_y, width, height = center_x1, center_y1, width1, height1
+
+ center_x += width * tx
+ center_y += height * ty
+
+ # Process 3D keypoints
+ keypoints_3d = keypoint_3d_processing(keypoints_3d, flip_kp_permutation, rot, do_flip)
+
+ # 3. generate image patch
+ if use_skimage_antialias:
+ # Blur image to avoid aliasing artifacts
+ downsampling_factor = (patch_width / (width*scale))
+ if downsampling_factor > 1.1:
+ cvimg = gaussian(cvimg, sigma=(downsampling_factor-1)/2, channel_axis=2, preserve_range=True, truncate=3.0)
+
+ img_patch_cv, trans = generate_image_patch_cv2(cvimg,
+ center_x, center_y,
+ width, height,
+ patch_width, patch_height,
+ do_flip, scale, rot,
+ border_mode=border_mode)
+ # img_patch_cv, trans = generate_image_patch_skimage(cvimg,
+ # center_x, center_y,
+ # width, height,
+ # patch_width, patch_height,
+ # do_flip, scale, rot,
+ # border_mode=border_mode)
+
+ image = img_patch_cv.copy()
+ if is_bgr:
+ image = image[:, :, ::-1]
+ img_patch_cv = image.copy()
+ img_patch = convert_cvimg_to_tensor(image)
+
+
+ mano_params, has_mano_params = mano_param_processing(mano_params, has_mano_params, rot, do_flip)
+
+ # apply normalization
+ for n_c in range(min(img_channels, 3)):
+ img_patch[n_c, :, :] = np.clip(img_patch[n_c, :, :] * color_scale[n_c], 0, 255)
+ if mean is not None and std is not None:
+ img_patch[n_c, :, :] = (img_patch[n_c, :, :] - mean[n_c]) / std[n_c]
+ if do_flip:
+ keypoints_2d = fliplr_keypoints(keypoints_2d, img_width, flip_kp_permutation)
+
+
+ for n_jt in range(len(keypoints_2d)):
+ keypoints_2d[n_jt, 0:2] = trans_point2d(keypoints_2d[n_jt, 0:2], trans)
+ keypoints_2d[:, :-1] = keypoints_2d[:, :-1] / patch_width - 0.5
+
+ if not return_trans:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size
+ else:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size, trans
+
+def crop_to_hips(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Extreme cropping: Crop the box up to the hip locations.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [10, 11, 13, 14, 19, 20, 21, 22, 23, 24, 25+0, 25+1, 25+4, 25+5]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+
+def crop_to_shoulders(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box up to the shoulder locations.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16]]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ center, scale = get_bbox(keypoints_2d)
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.2 * scale[0]
+ height = 1.2 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_to_head(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the head.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16]]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.3 * scale[0]
+ height = 1.3 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_torso_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the torso.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nontorso_body_keypoints = [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 4, 5, 6, 7, 10, 11, 13, 17, 18]]
+ keypoints_2d[nontorso_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_rightarm_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the right arm.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonrightarm_body_keypoints = [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonrightarm_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_leftarm_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the left arm.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonleftarm_body_keypoints = [0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonleftarm_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_legs_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the legs.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonlegs_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18] + [25 + i for i in [6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18]]
+ keypoints_2d[nonlegs_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_rightleg_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the right leg.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonrightleg_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + [25 + i for i in [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonrightleg_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_leftleg_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the left leg.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonleftleg_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17, 18, 22, 23, 24] + [25 + i for i in [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonleftleg_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def full_body(keypoints_2d: np.array) -> bool:
+ """
+ Check if all main body joints are visible.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ bool: True if all main body joints are visible.
+ """
+
+ body_keypoints_openpose = [2, 3, 4, 5, 6, 7, 10, 11, 13, 14]
+ body_keypoints = [25 + i for i in [8, 7, 6, 9, 10, 11, 1, 0, 4, 5]]
+ return (np.maximum(keypoints_2d[body_keypoints, -1], keypoints_2d[body_keypoints_openpose, -1]) > 0).sum() == len(body_keypoints)
+
+def upper_body(keypoints_2d: np.array):
+ """
+ Check if all upper body joints are visible.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ bool: True if all main body joints are visible.
+ """
+ lower_body_keypoints_openpose = [10, 11, 13, 14]
+ lower_body_keypoints = [25 + i for i in [1, 0, 4, 5]]
+ upper_body_keypoints_openpose = [0, 1, 15, 16, 17, 18]
+ upper_body_keypoints = [25+8, 25+9, 25+12, 25+13, 25+17, 25+18]
+ return ((keypoints_2d[lower_body_keypoints + lower_body_keypoints_openpose, -1] > 0).sum() == 0)\
+ and ((keypoints_2d[upper_body_keypoints + upper_body_keypoints_openpose, -1] > 0).sum() >= 2)
+
+def get_bbox(keypoints_2d: np.array, rescale: float = 1.2) -> Tuple:
+ """
+ Get center and scale for bounding box from openpose detections.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center (np.array): Array of shape (2,) containing the new bounding box center.
+ scale (float): New bounding box scale.
+ """
+ valid = keypoints_2d[:,-1] > 0
+ valid_keypoints = keypoints_2d[valid][:,:-1]
+ center = 0.5 * (valid_keypoints.max(axis=0) + valid_keypoints.min(axis=0))
+ bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0))
+ # adjust bounding box tightness
+ scale = bbox_size
+ scale *= rescale
+ return center, scale
+
+def extreme_cropping(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Perform extreme cropping
+ Args:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ """
+ p = torch.rand(1).item()
+ if full_body(keypoints_2d):
+ if p < 0.7:
+ center_x, center_y, width, height = crop_to_hips(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.9:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif upper_body(keypoints_2d):
+ if p < 0.9:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+
+ return center_x, center_y, max(width, height), max(width, height)
+
+def extreme_cropping_aggressive(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Perform aggressive extreme cropping
+ Args:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ """
+ p = torch.rand(1).item()
+ if full_body(keypoints_2d):
+ if p < 0.2:
+ center_x, center_y, width, height = crop_to_hips(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.3:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.4:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.5:
+ center_x, center_y, width, height = crop_torso_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.6:
+ center_x, center_y, width, height = crop_rightarm_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.7:
+ center_x, center_y, width, height = crop_leftarm_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.8:
+ center_x, center_y, width, height = crop_legs_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.9:
+ center_x, center_y, width, height = crop_rightleg_only(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_leftleg_only(center_x, center_y, width, height, keypoints_2d)
+ elif upper_body(keypoints_2d):
+ if p < 0.2:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.4:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.6:
+ center_x, center_y, width, height = crop_torso_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.8:
+ center_x, center_y, width, height = crop_rightarm_only(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_leftarm_only(center_x, center_y, width, height, keypoints_2d)
+ return center_x, center_y, max(width, height), max(width, height)
diff --git a/hamer/datasets/vitdet_dataset.py b/hamer/datasets/vitdet_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e121e875cca9138cf0417b6434a3932d007dcbfd
--- /dev/null
+++ b/hamer/datasets/vitdet_dataset.py
@@ -0,0 +1,97 @@
+from typing import Dict
+
+import cv2
+import numpy as np
+from skimage.filters import gaussian
+from yacs.config import CfgNode
+import torch
+
+from .utils import (convert_cvimg_to_tensor,
+ expand_to_aspect_ratio,
+ generate_image_patch_cv2)
+
+DEFAULT_MEAN = 255. * np.array([0.485, 0.456, 0.406])
+DEFAULT_STD = 255. * np.array([0.229, 0.224, 0.225])
+
+class ViTDetDataset(torch.utils.data.Dataset):
+
+ def __init__(self,
+ cfg: CfgNode,
+ img_cv2: np.array,
+ boxes: np.array,
+ right: np.array,
+ rescale_factor=2.5,
+ train: bool = False,
+ **kwargs):
+ super().__init__()
+ self.cfg = cfg
+ self.img_cv2 = img_cv2
+ # self.boxes = boxes
+
+ assert train == False, "ViTDetDataset is only for inference"
+ self.train = train
+ self.img_size = cfg.MODEL.IMAGE_SIZE
+ self.mean = 255. * np.array(self.cfg.MODEL.IMAGE_MEAN)
+ self.std = 255. * np.array(self.cfg.MODEL.IMAGE_STD)
+
+ # Preprocess annotations
+ boxes = boxes.astype(np.float32)
+ self.center = (boxes[:, 2:4] + boxes[:, 0:2]) / 2.0
+ self.scale = rescale_factor * (boxes[:, 2:4] - boxes[:, 0:2]) / 200.0
+ #self.scale = (boxes[:, 2:4] - boxes[:, 0:2]) / 200.0
+ self.personid = np.arange(len(boxes), dtype=np.int32)
+ self.right = right.astype(np.float32)
+
+ def __len__(self) -> int:
+ return len(self.personid)
+
+ def __getitem__(self, idx: int) -> Dict[str, np.array]:
+
+ center = self.center[idx].copy()
+ center_x = center[0]
+ center_y = center[1]
+
+ scale = self.scale[idx]
+ BBOX_SHAPE = self.cfg.MODEL.get('BBOX_SHAPE', None)
+ bbox_size = expand_to_aspect_ratio(scale*200, target_aspect_ratio=BBOX_SHAPE).max()
+ #bbox_size = scale.max()*200
+
+ patch_width = patch_height = self.img_size
+
+ right = self.right[idx].copy()
+ flip = right == 0
+
+ # 3. generate image patch
+ # if use_skimage_antialias:
+ cvimg = self.img_cv2.copy()
+ if True:
+ # Blur image to avoid aliasing artifacts
+ downsampling_factor = ((bbox_size*1.0) / patch_width)
+ print(f'{downsampling_factor=}')
+ downsampling_factor = downsampling_factor / 2.0
+ if downsampling_factor > 1.1:
+ cvimg = gaussian(cvimg, sigma=(downsampling_factor-1)/2, channel_axis=2, preserve_range=True)
+
+
+ img_patch_cv, trans = generate_image_patch_cv2(cvimg,
+ center_x, center_y,
+ bbox_size, bbox_size,
+ patch_width, patch_height,
+ flip, 1.0, 0,
+ border_mode=cv2.BORDER_CONSTANT)
+ img_patch_cv = img_patch_cv[:, :, ::-1]
+ img_patch = convert_cvimg_to_tensor(img_patch_cv)
+
+ # apply normalization
+ for n_c in range(min(self.img_cv2.shape[2], 3)):
+ img_patch[n_c, :, :] = (img_patch[n_c, :, :] - self.mean[n_c]) / self.std[n_c]
+
+ item = {
+ 'img': img_patch,
+ 'personid': int(self.personid[idx]),
+ }
+ item['box_center'] = self.center[idx].copy()
+ item['box_size'] = bbox_size
+ item['img_size'] = 1.0 * np.array([cvimg.shape[1], cvimg.shape[0]])
+ item['right'] = self.right[idx].copy()
+ return item
diff --git a/hamer/models/__init__.py b/hamer/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..00fd105938776562aef6bd633f137fc676e49227
--- /dev/null
+++ b/hamer/models/__init__.py
@@ -0,0 +1,46 @@
+from .mano_wrapper import MANO
+from .hamer import HAMER
+from .discriminator import Discriminator
+
+from ..utils.download import cache_url
+from ..configs import CACHE_DIR_HAMER
+
+
+def download_models(folder=CACHE_DIR_HAMER):
+ """Download checkpoints and files for running inference.
+ """
+ import os
+ os.makedirs(folder, exist_ok=True)
+ download_files = {
+ "hamer_data.tar.gz" : ["https://people.eecs.berkeley.edu/~jathushan/projects/4dhumans/hamer_data.tar.gz", folder],
+ }
+
+ for file_name, url in download_files.items():
+ output_path = os.path.join(url[1], file_name)
+ if not os.path.exists(output_path):
+ print("Downloading file: " + file_name)
+ # output = gdown.cached_download(url[0], output_path, fuzzy=True)
+ output = cache_url(url[0], output_path)
+ assert os.path.exists(output_path), f"{output} does not exist"
+
+ # if ends with tar.gz, tar -xzf
+ if file_name.endswith(".tar.gz"):
+ print("Extracting file: " + file_name)
+ os.system("tar -xvf " + output_path + " -C " + url[1])
+
+DEFAULT_CHECKPOINT=f'{CACHE_DIR_HAMER}/hamer_ckpts/checkpoints/hamer.ckpt'
+def load_hamer(checkpoint_path=DEFAULT_CHECKPOINT):
+ from pathlib import Path
+ from ..configs import get_config
+ model_cfg = str(Path(checkpoint_path).parent.parent / 'model_config.yaml')
+ model_cfg = get_config(model_cfg, update_cachedir=True)
+
+ # Override some config values, to crop bbox correctly
+ if (model_cfg.MODEL.BACKBONE.TYPE == 'vit') and ('BBOX_SHAPE' not in model_cfg.MODEL):
+ model_cfg.defrost()
+ assert model_cfg.MODEL.IMAGE_SIZE == 256, f"MODEL.IMAGE_SIZE ({model_cfg.MODEL.IMAGE_SIZE}) should be 256 for ViT backbone"
+ model_cfg.MODEL.BBOX_SHAPE = [192,256]
+ model_cfg.freeze()
+
+ model = HAMER.load_from_checkpoint(checkpoint_path, strict=False, cfg=model_cfg)
+ return model, model_cfg
diff --git a/hamer/models/backbones/__init__.py b/hamer/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2b217b0e624dc5612dcc405c450fa4b43039dff
--- /dev/null
+++ b/hamer/models/backbones/__init__.py
@@ -0,0 +1,7 @@
+from .vit import vit
+
+def create_backbone(cfg):
+ if cfg.MODEL.BACKBONE.TYPE == 'vit':
+ return vit(cfg)
+ else:
+ raise NotImplementedError('Backbone type is not implemented')
diff --git a/hamer/models/backbones/vit.py b/hamer/models/backbones/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..c56c71889cd441294f57ad687d0678d2443d1eed
--- /dev/null
+++ b/hamer/models/backbones/vit.py
@@ -0,0 +1,348 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+def vit(cfg):
+ return ViT(
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ )
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+ def forward(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+class ViT(nn.Module):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+ ):
+ # Protect mutable default arguments
+ super(ViT, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ # since the pretraining model has class token
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x)
+ else:
+ x = blk(x)
+
+ x = self.last_norm(x)
+
+ xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+
+ return xp
+
+ def forward(self, x):
+ x = self.forward_features(x)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/hamer/models/components/__init__.py b/hamer/models/components/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/hamer/models/components/pose_transformer.py b/hamer/models/components/pose_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac04971407cb59637490cc4842f048b9bc4758be
--- /dev/null
+++ b/hamer/models/components/pose_transformer.py
@@ -0,0 +1,358 @@
+from inspect import isfunction
+from typing import Callable, Optional
+
+import torch
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from torch import nn
+
+from .t_cond_mlp import (
+ AdaptiveLayerNorm1D,
+ FrequencyEmbedder,
+ normalization_layer,
+)
+# from .vit import Attention, FeedForward
+
+
+def exists(val):
+ return val is not None
+
+
+def default(val, d):
+ if exists(val):
+ return val
+ return d() if isfunction(d) else d
+
+
+class PreNorm(nn.Module):
+ def __init__(self, dim: int, fn: Callable, norm: str = "layer", norm_cond_dim: int = -1):
+ super().__init__()
+ self.norm = normalization_layer(norm, dim, norm_cond_dim)
+ self.fn = fn
+
+ def forward(self, x: torch.Tensor, *args, **kwargs):
+ if isinstance(self.norm, AdaptiveLayerNorm1D):
+ return self.fn(self.norm(x, *args), **kwargs)
+ else:
+ return self.fn(self.norm(x), **kwargs)
+
+
+class FeedForward(nn.Module):
+ def __init__(self, dim, hidden_dim, dropout=0.0):
+ super().__init__()
+ self.net = nn.Sequential(
+ nn.Linear(dim, hidden_dim),
+ nn.GELU(),
+ nn.Dropout(dropout),
+ nn.Linear(hidden_dim, dim),
+ nn.Dropout(dropout),
+ )
+
+ def forward(self, x):
+ return self.net(x)
+
+
+class Attention(nn.Module):
+ def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
+ super().__init__()
+ inner_dim = dim_head * heads
+ project_out = not (heads == 1 and dim_head == dim)
+
+ self.heads = heads
+ self.scale = dim_head**-0.5
+
+ self.attend = nn.Softmax(dim=-1)
+ self.dropout = nn.Dropout(dropout)
+
+ self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+
+ self.to_out = (
+ nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+ if project_out
+ else nn.Identity()
+ )
+
+ def forward(self, x):
+ qkv = self.to_qkv(x).chunk(3, dim=-1)
+ q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
+
+ dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+
+ attn = self.attend(dots)
+ attn = self.dropout(attn)
+
+ out = torch.matmul(attn, v)
+ out = rearrange(out, "b h n d -> b n (h d)")
+ return self.to_out(out)
+
+
+class CrossAttention(nn.Module):
+ def __init__(self, dim, context_dim=None, heads=8, dim_head=64, dropout=0.0):
+ super().__init__()
+ inner_dim = dim_head * heads
+ project_out = not (heads == 1 and dim_head == dim)
+
+ self.heads = heads
+ self.scale = dim_head**-0.5
+
+ self.attend = nn.Softmax(dim=-1)
+ self.dropout = nn.Dropout(dropout)
+
+ context_dim = default(context_dim, dim)
+ self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias=False)
+ self.to_q = nn.Linear(dim, inner_dim, bias=False)
+
+ self.to_out = (
+ nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+ if project_out
+ else nn.Identity()
+ )
+
+ def forward(self, x, context=None):
+ context = default(context, x)
+ k, v = self.to_kv(context).chunk(2, dim=-1)
+ q = self.to_q(x)
+ q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), [q, k, v])
+
+ dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+
+ attn = self.attend(dots)
+ attn = self.dropout(attn)
+
+ out = torch.matmul(attn, v)
+ out = rearrange(out, "b h n d -> b n (h d)")
+ return self.to_out(out)
+
+
+class Transformer(nn.Module):
+ def __init__(
+ self,
+ dim: int,
+ depth: int,
+ heads: int,
+ dim_head: int,
+ mlp_dim: int,
+ dropout: float = 0.0,
+ norm: str = "layer",
+ norm_cond_dim: int = -1,
+ ):
+ super().__init__()
+ self.layers = nn.ModuleList([])
+ for _ in range(depth):
+ sa = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)
+ ff = FeedForward(dim, mlp_dim, dropout=dropout)
+ self.layers.append(
+ nn.ModuleList(
+ [
+ PreNorm(dim, sa, norm=norm, norm_cond_dim=norm_cond_dim),
+ PreNorm(dim, ff, norm=norm, norm_cond_dim=norm_cond_dim),
+ ]
+ )
+ )
+
+ def forward(self, x: torch.Tensor, *args):
+ for attn, ff in self.layers:
+ x = attn(x, *args) + x
+ x = ff(x, *args) + x
+ return x
+
+
+class TransformerCrossAttn(nn.Module):
+ def __init__(
+ self,
+ dim: int,
+ depth: int,
+ heads: int,
+ dim_head: int,
+ mlp_dim: int,
+ dropout: float = 0.0,
+ norm: str = "layer",
+ norm_cond_dim: int = -1,
+ context_dim: Optional[int] = None,
+ ):
+ super().__init__()
+ self.layers = nn.ModuleList([])
+ for _ in range(depth):
+ sa = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)
+ ca = CrossAttention(
+ dim, context_dim=context_dim, heads=heads, dim_head=dim_head, dropout=dropout
+ )
+ ff = FeedForward(dim, mlp_dim, dropout=dropout)
+ self.layers.append(
+ nn.ModuleList(
+ [
+ PreNorm(dim, sa, norm=norm, norm_cond_dim=norm_cond_dim),
+ PreNorm(dim, ca, norm=norm, norm_cond_dim=norm_cond_dim),
+ PreNorm(dim, ff, norm=norm, norm_cond_dim=norm_cond_dim),
+ ]
+ )
+ )
+
+ def forward(self, x: torch.Tensor, *args, context=None, context_list=None):
+ if context_list is None:
+ context_list = [context] * len(self.layers)
+ if len(context_list) != len(self.layers):
+ raise ValueError(f"len(context_list) != len(self.layers) ({len(context_list)} != {len(self.layers)})")
+
+ for i, (self_attn, cross_attn, ff) in enumerate(self.layers):
+ x = self_attn(x, *args) + x
+ x = cross_attn(x, *args, context=context_list[i]) + x
+ x = ff(x, *args) + x
+ return x
+
+
+class DropTokenDropout(nn.Module):
+ def __init__(self, p: float = 0.1):
+ super().__init__()
+ if p < 0 or p > 1:
+ raise ValueError(
+ "dropout probability has to be between 0 and 1, " "but got {}".format(p)
+ )
+ self.p = p
+
+ def forward(self, x: torch.Tensor):
+ # x: (batch_size, seq_len, dim)
+ if self.training and self.p > 0:
+ zero_mask = torch.full_like(x[0, :, 0], self.p).bernoulli().bool()
+ # TODO: permutation idx for each batch using torch.argsort
+ if zero_mask.any():
+ x = x[:, ~zero_mask, :]
+ return x
+
+
+class ZeroTokenDropout(nn.Module):
+ def __init__(self, p: float = 0.1):
+ super().__init__()
+ if p < 0 or p > 1:
+ raise ValueError(
+ "dropout probability has to be between 0 and 1, " "but got {}".format(p)
+ )
+ self.p = p
+
+ def forward(self, x: torch.Tensor):
+ # x: (batch_size, seq_len, dim)
+ if self.training and self.p > 0:
+ zero_mask = torch.full_like(x[:, :, 0], self.p).bernoulli().bool()
+ # Zero-out the masked tokens
+ x[zero_mask, :] = 0
+ return x
+
+
+class TransformerEncoder(nn.Module):
+ def __init__(
+ self,
+ num_tokens: int,
+ token_dim: int,
+ dim: int,
+ depth: int,
+ heads: int,
+ mlp_dim: int,
+ dim_head: int = 64,
+ dropout: float = 0.0,
+ emb_dropout: float = 0.0,
+ emb_dropout_type: str = "drop",
+ emb_dropout_loc: str = "token",
+ norm: str = "layer",
+ norm_cond_dim: int = -1,
+ token_pe_numfreq: int = -1,
+ ):
+ super().__init__()
+ if token_pe_numfreq > 0:
+ token_dim_new = token_dim * (2 * token_pe_numfreq + 1)
+ self.to_token_embedding = nn.Sequential(
+ Rearrange("b n d -> (b n) d", n=num_tokens, d=token_dim),
+ FrequencyEmbedder(token_pe_numfreq, token_pe_numfreq - 1),
+ Rearrange("(b n) d -> b n d", n=num_tokens, d=token_dim_new),
+ nn.Linear(token_dim_new, dim),
+ )
+ else:
+ self.to_token_embedding = nn.Linear(token_dim, dim)
+ self.pos_embedding = nn.Parameter(torch.randn(1, num_tokens, dim))
+ if emb_dropout_type == "drop":
+ self.dropout = DropTokenDropout(emb_dropout)
+ elif emb_dropout_type == "zero":
+ self.dropout = ZeroTokenDropout(emb_dropout)
+ else:
+ raise ValueError(f"Unknown emb_dropout_type: {emb_dropout_type}")
+ self.emb_dropout_loc = emb_dropout_loc
+
+ self.transformer = Transformer(
+ dim, depth, heads, dim_head, mlp_dim, dropout, norm=norm, norm_cond_dim=norm_cond_dim
+ )
+
+ def forward(self, inp: torch.Tensor, *args, **kwargs):
+ x = inp
+
+ if self.emb_dropout_loc == "input":
+ x = self.dropout(x)
+ x = self.to_token_embedding(x)
+
+ if self.emb_dropout_loc == "token":
+ x = self.dropout(x)
+ b, n, _ = x.shape
+ x += self.pos_embedding[:, :n]
+
+ if self.emb_dropout_loc == "token_afterpos":
+ x = self.dropout(x)
+ x = self.transformer(x, *args)
+ return x
+
+
+class TransformerDecoder(nn.Module):
+ def __init__(
+ self,
+ num_tokens: int,
+ token_dim: int,
+ dim: int,
+ depth: int,
+ heads: int,
+ mlp_dim: int,
+ dim_head: int = 64,
+ dropout: float = 0.0,
+ emb_dropout: float = 0.0,
+ emb_dropout_type: str = 'drop',
+ norm: str = "layer",
+ norm_cond_dim: int = -1,
+ context_dim: Optional[int] = None,
+ skip_token_embedding: bool = False,
+ ):
+ super().__init__()
+ if not skip_token_embedding:
+ self.to_token_embedding = nn.Linear(token_dim, dim)
+ else:
+ self.to_token_embedding = nn.Identity()
+ if token_dim != dim:
+ raise ValueError(
+ f"token_dim ({token_dim}) != dim ({dim}) when skip_token_embedding is True"
+ )
+
+ self.pos_embedding = nn.Parameter(torch.randn(1, num_tokens, dim))
+ if emb_dropout_type == "drop":
+ self.dropout = DropTokenDropout(emb_dropout)
+ elif emb_dropout_type == "zero":
+ self.dropout = ZeroTokenDropout(emb_dropout)
+ elif emb_dropout_type == "normal":
+ self.dropout = nn.Dropout(emb_dropout)
+
+ self.transformer = TransformerCrossAttn(
+ dim,
+ depth,
+ heads,
+ dim_head,
+ mlp_dim,
+ dropout,
+ norm=norm,
+ norm_cond_dim=norm_cond_dim,
+ context_dim=context_dim,
+ )
+
+ def forward(self, inp: torch.Tensor, *args, context=None, context_list=None):
+ x = self.to_token_embedding(inp)
+ b, n, _ = x.shape
+
+ x = self.dropout(x)
+ x += self.pos_embedding[:, :n]
+
+ x = self.transformer(x, *args, context=context, context_list=context_list)
+ return x
+
diff --git a/hamer/models/components/t_cond_mlp.py b/hamer/models/components/t_cond_mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..44d5a09bf54f67712a69953039b7b5af41c3f029
--- /dev/null
+++ b/hamer/models/components/t_cond_mlp.py
@@ -0,0 +1,199 @@
+import copy
+from typing import List, Optional
+
+import torch
+
+
+class AdaptiveLayerNorm1D(torch.nn.Module):
+ def __init__(self, data_dim: int, norm_cond_dim: int):
+ super().__init__()
+ if data_dim <= 0:
+ raise ValueError(f"data_dim must be positive, but got {data_dim}")
+ if norm_cond_dim <= 0:
+ raise ValueError(f"norm_cond_dim must be positive, but got {norm_cond_dim}")
+ self.norm = torch.nn.LayerNorm(
+ data_dim
+ ) # TODO: Check if elementwise_affine=True is correct
+ self.linear = torch.nn.Linear(norm_cond_dim, 2 * data_dim)
+ torch.nn.init.zeros_(self.linear.weight)
+ torch.nn.init.zeros_(self.linear.bias)
+
+ def forward(self, x: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+ # x: (batch, ..., data_dim)
+ # t: (batch, norm_cond_dim)
+ # return: (batch, data_dim)
+ x = self.norm(x)
+ alpha, beta = self.linear(t).chunk(2, dim=-1)
+
+ # Add singleton dimensions to alpha and beta
+ if x.dim() > 2:
+ alpha = alpha.view(alpha.shape[0], *([1] * (x.dim() - 2)), alpha.shape[1])
+ beta = beta.view(beta.shape[0], *([1] * (x.dim() - 2)), beta.shape[1])
+
+ return x * (1 + alpha) + beta
+
+
+class SequentialCond(torch.nn.Sequential):
+ def forward(self, input, *args, **kwargs):
+ for module in self:
+ if isinstance(module, (AdaptiveLayerNorm1D, SequentialCond, ResidualMLPBlock)):
+ # print(f'Passing on args to {module}', [a.shape for a in args])
+ input = module(input, *args, **kwargs)
+ else:
+ # print(f'Skipping passing args to {module}', [a.shape for a in args])
+ input = module(input)
+ return input
+
+
+def normalization_layer(norm: Optional[str], dim: int, norm_cond_dim: int = -1):
+ if norm == "batch":
+ return torch.nn.BatchNorm1d(dim)
+ elif norm == "layer":
+ return torch.nn.LayerNorm(dim)
+ elif norm == "ada":
+ assert norm_cond_dim > 0, f"norm_cond_dim must be positive, got {norm_cond_dim}"
+ return AdaptiveLayerNorm1D(dim, norm_cond_dim)
+ elif norm is None:
+ return torch.nn.Identity()
+ else:
+ raise ValueError(f"Unknown norm: {norm}")
+
+
+def linear_norm_activ_dropout(
+ input_dim: int,
+ output_dim: int,
+ activation: torch.nn.Module = torch.nn.ReLU(),
+ bias: bool = True,
+ norm: Optional[str] = "layer", # Options: ada/batch/layer
+ dropout: float = 0.0,
+ norm_cond_dim: int = -1,
+) -> SequentialCond:
+ layers = []
+ layers.append(torch.nn.Linear(input_dim, output_dim, bias=bias))
+ if norm is not None:
+ layers.append(normalization_layer(norm, output_dim, norm_cond_dim))
+ layers.append(copy.deepcopy(activation))
+ if dropout > 0.0:
+ layers.append(torch.nn.Dropout(dropout))
+ return SequentialCond(*layers)
+
+
+def create_simple_mlp(
+ input_dim: int,
+ hidden_dims: List[int],
+ output_dim: int,
+ activation: torch.nn.Module = torch.nn.ReLU(),
+ bias: bool = True,
+ norm: Optional[str] = "layer", # Options: ada/batch/layer
+ dropout: float = 0.0,
+ norm_cond_dim: int = -1,
+) -> SequentialCond:
+ layers = []
+ prev_dim = input_dim
+ for hidden_dim in hidden_dims:
+ layers.extend(
+ linear_norm_activ_dropout(
+ prev_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+ )
+ )
+ prev_dim = hidden_dim
+ layers.append(torch.nn.Linear(prev_dim, output_dim, bias=bias))
+ return SequentialCond(*layers)
+
+
+class ResidualMLPBlock(torch.nn.Module):
+ def __init__(
+ self,
+ input_dim: int,
+ hidden_dim: int,
+ num_hidden_layers: int,
+ output_dim: int,
+ activation: torch.nn.Module = torch.nn.ReLU(),
+ bias: bool = True,
+ norm: Optional[str] = "layer", # Options: ada/batch/layer
+ dropout: float = 0.0,
+ norm_cond_dim: int = -1,
+ ):
+ super().__init__()
+ if not (input_dim == output_dim == hidden_dim):
+ raise NotImplementedError(
+ f"input_dim {input_dim} != output_dim {output_dim} is not implemented"
+ )
+
+ layers = []
+ prev_dim = input_dim
+ for i in range(num_hidden_layers):
+ layers.append(
+ linear_norm_activ_dropout(
+ prev_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+ )
+ )
+ prev_dim = hidden_dim
+ self.model = SequentialCond(*layers)
+ self.skip = torch.nn.Identity()
+
+ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+ return x + self.model(x, *args, **kwargs)
+
+
+class ResidualMLP(torch.nn.Module):
+ def __init__(
+ self,
+ input_dim: int,
+ hidden_dim: int,
+ num_hidden_layers: int,
+ output_dim: int,
+ activation: torch.nn.Module = torch.nn.ReLU(),
+ bias: bool = True,
+ norm: Optional[str] = "layer", # Options: ada/batch/layer
+ dropout: float = 0.0,
+ num_blocks: int = 1,
+ norm_cond_dim: int = -1,
+ ):
+ super().__init__()
+ self.input_dim = input_dim
+ self.model = SequentialCond(
+ linear_norm_activ_dropout(
+ input_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+ ),
+ *[
+ ResidualMLPBlock(
+ hidden_dim,
+ hidden_dim,
+ num_hidden_layers,
+ hidden_dim,
+ activation,
+ bias,
+ norm,
+ dropout,
+ norm_cond_dim,
+ )
+ for _ in range(num_blocks)
+ ],
+ torch.nn.Linear(hidden_dim, output_dim, bias=bias),
+ )
+
+ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+ return self.model(x, *args, **kwargs)
+
+
+class FrequencyEmbedder(torch.nn.Module):
+ def __init__(self, num_frequencies, max_freq_log2):
+ super().__init__()
+ frequencies = 2 ** torch.linspace(0, max_freq_log2, steps=num_frequencies)
+ self.register_buffer("frequencies", frequencies)
+
+ def forward(self, x):
+ # x should be of size (N,) or (N, D)
+ N = x.size(0)
+ if x.dim() == 1: # (N,)
+ x = x.unsqueeze(1) # (N, D) where D=1
+ x_unsqueezed = x.unsqueeze(-1) # (N, D, 1)
+ scaled = self.frequencies.view(1, 1, -1) * x_unsqueezed # (N, D, num_frequencies)
+ s = torch.sin(scaled)
+ c = torch.cos(scaled)
+ embedded = torch.cat([s, c, x_unsqueezed], dim=-1).view(
+ N, -1
+ ) # (N, D * 2 * num_frequencies + D)
+ return embedded
+
diff --git a/hamer/models/discriminator.py b/hamer/models/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7ef22e00ed7ea457b8e4fcf35e0e71ddacc5185
--- /dev/null
+++ b/hamer/models/discriminator.py
@@ -0,0 +1,99 @@
+import torch
+import torch.nn as nn
+
+class Discriminator(nn.Module):
+
+ def __init__(self):
+ """
+ Pose + Shape discriminator proposed in HMR
+ """
+ super(Discriminator, self).__init__()
+
+ self.num_joints = 15
+ # poses_alone
+ self.D_conv1 = nn.Conv2d(9, 32, kernel_size=1)
+ nn.init.xavier_uniform_(self.D_conv1.weight)
+ nn.init.zeros_(self.D_conv1.bias)
+ self.relu = nn.ReLU(inplace=True)
+ self.D_conv2 = nn.Conv2d(32, 32, kernel_size=1)
+ nn.init.xavier_uniform_(self.D_conv2.weight)
+ nn.init.zeros_(self.D_conv2.bias)
+ pose_out = []
+ for i in range(self.num_joints):
+ pose_out_temp = nn.Linear(32, 1)
+ nn.init.xavier_uniform_(pose_out_temp.weight)
+ nn.init.zeros_(pose_out_temp.bias)
+ pose_out.append(pose_out_temp)
+ self.pose_out = nn.ModuleList(pose_out)
+
+ # betas
+ self.betas_fc1 = nn.Linear(10, 10)
+ nn.init.xavier_uniform_(self.betas_fc1.weight)
+ nn.init.zeros_(self.betas_fc1.bias)
+ self.betas_fc2 = nn.Linear(10, 5)
+ nn.init.xavier_uniform_(self.betas_fc2.weight)
+ nn.init.zeros_(self.betas_fc2.bias)
+ self.betas_out = nn.Linear(5, 1)
+ nn.init.xavier_uniform_(self.betas_out.weight)
+ nn.init.zeros_(self.betas_out.bias)
+
+ # poses_joint
+ self.D_alljoints_fc1 = nn.Linear(32*self.num_joints, 1024)
+ nn.init.xavier_uniform_(self.D_alljoints_fc1.weight)
+ nn.init.zeros_(self.D_alljoints_fc1.bias)
+ self.D_alljoints_fc2 = nn.Linear(1024, 1024)
+ nn.init.xavier_uniform_(self.D_alljoints_fc2.weight)
+ nn.init.zeros_(self.D_alljoints_fc2.bias)
+ self.D_alljoints_out = nn.Linear(1024, 1)
+ nn.init.xavier_uniform_(self.D_alljoints_out.weight)
+ nn.init.zeros_(self.D_alljoints_out.bias)
+
+
+ def forward(self, poses: torch.Tensor, betas: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the discriminator.
+ Args:
+ poses (torch.Tensor): Tensor of shape (B, 23, 3, 3) containing a batch of MANO hand poses (excluding the global orientation).
+ betas (torch.Tensor): Tensor of shape (B, 10) containign a batch of MANO beta coefficients.
+ Returns:
+ torch.Tensor: Discriminator output with shape (B, 25)
+ """
+ #import ipdb; ipdb.set_trace()
+ #bn = poses.shape[0]
+ # poses B x 207
+ #poses = poses.reshape(bn, -1)
+ # poses B x num_joints x 1 x 9
+ poses = poses.reshape(-1, self.num_joints, 1, 9)
+ bn = poses.shape[0]
+ # poses B x 9 x num_joints x 1
+ poses = poses.permute(0, 3, 1, 2).contiguous()
+
+ # poses_alone
+ poses = self.D_conv1(poses)
+ poses = self.relu(poses)
+ poses = self.D_conv2(poses)
+ poses = self.relu(poses)
+
+ poses_out = []
+ for i in range(self.num_joints):
+ poses_out_ = self.pose_out[i](poses[:, :, i, 0])
+ poses_out.append(poses_out_)
+ poses_out = torch.cat(poses_out, dim=1)
+
+ # betas
+ betas = self.betas_fc1(betas)
+ betas = self.relu(betas)
+ betas = self.betas_fc2(betas)
+ betas = self.relu(betas)
+ betas_out = self.betas_out(betas)
+
+ # poses_joint
+ poses = poses.reshape(bn,-1)
+ poses_all = self.D_alljoints_fc1(poses)
+ poses_all = self.relu(poses_all)
+ poses_all = self.D_alljoints_fc2(poses_all)
+ poses_all = self.relu(poses_all)
+ poses_all_out = self.D_alljoints_out(poses_all)
+
+ disc_out = torch.cat((poses_out, betas_out, poses_all_out), 1)
+ return disc_out
diff --git a/hamer/models/hamer.py b/hamer/models/hamer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c095a315ef76b7dcf6504ee9e7799d1c4ca68f24
--- /dev/null
+++ b/hamer/models/hamer.py
@@ -0,0 +1,363 @@
+import torch
+import pytorch_lightning as pl
+from typing import Any, Dict, Mapping, Tuple
+
+from yacs.config import CfgNode
+
+from ..utils import SkeletonRenderer, MeshRenderer
+from ..utils.geometry import aa_to_rotmat, perspective_projection
+from ..utils.pylogger import get_pylogger
+from .backbones import create_backbone
+from .heads import build_mano_head
+from .discriminator import Discriminator
+from .losses import Keypoint3DLoss, Keypoint2DLoss, ParameterLoss
+from . import MANO
+
+log = get_pylogger(__name__)
+
+class HAMER(pl.LightningModule):
+
+ def __init__(self, cfg: CfgNode, init_renderer: bool = False):
+ """
+ Setup HAMER model
+ Args:
+ cfg (CfgNode): Config file as a yacs CfgNode
+ """
+ super().__init__()
+
+ # Save hyperparameters
+ self.save_hyperparameters(logger=False, ignore=['init_renderer'])
+
+ self.cfg = cfg
+ # Create backbone feature extractor
+ self.backbone = create_backbone(cfg)
+ #if cfg.MODEL.BACKBONE.get('PRETRAINED_WEIGHTS', None):
+ # log.info(f'Loading backbone weights from {cfg.MODEL.BACKBONE.PRETRAINED_WEIGHTS}')
+ # self.backbone.load_state_dict(torch.load(cfg.MODEL.BACKBONE.PRETRAINED_WEIGHTS, map_location='cpu')['state_dict'])
+
+ # Create MANO head
+ self.mano_head = build_mano_head(cfg)
+
+ # Create discriminator
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ self.discriminator = Discriminator()
+
+ # Define loss functions
+ self.keypoint_3d_loss = Keypoint3DLoss(loss_type='l1')
+ self.keypoint_2d_loss = Keypoint2DLoss(loss_type='l1')
+ self.mano_parameter_loss = ParameterLoss()
+
+ # Instantiate MANO model
+ mano_cfg = {k.lower(): v for k,v in dict(cfg.MANO).items()}
+ self.mano = MANO(**mano_cfg)
+
+ # Buffer that shows whetheer we need to initialize ActNorm layers
+ self.register_buffer('initialized', torch.tensor(False))
+ # Setup renderer for visualization
+ if init_renderer:
+ self.renderer = SkeletonRenderer(self.cfg)
+ self.mesh_renderer = MeshRenderer(self.cfg, faces=self.mano.faces)
+ else:
+ self.renderer = None
+ self.mesh_renderer = None
+
+ # Disable automatic optimization since we use adversarial training
+ self.automatic_optimization = False
+
+ def on_after_backward(self):
+ for name, param in self.named_parameters():
+ if param.grad is None:
+ print(param.shape)
+ print(name)
+
+ def get_parameters(self):
+ all_params = list(self.mano_head.parameters())
+ all_params += list(self.backbone.parameters())
+ return all_params
+
+ def configure_optimizers(self) -> Tuple[torch.optim.Optimizer, torch.optim.Optimizer]:
+ """
+ Setup model and distriminator Optimizers
+ Returns:
+ Tuple[torch.optim.Optimizer, torch.optim.Optimizer]: Model and discriminator optimizers
+ """
+ param_groups = [{'params': filter(lambda p: p.requires_grad, self.get_parameters()), 'lr': self.cfg.TRAIN.LR}]
+
+ optimizer = torch.optim.AdamW(params=param_groups,
+ # lr=self.cfg.TRAIN.LR,
+ weight_decay=self.cfg.TRAIN.WEIGHT_DECAY)
+ optimizer_disc = torch.optim.AdamW(params=self.discriminator.parameters(),
+ lr=self.cfg.TRAIN.LR,
+ weight_decay=self.cfg.TRAIN.WEIGHT_DECAY)
+
+ return optimizer, optimizer_disc
+
+ def forward_step(self, batch: Dict, train: bool = False) -> Dict:
+ """
+ Run a forward step of the network
+ Args:
+ batch (Dict): Dictionary containing batch data
+ train (bool): Flag indicating whether it is training or validation mode
+ Returns:
+ Dict: Dictionary containing the regression output
+ """
+
+ # Use RGB image as input
+ x = batch['img']
+ batch_size = x.shape[0]
+
+ # Compute conditioning features using the backbone
+ # if using ViT backbone, we need to use a different aspect ratio
+ conditioning_feats = self.backbone(x[:,:,:,32:-32])
+
+ pred_mano_params, pred_cam, _ = self.mano_head(conditioning_feats)
+
+ # Store useful regression outputs to the output dict
+ output = {}
+ output['pred_cam'] = pred_cam
+ output['pred_mano_params'] = {k: v.clone() for k,v in pred_mano_params.items()}
+
+ # Compute camera translation
+ device = pred_mano_params['hand_pose'].device
+ dtype = pred_mano_params['hand_pose'].dtype
+ focal_length = self.cfg.EXTRA.FOCAL_LENGTH * torch.ones(batch_size, 2, device=device, dtype=dtype)
+ pred_cam_t = torch.stack([pred_cam[:, 1],
+ pred_cam[:, 2],
+ 2*focal_length[:, 0]/(self.cfg.MODEL.IMAGE_SIZE * pred_cam[:, 0] +1e-9)],dim=-1)
+ output['pred_cam_t'] = pred_cam_t
+ output['focal_length'] = focal_length
+
+ # Compute model vertices, joints and the projected joints
+ pred_mano_params['global_orient'] = pred_mano_params['global_orient'].reshape(batch_size, -1, 3, 3)
+ pred_mano_params['hand_pose'] = pred_mano_params['hand_pose'].reshape(batch_size, -1, 3, 3)
+ pred_mano_params['betas'] = pred_mano_params['betas'].reshape(batch_size, -1)
+ mano_output = self.mano(**{k: v.float() for k,v in pred_mano_params.items()}, pose2rot=False)
+ pred_keypoints_3d = mano_output.joints
+ pred_vertices = mano_output.vertices
+ output['pred_keypoints_3d'] = pred_keypoints_3d.reshape(batch_size, -1, 3)
+ output['pred_vertices'] = pred_vertices.reshape(batch_size, -1, 3)
+ pred_cam_t = pred_cam_t.reshape(-1, 3)
+ focal_length = focal_length.reshape(-1, 2)
+ pred_keypoints_2d = perspective_projection(pred_keypoints_3d,
+ translation=pred_cam_t,
+ focal_length=focal_length / self.cfg.MODEL.IMAGE_SIZE)
+
+ output['pred_keypoints_2d'] = pred_keypoints_2d.reshape(batch_size, -1, 2)
+ return output
+
+ def compute_loss(self, batch: Dict, output: Dict, train: bool = True) -> torch.Tensor:
+ """
+ Compute losses given the input batch and the regression output
+ Args:
+ batch (Dict): Dictionary containing batch data
+ output (Dict): Dictionary containing the regression output
+ train (bool): Flag indicating whether it is training or validation mode
+ Returns:
+ torch.Tensor : Total loss for current batch
+ """
+
+ pred_mano_params = output['pred_mano_params']
+ pred_keypoints_2d = output['pred_keypoints_2d']
+ pred_keypoints_3d = output['pred_keypoints_3d']
+
+
+ batch_size = pred_mano_params['hand_pose'].shape[0]
+ device = pred_mano_params['hand_pose'].device
+ dtype = pred_mano_params['hand_pose'].dtype
+
+ # Get annotations
+ gt_keypoints_2d = batch['keypoints_2d']
+ gt_keypoints_3d = batch['keypoints_3d']
+ gt_mano_params = batch['mano_params']
+ has_mano_params = batch['has_mano_params']
+ is_axis_angle = batch['mano_params_is_axis_angle']
+
+ # Compute 3D keypoint loss
+ loss_keypoints_2d = self.keypoint_2d_loss(pred_keypoints_2d, gt_keypoints_2d)
+ loss_keypoints_3d = self.keypoint_3d_loss(pred_keypoints_3d, gt_keypoints_3d, pelvis_id=0)
+
+ # Compute loss on MANO parameters
+ loss_mano_params = {}
+ for k, pred in pred_mano_params.items():
+ gt = gt_mano_params[k].view(batch_size, -1)
+ if is_axis_angle[k].all():
+ gt = aa_to_rotmat(gt.reshape(-1, 3)).view(batch_size, -1, 3, 3)
+ has_gt = has_mano_params[k]
+ loss_mano_params[k] = self.mano_parameter_loss(pred.reshape(batch_size, -1), gt.reshape(batch_size, -1), has_gt)
+
+ loss = self.cfg.LOSS_WEIGHTS['KEYPOINTS_3D'] * loss_keypoints_3d+\
+ self.cfg.LOSS_WEIGHTS['KEYPOINTS_2D'] * loss_keypoints_2d+\
+ sum([loss_mano_params[k] * self.cfg.LOSS_WEIGHTS[k.upper()] for k in loss_mano_params])
+
+ #loss = loss + 0*self.mano.body_pose.mean()
+
+ losses = dict(loss=loss.detach(),
+ loss_keypoints_2d=loss_keypoints_2d.detach(),
+ loss_keypoints_3d=loss_keypoints_3d.detach())
+
+ for k, v in loss_mano_params.items():
+ losses['loss_' + k] = v.detach()
+
+ output['losses'] = losses
+
+ return loss
+
+ # Tensoroboard logging should run from first rank only
+ @pl.utilities.rank_zero.rank_zero_only
+ def tensorboard_logging(self, batch: Dict, output: Dict, step_count: int, train: bool = True, write_to_summary_writer: bool = True) -> None:
+ """
+ Log results to Tensorboard
+ Args:
+ batch (Dict): Dictionary containing batch data
+ output (Dict): Dictionary containing the regression output
+ step_count (int): Global training step count
+ train (bool): Flag indicating whether it is training or validation mode
+ """
+
+ mode = 'train' if train else 'val'
+ batch_size = batch['keypoints_2d'].shape[0]
+ images = batch['img']
+ images = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(1,3,1,1)
+ images = images + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(1,3,1,1)
+ #images = 255*images.permute(0, 2, 3, 1).cpu().numpy()
+
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach().reshape(batch_size, -1, 3)
+ pred_vertices = output['pred_vertices'].detach().reshape(batch_size, -1, 3)
+ focal_length = output['focal_length'].detach().reshape(batch_size, 2)
+ gt_keypoints_3d = batch['keypoints_3d']
+ gt_keypoints_2d = batch['keypoints_2d']
+ losses = output['losses']
+ pred_cam_t = output['pred_cam_t'].detach().reshape(batch_size, 3)
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach().reshape(batch_size, -1, 2)
+
+ if write_to_summary_writer:
+ summary_writer = self.logger.experiment
+ for loss_name, val in losses.items():
+ summary_writer.add_scalar(mode +'/' + loss_name, val.detach().item(), step_count)
+ num_images = min(batch_size, self.cfg.EXTRA.NUM_LOG_IMAGES)
+
+ gt_keypoints_3d = batch['keypoints_3d']
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach().reshape(batch_size, -1, 3)
+
+ # We render the skeletons instead of the full mesh because rendering a lot of meshes will make the training slow.
+ #predictions = self.renderer(pred_keypoints_3d[:num_images],
+ # gt_keypoints_3d[:num_images],
+ # 2 * gt_keypoints_2d[:num_images],
+ # images=images[:num_images],
+ # camera_translation=pred_cam_t[:num_images])
+ predictions = self.mesh_renderer.visualize_tensorboard(pred_vertices[:num_images].cpu().numpy(),
+ pred_cam_t[:num_images].cpu().numpy(),
+ images[:num_images].cpu().numpy(),
+ pred_keypoints_2d[:num_images].cpu().numpy(),
+ gt_keypoints_2d[:num_images].cpu().numpy(),
+ focal_length=focal_length[:num_images].cpu().numpy())
+ if write_to_summary_writer:
+ summary_writer.add_image('%s/predictions' % mode, predictions, step_count)
+
+ return predictions
+
+ def forward(self, batch: Dict) -> Dict:
+ """
+ Run a forward step of the network in val mode
+ Args:
+ batch (Dict): Dictionary containing batch data
+ Returns:
+ Dict: Dictionary containing the regression output
+ """
+ return self.forward_step(batch, train=False)
+
+ def training_step_discriminator(self, batch: Dict,
+ hand_pose: torch.Tensor,
+ betas: torch.Tensor,
+ optimizer: torch.optim.Optimizer) -> torch.Tensor:
+ """
+ Run a discriminator training step
+ Args:
+ batch (Dict): Dictionary containing mocap batch data
+ hand_pose (torch.Tensor): Regressed hand pose from current step
+ betas (torch.Tensor): Regressed betas from current step
+ optimizer (torch.optim.Optimizer): Discriminator optimizer
+ Returns:
+ torch.Tensor: Discriminator loss
+ """
+ batch_size = hand_pose.shape[0]
+ gt_hand_pose = batch['hand_pose']
+ gt_betas = batch['betas']
+ gt_rotmat = aa_to_rotmat(gt_hand_pose.view(-1,3)).view(batch_size, -1, 3, 3)
+ disc_fake_out = self.discriminator(hand_pose.detach(), betas.detach())
+ loss_fake = ((disc_fake_out - 0.0) ** 2).sum() / batch_size
+ disc_real_out = self.discriminator(gt_rotmat, gt_betas)
+ loss_real = ((disc_real_out - 1.0) ** 2).sum() / batch_size
+ loss_disc = loss_fake + loss_real
+ loss = self.cfg.LOSS_WEIGHTS.ADVERSARIAL * loss_disc
+ optimizer.zero_grad()
+ self.manual_backward(loss)
+ optimizer.step()
+ return loss_disc.detach()
+
+ def training_step(self, joint_batch: Dict, batch_idx: int) -> Dict:
+ """
+ Run a full training step
+ Args:
+ joint_batch (Dict): Dictionary containing image and mocap batch data
+ batch_idx (int): Unused.
+ batch_idx (torch.Tensor): Unused.
+ Returns:
+ Dict: Dictionary containing regression output.
+ """
+ batch = joint_batch['img']
+ mocap_batch = joint_batch['mocap']
+ optimizer = self.optimizers(use_pl_optimizer=True)
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ optimizer, optimizer_disc = optimizer
+
+ batch_size = batch['img'].shape[0]
+ output = self.forward_step(batch, train=True)
+ pred_mano_params = output['pred_mano_params']
+ if self.cfg.get('UPDATE_GT_SPIN', False):
+ self.update_batch_gt_spin(batch, output)
+ loss = self.compute_loss(batch, output, train=True)
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ disc_out = self.discriminator(pred_mano_params['hand_pose'].reshape(batch_size, -1), pred_mano_params['betas'].reshape(batch_size, -1))
+ loss_adv = ((disc_out - 1.0) ** 2).sum() / batch_size
+ loss = loss + self.cfg.LOSS_WEIGHTS.ADVERSARIAL * loss_adv
+
+ # Error if Nan
+ if torch.isnan(loss):
+ raise ValueError('Loss is NaN')
+
+ optimizer.zero_grad()
+ self.manual_backward(loss)
+ # Clip gradient
+ if self.cfg.TRAIN.get('GRAD_CLIP_VAL', 0) > 0:
+ gn = torch.nn.utils.clip_grad_norm_(self.get_parameters(), self.cfg.TRAIN.GRAD_CLIP_VAL, error_if_nonfinite=True)
+ self.log('train/grad_norm', gn, on_step=True, on_epoch=True, prog_bar=True, logger=True)
+ optimizer.step()
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ loss_disc = self.training_step_discriminator(mocap_batch, pred_mano_params['hand_pose'].reshape(batch_size, -1), pred_mano_params['betas'].reshape(batch_size, -1), optimizer_disc)
+ output['losses']['loss_gen'] = loss_adv
+ output['losses']['loss_disc'] = loss_disc
+
+ if self.global_step > 0 and self.global_step % self.cfg.GENERAL.LOG_STEPS == 0:
+ self.tensorboard_logging(batch, output, self.global_step, train=True)
+
+ self.log('train/loss', output['losses']['loss'], on_step=True, on_epoch=True, prog_bar=True, logger=False)
+
+ return output
+
+ def validation_step(self, batch: Dict, batch_idx: int, dataloader_idx=0) -> Dict:
+ """
+ Run a validation step and log to Tensorboard
+ Args:
+ batch (Dict): Dictionary containing batch data
+ batch_idx (int): Unused.
+ Returns:
+ Dict: Dictionary containing regression output.
+ """
+ # batch_size = batch['img'].shape[0]
+ output = self.forward_step(batch, train=False)
+ loss = self.compute_loss(batch, output, train=False)
+ output['loss'] = loss
+ self.tensorboard_logging(batch, output, self.global_step, train=False)
+
+ return output
diff --git a/hamer/models/heads/__init__.py b/hamer/models/heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..27e24ee70c20d9979a880a149efc9bc617f65e74
--- /dev/null
+++ b/hamer/models/heads/__init__.py
@@ -0,0 +1 @@
+from .mano_head import build_mano_head
diff --git a/hamer/models/heads/mano_head.py b/hamer/models/heads/mano_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c58487305d4816597d958017415033337f9100f2
--- /dev/null
+++ b/hamer/models/heads/mano_head.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import einops
+
+from ...utils.geometry import rot6d_to_rotmat, aa_to_rotmat
+from ..components.pose_transformer import TransformerDecoder
+
+def build_mano_head(cfg):
+ mano_head_type = cfg.MODEL.MANO_HEAD.get('TYPE', 'hamer')
+ if mano_head_type == 'transformer_decoder':
+ return MANOTransformerDecoderHead(cfg)
+ else:
+ raise ValueError('Unknown MANO head type: {}'.format(mano_head_type))
+
+class MANOTransformerDecoderHead(nn.Module):
+ """ Cross-attention based MANO Transformer decoder
+ """
+
+ def __init__(self, cfg):
+ super().__init__()
+ self.cfg = cfg
+ self.joint_rep_type = cfg.MODEL.MANO_HEAD.get('JOINT_REP', '6d')
+ self.joint_rep_dim = {'6d': 6, 'aa': 3}[self.joint_rep_type]
+ npose = self.joint_rep_dim * (cfg.MANO.NUM_HAND_JOINTS + 1)
+ self.npose = npose
+ self.input_is_mean_shape = cfg.MODEL.MANO_HEAD.get('TRANSFORMER_INPUT', 'zero') == 'mean_shape'
+ transformer_args = dict(
+ num_tokens=1,
+ token_dim=(npose + 10 + 3) if self.input_is_mean_shape else 1,
+ dim=1024,
+ )
+ transformer_args = (transformer_args | dict(cfg.MODEL.MANO_HEAD.TRANSFORMER_DECODER))
+ self.transformer = TransformerDecoder(
+ **transformer_args
+ )
+ dim=transformer_args['dim']
+ self.decpose = nn.Linear(dim, npose)
+ self.decshape = nn.Linear(dim, 10)
+ self.deccam = nn.Linear(dim, 3)
+
+ if cfg.MODEL.MANO_HEAD.get('INIT_DECODER_XAVIER', False):
+ # True by default in MLP. False by default in Transformer
+ nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
+ nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
+ nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
+
+ mean_params = np.load(cfg.MANO.MEAN_PARAMS)
+ init_hand_pose = torch.from_numpy(mean_params['pose'].astype(np.float32)).unsqueeze(0)
+ init_betas = torch.from_numpy(mean_params['shape'].astype('float32')).unsqueeze(0)
+ init_cam = torch.from_numpy(mean_params['cam'].astype(np.float32)).unsqueeze(0)
+ self.register_buffer('init_hand_pose', init_hand_pose)
+ self.register_buffer('init_betas', init_betas)
+ self.register_buffer('init_cam', init_cam)
+
+ def forward(self, x, **kwargs):
+
+ batch_size = x.shape[0]
+ # vit pretrained backbone is channel-first. Change to token-first
+ x = einops.rearrange(x, 'b c h w -> b (h w) c')
+
+ init_hand_pose = self.init_hand_pose.expand(batch_size, -1)
+ init_betas = self.init_betas.expand(batch_size, -1)
+ init_cam = self.init_cam.expand(batch_size, -1)
+
+ # TODO: Convert init_hand_pose to aa rep if needed
+ if self.joint_rep_type == 'aa':
+ raise NotImplementedError
+
+ pred_hand_pose = init_hand_pose
+ pred_betas = init_betas
+ pred_cam = init_cam
+ pred_hand_pose_list = []
+ pred_betas_list = []
+ pred_cam_list = []
+ for i in range(self.cfg.MODEL.MANO_HEAD.get('IEF_ITERS', 1)):
+ # Input token to transformer is zero token
+ if self.input_is_mean_shape:
+ token = torch.cat([pred_hand_pose, pred_betas, pred_cam], dim=1)[:,None,:]
+ else:
+ token = torch.zeros(batch_size, 1, 1).to(x.device)
+
+ # Pass through transformer
+ token_out = self.transformer(token, context=x)
+ token_out = token_out.squeeze(1) # (B, C)
+
+ # Readout from token_out
+ pred_hand_pose = self.decpose(token_out) + pred_hand_pose
+ pred_betas = self.decshape(token_out) + pred_betas
+ pred_cam = self.deccam(token_out) + pred_cam
+ pred_hand_pose_list.append(pred_hand_pose)
+ pred_betas_list.append(pred_betas)
+ pred_cam_list.append(pred_cam)
+
+ # Convert self.joint_rep_type -> rotmat
+ joint_conversion_fn = {
+ '6d': rot6d_to_rotmat,
+ 'aa': lambda x: aa_to_rotmat(x.view(-1, 3).contiguous())
+ }[self.joint_rep_type]
+
+ pred_mano_params_list = {}
+ pred_mano_params_list['hand_pose'] = torch.cat([joint_conversion_fn(pbp).view(batch_size, -1, 3, 3)[:, 1:, :, :] for pbp in pred_hand_pose_list], dim=0)
+ pred_mano_params_list['betas'] = torch.cat(pred_betas_list, dim=0)
+ pred_mano_params_list['cam'] = torch.cat(pred_cam_list, dim=0)
+ pred_hand_pose = joint_conversion_fn(pred_hand_pose).view(batch_size, self.cfg.MANO.NUM_HAND_JOINTS+1, 3, 3)
+
+ pred_mano_params = {'global_orient': pred_hand_pose[:, [0]],
+ 'hand_pose': pred_hand_pose[:, 1:],
+ 'betas': pred_betas}
+ return pred_mano_params, pred_cam, pred_mano_params_list
diff --git a/hamer/models/losses.py b/hamer/models/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6e493c081a4d99b97b5641e85152c4d56072a58
--- /dev/null
+++ b/hamer/models/losses.py
@@ -0,0 +1,92 @@
+import torch
+import torch.nn as nn
+
+class Keypoint2DLoss(nn.Module):
+
+ def __init__(self, loss_type: str = 'l1'):
+ """
+ 2D keypoint loss module.
+ Args:
+ loss_type (str): Choose between l1 and l2 losses.
+ """
+ super(Keypoint2DLoss, self).__init__()
+ if loss_type == 'l1':
+ self.loss_fn = nn.L1Loss(reduction='none')
+ elif loss_type == 'l2':
+ self.loss_fn = nn.MSELoss(reduction='none')
+ else:
+ raise NotImplementedError('Unsupported loss function')
+
+ def forward(self, pred_keypoints_2d: torch.Tensor, gt_keypoints_2d: torch.Tensor) -> torch.Tensor:
+ """
+ Compute 2D reprojection loss on the keypoints.
+ Args:
+ pred_keypoints_2d (torch.Tensor): Tensor of shape [B, S, N, 2] containing projected 2D keypoints (B: batch_size, S: num_samples, N: num_keypoints)
+ gt_keypoints_2d (torch.Tensor): Tensor of shape [B, S, N, 3] containing the ground truth 2D keypoints and confidence.
+ Returns:
+ torch.Tensor: 2D keypoint loss.
+ """
+ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
+ batch_size = conf.shape[0]
+ loss = (conf * self.loss_fn(pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).sum(dim=(1,2))
+ return loss.sum()
+
+
+class Keypoint3DLoss(nn.Module):
+
+ def __init__(self, loss_type: str = 'l1'):
+ """
+ 3D keypoint loss module.
+ Args:
+ loss_type (str): Choose between l1 and l2 losses.
+ """
+ super(Keypoint3DLoss, self).__init__()
+ if loss_type == 'l1':
+ self.loss_fn = nn.L1Loss(reduction='none')
+ elif loss_type == 'l2':
+ self.loss_fn = nn.MSELoss(reduction='none')
+ else:
+ raise NotImplementedError('Unsupported loss function')
+
+ def forward(self, pred_keypoints_3d: torch.Tensor, gt_keypoints_3d: torch.Tensor, pelvis_id: int = 0):
+ """
+ Compute 3D keypoint loss.
+ Args:
+ pred_keypoints_3d (torch.Tensor): Tensor of shape [B, S, N, 3] containing the predicted 3D keypoints (B: batch_size, S: num_samples, N: num_keypoints)
+ gt_keypoints_3d (torch.Tensor): Tensor of shape [B, S, N, 4] containing the ground truth 3D keypoints and confidence.
+ Returns:
+ torch.Tensor: 3D keypoint loss.
+ """
+ batch_size = pred_keypoints_3d.shape[0]
+ gt_keypoints_3d = gt_keypoints_3d.clone()
+ pred_keypoints_3d = pred_keypoints_3d - pred_keypoints_3d[:, pelvis_id, :].unsqueeze(dim=1)
+ gt_keypoints_3d[:, :, :-1] = gt_keypoints_3d[:, :, :-1] - gt_keypoints_3d[:, pelvis_id, :-1].unsqueeze(dim=1)
+ conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
+ gt_keypoints_3d = gt_keypoints_3d[:, :, :-1]
+ loss = (conf * self.loss_fn(pred_keypoints_3d, gt_keypoints_3d)).sum(dim=(1,2))
+ return loss.sum()
+
+class ParameterLoss(nn.Module):
+
+ def __init__(self):
+ """
+ MANO parameter loss module.
+ """
+ super(ParameterLoss, self).__init__()
+ self.loss_fn = nn.MSELoss(reduction='none')
+
+ def forward(self, pred_param: torch.Tensor, gt_param: torch.Tensor, has_param: torch.Tensor):
+ """
+ Compute MANO parameter loss.
+ Args:
+ pred_param (torch.Tensor): Tensor of shape [B, S, ...] containing the predicted parameters (body pose / global orientation / betas)
+ gt_param (torch.Tensor): Tensor of shape [B, S, ...] containing the ground truth MANO parameters.
+ Returns:
+ torch.Tensor: L2 parameter loss loss.
+ """
+ batch_size = pred_param.shape[0]
+ num_dims = len(pred_param.shape)
+ mask_dimension = [batch_size] + [1] * (num_dims-1)
+ has_param = has_param.type(pred_param.type()).view(*mask_dimension)
+ loss_param = (has_param * self.loss_fn(pred_param, gt_param))
+ return loss_param.sum()
diff --git a/hamer/models/mano_wrapper.py b/hamer/models/mano_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6f0cc336098e9303d2514c571307c56baf3bc86
--- /dev/null
+++ b/hamer/models/mano_wrapper.py
@@ -0,0 +1,40 @@
+import torch
+import numpy as np
+import pickle
+from typing import Optional
+import smplx
+from smplx.lbs import vertices2joints
+from smplx.utils import MANOOutput, to_tensor
+from smplx.vertex_ids import vertex_ids
+
+
+class MANO(smplx.MANOLayer):
+ def __init__(self, *args, joint_regressor_extra: Optional[str] = None, **kwargs):
+ """
+ Extension of the official MANO implementation to support more joints.
+ Args:
+ Same as MANOLayer.
+ joint_regressor_extra (str): Path to extra joint regressor.
+ """
+ super(MANO, self).__init__(*args, **kwargs)
+ mano_to_openpose = [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]
+
+ #2, 3, 5, 4, 1
+ if joint_regressor_extra is not None:
+ self.register_buffer('joint_regressor_extra', torch.tensor(pickle.load(open(joint_regressor_extra, 'rb'), encoding='latin1'), dtype=torch.float32))
+ self.register_buffer('extra_joints_idxs', to_tensor(list(vertex_ids['mano'].values()), dtype=torch.long))
+ self.register_buffer('joint_map', torch.tensor(mano_to_openpose, dtype=torch.long))
+
+ def forward(self, *args, **kwargs) -> MANOOutput:
+ """
+ Run forward pass. Same as MANO and also append an extra set of joints if joint_regressor_extra is specified.
+ """
+ mano_output = super(MANO, self).forward(*args, **kwargs)
+ extra_joints = torch.index_select(mano_output.vertices, 1, self.extra_joints_idxs)
+ joints = torch.cat([mano_output.joints, extra_joints], dim=1)
+ joints = joints[:, self.joint_map, :]
+ if hasattr(self, 'joint_regressor_extra'):
+ extra_joints = vertices2joints(self.joint_regressor_extra, mano_output.vertices)
+ joints = torch.cat([joints, extra_joints], dim=1)
+ mano_output.joints = joints
+ return mano_output
diff --git a/hamer/utils/__init__.py b/hamer/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..09e47cdf8cdb303432d64902fbe58b256273f88a
--- /dev/null
+++ b/hamer/utils/__init__.py
@@ -0,0 +1,25 @@
+import torch
+from typing import Any
+
+from .renderer import Renderer
+from .mesh_renderer import MeshRenderer
+from .skeleton_renderer import SkeletonRenderer
+from .pose_utils import eval_pose, Evaluator
+
+def recursive_to(x: Any, target: torch.device):
+ """
+ Recursively transfer a batch of data to the target device
+ Args:
+ x (Any): Batch of data.
+ target (torch.device): Target device.
+ Returns:
+ Batch of data where all tensors are transfered to the target device.
+ """
+ if isinstance(x, dict):
+ return {k: recursive_to(v, target) for k, v in x.items()}
+ elif isinstance(x, torch.Tensor):
+ return x.to(target)
+ elif isinstance(x, list):
+ return [recursive_to(i, target) for i in x]
+ else:
+ return x
diff --git a/hamer/utils/download.py b/hamer/utils/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..84d9b34a4546aa8f456e9ceae2276ecbe1f60fb6
--- /dev/null
+++ b/hamer/utils/download.py
@@ -0,0 +1,66 @@
+import os
+import re
+import sys
+from urllib import request as urlrequest
+
+
+def _progress_bar(count, total):
+ """Report download progress. Credit:
+ https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
+ """
+ bar_len = 60
+ filled_len = int(round(bar_len * count / float(total)))
+ percents = round(100.0 * count / float(total), 1)
+ bar = "=" * filled_len + "-" * (bar_len - filled_len)
+ sys.stdout.write(
+ " [{}] {}% of {:.1f}MB file \r".format(bar, percents, total / 1024 / 1024)
+ )
+ sys.stdout.flush()
+ if count >= total:
+ sys.stdout.write("\n")
+
+
+def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
+ """Download url and write it to dst_file_path. Credit:
+ https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
+ """
+ # url = url + "?dl=1" if "dropbox" in url else url
+ req = urlrequest.Request(url)
+ response = urlrequest.urlopen(req)
+ total_size = response.info().get("Content-Length")
+ if total_size is None:
+ raise ValueError("Cannot determine size of download from {}".format(url))
+ total_size = int(total_size.strip())
+ bytes_so_far = 0
+
+ with open(dst_file_path, "wb") as f:
+ while 1:
+ chunk = response.read(chunk_size)
+ bytes_so_far += len(chunk)
+ if not chunk:
+ break
+
+ if progress_hook:
+ progress_hook(bytes_so_far, total_size)
+
+ f.write(chunk)
+ return bytes_so_far
+
+
+def cache_url(url_or_file, cache_file_path, download=True):
+ """Download the file specified by the URL to the cache_dir and return the path to
+ the cached file. If the argument is not a URL, simply return it as is.
+ """
+ is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None
+ if not is_url:
+ return url_or_file
+ url = url_or_file
+ if os.path.exists(cache_file_path):
+ return cache_file_path
+ cache_file_dir = os.path.dirname(cache_file_path)
+ if not os.path.exists(cache_file_dir):
+ os.makedirs(cache_file_dir)
+ if download:
+ print("Downloading remote file {} to {}".format(url, cache_file_path))
+ download_url(url, cache_file_path)
+ return cache_file_path
diff --git a/hamer/utils/geometry.py b/hamer/utils/geometry.py
new file mode 100644
index 0000000000000000000000000000000000000000..7929ef52608618a4682788487008e73c5736101b
--- /dev/null
+++ b/hamer/utils/geometry.py
@@ -0,0 +1,102 @@
+from typing import Optional
+import torch
+from torch.nn import functional as F
+
+def aa_to_rotmat(theta: torch.Tensor):
+ """
+ Convert axis-angle representation to rotation matrix.
+ Works by first converting it to a quaternion.
+ Args:
+ theta (torch.Tensor): Tensor of shape (B, 3) containing axis-angle representations.
+ Returns:
+ torch.Tensor: Corresponding rotation matrices with shape (B, 3, 3).
+ """
+ norm = torch.norm(theta + 1e-8, p = 2, dim = 1)
+ angle = torch.unsqueeze(norm, -1)
+ normalized = torch.div(theta, angle)
+ angle = angle * 0.5
+ v_cos = torch.cos(angle)
+ v_sin = torch.sin(angle)
+ quat = torch.cat([v_cos, v_sin * normalized], dim = 1)
+ return quat_to_rotmat(quat)
+
+def quat_to_rotmat(quat: torch.Tensor) -> torch.Tensor:
+ """
+ Convert quaternion representation to rotation matrix.
+ Args:
+ quat (torch.Tensor) of shape (B, 4); 4 <===> (w, x, y, z).
+ Returns:
+ torch.Tensor: Corresponding rotation matrices with shape (B, 3, 3).
+ """
+ norm_quat = quat
+ norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True)
+ w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3]
+
+ B = quat.size(0)
+
+ w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+ wx, wy, wz = w*x, w*y, w*z
+ xy, xz, yz = x*y, x*z, y*z
+
+ rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz,
+ 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx,
+ 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3)
+ return rotMat
+
+
+def rot6d_to_rotmat(x: torch.Tensor) -> torch.Tensor:
+ """
+ Convert 6D rotation representation to 3x3 rotation matrix.
+ Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
+ Args:
+ x (torch.Tensor): (B,6) Batch of 6-D rotation representations.
+ Returns:
+ torch.Tensor: Batch of corresponding rotation matrices with shape (B,3,3).
+ """
+ x = x.reshape(-1,2,3).permute(0, 2, 1).contiguous()
+ a1 = x[:, :, 0]
+ a2 = x[:, :, 1]
+ b1 = F.normalize(a1)
+ b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
+ b3 = torch.cross(b1, b2)
+ return torch.stack((b1, b2, b3), dim=-1)
+
+def perspective_projection(points: torch.Tensor,
+ translation: torch.Tensor,
+ focal_length: torch.Tensor,
+ camera_center: Optional[torch.Tensor] = None,
+ rotation: Optional[torch.Tensor] = None) -> torch.Tensor:
+ """
+ Computes the perspective projection of a set of 3D points.
+ Args:
+ points (torch.Tensor): Tensor of shape (B, N, 3) containing the input 3D points.
+ translation (torch.Tensor): Tensor of shape (B, 3) containing the 3D camera translation.
+ focal_length (torch.Tensor): Tensor of shape (B, 2) containing the focal length in pixels.
+ camera_center (torch.Tensor): Tensor of shape (B, 2) containing the camera center in pixels.
+ rotation (torch.Tensor): Tensor of shape (B, 3, 3) containing the camera rotation.
+ Returns:
+ torch.Tensor: Tensor of shape (B, N, 2) containing the projection of the input points.
+ """
+ batch_size = points.shape[0]
+ if rotation is None:
+ rotation = torch.eye(3, device=points.device, dtype=points.dtype).unsqueeze(0).expand(batch_size, -1, -1)
+ if camera_center is None:
+ camera_center = torch.zeros(batch_size, 2, device=points.device, dtype=points.dtype)
+ # Populate intrinsic camera matrix K.
+ K = torch.zeros([batch_size, 3, 3], device=points.device, dtype=points.dtype)
+ K[:,0,0] = focal_length[:,0]
+ K[:,1,1] = focal_length[:,1]
+ K[:,2,2] = 1.
+ K[:,:-1, -1] = camera_center
+
+ # Transform points
+ points = torch.einsum('bij,bkj->bki', rotation, points)
+ points = points + translation.unsqueeze(1)
+
+ # Apply perspective distortion
+ projected_points = points / points[:,:,-1].unsqueeze(-1)
+
+ # Apply camera intrinsics
+ projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+
+ return projected_points[:, :, :-1]
\ No newline at end of file
diff --git a/hamer/utils/mesh_renderer.py b/hamer/utils/mesh_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec7a6c1f78f7ec1cd757ce9aa1b47555b67a58d5
--- /dev/null
+++ b/hamer/utils/mesh_renderer.py
@@ -0,0 +1,149 @@
+import os
+#if 'PYOPENGL_PLATFORM' not in os.environ:
+# os.environ['PYOPENGL_PLATFORM'] = 'egl'
+import torch
+from torchvision.utils import make_grid
+import numpy as np
+import pyrender
+import trimesh
+import cv2
+import torch.nn.functional as F
+
+from .render_openpose import render_openpose
+
+def create_raymond_lights():
+ import pyrender
+ thetas = np.pi * np.array([1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
+ phis = np.pi * np.array([0.0, 2.0 / 3.0, 4.0 / 3.0])
+
+ nodes = []
+
+ for phi, theta in zip(phis, thetas):
+ xp = np.sin(theta) * np.cos(phi)
+ yp = np.sin(theta) * np.sin(phi)
+ zp = np.cos(theta)
+
+ z = np.array([xp, yp, zp])
+ z = z / np.linalg.norm(z)
+ x = np.array([-z[1], z[0], 0.0])
+ if np.linalg.norm(x) == 0:
+ x = np.array([1.0, 0.0, 0.0])
+ x = x / np.linalg.norm(x)
+ y = np.cross(z, x)
+
+ matrix = np.eye(4)
+ matrix[:3,:3] = np.c_[x,y,z]
+ nodes.append(pyrender.Node(
+ light=pyrender.DirectionalLight(color=np.ones(3), intensity=1.0),
+ matrix=matrix
+ ))
+
+ return nodes
+
+class MeshRenderer:
+
+ def __init__(self, cfg, faces=None):
+ self.cfg = cfg
+ self.focal_length = cfg.EXTRA.FOCAL_LENGTH
+ self.img_res = cfg.MODEL.IMAGE_SIZE
+ self.renderer = pyrender.OffscreenRenderer(viewport_width=self.img_res,
+ viewport_height=self.img_res,
+ point_size=1.0)
+
+ self.camera_center = [self.img_res // 2, self.img_res // 2]
+ self.faces = faces
+
+ def visualize(self, vertices, camera_translation, images, focal_length=None, nrow=3, padding=2):
+ images_np = np.transpose(images, (0,2,3,1))
+ rend_imgs = []
+ for i in range(vertices.shape[0]):
+ fl = self.focal_length
+ rend_img = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=False), (2,0,1))).float()
+ rend_img_side = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=True), (2,0,1))).float()
+ rend_imgs.append(torch.from_numpy(images[i]))
+ rend_imgs.append(rend_img)
+ rend_imgs.append(rend_img_side)
+ rend_imgs = make_grid(rend_imgs, nrow=nrow, padding=padding)
+ return rend_imgs
+
+ def visualize_tensorboard(self, vertices, camera_translation, images, pred_keypoints, gt_keypoints, focal_length=None, nrow=5, padding=2):
+ images_np = np.transpose(images, (0,2,3,1))
+ rend_imgs = []
+ pred_keypoints = np.concatenate((pred_keypoints, np.ones_like(pred_keypoints)[:, :, [0]]), axis=-1)
+ pred_keypoints = self.img_res * (pred_keypoints + 0.5)
+ gt_keypoints[:, :, :-1] = self.img_res * (gt_keypoints[:, :, :-1] + 0.5)
+ #keypoint_matches = [(1, 12), (2, 8), (3, 7), (4, 6), (5, 9), (6, 10), (7, 11), (8, 14), (9, 2), (10, 1), (11, 0), (12, 3), (13, 4), (14, 5)]
+ for i in range(vertices.shape[0]):
+ fl = self.focal_length
+ rend_img = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=False), (2,0,1))).float()
+ rend_img_side = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=True), (2,0,1))).float()
+ hand_keypoints = pred_keypoints[i, :21]
+ #extra_keypoints = pred_keypoints[i, -19:]
+ #for pair in keypoint_matches:
+ # hand_keypoints[pair[0], :] = extra_keypoints[pair[1], :]
+ pred_keypoints_img = render_openpose(255 * images_np[i].copy(), hand_keypoints) / 255
+ hand_keypoints = gt_keypoints[i, :21]
+ #extra_keypoints = gt_keypoints[i, -19:]
+ #for pair in keypoint_matches:
+ # if extra_keypoints[pair[1], -1] > 0 and hand_keypoints[pair[0], -1] == 0:
+ # hand_keypoints[pair[0], :] = extra_keypoints[pair[1], :]
+ gt_keypoints_img = render_openpose(255*images_np[i].copy(), hand_keypoints) / 255
+ rend_imgs.append(torch.from_numpy(images[i]))
+ rend_imgs.append(rend_img)
+ rend_imgs.append(rend_img_side)
+ rend_imgs.append(torch.from_numpy(pred_keypoints_img).permute(2,0,1))
+ rend_imgs.append(torch.from_numpy(gt_keypoints_img).permute(2,0,1))
+ rend_imgs = make_grid(rend_imgs, nrow=nrow, padding=padding)
+ return rend_imgs
+
+ def __call__(self, vertices, camera_translation, image, focal_length=5000, text=None, resize=None, side_view=False, baseColorFactor=(1.0, 1.0, 0.9, 1.0), rot_angle=90):
+ renderer = pyrender.OffscreenRenderer(viewport_width=image.shape[1],
+ viewport_height=image.shape[0],
+ point_size=1.0)
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.0,
+ alphaMode='OPAQUE',
+ baseColorFactor=baseColorFactor)
+
+ camera_translation[0] *= -1.
+
+ mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+ if side_view:
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), [0, 1, 0])
+ mesh.apply_transform(rot)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [image.shape[1] / 2., image.shape[0] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1])
+ scene.add(camera, pose=camera_pose)
+
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ valid_mask = (color[:, :, -1] > 0)[:, :, np.newaxis]
+ if not side_view:
+ output_img = (color[:, :, :3] * valid_mask +
+ (1 - valid_mask) * image)
+ else:
+ output_img = color[:, :, :3]
+ if resize is not None:
+ output_img = cv2.resize(output_img, resize)
+
+ output_img = output_img.astype(np.float32)
+ renderer.delete()
+ return output_img
diff --git a/hamer/utils/misc.py b/hamer/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffcfe784872b305c264ce6ef67fd0a9e9ad3390f
--- /dev/null
+++ b/hamer/utils/misc.py
@@ -0,0 +1,203 @@
+import time
+import warnings
+from importlib.util import find_spec
+from pathlib import Path
+from typing import Callable, List
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning import Callback
+from pytorch_lightning.loggers import Logger
+from pytorch_lightning.utilities import rank_zero_only
+
+from . import pylogger, rich_utils
+
+log = pylogger.get_pylogger(__name__)
+
+
+def task_wrapper(task_func: Callable) -> Callable:
+ """Optional decorator that wraps the task function in extra utilities.
+
+ Makes multirun more resistant to failure.
+
+ Utilities:
+ - Calling the `utils.extras()` before the task is started
+ - Calling the `utils.close_loggers()` after the task is finished
+ - Logging the exception if occurs
+ - Logging the task total execution time
+ - Logging the output dir
+ """
+
+ def wrap(cfg: DictConfig):
+
+ # apply extra utilities
+ extras(cfg)
+
+ # execute the task
+ try:
+ start_time = time.time()
+ ret = task_func(cfg=cfg)
+ except Exception as ex:
+ log.exception("") # save exception to `.log` file
+ raise ex
+ finally:
+ path = Path(cfg.paths.output_dir, "exec_time.log")
+ content = f"'{cfg.task_name}' execution time: {time.time() - start_time} (s)"
+ save_file(path, content) # save task execution time (even if exception occurs)
+ close_loggers() # close loggers (even if exception occurs so multirun won't fail)
+
+ log.info(f"Output dir: {cfg.paths.output_dir}")
+
+ return ret
+
+ return wrap
+
+
+def extras(cfg: DictConfig) -> None:
+ """Applies optional utilities before the task is started.
+
+ Utilities:
+ - Ignoring python warnings
+ - Setting tags from command line
+ - Rich config printing
+ """
+
+ # return if no `extras` config
+ if not cfg.get("extras"):
+ log.warning("Extras config not found! ")
+ return
+
+ # disable python warnings
+ if cfg.extras.get("ignore_warnings"):
+ log.info("Disabling python warnings! ")
+ warnings.filterwarnings("ignore")
+
+ # prompt user to input tags from command line if none are provided in the config
+ if cfg.extras.get("enforce_tags"):
+ log.info("Enforcing tags! ")
+ rich_utils.enforce_tags(cfg, save_to_file=True)
+
+ # pretty print config tree using Rich library
+ if cfg.extras.get("print_config"):
+ log.info("Printing config tree with Rich! ")
+ rich_utils.print_config_tree(cfg, resolve=True, save_to_file=True)
+
+
+@rank_zero_only
+def save_file(path: str, content: str) -> None:
+ """Save file in rank zero mode (only on one process in multi-GPU setup)."""
+ with open(path, "w+") as file:
+ file.write(content)
+
+
+def instantiate_callbacks(callbacks_cfg: DictConfig) -> List[Callback]:
+ """Instantiates callbacks from config."""
+ callbacks: List[Callback] = []
+
+ if not callbacks_cfg:
+ log.warning("Callbacks config is empty.")
+ return callbacks
+
+ if not isinstance(callbacks_cfg, DictConfig):
+ raise TypeError("Callbacks config must be a DictConfig!")
+
+ for _, cb_conf in callbacks_cfg.items():
+ if isinstance(cb_conf, DictConfig) and "_target_" in cb_conf:
+ log.info(f"Instantiating callback <{cb_conf._target_}>")
+ callbacks.append(hydra.utils.instantiate(cb_conf))
+
+ return callbacks
+
+
+def instantiate_loggers(logger_cfg: DictConfig) -> List[Logger]:
+ """Instantiates loggers from config."""
+ logger: List[Logger] = []
+
+ if not logger_cfg:
+ log.warning("Logger config is empty.")
+ return logger
+
+ if not isinstance(logger_cfg, DictConfig):
+ raise TypeError("Logger config must be a DictConfig!")
+
+ for _, lg_conf in logger_cfg.items():
+ if isinstance(lg_conf, DictConfig) and "_target_" in lg_conf:
+ log.info(f"Instantiating logger <{lg_conf._target_}>")
+ logger.append(hydra.utils.instantiate(lg_conf))
+
+ return logger
+
+
+@rank_zero_only
+def log_hyperparameters(object_dict: dict) -> None:
+ """Controls which config parts are saved by lightning loggers.
+
+ Additionally saves:
+ - Number of model parameters
+ """
+
+ hparams = {}
+
+ cfg = object_dict["cfg"]
+ model = object_dict["model"]
+ trainer = object_dict["trainer"]
+
+ if not trainer.logger:
+ log.warning("Logger not found! Skipping hyperparameter logging...")
+ return
+
+ # save number of model parameters
+ hparams["model/params/total"] = sum(p.numel() for p in model.parameters())
+ hparams["model/params/trainable"] = sum(
+ p.numel() for p in model.parameters() if p.requires_grad
+ )
+ hparams["model/params/non_trainable"] = sum(
+ p.numel() for p in model.parameters() if not p.requires_grad
+ )
+
+ for k in cfg.keys():
+ hparams[k] = cfg.get(k)
+
+ # Resolve all interpolations
+ def _resolve(_cfg):
+ if isinstance(_cfg, DictConfig):
+ _cfg = OmegaConf.to_container(_cfg, resolve=True)
+ return _cfg
+
+ hparams = {k: _resolve(v) for k, v in hparams.items()}
+
+ # send hparams to all loggers
+ trainer.logger.log_hyperparams(hparams)
+
+
+def get_metric_value(metric_dict: dict, metric_name: str) -> float:
+ """Safely retrieves value of the metric logged in LightningModule."""
+
+ if not metric_name:
+ log.info("Metric name is None! Skipping metric value retrieval...")
+ return None
+
+ if metric_name not in metric_dict:
+ raise Exception(
+ f"Metric value not found! \n"
+ "Make sure metric name logged in LightningModule is correct!\n"
+ "Make sure `optimized_metric` name in `hparams_search` config is correct!"
+ )
+
+ metric_value = metric_dict[metric_name].item()
+ log.info(f"Retrieved metric value! <{metric_name}={metric_value}>")
+
+ return metric_value
+
+
+def close_loggers() -> None:
+ """Makes sure all loggers closed properly (prevents logging failure during multirun)."""
+
+ log.info("Closing loggers...")
+
+ if find_spec("wandb"): # if wandb is installed
+ import wandb
+
+ if wandb.run:
+ log.info("Closing wandb!")
+ wandb.finish()
diff --git a/hamer/utils/pose_utils.py b/hamer/utils/pose_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7beb3d626df1c5dc560731da7857a5b5e37a83d
--- /dev/null
+++ b/hamer/utils/pose_utils.py
@@ -0,0 +1,306 @@
+"""
+Code adapted from: https://github.com/akanazawa/hmr/blob/master/src/benchmark/eval_util.py
+"""
+
+import torch
+import numpy as np
+from typing import Optional, Dict, List, Tuple
+
+def compute_similarity_transform(S1: torch.Tensor, S2: torch.Tensor) -> torch.Tensor:
+ """
+ Computes a similarity transform (sR, t) in a batched way that takes
+ a set of 3D points S1 (B, N, 3) closest to a set of 3D points S2 (B, N, 3),
+ where R is a 3x3 rotation matrix, t 3x1 translation, s scale.
+ i.e. solves the orthogonal Procrutes problem.
+ Args:
+ S1 (torch.Tensor): First set of points of shape (B, N, 3).
+ S2 (torch.Tensor): Second set of points of shape (B, N, 3).
+ Returns:
+ (torch.Tensor): The first set of points after applying the similarity transformation.
+ """
+
+ batch_size = S1.shape[0]
+ S1 = S1.permute(0, 2, 1)
+ S2 = S2.permute(0, 2, 1)
+ # 1. Remove mean.
+ mu1 = S1.mean(dim=2, keepdim=True)
+ mu2 = S2.mean(dim=2, keepdim=True)
+ X1 = S1 - mu1
+ X2 = S2 - mu2
+
+ # 2. Compute variance of X1 used for scale.
+ var1 = (X1**2).sum(dim=(1,2))
+
+ # 3. The outer product of X1 and X2.
+ K = torch.matmul(X1, X2.permute(0, 2, 1))
+
+ # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are singular vectors of K.
+ U, s, V = torch.svd(K)
+ Vh = V.permute(0, 2, 1)
+
+ # Construct Z that fixes the orientation of R to get det(R)=1.
+ Z = torch.eye(U.shape[1], device=U.device).unsqueeze(0).repeat(batch_size, 1, 1)
+ Z[:, -1, -1] *= torch.sign(torch.linalg.det(torch.matmul(U, Vh)))
+
+ # Construct R.
+ R = torch.matmul(torch.matmul(V, Z), U.permute(0, 2, 1))
+
+ # 5. Recover scale.
+ trace = torch.matmul(R, K).diagonal(offset=0, dim1=-1, dim2=-2).sum(dim=-1)
+ scale = (trace / var1).unsqueeze(dim=-1).unsqueeze(dim=-1)
+
+ # 6. Recover translation.
+ t = mu2 - scale*torch.matmul(R, mu1)
+
+ # 7. Error:
+ S1_hat = scale*torch.matmul(R, S1) + t
+
+ return S1_hat.permute(0, 2, 1)
+
+def reconstruction_error(S1, S2) -> np.array:
+ """
+ Computes the mean Euclidean distance of 2 set of points S1, S2 after performing Procrustes alignment.
+ Args:
+ S1 (torch.Tensor): First set of points of shape (B, N, 3).
+ S2 (torch.Tensor): Second set of points of shape (B, N, 3).
+ Returns:
+ (np.array): Reconstruction error.
+ """
+ S1_hat = compute_similarity_transform(S1, S2)
+ re = torch.sqrt( ((S1_hat - S2)** 2).sum(dim=-1)).mean(dim=-1)
+ return re
+
+def eval_pose(pred_joints, gt_joints) -> Tuple[np.array, np.array]:
+ """
+ Compute joint errors in mm before and after Procrustes alignment.
+ Args:
+ pred_joints (torch.Tensor): Predicted 3D joints of shape (B, N, 3).
+ gt_joints (torch.Tensor): Ground truth 3D joints of shape (B, N, 3).
+ Returns:
+ Tuple[np.array, np.array]: Joint errors in mm before and after alignment.
+ """
+ # Absolute error (MPJPE)
+ mpjpe = torch.sqrt(((pred_joints - gt_joints) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
+
+ # Reconstruction_error
+ r_error = reconstruction_error(pred_joints, gt_joints).cpu().numpy()
+ return 1000 * mpjpe, 1000 * r_error
+
+class Evaluator:
+
+ def __init__(self,
+ dataset_length: int,
+ keypoint_list: List,
+ pelvis_ind: int,
+ metrics: List = ['mode_mpjpe', 'mode_re', 'min_mpjpe', 'min_re'],
+ pck_thresholds: Optional[List] = None):
+ """
+ Class used for evaluating trained models on different 3D pose datasets.
+ Args:
+ dataset_length (int): Total dataset length.
+ keypoint_list [List]: List of keypoints used for evaluation.
+ pelvis_ind (int): Index of pelvis keypoint; used for aligning the predictions and ground truth.
+ metrics [List]: List of evaluation metrics to record.
+ """
+ self.dataset_length = dataset_length
+ self.keypoint_list = keypoint_list
+ self.pelvis_ind = pelvis_ind
+ self.metrics = metrics
+ for metric in self.metrics:
+ setattr(self, metric, np.zeros((dataset_length,)))
+ self.counter = 0
+ if pck_thresholds is None:
+ self.pck_evaluator = None
+ else:
+ self.pck_evaluator = EvaluatorPCK(pck_thresholds)
+
+ def log(self):
+ """
+ Print current evaluation metrics
+ """
+ if self.counter == 0:
+ print('Evaluation has not started')
+ return
+ print(f'{self.counter} / {self.dataset_length} samples')
+ if self.pck_evaluator is not None:
+ self.pck_evaluator.log()
+ for metric in self.metrics:
+ if metric in ['mode_mpjpe', 'mode_re', 'min_mpjpe', 'min_re']:
+ unit = 'mm'
+ else:
+ unit = ''
+ print(f'{metric}: {getattr(self, metric)[:self.counter].mean()} {unit}')
+ print('***')
+
+ def get_metrics_dict(self) -> Dict:
+ """
+ Returns:
+ Dict: Dictionary of evaluation metrics.
+ """
+ d1 = {metric: getattr(self, metric)[:self.counter].mean() for metric in self.metrics}
+ if self.pck_evaluator is not None:
+ d2 = self.pck_evaluator.get_metrics_dict()
+ d1.update(d2)
+ return d1
+
+ def __call__(self, output: Dict, batch: Dict, opt_output: Optional[Dict] = None):
+ """
+ Evaluate current batch.
+ Args:
+ output (Dict): Regression output.
+ batch (Dict): Dictionary containing images and their corresponding annotations.
+ opt_output (Dict): Optimization output.
+ """
+ if self.pck_evaluator is not None:
+ self.pck_evaluator(output, batch, opt_output)
+
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach()
+ pred_keypoints_3d = pred_keypoints_3d[:,None,:,:]
+ batch_size = pred_keypoints_3d.shape[0]
+ num_samples = pred_keypoints_3d.shape[1]
+ gt_keypoints_3d = batch['keypoints_3d'][:, :, :-1].unsqueeze(1).repeat(1, num_samples, 1, 1)
+
+ # Align predictions and ground truth such that the pelvis location is at the origin
+ pred_keypoints_3d -= pred_keypoints_3d[:, :, [self.pelvis_ind]]
+ gt_keypoints_3d -= gt_keypoints_3d[:, :, [self.pelvis_ind]]
+
+ # Compute joint errors
+ mpjpe, re = eval_pose(pred_keypoints_3d.reshape(batch_size * num_samples, -1, 3)[:, self.keypoint_list], gt_keypoints_3d.reshape(batch_size * num_samples, -1 ,3)[:, self.keypoint_list])
+ mpjpe = mpjpe.reshape(batch_size, num_samples)
+ re = re.reshape(batch_size, num_samples)
+
+ # Compute 2d keypoint errors
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach()
+ pred_keypoints_2d = pred_keypoints_2d[:,None,:,:]
+ gt_keypoints_2d = batch['keypoints_2d'][:,None,:,:].repeat(1, num_samples, 1, 1)
+ conf = gt_keypoints_2d[:, :, :, -1].clone()
+ kp_err = torch.nn.functional.mse_loss(
+ pred_keypoints_2d,
+ gt_keypoints_2d[:, :, :, :-1],
+ reduction='none'
+ ).sum(dim=3)
+ kp_l2_loss = (conf * kp_err).mean(dim=2)
+ kp_l2_loss = kp_l2_loss.detach().cpu().numpy()
+
+ # Compute joint errors after optimization, if available.
+ if opt_output is not None:
+ opt_keypoints_3d = opt_output['model_joints']
+ opt_keypoints_3d -= opt_keypoints_3d[:, [self.pelvis_ind]]
+ opt_mpjpe, opt_re = eval_pose(opt_keypoints_3d[:, self.keypoint_list], gt_keypoints_3d[:, 0, self.keypoint_list])
+
+ # The 0-th sample always corresponds to the mode
+ if hasattr(self, 'mode_mpjpe'):
+ mode_mpjpe = mpjpe[:, 0]
+ self.mode_mpjpe[self.counter:self.counter+batch_size] = mode_mpjpe
+ if hasattr(self, 'mode_re'):
+ mode_re = re[:, 0]
+ self.mode_re[self.counter:self.counter+batch_size] = mode_re
+ if hasattr(self, 'mode_kpl2'):
+ mode_kpl2 = kp_l2_loss[:, 0]
+ self.mode_kpl2[self.counter:self.counter+batch_size] = mode_kpl2
+ if hasattr(self, 'min_mpjpe'):
+ min_mpjpe = mpjpe.min(axis=-1)
+ self.min_mpjpe[self.counter:self.counter+batch_size] = min_mpjpe
+ if hasattr(self, 'min_re'):
+ min_re = re.min(axis=-1)
+ self.min_re[self.counter:self.counter+batch_size] = min_re
+ if hasattr(self, 'min_kpl2'):
+ min_kpl2 = kp_l2_loss.min(axis=-1)
+ self.min_kpl2[self.counter:self.counter+batch_size] = min_kpl2
+ if hasattr(self, 'opt_mpjpe'):
+ self.opt_mpjpe[self.counter:self.counter+batch_size] = opt_mpjpe
+ if hasattr(self, 'opt_re'):
+ self.opt_re[self.counter:self.counter+batch_size] = opt_re
+
+ self.counter += batch_size
+
+ if hasattr(self, 'mode_mpjpe') and hasattr(self, 'mode_re'):
+ return {
+ 'mode_mpjpe': mode_mpjpe,
+ 'mode_re': mode_re,
+ }
+ else:
+ return {}
+
+
+class EvaluatorPCK:
+
+ def __init__(self, thresholds: List = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5],):
+ """
+ Class used for evaluating trained models on different 3D pose datasets.
+ Args:
+ thresholds [List]: List of PCK thresholds to evaluate.
+ metrics [List]: List of evaluation metrics to record.
+ """
+ self.thresholds = thresholds
+ self.pred_kp_2d = []
+ self.gt_kp_2d = []
+ self.gt_conf_2d = []
+ self.counter = 0
+
+ def log(self):
+ """
+ Print current evaluation metrics
+ """
+ if self.counter == 0:
+ print('Evaluation has not started')
+ return
+ print(f'{self.counter} samples')
+ metrics_dict = self.get_metrics_dict()
+ for metric in metrics_dict:
+ print(f'{metric}: {metrics_dict[metric]}')
+ print('***')
+
+ def get_metrics_dict(self) -> Dict:
+ """
+ Returns:
+ Dict: Dictionary of evaluation metrics.
+ """
+ pcks = self.compute_pcks()
+ metrics = {}
+ for thr, (acc,avg_acc,cnt) in zip(self.thresholds, pcks):
+ metrics.update({f'kp{i}_pck_{thr}': float(a) for i, a in enumerate(acc) if a>=0})
+ metrics.update({f'kpAvg_pck_{thr}': float(avg_acc)})
+ return metrics
+
+ def compute_pcks(self):
+ pred_kp_2d = np.concatenate(self.pred_kp_2d, axis=0)
+ gt_kp_2d = np.concatenate(self.gt_kp_2d, axis=0)
+ gt_conf_2d = np.concatenate(self.gt_conf_2d, axis=0)
+ assert pred_kp_2d.shape == gt_kp_2d.shape
+ assert pred_kp_2d[..., 0].shape == gt_conf_2d.shape
+ assert pred_kp_2d.shape[1] == 1 # num_samples
+
+ from mmpose.core.evaluation import keypoint_pck_accuracy
+ pcks = [
+ keypoint_pck_accuracy(
+ pred_kp_2d[:, 0, :, :],
+ gt_kp_2d[:, 0, :, :],
+ gt_conf_2d[:, 0, :]>0.5,
+ thr=thr,
+ normalize = np.ones((len(pred_kp_2d),2)) # Already in [-0.5,0.5] range. No need to normalize
+ )
+ for thr in self.thresholds
+ ]
+ return pcks
+
+ def __call__(self, output: Dict, batch: Dict, opt_output: Optional[Dict] = None):
+ """
+ Evaluate current batch.
+ Args:
+ output (Dict): Regression output.
+ batch (Dict): Dictionary containing images and their corresponding annotations.
+ opt_output (Dict): Optimization output.
+ """
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach()
+ num_samples = 1
+ batch_size = pred_keypoints_2d.shape[0]
+
+ pred_keypoints_2d = pred_keypoints_2d[:,None,:,:]
+ gt_keypoints_2d = batch['keypoints_2d'][:,None,:,:].repeat(1, num_samples, 1, 1)
+
+ self.pred_kp_2d.append(pred_keypoints_2d[:, :, :, :2].detach().cpu().numpy())
+ self.gt_conf_2d.append(gt_keypoints_2d[:, :, :, -1].detach().cpu().numpy())
+ self.gt_kp_2d.append(gt_keypoints_2d[:, :, :, :2].detach().cpu().numpy())
+
+ self.counter += batch_size
diff --git a/hamer/utils/pylogger.py b/hamer/utils/pylogger.py
new file mode 100644
index 0000000000000000000000000000000000000000..92ffa71893ec20acde65e44d899334a38d8d1333
--- /dev/null
+++ b/hamer/utils/pylogger.py
@@ -0,0 +1,17 @@
+import logging
+
+from pytorch_lightning.utilities import rank_zero_only
+
+
+def get_pylogger(name=__name__) -> logging.Logger:
+ """Initializes multi-GPU-friendly python command line logger."""
+
+ logger = logging.getLogger(name)
+
+ # this ensures all logging levels get marked with the rank zero decorator
+ # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+ logging_levels = ("debug", "info", "warning", "error", "exception", "fatal", "critical")
+ for level in logging_levels:
+ setattr(logger, level, rank_zero_only(getattr(logger, level)))
+
+ return logger
diff --git a/hamer/utils/render_openpose.py b/hamer/utils/render_openpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb1e4b5f17d68edb887c65886d791090c5aa8a59
--- /dev/null
+++ b/hamer/utils/render_openpose.py
@@ -0,0 +1,191 @@
+"""
+Render OpenPose keypoints.
+Code was ported to Python from the official C++ implementation https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/utilities/keypoint.cpp
+"""
+import cv2
+import math
+import numpy as np
+from typing import List, Tuple
+
+def get_keypoints_rectangle(keypoints: np.array, threshold: float) -> Tuple[float, float, float]:
+ """
+ Compute rectangle enclosing keypoints above the threshold.
+ Args:
+ keypoints (np.array): Keypoint array of shape (N, 3).
+ threshold (float): Confidence visualization threshold.
+ Returns:
+ Tuple[float, float, float]: Rectangle width, height and area.
+ """
+ valid_ind = keypoints[:, -1] > threshold
+ if valid_ind.sum() > 0:
+ valid_keypoints = keypoints[valid_ind][:, :-1]
+ max_x = valid_keypoints[:,0].max()
+ max_y = valid_keypoints[:,1].max()
+ min_x = valid_keypoints[:,0].min()
+ min_y = valid_keypoints[:,1].min()
+ width = max_x - min_x
+ height = max_y - min_y
+ area = width * height
+ return width, height, area
+ else:
+ return 0,0,0
+
+def render_keypoints(img: np.array,
+ keypoints: np.array,
+ pairs: List,
+ colors: List,
+ thickness_circle_ratio: float,
+ thickness_line_ratio_wrt_circle: float,
+ pose_scales: List,
+ threshold: float = 0.1,
+ alpha: float = 1.0) -> np.array:
+ """
+ Render keypoints on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ keypoints (np.array): Keypoint array of shape (N, 3).
+ pairs (List): List of keypoint pairs per limb.
+ colors: (List): List of colors per keypoint.
+ thickness_circle_ratio (float): Circle thickness ratio.
+ thickness_line_ratio_wrt_circle (float): Line thickness ratio wrt the circle.
+ pose_scales (List): List of pose scales.
+ threshold (float): Only visualize keypoints with confidence above the threshold.
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+ img_orig = img.copy()
+ width, height = img.shape[1], img.shape[2]
+ area = width * height
+
+ lineType = 8
+ shift = 0
+ numberColors = len(colors)
+ thresholdRectangle = 0.1
+
+ person_width, person_height, person_area = get_keypoints_rectangle(keypoints, thresholdRectangle)
+ if person_area > 0:
+ ratioAreas = min(1, max(person_width / width, person_height / height))
+ thicknessRatio = np.maximum(np.round(math.sqrt(area) * thickness_circle_ratio * ratioAreas), 2)
+ thicknessCircle = np.maximum(1, thicknessRatio if ratioAreas > 0.05 else -np.ones_like(thicknessRatio))
+ thicknessLine = np.maximum(1, np.round(thicknessRatio * thickness_line_ratio_wrt_circle))
+ radius = thicknessRatio / 2
+
+ img = np.ascontiguousarray(img.copy())
+ for i, pair in enumerate(pairs):
+ index1, index2 = pair
+ if keypoints[index1, -1] > threshold and keypoints[index2, -1] > threshold:
+ thicknessLineScaled = int(round(min(thicknessLine[index1], thicknessLine[index2]) * pose_scales[0]))
+ colorIndex = index2
+ color = colors[colorIndex % numberColors]
+ keypoint1 = keypoints[index1, :-1].astype(np.int)
+ keypoint2 = keypoints[index2, :-1].astype(np.int)
+ cv2.line(img, tuple(keypoint1.tolist()), tuple(keypoint2.tolist()), tuple(color.tolist()), thicknessLineScaled, lineType, shift)
+ for part in range(len(keypoints)):
+ faceIndex = part
+ if keypoints[faceIndex, -1] > threshold:
+ radiusScaled = int(round(radius[faceIndex] * pose_scales[0]))
+ thicknessCircleScaled = int(round(thicknessCircle[faceIndex] * pose_scales[0]))
+ colorIndex = part
+ color = colors[colorIndex % numberColors]
+ center = keypoints[faceIndex, :-1].astype(np.int)
+ cv2.circle(img, tuple(center.tolist()), radiusScaled, tuple(color.tolist()), thicknessCircleScaled, lineType, shift)
+ return img
+
+def render_hand_keypoints(img, right_hand_keypoints, threshold=0.1, use_confidence=False, map_fn=lambda x: np.ones_like(x), alpha=1.0):
+ if use_confidence and map_fn is not None:
+ #thicknessCircleRatioLeft = 1./50 * map_fn(left_hand_keypoints[:, -1])
+ thicknessCircleRatioRight = 1./50 * map_fn(right_hand_keypoints[:, -1])
+ else:
+ #thicknessCircleRatioLeft = 1./50 * np.ones(left_hand_keypoints.shape[0])
+ thicknessCircleRatioRight = 1./50 * np.ones(right_hand_keypoints.shape[0])
+ thicknessLineRatioWRTCircle = 0.75
+ pairs = [0,1, 1,2, 2,3, 3,4, 0,5, 5,6, 6,7, 7,8, 0,9, 9,10, 10,11, 11,12, 0,13, 13,14, 14,15, 15,16, 0,17, 17,18, 18,19, 19,20]
+ pairs = np.array(pairs).reshape(-1,2)
+
+ colors = [100., 100., 100.,
+ 100., 0., 0.,
+ 150., 0., 0.,
+ 200., 0., 0.,
+ 255., 0., 0.,
+ 100., 100., 0.,
+ 150., 150., 0.,
+ 200., 200., 0.,
+ 255., 255., 0.,
+ 0., 100., 50.,
+ 0., 150., 75.,
+ 0., 200., 100.,
+ 0., 255., 125.,
+ 0., 50., 100.,
+ 0., 75., 150.,
+ 0., 100., 200.,
+ 0., 125., 255.,
+ 100., 0., 100.,
+ 150., 0., 150.,
+ 200., 0., 200.,
+ 255., 0., 255.]
+ colors = np.array(colors).reshape(-1,3)
+ #colors = np.zeros_like(colors)
+ poseScales = [1]
+ #img = render_keypoints(img, left_hand_keypoints, pairs, colors, thicknessCircleRatioLeft, thicknessLineRatioWRTCircle, poseScales, threshold, alpha=alpha)
+ img = render_keypoints(img, right_hand_keypoints, pairs, colors, thicknessCircleRatioRight, thicknessLineRatioWRTCircle, poseScales, threshold, alpha=alpha)
+ #img = render_keypoints(img, right_hand_keypoints, pairs, colors, thickness_circle_ratio, thickness_line_ratio_wrt_circle, pose_scales, 0.1)
+ return img
+
+def render_body_keypoints(img: np.array,
+ body_keypoints: np.array) -> np.array:
+ """
+ Render OpenPose body keypoints on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ body_keypoints (np.array): Keypoint array of shape (N, 3); 3 <====> (x, y, confidence).
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+
+ thickness_circle_ratio = 1./75. * np.ones(body_keypoints.shape[0])
+ thickness_line_ratio_wrt_circle = 0.75
+ pairs = []
+ pairs = [1,8,1,2,1,5,2,3,3,4,5,6,6,7,8,9,9,10,10,11,8,12,12,13,13,14,1,0,0,15,15,17,0,16,16,18,14,19,19,20,14,21,11,22,22,23,11,24]
+ pairs = np.array(pairs).reshape(-1,2)
+ colors = [255., 0., 85.,
+ 255., 0., 0.,
+ 255., 85., 0.,
+ 255., 170., 0.,
+ 255., 255., 0.,
+ 170., 255., 0.,
+ 85., 255., 0.,
+ 0., 255., 0.,
+ 255., 0., 0.,
+ 0., 255., 85.,
+ 0., 255., 170.,
+ 0., 255., 255.,
+ 0., 170., 255.,
+ 0., 85., 255.,
+ 0., 0., 255.,
+ 255., 0., 170.,
+ 170., 0., 255.,
+ 255., 0., 255.,
+ 85., 0., 255.,
+ 0., 0., 255.,
+ 0., 0., 255.,
+ 0., 0., 255.,
+ 0., 255., 255.,
+ 0., 255., 255.,
+ 0., 255., 255.]
+ colors = np.array(colors).reshape(-1,3)
+ pose_scales = [1]
+ return render_keypoints(img, body_keypoints, pairs, colors, thickness_circle_ratio, thickness_line_ratio_wrt_circle, pose_scales, 0.1)
+
+def render_openpose(img: np.array,
+ hand_keypoints: np.array) -> np.array:
+ """
+ Render keypoints in the OpenPose format on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ body_keypoints (np.array): Keypoint array of shape (N, 3); 3 <====> (x, y, confidence).
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+ #img = render_body_keypoints(img, body_keypoints)
+ img = render_hand_keypoints(img, hand_keypoints)
+ return img
diff --git a/hamer/utils/renderer.py b/hamer/utils/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c33a36379ccc99099cf141403360d81de01467f
--- /dev/null
+++ b/hamer/utils/renderer.py
@@ -0,0 +1,423 @@
+import os
+#if 'PYOPENGL_PLATFORM' not in os.environ:
+# os.environ['PYOPENGL_PLATFORM'] = 'egl'
+import torch
+import numpy as np
+import pyrender
+import trimesh
+import cv2
+from yacs.config import CfgNode
+from typing import List, Optional
+
+def cam_crop_to_full(cam_bbox, box_center, box_size, img_size, focal_length=5000.):
+ # Convert cam_bbox to full image
+ img_w, img_h = img_size[:, 0], img_size[:, 1]
+ cx, cy, b = box_center[:, 0], box_center[:, 1], box_size
+ w_2, h_2 = img_w / 2., img_h / 2.
+ bs = b * cam_bbox[:, 0] + 1e-9
+ tz = 2 * focal_length / bs
+ tx = (2 * (cx - w_2) / bs) + cam_bbox[:, 1]
+ ty = (2 * (cy - h_2) / bs) + cam_bbox[:, 2]
+ full_cam = torch.stack([tx, ty, tz], dim=-1)
+ return full_cam
+
+def get_light_poses(n_lights=5, elevation=np.pi / 3, dist=12):
+ # get lights in a circle around origin at elevation
+ thetas = elevation * np.ones(n_lights)
+ phis = 2 * np.pi * np.arange(n_lights) / n_lights
+ poses = []
+ trans = make_translation(torch.tensor([0, 0, dist]))
+ for phi, theta in zip(phis, thetas):
+ rot = make_rotation(rx=-theta, ry=phi, order="xyz")
+ poses.append((rot @ trans).numpy())
+ return poses
+
+def make_translation(t):
+ return make_4x4_pose(torch.eye(3), t)
+
+def make_rotation(rx=0, ry=0, rz=0, order="xyz"):
+ Rx = rotx(rx)
+ Ry = roty(ry)
+ Rz = rotz(rz)
+ if order == "xyz":
+ R = Rz @ Ry @ Rx
+ elif order == "xzy":
+ R = Ry @ Rz @ Rx
+ elif order == "yxz":
+ R = Rz @ Rx @ Ry
+ elif order == "yzx":
+ R = Rx @ Rz @ Ry
+ elif order == "zyx":
+ R = Rx @ Ry @ Rz
+ elif order == "zxy":
+ R = Ry @ Rx @ Rz
+ return make_4x4_pose(R, torch.zeros(3))
+
+def make_4x4_pose(R, t):
+ """
+ :param R (*, 3, 3)
+ :param t (*, 3)
+ return (*, 4, 4)
+ """
+ dims = R.shape[:-2]
+ pose_3x4 = torch.cat([R, t.view(*dims, 3, 1)], dim=-1)
+ bottom = (
+ torch.tensor([0, 0, 0, 1], device=R.device)
+ .reshape(*(1,) * len(dims), 1, 4)
+ .expand(*dims, 1, 4)
+ )
+ return torch.cat([pose_3x4, bottom], dim=-2)
+
+
+def rotx(theta):
+ return torch.tensor(
+ [
+ [1, 0, 0],
+ [0, np.cos(theta), -np.sin(theta)],
+ [0, np.sin(theta), np.cos(theta)],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def roty(theta):
+ return torch.tensor(
+ [
+ [np.cos(theta), 0, np.sin(theta)],
+ [0, 1, 0],
+ [-np.sin(theta), 0, np.cos(theta)],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def rotz(theta):
+ return torch.tensor(
+ [
+ [np.cos(theta), -np.sin(theta), 0],
+ [np.sin(theta), np.cos(theta), 0],
+ [0, 0, 1],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def create_raymond_lights() -> List[pyrender.Node]:
+ """
+ Return raymond light nodes for the scene.
+ """
+ thetas = np.pi * np.array([1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
+ phis = np.pi * np.array([0.0, 2.0 / 3.0, 4.0 / 3.0])
+
+ nodes = []
+
+ for phi, theta in zip(phis, thetas):
+ xp = np.sin(theta) * np.cos(phi)
+ yp = np.sin(theta) * np.sin(phi)
+ zp = np.cos(theta)
+
+ z = np.array([xp, yp, zp])
+ z = z / np.linalg.norm(z)
+ x = np.array([-z[1], z[0], 0.0])
+ if np.linalg.norm(x) == 0:
+ x = np.array([1.0, 0.0, 0.0])
+ x = x / np.linalg.norm(x)
+ y = np.cross(z, x)
+
+ matrix = np.eye(4)
+ matrix[:3,:3] = np.c_[x,y,z]
+ nodes.append(pyrender.Node(
+ light=pyrender.DirectionalLight(color=np.ones(3), intensity=1.0),
+ matrix=matrix
+ ))
+
+ return nodes
+
+class Renderer:
+
+ def __init__(self, cfg: CfgNode, faces: np.array):
+ """
+ Wrapper around the pyrender renderer to render MANO meshes.
+ Args:
+ cfg (CfgNode): Model config file.
+ faces (np.array): Array of shape (F, 3) containing the mesh faces.
+ """
+ self.cfg = cfg
+ self.focal_length = cfg.EXTRA.FOCAL_LENGTH
+ self.img_res = cfg.MODEL.IMAGE_SIZE
+
+ # add faces that make the hand mesh watertight
+ faces_new = np.array([[92, 38, 234],
+ [234, 38, 239],
+ [38, 122, 239],
+ [239, 122, 279],
+ [122, 118, 279],
+ [279, 118, 215],
+ [118, 117, 215],
+ [215, 117, 214],
+ [117, 119, 214],
+ [214, 119, 121],
+ [119, 120, 121],
+ [121, 120, 78],
+ [120, 108, 78],
+ [78, 108, 79]])
+ faces = np.concatenate([faces, faces_new], axis=0)
+
+ self.camera_center = [self.img_res // 2, self.img_res // 2]
+ self.faces = faces
+ self.faces_left = self.faces[:,[0,2,1]]
+
+ def __call__(self,
+ vertices: np.array,
+ camera_translation: np.array,
+ image: torch.Tensor,
+ full_frame: bool = False,
+ imgname: Optional[str] = None,
+ side_view=False, rot_angle=90,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ return_rgba=False,
+ ) -> np.array:
+ """
+ Render meshes on input image
+ Args:
+ vertices (np.array): Array of shape (V, 3) containing the mesh vertices.
+ camera_translation (np.array): Array of shape (3,) with the camera translation.
+ image (torch.Tensor): Tensor of shape (3, H, W) containing the image crop with normalized pixel values.
+ full_frame (bool): If True, then render on the full image.
+ imgname (Optional[str]): Contains the original image filenamee. Used only if full_frame == True.
+ """
+
+ if full_frame:
+ image = cv2.imread(imgname).astype(np.float32)[:, :, ::-1] / 255.
+ else:
+ image = image.clone() * torch.tensor(self.cfg.MODEL.IMAGE_STD, device=image.device).reshape(3,1,1)
+ image = image + torch.tensor(self.cfg.MODEL.IMAGE_MEAN, device=image.device).reshape(3,1,1)
+ image = image.permute(1, 2, 0).cpu().numpy()
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=image.shape[1],
+ viewport_height=image.shape[0],
+ point_size=1.0)
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.0,
+ alphaMode='OPAQUE',
+ baseColorFactor=(*mesh_base_color, 1.0))
+
+ camera_translation[0] *= -1.
+
+ mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+ if side_view:
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), [0, 1, 0])
+ mesh.apply_transform(rot)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [image.shape[1] / 2., image.shape[0] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=self.focal_length, fy=self.focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+ scene.add(camera, pose=camera_pose)
+
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ if return_rgba:
+ return color
+
+ valid_mask = (color[:, :, -1])[:, :, np.newaxis]
+ if not side_view:
+ output_img = (color[:, :, :3] * valid_mask + (1 - valid_mask) * image)
+ else:
+ output_img = color[:, :, :3]
+
+ output_img = output_img.astype(np.float32)
+ return output_img
+
+ def vertices_to_trimesh(self, vertices, camera_translation, mesh_base_color=(1.0, 1.0, 0.9),
+ rot_axis=[1,0,0], rot_angle=0, is_right=1):
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+ vertex_colors = np.array([(*mesh_base_color, 1.0)] * vertices.shape[0])
+ if is_right:
+ mesh = trimesh.Trimesh(vertices.copy() + camera_translation, self.faces.copy(), vertex_colors=vertex_colors)
+ else:
+ mesh = trimesh.Trimesh(vertices.copy() + camera_translation, self.faces_left.copy(), vertex_colors=vertex_colors)
+ # mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), rot_axis)
+ mesh.apply_transform(rot)
+
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ return mesh
+
+ def render_rgba(
+ self,
+ vertices: np.array,
+ cam_t = None,
+ rot=None,
+ rot_axis=[1,0,0],
+ rot_angle=0,
+ camera_z=3,
+ # camera_translation: np.array,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ render_res=[256, 256],
+ focal_length=None,
+ is_right=None,
+ ):
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=render_res[0],
+ viewport_height=render_res[1],
+ point_size=1.0)
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+
+ focal_length = focal_length if focal_length is not None else self.focal_length
+
+ if cam_t is not None:
+ camera_translation = cam_t.copy()
+ camera_translation[0] *= -1.
+ else:
+ camera_translation = np.array([0, 0, camera_z * focal_length/render_res[1]])
+
+ mesh = self.vertices_to_trimesh(vertices, np.array([0, 0, 0]), mesh_base_color, rot_axis, rot_angle, is_right=is_right)
+ mesh = pyrender.Mesh.from_trimesh(mesh)
+ # mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [render_res[0] / 2., render_res[1] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+
+ # Create camera node and add it to pyRender scene
+ camera_node = pyrender.Node(camera=camera, matrix=camera_pose)
+ scene.add_node(camera_node)
+ self.add_point_lighting(scene, camera_node)
+ self.add_lighting(scene, camera_node)
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ return color
+
+ def render_rgba_multiple(
+ self,
+ vertices: List[np.array],
+ cam_t: List[np.array],
+ rot_axis=[1,0,0],
+ rot_angle=0,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ render_res=[256, 256],
+ focal_length=None,
+ is_right=None,
+ ):
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=render_res[0],
+ viewport_height=render_res[1],
+ point_size=1.0)
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+
+ if is_right is None:
+ is_right = [1 for _ in range(len(vertices))]
+
+ mesh_list = [pyrender.Mesh.from_trimesh(self.vertices_to_trimesh(vvv, ttt.copy(), mesh_base_color, rot_axis, rot_angle, is_right=sss)) for vvv,ttt,sss in zip(vertices, cam_t, is_right)]
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ for i,mesh in enumerate(mesh_list):
+ scene.add(mesh, f'mesh_{i}')
+
+ camera_pose = np.eye(4)
+ # camera_pose[:3, 3] = camera_translation
+ camera_center = [render_res[0] / 2., render_res[1] / 2.]
+ focal_length = focal_length if focal_length is not None else self.focal_length
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+
+ # Create camera node and add it to pyRender scene
+ camera_node = pyrender.Node(camera=camera, matrix=camera_pose)
+ scene.add_node(camera_node)
+ self.add_point_lighting(scene, camera_node)
+ self.add_lighting(scene, camera_node)
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ return color
+
+ def add_lighting(self, scene, cam_node, color=np.ones(3), intensity=1.0):
+ # from phalp.visualize.py_renderer import get_light_poses
+ light_poses = get_light_poses()
+ light_poses.append(np.eye(4))
+ cam_pose = scene.get_pose(cam_node)
+ for i, pose in enumerate(light_poses):
+ matrix = cam_pose @ pose
+ node = pyrender.Node(
+ name=f"light-{i:02d}",
+ light=pyrender.DirectionalLight(color=color, intensity=intensity),
+ matrix=matrix,
+ )
+ if scene.has_node(node):
+ continue
+ scene.add_node(node)
+
+ def add_point_lighting(self, scene, cam_node, color=np.ones(3), intensity=1.0):
+ # from phalp.visualize.py_renderer import get_light_poses
+ light_poses = get_light_poses(dist=0.5)
+ light_poses.append(np.eye(4))
+ cam_pose = scene.get_pose(cam_node)
+ for i, pose in enumerate(light_poses):
+ matrix = cam_pose @ pose
+ # node = pyrender.Node(
+ # name=f"light-{i:02d}",
+ # light=pyrender.DirectionalLight(color=color, intensity=intensity),
+ # matrix=matrix,
+ # )
+ node = pyrender.Node(
+ name=f"plight-{i:02d}",
+ light=pyrender.PointLight(color=color, intensity=intensity),
+ matrix=matrix,
+ )
+ if scene.has_node(node):
+ continue
+ scene.add_node(node)
diff --git a/hamer/utils/rich_utils.py b/hamer/utils/rich_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..19f97494ed2958ec2c3d75c772360b5367f2dc7b
--- /dev/null
+++ b/hamer/utils/rich_utils.py
@@ -0,0 +1,105 @@
+from pathlib import Path
+from typing import Sequence
+
+import rich
+import rich.syntax
+import rich.tree
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, OmegaConf, open_dict
+from pytorch_lightning.utilities import rank_zero_only
+from rich.prompt import Prompt
+
+from . import pylogger
+
+log = pylogger.get_pylogger(__name__)
+
+
+@rank_zero_only
+def print_config_tree(
+ cfg: DictConfig,
+ print_order: Sequence[str] = (
+ "datamodule",
+ "model",
+ "callbacks",
+ "logger",
+ "trainer",
+ "paths",
+ "extras",
+ ),
+ resolve: bool = False,
+ save_to_file: bool = False,
+) -> None:
+ """Prints content of DictConfig using Rich library and its tree structure.
+
+ Args:
+ cfg (DictConfig): Configuration composed by Hydra.
+ print_order (Sequence[str], optional): Determines in what order config components are printed.
+ resolve (bool, optional): Whether to resolve reference fields of DictConfig.
+ save_to_file (bool, optional): Whether to export config to the hydra output folder.
+ """
+
+ style = "dim"
+ tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
+
+ queue = []
+
+ # add fields from `print_order` to queue
+ for field in print_order:
+ queue.append(field) if field in cfg else log.warning(
+ f"Field '{field}' not found in config. Skipping '{field}' config printing..."
+ )
+
+ # add all the other fields to queue (not specified in `print_order`)
+ for field in cfg:
+ if field not in queue:
+ queue.append(field)
+
+ # generate config tree from queue
+ for field in queue:
+ branch = tree.add(field, style=style, guide_style=style)
+
+ config_group = cfg[field]
+ if isinstance(config_group, DictConfig):
+ branch_content = OmegaConf.to_yaml(config_group, resolve=resolve)
+ else:
+ branch_content = str(config_group)
+
+ branch.add(rich.syntax.Syntax(branch_content, "yaml"))
+
+ # print config tree
+ rich.print(tree)
+
+ # save config tree to file
+ if save_to_file:
+ with open(Path(cfg.paths.output_dir, "config_tree.log"), "w") as file:
+ rich.print(tree, file=file)
+
+
+@rank_zero_only
+def enforce_tags(cfg: DictConfig, save_to_file: bool = False) -> None:
+ """Prompts user to input tags from command line if no tags are provided in config."""
+
+ if not cfg.get("tags"):
+ if "id" in HydraConfig().cfg.hydra.job:
+ raise ValueError("Specify tags before launching a multirun!")
+
+ log.warning("No tags provided in config. Prompting user to input tags...")
+ tags = Prompt.ask("Enter a list of comma separated tags", default="dev")
+ tags = [t.strip() for t in tags.split(",") if t != ""]
+
+ with open_dict(cfg):
+ cfg.tags = tags
+
+ log.info(f"Tags: {cfg.tags}")
+
+ if save_to_file:
+ with open(Path(cfg.paths.output_dir, "tags.log"), "w") as file:
+ rich.print(cfg.tags, file=file)
+
+
+if __name__ == "__main__":
+ from hydra import compose, initialize
+
+ with initialize(version_base="1.2", config_path="../../configs"):
+ cfg = compose(config_name="train.yaml", return_hydra_config=False, overrides=[])
+ print_config_tree(cfg, resolve=False, save_to_file=False)
diff --git a/hamer/utils/skeleton_renderer.py b/hamer/utils/skeleton_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..46a5df75bff887eab00984eeb5be3c1f6e752960
--- /dev/null
+++ b/hamer/utils/skeleton_renderer.py
@@ -0,0 +1,124 @@
+import torch
+import numpy as np
+import trimesh
+from typing import Optional
+from yacs.config import CfgNode
+
+from .geometry import perspective_projection
+from .render_openpose import render_openpose
+
+class SkeletonRenderer:
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Object used to render 3D keypoints. Faster for use during training.
+ Args:
+ cfg (CfgNode): Model config file.
+ """
+ self.cfg = cfg
+
+ def __call__(self,
+ pred_keypoints_3d: torch.Tensor,
+ gt_keypoints_3d: torch.Tensor,
+ gt_keypoints_2d: torch.Tensor,
+ images: Optional[np.array] = None,
+ camera_translation: Optional[torch.Tensor] = None) -> np.array:
+ """
+ Render batch of 3D keypoints.
+ Args:
+ pred_keypoints_3d (torch.Tensor): Tensor of shape (B, S, N, 3) containing a batch of predicted 3D keypoints, with S samples per image.
+ gt_keypoints_3d (torch.Tensor): Tensor of shape (B, N, 4) containing corresponding ground truth 3D keypoints; last value is the confidence.
+ gt_keypoints_2d (torch.Tensor): Tensor of shape (B, N, 3) containing corresponding ground truth 2D keypoints.
+ images (torch.Tensor): Tensor of shape (B, H, W, 3) containing images with values in the [0,255] range.
+ camera_translation (torch.Tensor): Tensor of shape (B, 3) containing the camera translation.
+ Returns:
+ np.array : Image with the following layout. Each row contains the a) input image,
+ b) image with gt 2D keypoints,
+ c) image with projected gt 3D keypoints,
+ d_1, ... , d_S) image with projected predicted 3D keypoints,
+ e) gt 3D keypoints rendered from a side view,
+ f_1, ... , f_S) predicted 3D keypoints frorm a side view
+ """
+ batch_size = pred_keypoints_3d.shape[0]
+# num_samples = pred_keypoints_3d.shape[1]
+ pred_keypoints_3d = pred_keypoints_3d.clone().cpu().float()
+ gt_keypoints_3d = gt_keypoints_3d.clone().cpu().float()
+ gt_keypoints_3d[:, :, :-1] = gt_keypoints_3d[:, :, :-1] - gt_keypoints_3d[:, [0], :-1] + pred_keypoints_3d[:, [0]]
+ gt_keypoints_2d = gt_keypoints_2d.clone().cpu().float().numpy()
+ gt_keypoints_2d[:, :, :-1] = self.cfg.MODEL.IMAGE_SIZE * (gt_keypoints_2d[:, :, :-1] + 1.0) / 2.0
+
+ #openpose_indices = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+ #gt_indices = [12, 8, 7, 6, 9, 10, 11, 14, 2, 1, 0, 3, 4, 5]
+ #gt_indices = [25 + i for i in gt_indices]
+ openpose_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+ gt_indices = openpose_indices
+ keypoints_to_render = torch.ones(batch_size, gt_keypoints_3d.shape[1], 1)
+ rotation = torch.eye(3).unsqueeze(0)
+ if camera_translation is None:
+ camera_translation = torch.tensor([0.0, 0.0, 2 * self.cfg.EXTRA.FOCAL_LENGTH / (0.8 * self.cfg.MODEL.IMAGE_SIZE)]).unsqueeze(0).repeat(batch_size, 1)
+ else:
+ camera_translation = camera_translation.cpu()
+
+ if images is None:
+ images = np.zeros((batch_size, self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE, 3))
+ focal_length = torch.tensor([self.cfg.EXTRA.FOCAL_LENGTH, self.cfg.EXTRA.FOCAL_LENGTH]).reshape(1, 2)
+ camera_center = torch.tensor([self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE], dtype=torch.float).reshape(1, 2) / 2.
+ gt_keypoints_3d_proj = perspective_projection(gt_keypoints_3d[:, :, :-1], rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation[:, :], focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1))
+ pred_keypoints_3d_proj = perspective_projection(pred_keypoints_3d.reshape(batch_size, -1, 3), rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation.reshape(batch_size, -1), focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1)).reshape(batch_size, -1, 2)
+ gt_keypoints_3d_proj = torch.cat([gt_keypoints_3d_proj, gt_keypoints_3d[:, :, [-1]]], dim=-1).cpu().numpy()
+ pred_keypoints_3d_proj = torch.cat([pred_keypoints_3d_proj, keypoints_to_render.reshape(batch_size, -1, 1)], dim=-1).cpu().numpy()
+ rows = []
+ # Rotate keypoints to visualize side view
+ R = torch.tensor(trimesh.transformations.rotation_matrix(np.radians(90), [0, 1, 0])[:3, :3]).float()
+ gt_keypoints_3d_side = gt_keypoints_3d.clone()
+ gt_keypoints_3d_side[:, :, :-1] = torch.einsum('bni,ij->bnj', gt_keypoints_3d_side[:, :, :-1], R)
+ pred_keypoints_3d_side = pred_keypoints_3d.clone()
+ pred_keypoints_3d_side = torch.einsum('bni,ij->bnj', pred_keypoints_3d_side, R)
+ gt_keypoints_3d_proj_side = perspective_projection(gt_keypoints_3d_side[:, :, :-1], rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation[:, :], focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1))
+ pred_keypoints_3d_proj_side = perspective_projection(pred_keypoints_3d_side.reshape(batch_size, -1, 3), rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation.reshape(batch_size, -1), focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1)).reshape(batch_size, -1, 2)
+ gt_keypoints_3d_proj_side = torch.cat([gt_keypoints_3d_proj_side, gt_keypoints_3d_side[:, :, [-1]]], dim=-1).cpu().numpy()
+ pred_keypoints_3d_proj_side = torch.cat([pred_keypoints_3d_proj_side, keypoints_to_render.reshape(batch_size, -1, 1)], dim=-1).cpu().numpy()
+ for i in range(batch_size):
+ img = images[i]
+ side_img = np.zeros((self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE, 3))
+ # gt 2D keypoints
+ body_keypoints_2d = gt_keypoints_2d[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_2d[i, gt, -1] > body_keypoints_2d[op, -1]:
+ body_keypoints_2d[op] = gt_keypoints_2d[i, gt]
+ gt_keypoints_img = render_openpose(img, body_keypoints_2d) / 255.
+ # gt 3D keypoints
+ body_keypoints_3d_proj = gt_keypoints_3d_proj[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_3d_proj[i, gt, -1] > body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = gt_keypoints_3d_proj[i, gt]
+ gt_keypoints_3d_proj_img = render_openpose(img, body_keypoints_3d_proj) / 255.
+ # gt 3D keypoints from the side
+ body_keypoints_3d_proj = gt_keypoints_3d_proj_side[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_3d_proj_side[i, gt, -1] > body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = gt_keypoints_3d_proj_side[i, gt]
+ gt_keypoints_3d_proj_img_side = render_openpose(side_img, body_keypoints_3d_proj) / 255.
+ # pred 3D keypoints
+ pred_keypoints_3d_proj_imgs = []
+ body_keypoints_3d_proj = pred_keypoints_3d_proj[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if pred_keypoints_3d_proj[i, gt, -1] >= body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = pred_keypoints_3d_proj[i, gt]
+ pred_keypoints_3d_proj_imgs.append(render_openpose(img, body_keypoints_3d_proj) / 255.)
+ pred_keypoints_3d_proj_img = np.concatenate(pred_keypoints_3d_proj_imgs, axis=1)
+ # gt 3D keypoints from the side
+ pred_keypoints_3d_proj_imgs_side = []
+ body_keypoints_3d_proj = pred_keypoints_3d_proj_side[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if pred_keypoints_3d_proj_side[i, gt, -1] >= body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = pred_keypoints_3d_proj_side[i, gt]
+ pred_keypoints_3d_proj_imgs_side.append(render_openpose(side_img, body_keypoints_3d_proj) / 255.)
+ pred_keypoints_3d_proj_img_side = np.concatenate(pred_keypoints_3d_proj_imgs_side, axis=1)
+ rows.append(np.concatenate((gt_keypoints_img, gt_keypoints_3d_proj_img, pred_keypoints_3d_proj_img, gt_keypoints_3d_proj_img_side, pred_keypoints_3d_proj_img_side), axis=1))
+ # Concatenate images
+ img = np.concatenate(rows, axis=0)
+ img[:, ::self.cfg.MODEL.IMAGE_SIZE, :] = 1.0
+ img[::self.cfg.MODEL.IMAGE_SIZE, :, :] = 1.0
+ img[:, (1+1+1)*self.cfg.MODEL.IMAGE_SIZE, :] = 0.5
+ return img
diff --git a/hamer/utils/utils_detectron2.py b/hamer/utils/utils_detectron2.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe01e02f8edbcbd5d545c6f3cb65aeb688a1dff4
--- /dev/null
+++ b/hamer/utils/utils_detectron2.py
@@ -0,0 +1,93 @@
+import detectron2.data.transforms as T
+import torch
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode, instantiate
+from detectron2.data import MetadataCatalog
+from omegaconf import OmegaConf
+
+
+class DefaultPredictor_Lazy:
+ """Create a simple end-to-end predictor with the given config that runs on single device for a
+ single input image.
+
+ Compared to using the model directly, this class does the following additions:
+
+ 1. Load checkpoint from the weights specified in config (cfg.MODEL.WEIGHTS).
+ 2. Always take BGR image as the input and apply format conversion internally.
+ 3. Apply resizing defined by the config (`cfg.INPUT.{MIN,MAX}_SIZE_TEST`).
+ 4. Take one input image and produce a single output, instead of a batch.
+
+ This is meant for simple demo purposes, so it does the above steps automatically.
+ This is not meant for benchmarks or running complicated inference logic.
+ If you'd like to do anything more complicated, please refer to its source code as
+ examples to build and use the model manually.
+
+ Attributes:
+ metadata (Metadata): the metadata of the underlying dataset, obtained from
+ test dataset name in the config.
+
+
+ Examples:
+ ::
+ pred = DefaultPredictor(cfg)
+ inputs = cv2.imread("input.jpg")
+ outputs = pred(inputs)
+ """
+
+ def __init__(self, cfg):
+ """
+ Args:
+ cfg: a yacs CfgNode or a omegaconf dict object.
+ """
+ if isinstance(cfg, CfgNode):
+ self.cfg = cfg.clone() # cfg can be modified by model
+ self.model = build_model(self.cfg) # noqa: F821
+ if len(cfg.DATASETS.TEST):
+ test_dataset = cfg.DATASETS.TEST[0]
+
+ checkpointer = DetectionCheckpointer(self.model)
+ checkpointer.load(cfg.MODEL.WEIGHTS)
+
+ self.aug = T.ResizeShortestEdge(
+ [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+ )
+
+ self.input_format = cfg.INPUT.FORMAT
+ else: # new LazyConfig
+ self.cfg = cfg
+ self.model = instantiate(cfg.model)
+ test_dataset = OmegaConf.select(cfg, "dataloader.test.dataset.names", default=None)
+ if isinstance(test_dataset, (list, tuple)):
+ test_dataset = test_dataset[0]
+
+ checkpointer = DetectionCheckpointer(self.model)
+ checkpointer.load(OmegaConf.select(cfg, "train.init_checkpoint", default=""))
+
+ mapper = instantiate(cfg.dataloader.test.mapper)
+ self.aug = mapper.augmentations
+ self.input_format = mapper.image_format
+
+ self.model.eval().cuda()
+ if test_dataset:
+ self.metadata = MetadataCatalog.get(test_dataset)
+ assert self.input_format in ["RGB", "BGR"], self.input_format
+
+ def __call__(self, original_image):
+ """
+ Args:
+ original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+
+ Returns:
+ predictions (dict):
+ the output of the model for one image only.
+ See :doc:`/tutorials/models` for details about the format.
+ """
+ with torch.no_grad():
+ if self.input_format == "RGB":
+ original_image = original_image[:, :, ::-1]
+ height, width = original_image.shape[:2]
+ image = self.aug(T.AugInput(original_image)).apply_image(original_image)
+ image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+ inputs = {"image": image, "height": height, "width": width}
+ predictions = self.model([inputs])[0]
+ return predictions
diff --git a/mmcv_custom/.DS_Store b/mmcv_custom/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..baeca02aabd894e760b28a1df88cda953704650e
Binary files /dev/null and b/mmcv_custom/.DS_Store differ
diff --git a/mmcv_custom/__init__.py b/mmcv_custom/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..23cb66e9336d6e87483eba5313976c3aa2de5e61
--- /dev/null
+++ b/mmcv_custom/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+from .checkpoint import load_checkpoint
+from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
+from .apex_runner.optimizer import DistOptimizerHook_custom
+
+__all__ = ['load_checkpoint', 'LayerDecayOptimizerConstructor', 'DistOptimizerHook_custom']
diff --git a/mmcv_custom/__pycache__/__init__.cpython-310.pyc b/mmcv_custom/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a73b236c7a631337b1f3a86d84470412fef0496
Binary files /dev/null and b/mmcv_custom/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmcv_custom/__pycache__/checkpoint.cpython-310.pyc b/mmcv_custom/__pycache__/checkpoint.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f5d0154731a976c7b6050237e40398eaf228ad6
Binary files /dev/null and b/mmcv_custom/__pycache__/checkpoint.cpython-310.pyc differ
diff --git a/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-310.pyc b/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41f317495d2f177e821e7d52d452fadaf589ebd0
Binary files /dev/null and b/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-310.pyc differ
diff --git a/mmcv_custom/apex_runner/__init__.py b/mmcv_custom/apex_runner/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b90d2cbaa978c67c83ce3a8393d172d5714e210
--- /dev/null
+++ b/mmcv_custom/apex_runner/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .checkpoint import save_checkpoint
+from .apex_iter_based_runner import IterBasedRunnerAmp
+
+
+__all__ = [
+ 'save_checkpoint', 'IterBasedRunnerAmp',
+]
diff --git a/mmcv_custom/apex_runner/__pycache__/__init__.cpython-310.pyc b/mmcv_custom/apex_runner/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6705d251ab2de8157062e7a8589841b3e4e4028b
Binary files /dev/null and b/mmcv_custom/apex_runner/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmcv_custom/apex_runner/__pycache__/apex_iter_based_runner.cpython-310.pyc b/mmcv_custom/apex_runner/__pycache__/apex_iter_based_runner.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6611a00e4743076333ff435dde00a5ba2af75c8d
Binary files /dev/null and b/mmcv_custom/apex_runner/__pycache__/apex_iter_based_runner.cpython-310.pyc differ
diff --git a/mmcv_custom/apex_runner/__pycache__/checkpoint.cpython-310.pyc b/mmcv_custom/apex_runner/__pycache__/checkpoint.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07a04502e7c957a0bbd177cbd4d3615567afbd43
Binary files /dev/null and b/mmcv_custom/apex_runner/__pycache__/checkpoint.cpython-310.pyc differ
diff --git a/mmcv_custom/apex_runner/__pycache__/optimizer.cpython-310.pyc b/mmcv_custom/apex_runner/__pycache__/optimizer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..daf3266708bcb4d7171517fc7cd460ad2b1571ad
Binary files /dev/null and b/mmcv_custom/apex_runner/__pycache__/optimizer.cpython-310.pyc differ
diff --git a/mmcv_custom/apex_runner/apex_iter_based_runner.py b/mmcv_custom/apex_runner/apex_iter_based_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..571733b091574607ba1ba39648da6a051a769d34
--- /dev/null
+++ b/mmcv_custom/apex_runner/apex_iter_based_runner.py
@@ -0,0 +1,103 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import platform
+import shutil
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from mmcv.runner import RUNNERS, IterBasedRunner
+from .checkpoint import save_checkpoint
+
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+@RUNNERS.register_module()
+class IterBasedRunnerAmp(IterBasedRunner):
+ """Iteration-based Runner with AMP support.
+
+ This runner train models iteration by iteration.
+ """
+
+ def save_checkpoint(self,
+ out_dir,
+ filename_tmpl='iter_{}.pth',
+ meta=None,
+ save_optimizer=True,
+ create_symlink=False):
+ """Save checkpoint to file.
+
+ Args:
+ out_dir (str): Directory to save checkpoint files.
+ filename_tmpl (str, optional): Checkpoint file template.
+ Defaults to 'iter_{}.pth'.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ Defaults to None.
+ save_optimizer (bool, optional): Whether save optimizer.
+ Defaults to True.
+ create_symlink (bool, optional): Whether create symlink to the
+ latest checkpoint file. Defaults to True.
+ """
+ if meta is None:
+ meta = dict(iter=self.iter + 1, epoch=self.epoch + 1)
+ elif isinstance(meta, dict):
+ meta.update(iter=self.iter + 1, epoch=self.epoch + 1)
+ else:
+ raise TypeError(
+ f'meta should be a dict or None, but got {type(meta)}')
+ if self.meta is not None:
+ meta.update(self.meta)
+
+ filename = filename_tmpl.format(self.iter + 1)
+ filepath = osp.join(out_dir, filename)
+ optimizer = self.optimizer if save_optimizer else None
+ save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
+ # in some environments, `os.symlink` is not supported, you may need to
+ # set `create_symlink` to False
+ # if create_symlink:
+ # dst_file = osp.join(out_dir, 'latest.pth')
+ # if platform.system() != 'Windows':
+ # mmcv.symlink(filename, dst_file)
+ # else:
+ # shutil.copy(filepath, dst_file)
+
+ def resume(self,
+ checkpoint,
+ resume_optimizer=True,
+ map_location='default'):
+ if map_location == 'default':
+ if torch.cuda.is_available():
+ device_id = torch.cuda.current_device()
+ checkpoint = self.load_checkpoint(
+ checkpoint,
+ map_location=lambda storage, loc: storage.cuda(device_id))
+ else:
+ checkpoint = self.load_checkpoint(checkpoint)
+ else:
+ checkpoint = self.load_checkpoint(
+ checkpoint, map_location=map_location)
+
+ self._epoch = checkpoint['meta']['epoch']
+ self._iter = checkpoint['meta']['iter']
+ self._inner_iter = checkpoint['meta']['iter']
+ if 'optimizer' in checkpoint and resume_optimizer:
+ if isinstance(self.optimizer, Optimizer):
+ self.optimizer.load_state_dict(checkpoint['optimizer'])
+ elif isinstance(self.optimizer, dict):
+ for k in self.optimizer.keys():
+ self.optimizer[k].load_state_dict(
+ checkpoint['optimizer'][k])
+ else:
+ raise TypeError(
+ 'Optimizer should be dict or torch.optim.Optimizer '
+ f'but got {type(self.optimizer)}')
+
+ if 'amp' in checkpoint:
+ apex.amp.load_state_dict(checkpoint['amp'])
+ self.logger.info('load amp state dict')
+
+ self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')
diff --git a/mmcv_custom/apex_runner/checkpoint.py b/mmcv_custom/apex_runner/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..b04167e0fc5f16bc33e793830ebb9c4ef15ef1ed
--- /dev/null
+++ b/mmcv_custom/apex_runner/checkpoint.py
@@ -0,0 +1,85 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import time
+from tempfile import TemporaryDirectory
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from mmcv.parallel import is_module_wrapper
+from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
+
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+ """Save checkpoint to file.
+
+ The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
+ ``optimizer``, ``amp``. By default ``meta`` will contain version
+ and time info.
+
+ Args:
+ model (Module): Module whose params are to be saved.
+ filename (str): Checkpoint filename.
+ optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ """
+ if meta is None:
+ meta = {}
+ elif not isinstance(meta, dict):
+ raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+ meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+
+ if is_module_wrapper(model):
+ model = model.module
+
+ if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+ # save class name to the meta
+ meta.update(CLASSES=model.CLASSES)
+
+ checkpoint = {
+ 'meta': meta,
+ 'state_dict': weights_to_cpu(get_state_dict(model))
+ }
+ # save optimizer state dict in the checkpoint
+ if isinstance(optimizer, Optimizer):
+ checkpoint['optimizer'] = optimizer.state_dict()
+ elif isinstance(optimizer, dict):
+ checkpoint['optimizer'] = {}
+ for name, optim in optimizer.items():
+ checkpoint['optimizer'][name] = optim.state_dict()
+
+ # save amp state dict in the checkpoint
+ checkpoint['amp'] = apex.amp.state_dict()
+
+ if filename.startswith('pavi://'):
+ try:
+ from pavi import modelcloud
+ from pavi.exception import NodeNotFoundError
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ model_path = filename[7:]
+ root = modelcloud.Folder()
+ model_dir, model_name = osp.split(model_path)
+ try:
+ model = modelcloud.get(model_dir)
+ except NodeNotFoundError:
+ model = root.create_training_model(model_dir)
+ with TemporaryDirectory() as tmp_dir:
+ checkpoint_file = osp.join(tmp_dir, model_name)
+ with open(checkpoint_file, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
+ model.create_file(checkpoint_file, name=model_name)
+ else:
+ mmcv.mkdir_or_exist(osp.dirname(filename))
+ # immediately flush buffer
+ with open(filename, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
diff --git a/mmcv_custom/apex_runner/optimizer.py b/mmcv_custom/apex_runner/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc42989b569e63bbf008bbbd2700fe217399e9f
--- /dev/null
+++ b/mmcv_custom/apex_runner/optimizer.py
@@ -0,0 +1,33 @@
+from mmcv.runner import OptimizerHook, HOOKS
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+@HOOKS.register_module()
+class DistOptimizerHook_custom(OptimizerHook):
+ """Optimizer hook for distributed training."""
+
+ def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+ self.update_interval = update_interval
+ self.use_fp16 = use_fp16
+
+ def before_run(self, runner):
+ runner.optimizer.zero_grad()
+
+ def after_train_iter(self, runner):
+ runner.outputs['loss'] /= self.update_interval
+ if self.use_fp16:
+ with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
+ scaled_loss.backward()
+ else:
+ runner.outputs['loss'].backward()
+ if self.every_n_iters(runner, self.update_interval):
+ if self.grad_clip is not None:
+ self.clip_grads(runner.model.parameters())
+ runner.optimizer.step()
+ runner.optimizer.zero_grad()
diff --git a/mmcv_custom/checkpoint.py b/mmcv_custom/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..52c9bac8a5eb89a4009e837ea338cd271e0a5bc7
--- /dev/null
+++ b/mmcv_custom/checkpoint.py
@@ -0,0 +1,552 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import io
+import os
+import os.path as osp
+import pkgutil
+import time
+import warnings
+from collections import OrderedDict
+from importlib import import_module
+from tempfile import TemporaryDirectory
+
+import torch
+import torchvision
+from torch.optim import Optimizer
+from torch.utils import model_zoo
+from torch.nn import functional as F
+
+import mmcv
+from mmcv.fileio import FileClient
+from mmcv.fileio import load as load_file
+from mmcv.parallel import is_module_wrapper
+from mmcv.utils import mkdir_or_exist
+from mmcv.runner import get_dist_info
+
+from scipy import interpolate
+import numpy as np
+import math
+import re
+import copy
+
+ENV_MMCV_HOME = 'MMCV_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+
+
+def _get_mmcv_home():
+ mmcv_home = os.path.expanduser(
+ os.getenv(
+ ENV_MMCV_HOME,
+ os.path.join(
+ os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')))
+
+ mkdir_or_exist(mmcv_home)
+ return mmcv_home
+
+
+def load_state_dict(module, state_dict, strict=False, logger=None):
+ """Load state_dict to a module.
+
+ This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+ Default value for ``strict`` is set to ``False`` and the message for
+ param mismatch will be shown even if strict is False.
+
+ Args:
+ module (Module): Module that receives the state_dict.
+ state_dict (OrderedDict): Weights.
+ strict (bool): whether to strictly enforce that the keys
+ in :attr:`state_dict` match the keys returned by this module's
+ :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+ logger (:obj:`logging.Logger`, optional): Logger to log the error
+ message. If not specified, print function will be used.
+ """
+ unexpected_keys = []
+ all_missing_keys = []
+ err_msg = []
+
+ metadata = getattr(state_dict, '_metadata', None)
+ state_dict = state_dict.copy()
+ if metadata is not None:
+ state_dict._metadata = metadata
+
+ # use _load_from_state_dict to enable checkpoint version control
+ def load(module, prefix=''):
+ # recursively check parallel module in case that the model has a
+ # complicated structure, e.g., nn.Module(nn.Module(DDP))
+ if is_module_wrapper(module):
+ module = module.module
+ local_metadata = {} if metadata is None else metadata.get(
+ prefix[:-1], {})
+ module._load_from_state_dict(state_dict, prefix, local_metadata, True,
+ all_missing_keys, unexpected_keys,
+ err_msg)
+ for name, child in module._modules.items():
+ if child is not None:
+ load(child, prefix + name + '.')
+
+ load(module)
+ load = None # break load->load reference cycle
+
+ # ignore "num_batches_tracked" of BN layers
+ missing_keys = [
+ key for key in all_missing_keys if 'num_batches_tracked' not in key
+ ]
+
+ if unexpected_keys:
+ err_msg.append('unexpected key in source '
+ f'state_dict: {", ".join(unexpected_keys)}\n')
+ if missing_keys:
+ err_msg.append(
+ f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
+
+ rank, _ = get_dist_info()
+ if len(err_msg) > 0 and rank == 0:
+ err_msg.insert(
+ 0, 'The model and loaded state dict do not match exactly\n')
+ err_msg = '\n'.join(err_msg)
+ if strict:
+ raise RuntimeError(err_msg)
+ elif logger is not None:
+ logger.warning(err_msg)
+ else:
+ print(err_msg)
+
+
+def load_url_dist(url, model_dir=None, map_location="cpu"):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ if rank == 0:
+ checkpoint = model_zoo.load_url(url, model_dir=model_dir, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ checkpoint = model_zoo.load_url(url, model_dir=model_dir, map_location=map_location)
+ return checkpoint
+
+
+def load_pavimodel_dist(model_path, map_location=None):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ try:
+ from pavi import modelcloud
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ if rank == 0:
+ model = modelcloud.get(model_path)
+ with TemporaryDirectory() as tmp_dir:
+ downloaded_file = osp.join(tmp_dir, model.name)
+ model.download(downloaded_file)
+ checkpoint = torch.load(downloaded_file, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ model = modelcloud.get(model_path)
+ with TemporaryDirectory() as tmp_dir:
+ downloaded_file = osp.join(tmp_dir, model.name)
+ model.download(downloaded_file)
+ checkpoint = torch.load(
+ downloaded_file, map_location=map_location)
+ return checkpoint
+
+
+def load_fileclient_dist(filename, backend, map_location):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ allowed_backends = ['ceph']
+ if backend not in allowed_backends:
+ raise ValueError(f'Load from Backend {backend} is not supported.')
+ if rank == 0:
+ fileclient = FileClient(backend=backend)
+ buffer = io.BytesIO(fileclient.get(filename))
+ checkpoint = torch.load(buffer, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ fileclient = FileClient(backend=backend)
+ buffer = io.BytesIO(fileclient.get(filename))
+ checkpoint = torch.load(buffer, map_location=map_location)
+ return checkpoint
+
+
+def get_torchvision_models():
+ model_urls = dict()
+ for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__):
+ if ispkg:
+ continue
+ _zoo = import_module(f'torchvision.models.{name}')
+ if hasattr(_zoo, 'model_urls'):
+ _urls = getattr(_zoo, 'model_urls')
+ model_urls.update(_urls)
+ return model_urls
+
+
+def get_external_models():
+ mmcv_home = _get_mmcv_home()
+ default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')
+ default_urls = load_file(default_json_path)
+ assert isinstance(default_urls, dict)
+ external_json_path = osp.join(mmcv_home, 'open_mmlab.json')
+ if osp.exists(external_json_path):
+ external_urls = load_file(external_json_path)
+ assert isinstance(external_urls, dict)
+ default_urls.update(external_urls)
+
+ return default_urls
+
+
+def get_mmcls_models():
+ mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json')
+ mmcls_urls = load_file(mmcls_json_path)
+
+ return mmcls_urls
+
+
+def get_deprecated_model_names():
+ deprecate_json_path = osp.join(mmcv.__path__[0],
+ 'model_zoo/deprecated.json')
+ deprecate_urls = load_file(deprecate_json_path)
+ assert isinstance(deprecate_urls, dict)
+
+ return deprecate_urls
+
+
+def _process_mmcls_checkpoint(checkpoint):
+ state_dict = checkpoint['state_dict']
+ new_state_dict = OrderedDict()
+ for k, v in state_dict.items():
+ if k.startswith('backbone.'):
+ new_state_dict[k[9:]] = v
+ new_checkpoint = dict(state_dict=new_state_dict)
+
+ return new_checkpoint
+
+
+def _load_checkpoint(filename, map_location=None):
+ """Load checkpoint from somewhere (modelzoo, file, url).
+
+ Args:
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+ details.
+ map_location (str | None): Same as :func:`torch.load`. Default: None.
+
+ Returns:
+ dict | OrderedDict: The loaded checkpoint. It can be either an
+ OrderedDict storing model weights or a dict containing other
+ information, which depends on the checkpoint.
+ """
+ if filename.startswith('modelzoo://'):
+ warnings.warn('The URL scheme of "modelzoo://" is deprecated, please '
+ 'use "torchvision://" instead')
+ model_urls = get_torchvision_models()
+ model_name = filename[11:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ elif filename.startswith('torchvision://'):
+ model_urls = get_torchvision_models()
+ model_name = filename[14:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ elif filename.startswith('open-mmlab://'):
+ model_urls = get_external_models()
+ model_name = filename[13:]
+ deprecated_urls = get_deprecated_model_names()
+ if model_name in deprecated_urls:
+ warnings.warn(f'open-mmlab://{model_name} is deprecated in favor '
+ f'of open-mmlab://{deprecated_urls[model_name]}')
+ model_name = deprecated_urls[model_name]
+ model_url = model_urls[model_name]
+ # check if is url
+ if model_url.startswith(('http://', 'https://')):
+ checkpoint = load_url_dist(model_url)
+ else:
+ filename = osp.join(_get_mmcv_home(), model_url)
+ if not osp.isfile(filename):
+ raise IOError(f'{filename} is not a checkpoint file')
+ checkpoint = torch.load(filename, map_location=map_location)
+ elif filename.startswith('mmcls://'):
+ model_urls = get_mmcls_models()
+ model_name = filename[8:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ checkpoint = _process_mmcls_checkpoint(checkpoint)
+ elif filename.startswith(('http://', 'https://')):
+ checkpoint = load_url_dist(filename)
+ elif filename.startswith('pavi://'):
+ model_path = filename[7:]
+ checkpoint = load_pavimodel_dist(model_path, map_location=map_location)
+ elif filename.startswith('s3://'):
+ checkpoint = load_fileclient_dist(
+ filename, backend='ceph', map_location=map_location)
+ else:
+ if not osp.isfile(filename):
+ raise IOError(f'{filename} is not a checkpoint file')
+ checkpoint = torch.load(filename, map_location=map_location)
+ return checkpoint
+
+
+def cosine_scheduler(base_value, final_value, epochs, niter_per_ep, warmup_epochs=0,
+ start_warmup_value=0, warmup_steps=-1):
+ warmup_schedule = np.array([])
+ warmup_iters = warmup_epochs * niter_per_ep
+ if warmup_steps > 0:
+ warmup_iters = warmup_steps
+ print("Set warmup steps = %d" % warmup_iters)
+ if warmup_epochs > 0:
+ warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)
+
+ iters = np.arange(epochs * niter_per_ep - warmup_iters)
+ schedule = np.array(
+ [final_value + 0.5 * (base_value - final_value) * (1 + math.cos(math.pi * i / (len(iters)))) for i in iters])
+
+ schedule = np.concatenate((warmup_schedule, schedule))
+
+ assert len(schedule) == epochs * niter_per_ep
+ return schedule
+
+
+def load_checkpoint(model,
+ filename,
+ map_location='cpu',
+ strict=False,
+ logger=None,
+ patch_padding='pad',
+ part_features=None
+ ):
+ """Load checkpoint from a file or URI.
+
+ Args:
+ model (Module): Module to load checkpoint.
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+ details.
+ map_location (str): Same as :func:`torch.load`.
+ strict (bool): Whether to allow different params for the model and
+ checkpoint.
+ logger (:mod:`logging.Logger` or None): The logger for error message.
+ patch_padding (str): 'pad' or 'bilinear' or 'bicubic', used for interpolate patch embed from 14x14 to 16x16
+
+ Returns:
+ dict or OrderedDict: The loaded checkpoint.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict = checkpoint['state_dict']
+ elif 'model' in checkpoint:
+ state_dict = checkpoint['model']
+ elif 'module' in checkpoint:
+ state_dict = checkpoint['module']
+ else:
+ state_dict = checkpoint
+ # strip prefix of state_dict
+ if list(state_dict.keys())[0].startswith('module.'):
+ state_dict = {k[7:]: v for k, v in state_dict.items()}
+
+ # for MoBY, load model of online branch
+ if sorted(list(state_dict.keys()))[0].startswith('encoder'):
+ state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')}
+
+ rank, _ = get_dist_info()
+
+ if 'patch_embed.proj.weight' in state_dict:
+ proj_weight = state_dict['patch_embed.proj.weight']
+ orig_size = proj_weight.shape[2:]
+ current_size = model.patch_embed.proj.weight.shape[2:]
+ padding_size = current_size[0] - orig_size[0]
+ padding_l = padding_size // 2
+ padding_r = padding_size - padding_l
+ if orig_size != current_size:
+ if 'pad' in patch_padding:
+ proj_weight = torch.nn.functional.pad(proj_weight, (padding_l, padding_r, padding_l, padding_r))
+ elif 'bilinear' in patch_padding:
+ proj_weight = torch.nn.functional.interpolate(proj_weight, size=current_size, mode='bilinear', align_corners=False)
+ elif 'bicubic' in patch_padding:
+ proj_weight = torch.nn.functional.interpolate(proj_weight, size=current_size, mode='bicubic', align_corners=False)
+ state_dict['patch_embed.proj.weight'] = proj_weight
+
+ if 'pos_embed' in state_dict:
+ pos_embed_checkpoint = state_dict['pos_embed']
+ embedding_size = pos_embed_checkpoint.shape[-1]
+ H, W = model.patch_embed.patch_shape
+ num_patches = model.patch_embed.num_patches
+ num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+ # height (== width) for the checkpoint position embedding
+ orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+ if rank == 0:
+ print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, H, W))
+ extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+ # only the position tokens are interpolated
+ pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+ pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+ pos_tokens = torch.nn.functional.interpolate(
+ pos_tokens, size=(H, W), mode='bicubic', align_corners=False)
+ pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+ new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+ state_dict['pos_embed'] = new_pos_embed
+
+ new_state_dict = copy.deepcopy(state_dict)
+ if part_features is not None:
+ current_keys = list(model.state_dict().keys())
+ for key in current_keys:
+ if "mlp.experts" in key:
+ source_key = re.sub(r'experts.\d+.', 'fc2.', key)
+ new_state_dict[key] = state_dict[source_key][-part_features:]
+ elif 'fc2' in key:
+ new_state_dict[key] = state_dict[key][:-part_features]
+
+ # load state_dict
+ load_state_dict(model, new_state_dict, strict, logger)
+ return checkpoint
+
+
+def weights_to_cpu(state_dict):
+ """Copy a model state_dict to cpu.
+
+ Args:
+ state_dict (OrderedDict): Model weights on GPU.
+
+ Returns:
+ OrderedDict: Model weights on GPU.
+ """
+ state_dict_cpu = OrderedDict()
+ for key, val in state_dict.items():
+ state_dict_cpu[key] = val.cpu()
+ return state_dict_cpu
+
+
+def _save_to_state_dict(module, destination, prefix, keep_vars):
+ """Saves module state to `destination` dictionary.
+
+ This method is modified from :meth:`torch.nn.Module._save_to_state_dict`.
+
+ Args:
+ module (nn.Module): The module to generate state_dict.
+ destination (dict): A dict where state will be stored.
+ prefix (str): The prefix for parameters and buffers used in this
+ module.
+ """
+ for name, param in module._parameters.items():
+ if param is not None:
+ destination[prefix + name] = param if keep_vars else param.detach()
+ for name, buf in module._buffers.items():
+ # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d
+ if buf is not None:
+ destination[prefix + name] = buf if keep_vars else buf.detach()
+
+
+def get_state_dict(module, destination=None, prefix='', keep_vars=False):
+ """Returns a dictionary containing a whole state of the module.
+
+ Both parameters and persistent buffers (e.g. running averages) are
+ included. Keys are corresponding parameter and buffer names.
+
+ This method is modified from :meth:`torch.nn.Module.state_dict` to
+ recursively check parallel module in case that the model has a complicated
+ structure, e.g., nn.Module(nn.Module(DDP)).
+
+ Args:
+ module (nn.Module): The module to generate state_dict.
+ destination (OrderedDict): Returned dict for the state of the
+ module.
+ prefix (str): Prefix of the key.
+ keep_vars (bool): Whether to keep the variable property of the
+ parameters. Default: False.
+
+ Returns:
+ dict: A dictionary containing a whole state of the module.
+ """
+ # recursively check parallel module in case that the model has a
+ # complicated structure, e.g., nn.Module(nn.Module(DDP))
+ if is_module_wrapper(module):
+ module = module.module
+
+ # below is the same as torch.nn.Module.state_dict()
+ if destination is None:
+ destination = OrderedDict()
+ destination._metadata = OrderedDict()
+ destination._metadata[prefix[:-1]] = local_metadata = dict(
+ version=module._version)
+ _save_to_state_dict(module, destination, prefix, keep_vars)
+ for name, child in module._modules.items():
+ if child is not None:
+ get_state_dict(
+ child, destination, prefix + name + '.', keep_vars=keep_vars)
+ for hook in module._state_dict_hooks.values():
+ hook_result = hook(module, destination, prefix, local_metadata)
+ if hook_result is not None:
+ destination = hook_result
+ return destination
+
+
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+ """Save checkpoint to file.
+
+ The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
+ ``optimizer``. By default ``meta`` will contain version and time info.
+
+ Args:
+ model (Module): Module whose params are to be saved.
+ filename (str): Checkpoint filename.
+ optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ """
+ if meta is None:
+ meta = {}
+ elif not isinstance(meta, dict):
+ raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+ meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+
+ if is_module_wrapper(model):
+ model = model.module
+
+ if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+ # save class name to the meta
+ meta.update(CLASSES=model.CLASSES)
+
+ checkpoint = {
+ 'meta': meta,
+ 'state_dict': weights_to_cpu(get_state_dict(model))
+ }
+ # save optimizer state dict in the checkpoint
+ if isinstance(optimizer, Optimizer):
+ checkpoint['optimizer'] = optimizer.state_dict()
+ elif isinstance(optimizer, dict):
+ checkpoint['optimizer'] = {}
+ for name, optim in optimizer.items():
+ checkpoint['optimizer'][name] = optim.state_dict()
+
+ if filename.startswith('pavi://'):
+ try:
+ from pavi import modelcloud
+ from pavi.exception import NodeNotFoundError
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ model_path = filename[7:]
+ root = modelcloud.Folder()
+ model_dir, model_name = osp.split(model_path)
+ try:
+ model = modelcloud.get(model_dir)
+ except NodeNotFoundError:
+ model = root.create_training_model(model_dir)
+ with TemporaryDirectory() as tmp_dir:
+ checkpoint_file = osp.join(tmp_dir, model_name)
+ with open(checkpoint_file, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
+ model.create_file(checkpoint_file, name=model_name)
+ else:
+ mmcv.mkdir_or_exist(osp.dirname(filename))
+ # immediately flush buffer
+ with open(filename, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
diff --git a/mmcv_custom/layer_decay_optimizer_constructor.py b/mmcv_custom/layer_decay_optimizer_constructor.py
new file mode 100644
index 0000000000000000000000000000000000000000..1357082e66d0a91c2544ee83440745f0e93b5175
--- /dev/null
+++ b/mmcv_custom/layer_decay_optimizer_constructor.py
@@ -0,0 +1,78 @@
+import json
+from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
+from mmcv.runner import get_dist_info
+
+
+def get_num_layer_for_vit(var_name, num_max_layer):
+ if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
+ return 0
+ elif var_name.startswith("backbone.patch_embed"):
+ return 0
+ elif var_name.startswith("backbone.blocks"):
+ layer_id = int(var_name.split('.')[2])
+ return layer_id + 1
+ else:
+ return num_max_layer - 1
+
+@OPTIMIZER_BUILDERS.register_module()
+class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
+ def add_params(self, params, module, prefix='', is_dcn_module=None):
+ """Add all parameters of module to the params list.
+ The parameters of the given module will be added to the list of param
+ groups, with specific rules defined by paramwise_cfg.
+ Args:
+ params (list[dict]): A list of param groups, it will be modified
+ in place.
+ module (nn.Module): The module to be added.
+ prefix (str): The prefix of the module
+ is_dcn_module (int|float|None): If the current module is a
+ submodule of DCN, `is_dcn_module` will be passed to
+ control conv_offset layer's learning rate. Defaults to None.
+ """
+ parameter_groups = {}
+ print(self.paramwise_cfg)
+ num_layers = self.paramwise_cfg.get('num_layers') + 2
+ layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
+ print("Build LayerDecayOptimizerConstructor %f - %d" % (layer_decay_rate, num_layers))
+ weight_decay = self.base_wd
+
+ for name, param in module.named_parameters():
+ if not param.requires_grad:
+ continue # frozen weights
+ if len(param.shape) == 1 or name.endswith(".bias") or 'pos_embed' in name:
+ group_name = "no_decay"
+ this_weight_decay = 0.
+ else:
+ group_name = "decay"
+ this_weight_decay = weight_decay
+
+ layer_id = get_num_layer_for_vit(name, num_layers)
+ group_name = "layer_%d_%s" % (layer_id, group_name)
+
+ if group_name not in parameter_groups:
+ scale = layer_decay_rate ** (num_layers - layer_id - 1)
+
+ parameter_groups[group_name] = {
+ "weight_decay": this_weight_decay,
+ "params": [],
+ "param_names": [],
+ "lr_scale": scale,
+ "group_name": group_name,
+ "lr": scale * self.base_lr,
+ }
+
+ parameter_groups[group_name]["params"].append(param)
+ parameter_groups[group_name]["param_names"].append(name)
+ rank, _ = get_dist_info()
+ if rank == 0:
+ to_display = {}
+ for key in parameter_groups:
+ to_display[key] = {
+ "param_names": parameter_groups[key]["param_names"],
+ "lr_scale": parameter_groups[key]["lr_scale"],
+ "lr": parameter_groups[key]["lr"],
+ "weight_decay": parameter_groups[key]["weight_decay"],
+ }
+ print("Param groups = %s" % json.dumps(to_display, indent=2))
+
+ params.extend(parameter_groups.values())
diff --git a/mmpose/.DS_Store b/mmpose/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5e79dedb71c88c440699ad1843879498c7e7b4be
Binary files /dev/null and b/mmpose/.DS_Store differ
diff --git a/mmpose/__init__.py b/mmpose/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e52beb9ddfd6534895ae93bdaa1ab7098f510d81
--- /dev/null
+++ b/mmpose/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+
+from .version import __version__, short_version
+
+
+def digit_version(version_str):
+ digit_version = []
+ for x in version_str.split('.'):
+ if x.isdigit():
+ digit_version.append(int(x))
+ elif x.find('rc') != -1:
+ patch_version = x.split('rc')
+ digit_version.append(int(patch_version[0]) - 1)
+ digit_version.append(int(patch_version[1]))
+ return digit_version
+
+
+mmcv_minimum_version = '1.3.8'
+mmcv_maximum_version = '1.5.0'
+mmcv_version = digit_version(mmcv.__version__)
+
+
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+ and mmcv_version <= digit_version(mmcv_maximum_version)), \
+ f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+ f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+
+__all__ = ['__version__', 'short_version']
diff --git a/mmpose/__pycache__/__init__.cpython-310.pyc b/mmpose/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df170bc1b980775b3babf1b03e38edb364db7c0a
Binary files /dev/null and b/mmpose/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/__pycache__/deprecated.cpython-310.pyc b/mmpose/__pycache__/deprecated.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0f4a18640979e83ad6a42e50f9c336784929b51
Binary files /dev/null and b/mmpose/__pycache__/deprecated.cpython-310.pyc differ
diff --git a/mmpose/__pycache__/version.cpython-310.pyc b/mmpose/__pycache__/version.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95c7970a908aadbf817bc787ca1e07b287c20832
Binary files /dev/null and b/mmpose/__pycache__/version.cpython-310.pyc differ
diff --git a/mmpose/apis/__init__.py b/mmpose/apis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e263edc4d6aa0a3380a3c2e8dc85e1a696bb164
--- /dev/null
+++ b/mmpose/apis/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .inference import (inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_pose_result)
+from .inference_3d import (extract_pose_sequence, inference_interhand_3d_model,
+ inference_mesh_model, inference_pose_lifter_model,
+ vis_3d_mesh_result, vis_3d_pose_result)
+from .inference_tracking import get_track_id, vis_pose_tracking_result
+from .test import multi_gpu_test, single_gpu_test
+from .train import init_random_seed, train_model
+
+__all__ = [
+ 'train_model', 'init_pose_model', 'inference_top_down_pose_model',
+ 'inference_bottom_up_pose_model', 'multi_gpu_test', 'single_gpu_test',
+ 'vis_pose_result', 'get_track_id', 'vis_pose_tracking_result',
+ 'inference_pose_lifter_model', 'vis_3d_pose_result',
+ 'inference_interhand_3d_model', 'extract_pose_sequence',
+ 'inference_mesh_model', 'vis_3d_mesh_result', 'process_mmdet_results',
+ 'init_random_seed'
+]
diff --git a/mmpose/apis/__pycache__/__init__.cpython-310.pyc b/mmpose/apis/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6814be3dbec6169ce2fbf3778ac29b027d331fdd
Binary files /dev/null and b/mmpose/apis/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/apis/__pycache__/inference.cpython-310.pyc b/mmpose/apis/__pycache__/inference.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98d6665a15384599d3922fe40d17d88df4f6ec52
Binary files /dev/null and b/mmpose/apis/__pycache__/inference.cpython-310.pyc differ
diff --git a/mmpose/apis/__pycache__/inference_3d.cpython-310.pyc b/mmpose/apis/__pycache__/inference_3d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd5eed1be5c8f702569452e946f3ec13cad234f1
Binary files /dev/null and b/mmpose/apis/__pycache__/inference_3d.cpython-310.pyc differ
diff --git a/mmpose/apis/__pycache__/inference_tracking.cpython-310.pyc b/mmpose/apis/__pycache__/inference_tracking.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d52ac9bf149eee1953b954c33e9b4f2d67dce09e
Binary files /dev/null and b/mmpose/apis/__pycache__/inference_tracking.cpython-310.pyc differ
diff --git a/mmpose/apis/__pycache__/test.cpython-310.pyc b/mmpose/apis/__pycache__/test.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..212996ca2c8e247ac06f37a2959e1f3d0c6a1340
Binary files /dev/null and b/mmpose/apis/__pycache__/test.cpython-310.pyc differ
diff --git a/mmpose/apis/__pycache__/train.cpython-310.pyc b/mmpose/apis/__pycache__/train.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66f32ee2bd0af8c46180d0b02885331341495789
Binary files /dev/null and b/mmpose/apis/__pycache__/train.cpython-310.pyc differ
diff --git a/mmpose/apis/inference.py b/mmpose/apis/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..5363d40c3f8680af79b470f59b5144941a0c4436
--- /dev/null
+++ b/mmpose/apis/inference.py
@@ -0,0 +1,833 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+from PIL import Image
+
+from mmpose.core.post_processing import oks_nms
+from mmpose.datasets.dataset_info import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+from mmpose.models import build_posenet
+from mmpose.utils.hooks import OutputHook
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+
+
+def init_pose_model(config, checkpoint=None, device='cuda:0'):
+ """Initialize a pose model from config file.
+
+ Args:
+ config (str or :obj:`mmcv.Config`): Config file path or the config
+ object.
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
+ will not load any weights.
+
+ Returns:
+ nn.Module: The constructed detector.
+ """
+ if isinstance(config, str):
+ config = mmcv.Config.fromfile(config)
+ elif not isinstance(config, mmcv.Config):
+ raise TypeError('config must be a filename or Config object, '
+ f'but got {type(config)}')
+ config.model.pretrained = None
+ model = build_posenet(config.model)
+ if checkpoint is not None:
+ # load model checkpoint
+ load_checkpoint(model, checkpoint, map_location='cpu')
+ # save the config in the model for convenience
+ model.cfg = config
+ model.to(device)
+ model.eval()
+ return model
+
+
+def _xyxy2xywh(bbox_xyxy):
+ """Transform the bbox format from x1y1x2y2 to xywh.
+
+ Args:
+ bbox_xyxy (np.ndarray): Bounding boxes (with scores), shaped (n, 4) or
+ (n, 5). (left, top, right, bottom, [score])
+
+ Returns:
+ np.ndarray: Bounding boxes (with scores),
+ shaped (n, 4) or (n, 5). (left, top, width, height, [score])
+ """
+ bbox_xywh = bbox_xyxy.copy()
+ bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0] + 1
+ bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] + 1
+
+ return bbox_xywh
+
+
+def _xywh2xyxy(bbox_xywh):
+ """Transform the bbox format from xywh to x1y1x2y2.
+
+ Args:
+ bbox_xywh (ndarray): Bounding boxes (with scores),
+ shaped (n, 4) or (n, 5). (left, top, width, height, [score])
+ Returns:
+ np.ndarray: Bounding boxes (with scores), shaped (n, 4) or
+ (n, 5). (left, top, right, bottom, [score])
+ """
+ bbox_xyxy = bbox_xywh.copy()
+ bbox_xyxy[:, 2] = bbox_xyxy[:, 2] + bbox_xyxy[:, 0] - 1
+ bbox_xyxy[:, 3] = bbox_xyxy[:, 3] + bbox_xyxy[:, 1] - 1
+
+ return bbox_xyxy
+
+
+def _box2cs(cfg, box):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h
+
+ Returns:
+ tuple: A tuple containing center and scale.
+
+ - np.ndarray[float32](2,): Center of the bbox (x, y).
+ - np.ndarray[float32](2,): Scale of the bbox w & h.
+ """
+
+ x, y, w, h = box[:4]
+ input_size = cfg.data_cfg['image_size']
+ aspect_ratio = input_size[0] / input_size[1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ scale = scale * 1.25
+
+ return center, scale
+
+
+def _inference_single_pose_model(model,
+ img_or_path,
+ bboxes,
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ return_heatmap=False):
+ """Inference human bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ bboxes (list | np.ndarray): All bounding boxes (with scores),
+ shaped (N, 4) or (N, 5). (left, top, width, height, [score])
+ where N is number of bounding boxes.
+ dataset (str): Dataset name. Deprecated.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ outputs (list[str] | tuple[str]): Names of layers whose output is
+ to be returned, default: None
+
+ Returns:
+ ndarray[NxKx3]: Predicted pose x, y, score.
+ heatmap[N, K, H, W]: Model output heatmap.
+ """
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset_info is not None:
+ dataset_name = dataset_info.dataset_name
+ flip_pairs = dataset_info.flip_pairs
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset in ('TopDownCocoDataset', 'TopDownOCHumanDataset',
+ 'AnimalMacaqueDataset'):
+ flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
+ [13, 14], [15, 16]]
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
+ [13, 14], [15, 16]]
+ foot = [[17, 20], [18, 21], [19, 22]]
+
+ face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34],
+ [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46],
+ [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66],
+ [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75],
+ [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]]
+
+ hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116],
+ [96, 117], [97, 118], [98, 119], [99, 120], [100, 121],
+ [101, 122], [102, 123], [103, 124], [104, 125], [105, 126],
+ [106, 127], [107, 128], [108, 129], [109, 130], [110, 131],
+ [111, 132]]
+ flip_pairs = body + foot + face + hand
+ elif dataset == 'TopDownAicDataset':
+ flip_pairs = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]]
+ elif dataset == 'TopDownMpiiDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
+ elif dataset == 'TopDownMpiiTrbDataset':
+ flip_pairs = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11],
+ [14, 15], [16, 22], [28, 34], [17, 23], [29, 35],
+ [18, 24], [30, 36], [19, 25], [31, 37], [20, 26],
+ [32, 38], [21, 27], [33, 39]]
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset', 'InterHand2DDataset'):
+ flip_pairs = []
+ elif dataset in 'Face300WDataset':
+ flip_pairs = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11],
+ [6, 10], [7, 9], [17, 26], [18, 25], [19, 24],
+ [20, 23], [21, 22], [31, 35], [32, 34], [36, 45],
+ [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
+ [48, 54], [49, 53], [50, 52], [61, 63], [60, 64],
+ [67, 65], [58, 56], [59, 55]]
+
+ elif dataset in 'FaceAFLWDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9],
+ [12, 14], [15, 17]]
+
+ elif dataset in 'FaceCOFWDataset':
+ flip_pairs = [[0, 1], [4, 6], [2, 3], [5, 7], [8, 9], [10, 11],
+ [12, 14], [16, 17], [13, 15], [18, 19], [22, 23]]
+
+ elif dataset in 'FaceWFLWDataset':
+ flip_pairs = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27],
+ [6, 26], [7, 25], [8, 24], [9, 23], [10, 22],
+ [11, 21], [12, 20], [13, 19], [14, 18], [15, 17],
+ [33, 46], [34, 45], [35, 44], [36, 43], [37, 42],
+ [38, 50], [39, 49], [40, 48], [41, 47], [60, 72],
+ [61, 71], [62, 70], [63, 69], [64, 68], [65, 75],
+ [66, 74], [67, 73], [55, 59], [56, 58], [76, 82],
+ [77, 81], [78, 80], [87, 83], [86, 84], [88, 92],
+ [89, 91], [95, 93], [96, 97]]
+
+ elif dataset in 'AnimalFlyDataset':
+ flip_pairs = [[1, 2], [6, 18], [7, 19], [8, 20], [9, 21], [10, 22],
+ [11, 23], [12, 24], [13, 25], [14, 26], [15, 27],
+ [16, 28], [17, 29], [30, 31]]
+ elif dataset in 'AnimalHorse10Dataset':
+ flip_pairs = []
+
+ elif dataset in 'AnimalLocustDataset':
+ flip_pairs = [[5, 20], [6, 21], [7, 22], [8, 23], [9, 24],
+ [10, 25], [11, 26], [12, 27], [13, 28], [14, 29],
+ [15, 30], [16, 31], [17, 32], [18, 33], [19, 34]]
+
+ elif dataset in 'AnimalZebraDataset':
+ flip_pairs = [[3, 4], [5, 6]]
+
+ elif dataset in 'AnimalPoseDataset':
+ flip_pairs = [[0, 1], [2, 3], [8, 9], [10, 11], [12, 13], [14, 15],
+ [16, 17], [18, 19]]
+ else:
+ raise NotImplementedError()
+ dataset_name = dataset
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'bbox_id':
+ 0, # need to be assigned if batch_size > 1
+ 'dataset':
+ dataset_name,
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'rotation':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs
+ }
+ }
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, [device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False,
+ return_heatmap=return_heatmap)
+
+ return result['preds'], result['output_heatmap']
+
+
+def inference_top_down_pose_model(model,
+ img_or_path,
+ person_results=None,
+ bbox_thr=None,
+ format='xywh',
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ return_heatmap=False,
+ outputs=None):
+ """Inference a single image with a list of person bounding boxes.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str| np.ndarray): Image filename or loaded image.
+ person_results (list(dict), optional): a list of detected persons that
+ contains ``bbox`` and/or ``track_id``:
+
+ - ``bbox`` (4, ) or (5, ): The person bounding box, which contains
+ 4 box coordinates (and score).
+ - ``track_id`` (int): The unique id for each human instance. If
+ not provided, a dummy person result with a bbox covering
+ the entire image will be used. Default: None.
+ bbox_thr (float | None): Threshold for bounding boxes. Only bboxes
+ with higher scores will be fed into the pose detector.
+ If bbox_thr is None, all boxes will be used.
+ format (str): bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+
+ - `xyxy` means (left, top, right, bottom),
+ - `xywh` means (left, top, width, height).
+ dataset (str): Dataset name, e.g. 'TopDownCocoDataset'.
+ It is deprecated. Please use dataset_info instead.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ return_heatmap (bool) : Flag to return heatmap, default: False
+ outputs (list(str) | tuple(str)) : Names of layers whose outputs
+ need to be returned. Default: None.
+
+ Returns:
+ tuple:
+ - pose_results (list[dict]): The bbox & pose info. \
+ Each item in the list is a dictionary, \
+ containing the bbox: (left, top, right, bottom, [score]) \
+ and the pose (ndarray[Kx3]): x, y, score.
+ - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \
+ torch.Tensor[N, K, H, W]]]): \
+ Output feature maps from layers specified in `outputs`. \
+ Includes 'heatmap' if `return_heatmap` is True.
+ """
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663'
+ ' for details.', DeprecationWarning)
+
+ # only two kinds of bbox format is supported.
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+ returned_outputs = []
+
+ if person_results is None:
+ # create dummy person results
+ if isinstance(img_or_path, str):
+ width, height = Image.open(img_or_path).size
+ else:
+ height, width = img_or_path.shape[:2]
+ person_results = [{'bbox': np.array([0, 0, width, height])}]
+
+ if len(person_results) == 0:
+ return pose_results, returned_outputs
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in person_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ person_results = [person_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return [], []
+
+ with OutputHook(model, outputs=outputs, as_tensor=False) as h:
+ # poses is results['pred'] # N x 17x 3
+ poses, heatmap = _inference_single_pose_model(
+ model,
+ img_or_path,
+ bboxes_xywh,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap)
+
+ if return_heatmap:
+ h.layer_outputs['heatmap'] = heatmap
+
+ returned_outputs.append(h.layer_outputs)
+
+ assert len(poses) == len(person_results), print(
+ len(poses), len(person_results), len(bboxes_xyxy))
+ for pose, person_result, bbox_xyxy in zip(poses, person_results,
+ bboxes_xyxy):
+ pose_result = person_result.copy()
+ pose_result['keypoints'] = pose
+ pose_result['bbox'] = bbox_xyxy
+ pose_results.append(pose_result)
+
+ return pose_results, returned_outputs
+
+
+def inference_bottom_up_pose_model(model,
+ img_or_path,
+ dataset='BottomUpCocoDataset',
+ dataset_info=None,
+ pose_nms_thr=0.9,
+ return_heatmap=False,
+ outputs=None):
+ """Inference a single image with a bottom-up pose model.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str| np.ndarray): Image filename or loaded image.
+ dataset (str): Dataset name, e.g. 'BottomUpCocoDataset'.
+ It is deprecated. Please use dataset_info instead.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ pose_nms_thr (float): retain oks overlap < pose_nms_thr, default: 0.9.
+ return_heatmap (bool) : Flag to return heatmap, default: False.
+ outputs (list(str) | tuple(str)) : Names of layers whose outputs
+ need to be returned, default: None.
+
+ Returns:
+ tuple:
+ - pose_results (list[np.ndarray]): The predicted pose info. \
+ The length of the list is the number of people (P). \
+ Each item in the list is a ndarray, containing each \
+ person's pose (np.ndarray[Kx3]): x, y, score.
+ - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \
+ torch.Tensor[N, K, H, W]]]): \
+ Output feature maps from layers specified in `outputs`. \
+ Includes 'heatmap' if `return_heatmap` is True.
+ """
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+
+ if dataset_info is not None:
+ dataset_name = dataset_info.dataset_name
+ flip_index = dataset_info.flip_index
+ sigmas = getattr(dataset_info, 'sigmas', None)
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ assert (dataset == 'BottomUpCocoDataset')
+ dataset_name = dataset
+ flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+ sigmas = None
+
+ pose_results = []
+ returned_outputs = []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ # prepare data
+ data = {
+ 'dataset': dataset_name,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_index': flip_index,
+ }
+ }
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ data = collate([data], samples_per_gpu=1)
+ data = scatter(data, [device])[0]
+
+ with OutputHook(model, outputs=outputs, as_tensor=False) as h:
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=data['img'],
+ img_metas=data['img_metas'],
+ return_loss=False,
+ return_heatmap=return_heatmap)
+
+ if return_heatmap:
+ h.layer_outputs['heatmap'] = result['output_heatmap']
+
+ returned_outputs.append(h.layer_outputs)
+
+ for idx, pred in enumerate(result['preds']):
+ area = (np.max(pred[:, 0]) - np.min(pred[:, 0])) * (
+ np.max(pred[:, 1]) - np.min(pred[:, 1]))
+ pose_results.append({
+ 'keypoints': pred[:, :3],
+ 'score': result['scores'][idx],
+ 'area': area,
+ })
+
+ # pose nms
+ score_per_joint = cfg.model.test_cfg.get('score_per_joint', False)
+ keep = oks_nms(
+ pose_results,
+ pose_nms_thr,
+ sigmas,
+ score_per_joint=score_per_joint)
+ pose_results = [pose_results[_keep] for _keep in keep]
+
+ return pose_results, returned_outputs
+
+
+def vis_pose_result(model,
+ img,
+ result,
+ radius=4,
+ thickness=1,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ show=False,
+ out_file=None):
+ """Visualize the detection results on the image.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ img (str | np.ndarray): Image filename or loaded image.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ kpt_score_thr (float): The threshold to visualize the keypoints.
+ skeleton (list[tuple()]): Default None.
+ show (bool): Whether to show the image. Default True.
+ out_file (str|None): The filename of the output visualization image.
+ """
+
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+
+ if dataset_info is not None:
+ skeleton = dataset_info.skeleton
+ pose_kpt_color = dataset_info.pose_kpt_color
+ pose_link_color = dataset_info.pose_link_color
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255],
+ [255, 0, 0], [255, 255, 255]])
+
+ if dataset in ('TopDownCocoDataset', 'BottomUpCocoDataset',
+ 'TopDownOCHumanDataset', 'AnimalMacaqueDataset'):
+ # show the results
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [4, 6]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+ ]]
+ pose_kpt_color = palette[[
+ 16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0
+ ]]
+
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ # show the results
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2],
+ [1, 3], [2, 4], [3, 5], [4, 6], [15, 17], [15, 18],
+ [15, 19], [16, 20], [16, 21], [16, 22], [91, 92],
+ [92, 93], [93, 94], [94, 95], [91, 96], [96, 97],
+ [97, 98], [98, 99], [91, 100], [100, 101], [101, 102],
+ [102, 103], [91, 104], [104, 105], [105, 106],
+ [106, 107], [91, 108], [108, 109], [109, 110],
+ [110, 111], [112, 113], [113, 114], [114, 115],
+ [115, 116], [112, 117], [117, 118], [118, 119],
+ [119, 120], [112, 121], [121, 122], [122, 123],
+ [123, 124], [112, 125], [125, 126], [126, 127],
+ [127, 128], [112, 129], [129, 130], [130, 131],
+ [131, 132]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+ ] + [16, 16, 16, 16, 16, 16] + [
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ] + [
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ]]
+ pose_kpt_color = palette[
+ [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] +
+ [0, 0, 0, 0, 0, 0] + [19] * (68 + 42)]
+
+ elif dataset == 'TopDownAicDataset':
+ skeleton = [[2, 1], [1, 0], [0, 13], [13, 3], [3, 4], [4, 5],
+ [8, 7], [7, 6], [6, 9], [9, 10], [10, 11], [12, 13],
+ [0, 6], [3, 9]]
+
+ pose_link_color = palette[[
+ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 0, 7, 7
+ ]]
+ pose_kpt_color = palette[[
+ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 0, 0
+ ]]
+
+ elif dataset == 'TopDownMpiiDataset':
+ skeleton = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7],
+ [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13],
+ [13, 14], [14, 15]]
+
+ pose_link_color = palette[[
+ 16, 16, 16, 16, 16, 16, 7, 7, 0, 9, 9, 9, 9, 9, 9
+ ]]
+ pose_kpt_color = palette[[
+ 16, 16, 16, 16, 16, 16, 7, 7, 0, 0, 9, 9, 9, 9, 9, 9
+ ]]
+
+ elif dataset == 'TopDownMpiiTrbDataset':
+ skeleton = [[12, 13], [13, 0], [13, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [0, 6], [1, 7], [6, 7], [6, 8], [7,
+ 9], [8, 10],
+ [9, 11], [14, 15], [16, 17], [18, 19], [20, 21],
+ [22, 23], [24, 25], [26, 27], [28, 29], [30, 31],
+ [32, 33], [34, 35], [36, 37], [38, 39]]
+
+ pose_link_color = palette[[16] * 14 + [19] * 13]
+ pose_kpt_color = palette[[16] * 14 + [0] * 26]
+
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset'):
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7],
+ [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13],
+ [13, 14], [14, 15], [15, 16], [0, 17], [17, 18],
+ [18, 19], [19, 20]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ]]
+ pose_kpt_color = palette[[
+ 0, 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16,
+ 16, 16
+ ]]
+
+ elif dataset == 'InterHand2DDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [4, 5], [5, 6], [6, 7], [8, 9],
+ [9, 10], [10, 11], [12, 13], [13, 14], [14, 15],
+ [16, 17], [17, 18], [18, 19], [3, 20], [7, 20],
+ [11, 20], [15, 20], [19, 20]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 0, 4, 8, 12,
+ 16
+ ]]
+ pose_kpt_color = palette[[
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16, 0
+ ]]
+
+ elif dataset == 'Face300WDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 68]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceAFLWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 19]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceCOFWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 29]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceWFLWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 98]
+ kpt_score_thr = 0
+
+ elif dataset == 'AnimalHorse10Dataset':
+ skeleton = [[0, 1], [1, 12], [12, 16], [16, 21], [21, 17],
+ [17, 11], [11, 10], [10, 8], [8, 9], [9, 12], [2, 3],
+ [3, 4], [5, 6], [6, 7], [13, 14], [14, 15], [18, 19],
+ [19, 20]]
+
+ pose_link_color = palette[[4] * 10 + [6] * 2 + [6] * 2 + [7] * 2 +
+ [7] * 2]
+ pose_kpt_color = palette[[
+ 4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 7, 7, 7, 4, 4, 7, 7, 7,
+ 4
+ ]]
+
+ elif dataset == 'AnimalFlyDataset':
+ skeleton = [[1, 0], [2, 0], [3, 0], [4, 3], [5, 4], [7, 6], [8, 7],
+ [9, 8], [11, 10], [12, 11], [13, 12], [15, 14],
+ [16, 15], [17, 16], [19, 18], [20, 19], [21, 20],
+ [23, 22], [24, 23], [25, 24], [27, 26], [28, 27],
+ [29, 28], [30, 3], [31, 3]]
+
+ pose_link_color = palette[[0] * 25]
+ pose_kpt_color = palette[[0] * 32]
+
+ elif dataset == 'AnimalLocustDataset':
+ skeleton = [[1, 0], [2, 1], [3, 2], [4, 3], [6, 5], [7, 6], [9, 8],
+ [10, 9], [11, 10], [13, 12], [14, 13], [15, 14],
+ [17, 16], [18, 17], [19, 18], [21, 20], [22, 21],
+ [24, 23], [25, 24], [26, 25], [28, 27], [29, 28],
+ [30, 29], [32, 31], [33, 32], [34, 33]]
+
+ pose_link_color = palette[[0] * 26]
+ pose_kpt_color = palette[[0] * 35]
+
+ elif dataset == 'AnimalZebraDataset':
+ skeleton = [[1, 0], [2, 1], [3, 2], [4, 2], [5, 7], [6, 7], [7, 2],
+ [8, 7]]
+
+ pose_link_color = palette[[0] * 8]
+ pose_kpt_color = palette[[0] * 9]
+
+ elif dataset in 'AnimalPoseDataset':
+ skeleton = [[0, 1], [0, 2], [1, 3], [0, 4], [1, 4], [4, 5], [5, 7],
+ [6, 7], [5, 8], [8, 12], [12, 16], [5, 9], [9, 13],
+ [13, 17], [6, 10], [10, 14], [14, 18], [6, 11],
+ [11, 15], [15, 19]]
+
+ pose_link_color = palette[[0] * 20]
+ pose_kpt_color = palette[[0] * 20]
+ else:
+ NotImplementedError()
+
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(
+ img,
+ result,
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ kpt_score_thr=kpt_score_thr,
+ bbox_color=bbox_color,
+ show=show,
+ out_file=out_file)
+
+ return img
+
+
+def process_mmdet_results(mmdet_results, cat_id=1):
+ """Process mmdet results, and return a list of bboxes.
+
+ Args:
+ mmdet_results (list|tuple): mmdet results.
+ cat_id (int): category id (default: 1 for human)
+
+ Returns:
+ person_results (list): a list of detected bounding boxes
+ """
+ if isinstance(mmdet_results, tuple):
+ det_results = mmdet_results[0]
+ else:
+ det_results = mmdet_results
+
+ bboxes = det_results[cat_id - 1]
+
+ person_results = []
+ for bbox in bboxes:
+ person = {}
+ person['bbox'] = bbox
+ person_results.append(person)
+
+ return person_results
diff --git a/mmpose/apis/inference_3d.py b/mmpose/apis/inference_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..f59f20a1d0794f542c60c2bcfc20bfa4a014a55a
--- /dev/null
+++ b/mmpose/apis/inference_3d.py
@@ -0,0 +1,791 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+import torch
+from mmcv.parallel import collate, scatter
+
+from mmpose.datasets.pipelines import Compose
+from .inference import _box2cs, _xywh2xyxy, _xyxy2xywh
+
+
+def extract_pose_sequence(pose_results, frame_idx, causal, seq_len, step=1):
+ """Extract the target frame from 2D pose results, and pad the sequence to a
+ fixed length.
+
+ Args:
+ pose_results (list[list[dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required \
+ when ``with_track_id==True``.
+ - bbox ((4, ) or (5, )): left, right, top, bottom, [score]
+
+ frame_idx (int): The index of the frame in the original video.
+ causal (bool): If True, the target frame is the last frame in
+ a sequence. Otherwise, the target frame is in the middle of
+ a sequence.
+ seq_len (int): The number of frames in the input sequence.
+ step (int): Step size to extract frames from the video.
+
+ Returns:
+ list[list[dict]]: Multi-frame pose detection results stored \
+ in a nested list with a length of seq_len.
+ """
+
+ if causal:
+ frames_left = seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (seq_len - 1) // 2
+ frames_right = frames_left
+ num_frames = len(pose_results)
+
+ # get the padded sequence
+ pad_left = max(0, frames_left - frame_idx // step)
+ pad_right = max(0, frames_right - (num_frames - 1 - frame_idx) // step)
+ start = max(frame_idx % step, frame_idx - frames_left * step)
+ end = min(num_frames - (num_frames - 1 - frame_idx) % step,
+ frame_idx + frames_right * step + 1)
+ pose_results_seq = [pose_results[0]] * pad_left + \
+ pose_results[start:end:step] + [pose_results[-1]] * pad_right
+ return pose_results_seq
+
+
+def _gather_pose_lifter_inputs(pose_results,
+ bbox_center,
+ bbox_scale,
+ norm_pose_2d=False):
+ """Gather input data (keypoints and track_id) for pose lifter model.
+
+ Note:
+ - The temporal length of the pose detection results: T
+ - The number of the person instances: N
+ - The number of the keypoints: K
+ - The channel number of each keypoint: C
+
+ Args:
+ pose_results (List[List[Dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True```
+ - bbox ((4, ) or (5, )): left, right, top, bottom, [score]
+
+ bbox_center (ndarray[1, 2]): x, y. The average center coordinate of the
+ bboxes in the dataset.
+ bbox_scale (int|float): The average scale of the bboxes in the dataset.
+ norm_pose_2d (bool): If True, scale the bbox (along with the 2D
+ pose) to bbox_scale, and move the bbox (along with the 2D pose) to
+ bbox_center. Default: False.
+
+ Returns:
+ list[list[dict]]: Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True``
+ """
+ sequence_inputs = []
+ for frame in pose_results:
+ frame_inputs = []
+ for res in frame:
+ inputs = dict()
+
+ if norm_pose_2d:
+ bbox = res['bbox']
+ center = np.array([[(bbox[0] + bbox[2]) / 2,
+ (bbox[1] + bbox[3]) / 2]])
+ scale = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
+ inputs['keypoints'] = (res['keypoints'][:, :2] - center) \
+ / scale * bbox_scale + bbox_center
+ else:
+ inputs['keypoints'] = res['keypoints'][:, :2]
+
+ if res['keypoints'].shape[1] == 3:
+ inputs['keypoints'] = np.concatenate(
+ [inputs['keypoints'], res['keypoints'][:, 2:]], axis=1)
+
+ if 'track_id' in res:
+ inputs['track_id'] = res['track_id']
+ frame_inputs.append(inputs)
+ sequence_inputs.append(frame_inputs)
+ return sequence_inputs
+
+
+def _collate_pose_sequence(pose_results, with_track_id=True, target_frame=-1):
+ """Reorganize multi-frame pose detection results into individual pose
+ sequences.
+
+ Note:
+ - The temporal length of the pose detection results: T
+ - The number of the person instances: N
+ - The number of the keypoints: K
+ - The channel number of each keypoint: C
+
+ Args:
+ pose_results (List[List[Dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True```
+
+ with_track_id (bool): If True, the element in pose_results is expected
+ to contain "track_id", which will be used to gather the pose
+ sequence of a person from multiple frames. Otherwise, the pose
+ results in each frame are expected to have a consistent number and
+ order of identities. Default is True.
+ target_frame (int): The index of the target frame. Default: -1.
+ """
+ T = len(pose_results)
+ assert T > 0
+
+ target_frame = (T + target_frame) % T # convert negative index to positive
+
+ N = len(pose_results[target_frame]) # use identities in the target frame
+ if N == 0:
+ return []
+
+ K, C = pose_results[target_frame][0]['keypoints'].shape
+
+ track_ids = None
+ if with_track_id:
+ track_ids = [res['track_id'] for res in pose_results[target_frame]]
+
+ pose_sequences = []
+ for idx in range(N):
+ pose_seq = dict()
+ # gather static information
+ for k, v in pose_results[target_frame][idx].items():
+ if k != 'keypoints':
+ pose_seq[k] = v
+ # gather keypoints
+ if not with_track_id:
+ pose_seq['keypoints'] = np.stack(
+ [frame[idx]['keypoints'] for frame in pose_results])
+ else:
+ keypoints = np.zeros((T, K, C), dtype=np.float32)
+ keypoints[target_frame] = pose_results[target_frame][idx][
+ 'keypoints']
+ # find the left most frame containing track_ids[idx]
+ for frame_idx in range(target_frame - 1, -1, -1):
+ contains_idx = False
+ for res in pose_results[frame_idx]:
+ if res['track_id'] == track_ids[idx]:
+ keypoints[frame_idx] = res['keypoints']
+ contains_idx = True
+ break
+ if not contains_idx:
+ # replicate the left most frame
+ keypoints[:frame_idx + 1] = keypoints[frame_idx + 1]
+ break
+ # find the right most frame containing track_idx[idx]
+ for frame_idx in range(target_frame + 1, T):
+ contains_idx = False
+ for res in pose_results[frame_idx]:
+ if res['track_id'] == track_ids[idx]:
+ keypoints[frame_idx] = res['keypoints']
+ contains_idx = True
+ break
+ if not contains_idx:
+ # replicate the right most frame
+ keypoints[frame_idx + 1:] = keypoints[frame_idx]
+ break
+ pose_seq['keypoints'] = keypoints
+ pose_sequences.append(pose_seq)
+
+ return pose_sequences
+
+
+def inference_pose_lifter_model(model,
+ pose_results_2d,
+ dataset=None,
+ dataset_info=None,
+ with_track_id=True,
+ image_size=None,
+ norm_pose_2d=False):
+ """Inference 3D pose from 2D pose sequences using a pose lifter model.
+
+ Args:
+ model (nn.Module): The loaded pose lifter model
+ pose_results_2d (list[list[dict]]): The 2D pose sequences stored in a
+ nested list. Each element of the outer list is the 2D pose results
+ of a single frame, and each element of the inner list is the 2D
+ pose of one person, which contains:
+
+ - "keypoints" (ndarray[K, 2 or 3]): x, y, [score]
+ - "track_id" (int)
+ dataset (str): Dataset name, e.g. 'Body3DH36MDataset'
+ with_track_id: If True, the element in pose_results_2d is expected to
+ contain "track_id", which will be used to gather the pose sequence
+ of a person from multiple frames. Otherwise, the pose results in
+ each frame are expected to have a consistent number and order of
+ identities. Default is True.
+ image_size (tuple|list): image width, image height. If None, image size
+ will not be contained in dict ``data``.
+ norm_pose_2d (bool): If True, scale the bbox (along with the 2D
+ pose) to the average bbox scale of the dataset, and move the bbox
+ (along with the 2D pose) to the average bbox center of the dataset.
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element is the result of \
+ an instance, which contains:
+
+ - "keypoints_3d" (ndarray[K, 3]): predicted 3D keypoints
+ - "keypoints" (ndarray[K, 2 or 3]): from the last frame in \
+ ``pose_results_2d``.
+ - "track_id" (int): from the last frame in ``pose_results_2d``. \
+ If there is no valid instance, an empty list will be \
+ returned.
+ """
+ cfg = model.cfg
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ if dataset_info is not None:
+ flip_pairs = dataset_info.flip_pairs
+ assert 'stats_info' in dataset_info._dataset_info
+ bbox_center = dataset_info._dataset_info['stats_info']['bbox_center']
+ bbox_scale = dataset_info._dataset_info['stats_info']['bbox_scale']
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset == 'Body3DH36MDataset':
+ flip_pairs = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15], [13, 16]]
+ bbox_center = np.array([[528, 427]], dtype=np.float32)
+ bbox_scale = 400
+ else:
+ raise NotImplementedError()
+
+ target_idx = -1 if model.causal else len(pose_results_2d) // 2
+ pose_lifter_inputs = _gather_pose_lifter_inputs(pose_results_2d,
+ bbox_center, bbox_scale,
+ norm_pose_2d)
+ pose_sequences_2d = _collate_pose_sequence(pose_lifter_inputs,
+ with_track_id, target_idx)
+
+ if not pose_sequences_2d:
+ return []
+
+ batch_data = []
+ for seq in pose_sequences_2d:
+ pose_2d = seq['keypoints'].astype(np.float32)
+ T, K, C = pose_2d.shape
+
+ input_2d = pose_2d[..., :2]
+ input_2d_visible = pose_2d[..., 2:3]
+ if C > 2:
+ input_2d_visible = pose_2d[..., 2:3]
+ else:
+ input_2d_visible = np.ones((T, K, 1), dtype=np.float32)
+
+ # TODO: Will be removed in the later versions
+ # Dummy 3D input
+ # This is for compatibility with configs in mmpose<=v0.14.0, where a
+ # 3D input is required to generate denormalization parameters. This
+ # part will be removed in the future.
+ target = np.zeros((K, 3), dtype=np.float32)
+ target_visible = np.ones((K, 1), dtype=np.float32)
+
+ # Dummy image path
+ # This is for compatibility with configs in mmpose<=v0.14.0, where
+ # target_image_path is required. This part will be removed in the
+ # future.
+ target_image_path = None
+
+ data = {
+ 'input_2d': input_2d,
+ 'input_2d_visible': input_2d_visible,
+ 'target': target,
+ 'target_visible': target_visible,
+ 'target_image_path': target_image_path,
+ 'ann_info': {
+ 'num_joints': K,
+ 'flip_pairs': flip_pairs
+ }
+ }
+
+ if image_size is not None:
+ assert len(image_size) == 2
+ data['image_width'] = image_size[0]
+ data['image_height'] = image_size[1]
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, target_gpus=[device])[0]
+
+ with torch.no_grad():
+ result = model(
+ input=batch_data['input'],
+ metas=batch_data['metas'],
+ return_loss=False)
+
+ poses_3d = result['preds']
+ if poses_3d.shape[-1] != 4:
+ assert poses_3d.shape[-1] == 3
+ dummy_score = np.ones(
+ poses_3d.shape[:-1] + (1, ), dtype=poses_3d.dtype)
+ poses_3d = np.concatenate((poses_3d, dummy_score), axis=-1)
+ pose_results = []
+ for pose_2d, pose_3d in zip(pose_sequences_2d, poses_3d):
+ pose_result = pose_2d.copy()
+ pose_result['keypoints_3d'] = pose_3d
+ pose_results.append(pose_result)
+
+ return pose_results
+
+
+def vis_3d_pose_result(model,
+ result,
+ img=None,
+ dataset='Body3DH36MDataset',
+ dataset_info=None,
+ kpt_score_thr=0.3,
+ radius=8,
+ thickness=2,
+ num_instances=-1,
+ show=False,
+ out_file=None):
+ """Visualize the 3D pose estimation results.
+
+ Args:
+ model (nn.Module): The loaded model.
+ result (list[dict])
+ """
+
+ if dataset_info is not None:
+ skeleton = dataset_info.skeleton
+ pose_kpt_color = dataset_info.pose_kpt_color
+ pose_link_color = dataset_info.pose_link_color
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255],
+ [255, 0, 0], [255, 255, 255]])
+
+ if dataset == 'Body3DH36MDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7],
+ [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13],
+ [8, 14], [14, 15], [15, 16]]
+
+ pose_kpt_color = palette[[
+ 9, 0, 0, 0, 16, 16, 16, 9, 9, 9, 9, 16, 16, 16, 0, 0, 0
+ ]]
+ pose_link_color = palette[[
+ 0, 0, 0, 16, 16, 16, 9, 9, 9, 9, 16, 16, 16, 0, 0, 0
+ ]]
+
+ elif dataset == 'InterHand3DDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 20], [4, 5], [5, 6],
+ [6, 7], [7, 20], [8, 9], [9, 10], [10, 11], [11, 20],
+ [12, 13], [13, 14], [14, 15], [15, 20], [16, 17],
+ [17, 18], [18, 19], [19, 20], [21, 22], [22, 23],
+ [23, 24], [24, 41], [25, 26], [26, 27], [27, 28],
+ [28, 41], [29, 30], [30, 31], [31, 32], [32, 41],
+ [33, 34], [34, 35], [35, 36], [36, 41], [37, 38],
+ [38, 39], [39, 40], [40, 41]]
+
+ pose_kpt_color = [[14, 128, 250], [14, 128, 250], [14, 128, 250],
+ [14, 128, 250], [80, 127, 255], [80, 127, 255],
+ [80, 127, 255], [80, 127, 255], [71, 99, 255],
+ [71, 99, 255], [71, 99, 255], [71, 99, 255],
+ [0, 36, 255], [0, 36, 255], [0, 36, 255],
+ [0, 36, 255], [0, 0, 230], [0, 0, 230],
+ [0, 0, 230], [0, 0, 230], [0, 0, 139],
+ [237, 149, 100], [237, 149, 100],
+ [237, 149, 100], [237, 149, 100], [230, 128, 77],
+ [230, 128, 77], [230, 128, 77], [230, 128, 77],
+ [255, 144, 30], [255, 144, 30], [255, 144, 30],
+ [255, 144, 30], [153, 51, 0], [153, 51, 0],
+ [153, 51, 0], [153, 51, 0], [255, 51, 13],
+ [255, 51, 13], [255, 51, 13], [255, 51, 13],
+ [103, 37, 8]]
+
+ pose_link_color = [[14, 128, 250], [14, 128, 250], [14, 128, 250],
+ [14, 128, 250], [80, 127, 255], [80, 127, 255],
+ [80, 127, 255], [80, 127, 255], [71, 99, 255],
+ [71, 99, 255], [71, 99, 255], [71, 99, 255],
+ [0, 36, 255], [0, 36, 255], [0, 36, 255],
+ [0, 36, 255], [0, 0, 230], [0, 0, 230],
+ [0, 0, 230], [0, 0, 230], [237, 149, 100],
+ [237, 149, 100], [237, 149, 100],
+ [237, 149, 100], [230, 128, 77], [230, 128, 77],
+ [230, 128, 77], [230, 128, 77], [255, 144, 30],
+ [255, 144, 30], [255, 144, 30], [255, 144, 30],
+ [153, 51, 0], [153, 51, 0], [153, 51, 0],
+ [153, 51, 0], [255, 51, 13], [255, 51, 13],
+ [255, 51, 13], [255, 51, 13]]
+ else:
+ raise NotImplementedError
+
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(
+ result,
+ img,
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ num_instances=num_instances,
+ show=show,
+ out_file=out_file)
+
+ return img
+
+
+def inference_interhand_3d_model(model,
+ img_or_path,
+ det_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset='InterHand3DDataset'):
+ """Inference a single image with a list of hand bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ det_results (list[dict]): The 2D bbox sequences stored in a list.
+ Each each element of the list is the bbox of one person, whose
+ shape is (ndarray[4 or 5]), containing 4 box coordinates
+ (and score).
+ dataset (str): Dataset name.
+ format: bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+ 'xyxy' means (left, top, right, bottom),
+ 'xywh' means (left, top, width, height).
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element is the result \
+ of an instance, which contains the predicted 3D keypoints with \
+ shape (ndarray[K,3]). If there is no valid instance, an \
+ empty list will be returned.
+ """
+
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+
+ if len(det_results) == 0:
+ return pose_results
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in det_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ det_results = [det_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset == 'InterHand3DDataset':
+ flip_pairs = [[i, 21 + i] for i in range(21)]
+ else:
+ raise NotImplementedError()
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'bbox_id':
+ 0, # need to be assigned if batch_size > 1
+ 'dataset':
+ dataset,
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'rotation':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs,
+ 'heatmap3d_depth_bound': cfg.data_cfg['heatmap3d_depth_bound'],
+ 'heatmap_size_root': cfg.data_cfg['heatmap_size_root'],
+ 'root_depth_bound': cfg.data_cfg['root_depth_bound']
+ }
+ }
+
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, [device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False)
+
+ poses_3d = result['preds']
+ rel_root_depth = result['rel_root_depth']
+ hand_type = result['hand_type']
+ if poses_3d.shape[-1] != 4:
+ assert poses_3d.shape[-1] == 3
+ dummy_score = np.ones(
+ poses_3d.shape[:-1] + (1, ), dtype=poses_3d.dtype)
+ poses_3d = np.concatenate((poses_3d, dummy_score), axis=-1)
+
+ # add relative root depth to left hand joints
+ poses_3d[:, 21:, 2] += rel_root_depth
+
+ # set joint scores according to hand type
+ poses_3d[:, :21, 3] *= hand_type[:, [0]]
+ poses_3d[:, 21:, 3] *= hand_type[:, [1]]
+
+ pose_results = []
+ for pose_3d, person_res, bbox_xyxy in zip(poses_3d, det_results,
+ bboxes_xyxy):
+ pose_res = person_res.copy()
+ pose_res['keypoints_3d'] = pose_3d
+ pose_res['bbox'] = bbox_xyxy
+ pose_results.append(pose_res)
+
+ return pose_results
+
+
+def inference_mesh_model(model,
+ img_or_path,
+ det_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset='MeshH36MDataset'):
+ """Inference a single image with a list of bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+ - num_vertices: V
+ - num_faces: F
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ det_results (list[dict]): The 2D bbox sequences stored in a list.
+ Each element of the list is the bbox of one person.
+ "bbox" (ndarray[4 or 5]): The person bounding box,
+ which contains 4 box coordinates (and score).
+ bbox_thr (float | None): Threshold for bounding boxes.
+ Only bboxes with higher scores will be fed into the pose
+ detector. If bbox_thr is None, all boxes will be used.
+ format (str): bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+
+ - 'xyxy' means (left, top, right, bottom),
+ - 'xywh' means (left, top, width, height).
+ dataset (str): Dataset name.
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element \
+ is the result of an instance, which contains:
+
+ - 'bbox' (ndarray[4]): instance bounding bbox
+ - 'center' (ndarray[2]): bbox center
+ - 'scale' (ndarray[2]): bbox scale
+ - 'keypoints_3d' (ndarray[K,3]): predicted 3D keypoints
+ - 'camera' (ndarray[3]): camera parameters
+ - 'vertices' (ndarray[V, 3]): predicted 3D vertices
+ - 'faces' (ndarray[F, 3]): mesh faces
+
+ If there is no valid instance, an empty list
+ will be returned.
+ """
+
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+
+ if len(det_results) == 0:
+ return pose_results
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in det_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ det_results = [det_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset == 'MeshH36MDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9],
+ [20, 21], [22, 23]]
+ else:
+ raise NotImplementedError()
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'image_file':
+ img_or_path,
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'rotation':
+ 0,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'dataset':
+ dataset,
+ 'joints_2d':
+ np.zeros((cfg.data_cfg.num_joints, 2), dtype=np.float32),
+ 'joints_2d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 1), dtype=np.float32),
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'pose':
+ np.zeros(72, dtype=np.float32),
+ 'beta':
+ np.zeros(10, dtype=np.float32),
+ 'has_smpl':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs,
+ }
+ }
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, target_gpus=[device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ preds = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False,
+ return_vertices=True,
+ return_faces=True)
+
+ for idx in range(len(det_results)):
+ pose_res = det_results[idx].copy()
+ pose_res['bbox'] = bboxes_xyxy[idx]
+ pose_res['center'] = batch_data['img_metas'][idx]['center']
+ pose_res['scale'] = batch_data['img_metas'][idx]['scale']
+ pose_res['keypoints_3d'] = preds['keypoints_3d'][idx]
+ pose_res['camera'] = preds['camera'][idx]
+ pose_res['vertices'] = preds['vertices'][idx]
+ pose_res['faces'] = preds['faces']
+ pose_results.append(pose_res)
+ return pose_results
+
+
+def vis_3d_mesh_result(model, result, img=None, show=False, out_file=None):
+ """Visualize the 3D mesh estimation results.
+
+ Args:
+ model (nn.Module): The loaded model.
+ result (list[dict]): 3D mesh estimation results.
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(result, img, show=show, out_file=out_file)
+
+ return img
diff --git a/mmpose/apis/inference_tracking.py b/mmpose/apis/inference_tracking.py
new file mode 100644
index 0000000000000000000000000000000000000000..9494fbaa75ca54840bd2c3f8bbbfcc7955e3a05d
--- /dev/null
+++ b/mmpose/apis/inference_tracking.py
@@ -0,0 +1,347 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+
+from mmpose.core import OneEuroFilter, oks_iou
+
+
+def _compute_iou(bboxA, bboxB):
+ """Compute the Intersection over Union (IoU) between two boxes .
+
+ Args:
+ bboxA (list): The first bbox info (left, top, right, bottom, score).
+ bboxB (list): The second bbox info (left, top, right, bottom, score).
+
+ Returns:
+ float: The IoU value.
+ """
+
+ x1 = max(bboxA[0], bboxB[0])
+ y1 = max(bboxA[1], bboxB[1])
+ x2 = min(bboxA[2], bboxB[2])
+ y2 = min(bboxA[3], bboxB[3])
+
+ inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+
+ bboxA_area = (bboxA[2] - bboxA[0]) * (bboxA[3] - bboxA[1])
+ bboxB_area = (bboxB[2] - bboxB[0]) * (bboxB[3] - bboxB[1])
+ union_area = float(bboxA_area + bboxB_area - inter_area)
+ if union_area == 0:
+ union_area = 1e-5
+ warnings.warn('union_area=0 is unexpected')
+
+ iou = inter_area / union_area
+
+ return iou
+
+
+def _track_by_iou(res, results_last, thr):
+ """Get track id using IoU tracking greedily.
+
+ Args:
+ res (dict): The bbox & pose results of the person instance.
+ results_last (list[dict]): The bbox & pose & track_id info of the
+ last frame (bbox_result, pose_result, track_id).
+ thr (float): The threshold for iou tracking.
+
+ Returns:
+ int: The track id for the new person instance.
+ list[dict]: The bbox & pose & track_id info of the persons
+ that have not been matched on the last frame.
+ dict: The matched person instance on the last frame.
+ """
+
+ bbox = list(res['bbox'])
+
+ max_iou_score = -1
+ max_index = -1
+ match_result = {}
+ for index, res_last in enumerate(results_last):
+ bbox_last = list(res_last['bbox'])
+
+ iou_score = _compute_iou(bbox, bbox_last)
+ if iou_score > max_iou_score:
+ max_iou_score = iou_score
+ max_index = index
+
+ if max_iou_score > thr:
+ track_id = results_last[max_index]['track_id']
+ match_result = results_last[max_index]
+ del results_last[max_index]
+ else:
+ track_id = -1
+
+ return track_id, results_last, match_result
+
+
+def _track_by_oks(res, results_last, thr):
+ """Get track id using OKS tracking greedily.
+
+ Args:
+ res (dict): The pose results of the person instance.
+ results_last (list[dict]): The pose & track_id info of the
+ last frame (pose_result, track_id).
+ thr (float): The threshold for oks tracking.
+
+ Returns:
+ int: The track id for the new person instance.
+ list[dict]: The pose & track_id info of the persons
+ that have not been matched on the last frame.
+ dict: The matched person instance on the last frame.
+ """
+ pose = res['keypoints'].reshape((-1))
+ area = res['area']
+ max_index = -1
+ match_result = {}
+
+ if len(results_last) == 0:
+ return -1, results_last, match_result
+
+ pose_last = np.array(
+ [res_last['keypoints'].reshape((-1)) for res_last in results_last])
+ area_last = np.array([res_last['area'] for res_last in results_last])
+
+ oks_score = oks_iou(pose, pose_last, area, area_last)
+
+ max_index = np.argmax(oks_score)
+
+ if oks_score[max_index] > thr:
+ track_id = results_last[max_index]['track_id']
+ match_result = results_last[max_index]
+ del results_last[max_index]
+ else:
+ track_id = -1
+
+ return track_id, results_last, match_result
+
+
+def _get_area(results):
+ """Get bbox for each person instance on the current frame.
+
+ Args:
+ results (list[dict]): The pose results of the current frame
+ (pose_result).
+ Returns:
+ list[dict]: The bbox & pose info of the current frame
+ (bbox_result, pose_result, area).
+ """
+ for result in results:
+ if 'bbox' in result:
+ result['area'] = ((result['bbox'][2] - result['bbox'][0]) *
+ (result['bbox'][3] - result['bbox'][1]))
+ else:
+ xmin = np.min(
+ result['keypoints'][:, 0][result['keypoints'][:, 0] > 0],
+ initial=1e10)
+ xmax = np.max(result['keypoints'][:, 0])
+ ymin = np.min(
+ result['keypoints'][:, 1][result['keypoints'][:, 1] > 0],
+ initial=1e10)
+ ymax = np.max(result['keypoints'][:, 1])
+ result['area'] = (xmax - xmin) * (ymax - ymin)
+ result['bbox'] = np.array([xmin, ymin, xmax, ymax])
+ return results
+
+
+def _temporal_refine(result, match_result, fps=None):
+ """Refine koypoints using tracked person instance on last frame.
+
+ Args:
+ results (dict): The pose results of the current frame
+ (pose_result).
+ match_result (dict): The pose results of the last frame
+ (match_result)
+ Returns:
+ (array): The person keypoints after refine.
+ """
+ if 'one_euro' in match_result:
+ result['keypoints'][:, :2] = match_result['one_euro'](
+ result['keypoints'][:, :2])
+ result['one_euro'] = match_result['one_euro']
+ else:
+ result['one_euro'] = OneEuroFilter(result['keypoints'][:, :2], fps=fps)
+ return result['keypoints']
+
+
+def get_track_id(results,
+ results_last,
+ next_id,
+ min_keypoints=3,
+ use_oks=False,
+ tracking_thr=0.3,
+ use_one_euro=False,
+ fps=None):
+ """Get track id for each person instance on the current frame.
+
+ Args:
+ results (list[dict]): The bbox & pose results of the current frame
+ (bbox_result, pose_result).
+ results_last (list[dict]): The bbox & pose & track_id info of the
+ last frame (bbox_result, pose_result, track_id).
+ next_id (int): The track id for the new person instance.
+ min_keypoints (int): Minimum number of keypoints recognized as person.
+ default: 3.
+ use_oks (bool): Flag to using oks tracking. default: False.
+ tracking_thr (float): The threshold for tracking.
+ use_one_euro (bool): Option to use one-euro-filter. default: False.
+ fps (optional): Parameters that d_cutoff
+ when one-euro-filter is used as a video input
+
+ Returns:
+ tuple:
+ - results (list[dict]): The bbox & pose & track_id info of the \
+ current frame (bbox_result, pose_result, track_id).
+ - next_id (int): The track id for the new person instance.
+ """
+ results = _get_area(results)
+
+ if use_oks:
+ _track = _track_by_oks
+ else:
+ _track = _track_by_iou
+
+ for result in results:
+ track_id, results_last, match_result = _track(result, results_last,
+ tracking_thr)
+ if track_id == -1:
+ if np.count_nonzero(result['keypoints'][:, 1]) > min_keypoints:
+ result['track_id'] = next_id
+ next_id += 1
+ else:
+ # If the number of keypoints detected is small,
+ # delete that person instance.
+ result['keypoints'][:, 1] = -10
+ result['bbox'] *= 0
+ result['track_id'] = -1
+ else:
+ result['track_id'] = track_id
+ if use_one_euro:
+ result['keypoints'] = _temporal_refine(
+ result, match_result, fps=fps)
+ del match_result
+
+ return results, next_id
+
+
+def vis_pose_tracking_result(model,
+ img,
+ result,
+ radius=4,
+ thickness=1,
+ kpt_score_thr=0.3,
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ show=False,
+ out_file=None):
+ """Visualize the pose tracking results on the image.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ img (str | np.ndarray): Image filename or loaded image.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ kpt_score_thr (float): The threshold to visualize the keypoints.
+ skeleton (list[tuple]): Default None.
+ show (bool): Whether to show the image. Default True.
+ out_file (str|None): The filename of the output visualization image.
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
+ [255, 255, 255]])
+
+ if dataset_info is None and dataset is not None:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset in ('TopDownCocoDataset', 'BottomUpCocoDataset',
+ 'TopDownOCHumanDataset'):
+ kpt_num = 17
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [4, 6]]
+
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ kpt_num = 133
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2],
+ [1, 3], [2, 4], [3, 5], [4, 6], [15, 17], [15, 18],
+ [15, 19], [16, 20], [16, 21], [16, 22], [91, 92],
+ [92, 93], [93, 94], [94, 95], [91, 96], [96, 97],
+ [97, 98], [98, 99], [91, 100], [100, 101], [101, 102],
+ [102, 103], [91, 104], [104, 105], [105, 106],
+ [106, 107], [91, 108], [108, 109], [109, 110],
+ [110, 111], [112, 113], [113, 114], [114, 115],
+ [115, 116], [112, 117], [117, 118], [118, 119],
+ [119, 120], [112, 121], [121, 122], [122, 123],
+ [123, 124], [112, 125], [125, 126], [126, 127],
+ [127, 128], [112, 129], [129, 130], [130, 131],
+ [131, 132]]
+ radius = 1
+
+ elif dataset == 'TopDownAicDataset':
+ kpt_num = 14
+ skeleton = [[2, 1], [1, 0], [0, 13], [13, 3], [3, 4], [4, 5],
+ [8, 7], [7, 6], [6, 9], [9, 10], [10, 11], [12, 13],
+ [0, 6], [3, 9]]
+
+ elif dataset == 'TopDownMpiiDataset':
+ kpt_num = 16
+ skeleton = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7],
+ [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13],
+ [13, 14], [14, 15]]
+
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset'):
+ kpt_num = 21
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7],
+ [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13],
+ [13, 14], [14, 15], [15, 16], [0, 17], [17, 18],
+ [18, 19], [19, 20]]
+
+ elif dataset == 'InterHand2DDataset':
+ kpt_num = 21
+ skeleton = [[0, 1], [1, 2], [2, 3], [4, 5], [5, 6], [6, 7], [8, 9],
+ [9, 10], [10, 11], [12, 13], [13, 14], [14, 15],
+ [16, 17], [17, 18], [18, 19], [3, 20], [7, 20],
+ [11, 20], [15, 20], [19, 20]]
+
+ else:
+ raise NotImplementedError()
+
+ elif dataset_info is not None:
+ kpt_num = dataset_info.keypoint_num
+ skeleton = dataset_info.skeleton
+
+ for res in result:
+ track_id = res['track_id']
+ bbox_color = palette[track_id % len(palette)]
+ pose_kpt_color = palette[[track_id % len(palette)] * kpt_num]
+ pose_link_color = palette[[track_id % len(palette)] * len(skeleton)]
+ img = model.show_result(
+ img, [res],
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ bbox_color=tuple(bbox_color.tolist()),
+ kpt_score_thr=kpt_score_thr,
+ show=show,
+ out_file=out_file)
+
+ return img
diff --git a/mmpose/apis/test.py b/mmpose/apis/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3843b5a594c03cf82144f6c3b3805a9221f16d72
--- /dev/null
+++ b/mmpose/apis/test.py
@@ -0,0 +1,191 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+
+
+def single_gpu_test(model, data_loader):
+ """Test model with a single gpu.
+
+ This method tests model with a single gpu and displays test progress bar.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+
+
+ Returns:
+ list: The prediction results.
+ """
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for data in data_loader:
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.append(result)
+
+ # use the first key as main key to calculate the batch size
+ batch_size = len(next(iter(data.values())))
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for data in data_loader:
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.append(result)
+
+ if rank == 0:
+ # use the first key as main key to calculate the batch size
+ batch_size = len(next(iter(data.values())))
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ """Collect results in cpu mode.
+
+ It saves the results on different gpus to 'tmpdir' and collects
+ them by the rank 0 worker.
+
+ Args:
+ result_part (list): Results to be collected
+ size (int): Result size.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode. Default: None
+
+ Returns:
+ list: Ordered results.
+ """
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ mmcv.mkdir_or_exist('.dist_test')
+ tmpdir = tempfile.mkdtemp(dir='.dist_test')
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # synchronizes all processes to make sure tmpdir exist
+ dist.barrier()
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+ # synchronizes all processes for loading pickle file
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, f'part_{i}.pkl')
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ """Collect results in gpu mode.
+
+ It encodes results to gpu tensors and use gpu communication for results
+ collection.
+
+ Args:
+ result_part (list): Results to be collected
+ size (int): Result size.
+
+ Returns:
+ list: Ordered results.
+ """
+
+ rank, world_size = get_dist_info()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+ part_send[:shape_tensor[0]] = part_tensor
+ part_recv_list = [
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
+ ]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_list.append(
+ pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
+ return None
diff --git a/mmpose/apis/train.py b/mmpose/apis/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c31f8b0b1ace6d27feb14b8d441fec6436ad9e2
--- /dev/null
+++ b/mmpose/apis/train.py
@@ -0,0 +1,200 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook,
+ get_dist_info)
+from mmcv.utils import digit_version
+
+from mmpose.core import DistEvalHook, EvalHook, build_optimizers
+from mmpose.core.distributed_wrapper import DistributedDataParallelWrapper
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.utils import get_root_logger
+
+try:
+ from mmcv.runner import Fp16OptimizerHook
+except ImportError:
+ warnings.warn(
+ 'Fp16OptimizerHook from mmpose will be deprecated from '
+ 'v0.15.0. Please install mmcv>=1.1.4', DeprecationWarning)
+ from mmpose.core import Fp16OptimizerHook
+
+
+def init_random_seed(seed=None, device='cuda'):
+ """Initialize random seed.
+
+ If the seed is not set, the seed will be automatically randomized,
+ and then broadcast to all processes to prevent some potential bugs.
+
+ Args:
+ seed (int, Optional): The seed. Default to None.
+ device (str): The device where the seed will be put on.
+ Default to 'cuda'.
+
+ Returns:
+ int: Seed to be used.
+ """
+ if seed is not None:
+ return seed
+
+ # Make sure all ranks share the same random seed to prevent
+ # some potential bugs. Please refer to
+ # https://github.com/open-mmlab/mmdetection/issues/6339
+ rank, world_size = get_dist_info()
+ seed = np.random.randint(2**31)
+ if world_size == 1:
+ return seed
+
+ if rank == 0:
+ random_num = torch.tensor(seed, dtype=torch.int32, device=device)
+ else:
+ random_num = torch.tensor(0, dtype=torch.int32, device=device)
+ dist.broadcast(random_num, src=0)
+ return random_num.item()
+
+
+def train_model(model,
+ dataset,
+ cfg,
+ distributed=False,
+ validate=False,
+ timestamp=None,
+ meta=None):
+ """Train model entry function.
+
+ Args:
+ model (nn.Module): The model to be trained.
+ dataset (Dataset): Train dataset.
+ cfg (dict): The config dict for training.
+ distributed (bool): Whether to use distributed training.
+ Default: False.
+ validate (bool): Whether to do evaluation. Default: False.
+ timestamp (str | None): Local time for runner. Default: None.
+ meta (dict | None): Meta dict to record some important information.
+ Default: None
+ """
+ logger = get_root_logger(cfg.log_level)
+
+ # prepare data loaders
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+ # step 1: give default values and override (if exist) from cfg.data
+ loader_cfg = {
+ **dict(
+ seed=cfg.get('seed'),
+ drop_last=False,
+ dist=distributed,
+ num_gpus=len(cfg.gpu_ids)),
+ **({} if torch.__version__ != 'parrots' else dict(
+ prefetch_num=2,
+ pin_memory=False,
+ )),
+ **dict((k, cfg.data[k]) for k in [
+ 'samples_per_gpu',
+ 'workers_per_gpu',
+ 'shuffle',
+ 'seed',
+ 'drop_last',
+ 'prefetch_num',
+ 'pin_memory',
+ 'persistent_workers',
+ ] if k in cfg.data)
+ }
+
+ # step 2: cfg.data.train_dataloader has highest priority
+ train_loader_cfg = dict(loader_cfg, **cfg.data.get('train_dataloader', {}))
+
+ data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]
+
+ # determine whether use adversarial training precess or not
+ use_adverserial_train = cfg.get('use_adversarial_train', False)
+
+ # put model on gpus
+ if distributed:
+ find_unused_parameters = cfg.get('find_unused_parameters', False)
+ # Sets the `find_unused_parameters` parameter in
+ # torch.nn.parallel.DistributedDataParallel
+
+ if use_adverserial_train:
+ # Use DistributedDataParallelWrapper for adversarial training
+ model = DistributedDataParallelWrapper(
+ model,
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters)
+ else:
+ model = MMDistributedDataParallel(
+ model.cuda(),
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters)
+ else:
+ if digit_version(mmcv.__version__) >= digit_version(
+ '1.4.4') or torch.cuda.is_available():
+ model = MMDataParallel(model, device_ids=cfg.gpu_ids)
+ else:
+ warnings.warn(
+ 'We recommend to use MMCV >= 1.4.4 for CPU training. '
+ 'See https://github.com/open-mmlab/mmpose/pull/1157 for '
+ 'details.')
+
+ # build runner
+ optimizer = build_optimizers(model, cfg.optimizer)
+
+ runner = EpochBasedRunner(
+ model,
+ optimizer=optimizer,
+ work_dir=cfg.work_dir,
+ logger=logger,
+ meta=meta)
+ # an ugly workaround to make .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ if use_adverserial_train:
+ # The optimizer step process is included in the train_step function
+ # of the model, so the runner should NOT include optimizer hook.
+ optimizer_config = None
+ else:
+ # fp16 setting
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ optimizer_config = Fp16OptimizerHook(
+ **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+ elif distributed and 'type' not in cfg.optimizer_config:
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
+ else:
+ optimizer_config = cfg.optimizer_config
+
+ # register hooks
+ runner.register_training_hooks(cfg.lr_config, optimizer_config,
+ cfg.checkpoint_config, cfg.log_config,
+ cfg.get('momentum_config', None))
+ if distributed:
+ runner.register_hook(DistSamplerSeedHook())
+
+ # register eval hooks
+ if validate:
+ eval_cfg = cfg.get('evaluation', {})
+ val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+ dataloader_setting = dict(
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
+ # cfg.gpus will be ignored if distributed
+ num_gpus=len(cfg.gpu_ids),
+ dist=distributed,
+ drop_last=False,
+ shuffle=False)
+ dataloader_setting = dict(dataloader_setting,
+ **cfg.data.get('val_dataloader', {}))
+ val_dataloader = build_dataloader(val_dataset, **dataloader_setting)
+ eval_hook = DistEvalHook if distributed else EvalHook
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
diff --git a/mmpose/core/__init__.py b/mmpose/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..66185b72c47c99a0d296bf65c72f50a47f2d080c
--- /dev/null
+++ b/mmpose/core/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .camera import * # noqa: F401, F403
+from .evaluation import * # noqa: F401, F403
+from .fp16 import * # noqa: F401, F403
+from .optimizer import * # noqa: F401, F403
+from .post_processing import * # noqa: F401, F403
+from .utils import * # noqa: F401, F403
+from .visualization import * # noqa: F401, F403
diff --git a/mmpose/core/__pycache__/__init__.cpython-310.pyc b/mmpose/core/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1dc7d301dd0d2529709d228391a6ae4a15a284a2
Binary files /dev/null and b/mmpose/core/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/__pycache__/distributed_wrapper.cpython-310.pyc b/mmpose/core/__pycache__/distributed_wrapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b87ae9de58e270ff3aee52c3e869279a1cae3ad
Binary files /dev/null and b/mmpose/core/__pycache__/distributed_wrapper.cpython-310.pyc differ
diff --git a/mmpose/core/camera/__init__.py b/mmpose/core/camera/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4a3c5526560996791a85f0d84a72a66286486ca
--- /dev/null
+++ b/mmpose/core/camera/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .camera_base import CAMERAS
+from .single_camera import SimpleCamera
+from .single_camera_torch import SimpleCameraTorch
+
+__all__ = ['CAMERAS', 'SimpleCamera', 'SimpleCameraTorch']
diff --git a/mmpose/core/camera/__pycache__/__init__.cpython-310.pyc b/mmpose/core/camera/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d5d339d9a4fd101b90fbeaa328196d0200ef03a0
Binary files /dev/null and b/mmpose/core/camera/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/camera/__pycache__/camera_base.cpython-310.pyc b/mmpose/core/camera/__pycache__/camera_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37f2e218762c89053d75e52f579408d02765152f
Binary files /dev/null and b/mmpose/core/camera/__pycache__/camera_base.cpython-310.pyc differ
diff --git a/mmpose/core/camera/__pycache__/single_camera.cpython-310.pyc b/mmpose/core/camera/__pycache__/single_camera.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bce9ccac38a7c528a12ebb755063b02f953992a
Binary files /dev/null and b/mmpose/core/camera/__pycache__/single_camera.cpython-310.pyc differ
diff --git a/mmpose/core/camera/__pycache__/single_camera_torch.cpython-310.pyc b/mmpose/core/camera/__pycache__/single_camera_torch.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5011104e6cedef26a94d69d97da9fcf192c3cf71
Binary files /dev/null and b/mmpose/core/camera/__pycache__/single_camera_torch.cpython-310.pyc differ
diff --git a/mmpose/core/camera/camera_base.py b/mmpose/core/camera/camera_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..28b23e7c6279e3613265a949df91f6ced0413b99
--- /dev/null
+++ b/mmpose/core/camera/camera_base.py
@@ -0,0 +1,45 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+
+from mmcv.utils import Registry
+
+CAMERAS = Registry('camera')
+
+
+class SingleCameraBase(metaclass=ABCMeta):
+ """Base class for single camera model.
+
+ Args:
+ param (dict): Camera parameters
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_world: Project points from camera coordinates to world
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ @abstractmethod
+ def __init__(self, param):
+ """Load camera parameters and check validity."""
+
+ def world_to_camera(self, X):
+ """Project points from world coordinates to camera coordinates."""
+ raise NotImplementedError
+
+ def camera_to_world(self, X):
+ """Project points from camera coordinates to world coordinates."""
+ raise NotImplementedError
+
+ def camera_to_pixel(self, X):
+ """Project points from camera coordinates to pixel coordinates."""
+ raise NotImplementedError
+
+ def world_to_pixel(self, X):
+ """Project points from world coordinates to pixel coordinates."""
+ _X = self.world_to_camera(X)
+ return self.camera_to_pixel(_X)
diff --git a/mmpose/core/camera/single_camera.py b/mmpose/core/camera/single_camera.py
new file mode 100644
index 0000000000000000000000000000000000000000..cabd79941af5c81110876e94ce6103cc02ea5078
--- /dev/null
+++ b/mmpose/core/camera/single_camera.py
@@ -0,0 +1,123 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from .camera_base import CAMERAS, SingleCameraBase
+
+
+@CAMERAS.register_module()
+class SimpleCamera(SingleCameraBase):
+ """Camera model to calculate coordinate transformation with given
+ intrinsic/extrinsic camera parameters.
+
+ Note:
+ The keypoint coordinate should be an np.ndarray with a shape of
+ [...,J, C] where J is the keypoint number of an instance, and C is
+ the coordinate dimension. For example:
+
+ [J, C]: shape of joint coordinates of a person with J joints.
+ [N, J, C]: shape of a batch of person joint coordinates.
+ [N, T, J, C]: shape of a batch of pose sequences.
+
+ Args:
+ param (dict): camera parameters including:
+ - R: 3x3, camera rotation matrix (camera-to-world)
+ - T: 3x1, camera translation (camera-to-world)
+ - K: (optional) 2x3, camera intrinsic matrix
+ - k: (optional) nx1, camera radial distortion coefficients
+ - p: (optional) mx1, camera tangential distortion coefficients
+ - f: (optional) 2x1, camera focal length
+ - c: (optional) 2x1, camera center
+ if K is not provided, it will be calculated from f and c.
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ def __init__(self, param):
+
+ self.param = {}
+ # extrinsic param
+ R = np.array(param['R'], dtype=np.float32)
+ T = np.array(param['T'], dtype=np.float32)
+ assert R.shape == (3, 3)
+ assert T.shape == (3, 1)
+ # The camera matrices are transposed in advance because the joint
+ # coordinates are stored as row vectors.
+ self.param['R_c2w'] = R.T
+ self.param['T_c2w'] = T.T
+ self.param['R_w2c'] = R
+ self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']
+
+ # intrinsic param
+ if 'K' in param:
+ K = np.array(param['K'], dtype=np.float32)
+ assert K.shape == (2, 3)
+ self.param['K'] = K.T
+ self.param['f'] = np.array([K[0, 0], K[1, 1]])[:, np.newaxis]
+ self.param['c'] = np.array([K[0, 2], K[1, 2]])[:, np.newaxis]
+ elif 'f' in param and 'c' in param:
+ f = np.array(param['f'], dtype=np.float32)
+ c = np.array(param['c'], dtype=np.float32)
+ assert f.shape == (2, 1)
+ assert c.shape == (2, 1)
+ self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T
+ self.param['f'] = f
+ self.param['c'] = c
+ else:
+ raise ValueError('Camera intrinsic parameters are missing. '
+ 'Either "K" or "f"&"c" should be provided.')
+
+ # distortion param
+ if 'k' in param and 'p' in param:
+ self.undistortion = True
+ self.param['k'] = np.array(param['k'], dtype=np.float32).flatten()
+ self.param['p'] = np.array(param['p'], dtype=np.float32).flatten()
+ assert self.param['k'].size in {3, 6}
+ assert self.param['p'].size == 2
+ else:
+ self.undistortion = False
+
+ def world_to_camera(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_w2c'] + self.param['T_w2c']
+
+ def camera_to_world(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_c2w'] + self.param['T_c2w']
+
+ def camera_to_pixel(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+
+ _X = X / X[..., 2:]
+
+ if self.undistortion:
+ k = self.param['k']
+ p = self.param['p']
+ _X_2d = _X[..., :2]
+ r2 = (_X_2d**2).sum(-1)
+ radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
+ if k.size == 6:
+ radial /= 1 + sum(
+ (ki * r2**(i + 1) for i, ki in enumerate(k[3:])))
+
+ tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])
+
+ _X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer(
+ r2, p[::-1]).reshape(_X_2d.shape)
+ return _X @ self.param['K']
+
+ def pixel_to_camera(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ _X = X.copy()
+ _X[:, :2] = (X[:, :2] - self.param['c'].T) / self.param['f'].T * X[:,
+ [2]]
+ return _X
diff --git a/mmpose/core/camera/single_camera_torch.py b/mmpose/core/camera/single_camera_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..22eb72f23d6eecf1b5c5a9b570a4f142fcf6e02a
--- /dev/null
+++ b/mmpose/core/camera/single_camera_torch.py
@@ -0,0 +1,118 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from .camera_base import CAMERAS, SingleCameraBase
+
+
+@CAMERAS.register_module()
+class SimpleCameraTorch(SingleCameraBase):
+ """Camera model to calculate coordinate transformation with given
+ intrinsic/extrinsic camera parameters.
+
+ Notes:
+ The keypoint coordinate should be an np.ndarray with a shape of
+ [...,J, C] where J is the keypoint number of an instance, and C is
+ the coordinate dimension. For example:
+
+ [J, C]: shape of joint coordinates of a person with J joints.
+ [N, J, C]: shape of a batch of person joint coordinates.
+ [N, T, J, C]: shape of a batch of pose sequences.
+
+ Args:
+ param (dict): camera parameters including:
+ - R: 3x3, camera rotation matrix (camera-to-world)
+ - T: 3x1, camera translation (camera-to-world)
+ - K: (optional) 2x3, camera intrinsic matrix
+ - k: (optional) nx1, camera radial distortion coefficients
+ - p: (optional) mx1, camera tangential distortion coefficients
+ - f: (optional) 2x1, camera focal length
+ - c: (optional) 2x1, camera center
+ if K is not provided, it will be calculated from f and c.
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ def __init__(self, param, device):
+
+ self.param = {}
+ # extrinsic param
+ R = torch.tensor(param['R'], device=device)
+ T = torch.tensor(param['T'], device=device)
+
+ assert R.shape == (3, 3)
+ assert T.shape == (3, 1)
+ # The camera matrices are transposed in advance because the joint
+ # coordinates are stored as row vectors.
+ self.param['R_c2w'] = R.T
+ self.param['T_c2w'] = T.T
+ self.param['R_w2c'] = R
+ self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']
+
+ # intrinsic param
+ if 'K' in param:
+ K = torch.tensor(param['K'], device=device)
+ assert K.shape == (2, 3)
+ self.param['K'] = K.T
+ self.param['f'] = torch.tensor([[K[0, 0]], [K[1, 1]]],
+ device=device)
+ self.param['c'] = torch.tensor([[K[0, 2]], [K[1, 2]]],
+ device=device)
+ elif 'f' in param and 'c' in param:
+ f = torch.tensor(param['f'], device=device)
+ c = torch.tensor(param['c'], device=device)
+ assert f.shape == (2, 1)
+ assert c.shape == (2, 1)
+ self.param['K'] = torch.cat([torch.diagflat(f), c], dim=-1).T
+ self.param['f'] = f
+ self.param['c'] = c
+ else:
+ raise ValueError('Camera intrinsic parameters are missing. '
+ 'Either "K" or "f"&"c" should be provided.')
+
+ # distortion param
+ if 'k' in param and 'p' in param:
+ self.undistortion = True
+ self.param['k'] = torch.tensor(param['k'], device=device).view(-1)
+ self.param['p'] = torch.tensor(param['p'], device=device).view(-1)
+ assert len(self.param['k']) in {3, 6}
+ assert len(self.param['p']) == 2
+ else:
+ self.undistortion = False
+
+ def world_to_camera(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_w2c'] + self.param['T_w2c']
+
+ def camera_to_world(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_c2w'] + self.param['T_c2w']
+
+ def camera_to_pixel(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+
+ _X = X / X[..., 2:]
+
+ if self.undistortion:
+ k = self.param['k']
+ p = self.param['p']
+ _X_2d = _X[..., :2]
+ r2 = (_X_2d**2).sum(-1)
+ radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
+ if k.size == 6:
+ radial /= 1 + sum(
+ (ki * r2**(i + 1) for i, ki in enumerate(k[3:])))
+
+ tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])
+
+ _X[..., :2] = _X_2d * (radial + tangential)[..., None] + torch.ger(
+ r2, p.flip([0])).reshape(_X_2d.shape)
+ return _X @ self.param['K']
diff --git a/mmpose/core/distributed_wrapper.py b/mmpose/core/distributed_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..c67aceec992085e9952ea70c62009e9ec1db30ca
--- /dev/null
+++ b/mmpose/core/distributed_wrapper.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.parallel import MODULE_WRAPPERS as MMCV_MODULE_WRAPPERS
+from mmcv.parallel import MMDistributedDataParallel
+from mmcv.parallel.scatter_gather import scatter_kwargs
+from mmcv.utils import Registry
+from torch.cuda._utils import _get_device_index
+
+MODULE_WRAPPERS = Registry('module wrapper', parent=MMCV_MODULE_WRAPPERS)
+
+
+@MODULE_WRAPPERS.register_module()
+class DistributedDataParallelWrapper(nn.Module):
+ """A DistributedDataParallel wrapper for models in 3D mesh estimation task.
+
+ In 3D mesh estimation task, there is a need to wrap different modules in
+ the models with separate DistributedDataParallel. Otherwise, it will cause
+ errors for GAN training.
+ More specific, the GAN model, usually has two sub-modules:
+ generator and discriminator. If we wrap both of them in one
+ standard DistributedDataParallel, it will cause errors during training,
+ because when we update the parameters of the generator (or discriminator),
+ the parameters of the discriminator (or generator) is not updated, which is
+ not allowed for DistributedDataParallel.
+ So we design this wrapper to separately wrap DistributedDataParallel
+ for generator and discriminator.
+
+ In this wrapper, we perform two operations:
+ 1. Wrap the modules in the models with separate MMDistributedDataParallel.
+ Note that only modules with parameters will be wrapped.
+ 2. Do scatter operation for 'forward', 'train_step' and 'val_step'.
+
+ Note that the arguments of this wrapper is the same as those in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+
+ Args:
+ module (nn.Module): Module that needs to be wrapped.
+ device_ids (list[int | `torch.device`]): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ dim (int, optional): Same as that in the official scatter function in
+ pytorch. Defaults to 0.
+ broadcast_buffers (bool): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ Defaults to False.
+ find_unused_parameters (bool, optional): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ Traverse the autograd graph of all tensors contained in returned
+ value of the wrapped module’s forward function. Defaults to False.
+ kwargs (dict): Other arguments used in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ """
+
+ def __init__(self,
+ module,
+ device_ids,
+ dim=0,
+ broadcast_buffers=False,
+ find_unused_parameters=False,
+ **kwargs):
+ super().__init__()
+ assert len(device_ids) == 1, (
+ 'Currently, DistributedDataParallelWrapper only supports one'
+ 'single CUDA device for each process.'
+ f'The length of device_ids must be 1, but got {len(device_ids)}.')
+ self.module = module
+ self.dim = dim
+ self.to_ddp(
+ device_ids=device_ids,
+ dim=dim,
+ broadcast_buffers=broadcast_buffers,
+ find_unused_parameters=find_unused_parameters,
+ **kwargs)
+ self.output_device = _get_device_index(device_ids[0], True)
+
+ def to_ddp(self, device_ids, dim, broadcast_buffers,
+ find_unused_parameters, **kwargs):
+ """Wrap models with separate MMDistributedDataParallel.
+
+ It only wraps the modules with parameters.
+ """
+ for name, module in self.module._modules.items():
+ if next(module.parameters(), None) is None:
+ module = module.cuda()
+ elif all(not p.requires_grad for p in module.parameters()):
+ module = module.cuda()
+ else:
+ module = MMDistributedDataParallel(
+ module.cuda(),
+ device_ids=device_ids,
+ dim=dim,
+ broadcast_buffers=broadcast_buffers,
+ find_unused_parameters=find_unused_parameters,
+ **kwargs)
+ self.module._modules[name] = module
+
+ def scatter(self, inputs, kwargs, device_ids):
+ """Scatter function.
+
+ Args:
+ inputs (Tensor): Input Tensor.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ device_ids (int): Device id.
+ """
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+ def forward(self, *inputs, **kwargs):
+ """Forward function.
+
+ Args:
+ inputs (tuple): Input data.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ return self.module(*inputs[0], **kwargs[0])
+
+ def train_step(self, *inputs, **kwargs):
+ """Train step function.
+
+ Args:
+ inputs (Tensor): Input Tensor.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ output = self.module.train_step(*inputs[0], **kwargs[0])
+ return output
+
+ def val_step(self, *inputs, **kwargs):
+ """Validation step function.
+
+ Args:
+ inputs (tuple): Input data.
+ kwargs (dict): Args for ``scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ output = self.module.val_step(*inputs[0], **kwargs[0])
+ return output
diff --git a/mmpose/core/evaluation/__init__.py b/mmpose/core/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f9378429c8ddaa15f7ac17446bc9d484987df16
--- /dev/null
+++ b/mmpose/core/evaluation/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_eval import (aggregate_scale, aggregate_stage_flip,
+ flip_feature_maps, get_group_preds,
+ split_ae_outputs)
+from .eval_hooks import DistEvalHook, EvalHook
+from .mesh_eval import compute_similarity_transform
+from .pose3d_eval import keypoint_3d_auc, keypoint_3d_pck, keypoint_mpjpe
+from .top_down_eval import (keypoint_auc, keypoint_epe, keypoint_pck_accuracy,
+ keypoints_from_heatmaps, keypoints_from_heatmaps3d,
+ keypoints_from_regression,
+ multilabel_classification_accuracy,
+ pose_pck_accuracy, post_dark_udp)
+
+__all__ = [
+ 'EvalHook', 'DistEvalHook', 'pose_pck_accuracy', 'keypoints_from_heatmaps',
+ 'keypoints_from_regression', 'keypoint_pck_accuracy', 'keypoint_3d_pck',
+ 'keypoint_3d_auc', 'keypoint_auc', 'keypoint_epe', 'get_group_preds',
+ 'split_ae_outputs', 'flip_feature_maps', 'aggregate_stage_flip',
+ 'aggregate_scale', 'compute_similarity_transform', 'post_dark_udp',
+ 'keypoint_mpjpe', 'keypoints_from_heatmaps3d',
+ 'multilabel_classification_accuracy'
+]
diff --git a/mmpose/core/evaluation/__pycache__/__init__.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46bf770446930fe6dd8df02e2804a7488a300365
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/__pycache__/bottom_up_eval.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/bottom_up_eval.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be70877a209393db5fafd72dd69f1af2af61b2e4
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/bottom_up_eval.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/__pycache__/eval_hooks.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/eval_hooks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4abc8ba013bae3e7449b175eef7992234dc37ad4
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/eval_hooks.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/__pycache__/mesh_eval.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/mesh_eval.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb32644547c382ba2926e9ea1a401a9828055a86
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/mesh_eval.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/__pycache__/pose3d_eval.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/pose3d_eval.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51315ff76962b6d8fa520dd2badf41537adc7377
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/pose3d_eval.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/__pycache__/top_down_eval.cpython-310.pyc b/mmpose/core/evaluation/__pycache__/top_down_eval.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8ab0ef308747f10e269b6b912fa48c5c5ef59ae
Binary files /dev/null and b/mmpose/core/evaluation/__pycache__/top_down_eval.cpython-310.pyc differ
diff --git a/mmpose/core/evaluation/bottom_up_eval.py b/mmpose/core/evaluation/bottom_up_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b37d7c98e684284e3863922e7c7d2abedce0e24
--- /dev/null
+++ b/mmpose/core/evaluation/bottom_up_eval.py
@@ -0,0 +1,333 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.core.post_processing import (get_warp_matrix, transform_preds,
+ warp_affine_joints)
+
+
+def split_ae_outputs(outputs, num_joints, with_heatmaps, with_ae,
+ select_output_index):
+ """Split multi-stage outputs into heatmaps & tags.
+
+ Args:
+ outputs (list(Tensor)): Outputs of network
+ num_joints (int): Number of joints
+ with_heatmaps (list[bool]): Option to output
+ heatmaps for different stages.
+ with_ae (list[bool]): Option to output
+ ae tags for different stages.
+ select_output_index (list[int]): Output keep the selected index
+
+ Returns:
+ tuple: A tuple containing multi-stage outputs.
+
+ - list[Tensor]: multi-stage heatmaps.
+ - list[Tensor]: multi-stage tags.
+ """
+
+ heatmaps = []
+ tags = []
+
+ # aggregate heatmaps from different stages
+ for i, output in enumerate(outputs):
+ if i not in select_output_index:
+ continue
+ # staring index of the associative embeddings
+ offset_feat = num_joints if with_heatmaps[i] else 0
+ if with_heatmaps[i]:
+ heatmaps.append(output[:, :num_joints])
+ if with_ae[i]:
+ tags.append(output[:, offset_feat:])
+
+ return heatmaps, tags
+
+
+def flip_feature_maps(feature_maps, flip_index=None):
+ """Flip the feature maps and swap the channels.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ flip_index (list[int] | None): Channel-flip indexes.
+ If None, do not flip channels.
+
+ Returns:
+ list[Tensor]: Flipped feature_maps.
+ """
+ flipped_feature_maps = []
+ for feature_map in feature_maps:
+ feature_map = torch.flip(feature_map, [3])
+ if flip_index is not None:
+ flipped_feature_maps.append(feature_map[:, flip_index, :, :])
+ else:
+ flipped_feature_maps.append(feature_map)
+
+ return flipped_feature_maps
+
+
+def _resize_average(feature_maps, align_corners, index=-1, resize_size=None):
+ """Resize the feature maps and compute the average.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+
+ if feature_maps is None:
+ return None
+ feature_maps_avg = 0
+
+ feature_map_list = _resize_concate(
+ feature_maps, align_corners, index=index, resize_size=resize_size)
+ for feature_map in feature_map_list:
+ feature_maps_avg += feature_map
+
+ feature_maps_avg /= len(feature_map_list)
+ return [feature_maps_avg]
+
+
+def _resize_unsqueeze_concat(feature_maps,
+ align_corners,
+ index=-1,
+ resize_size=None):
+ """Resize, unsqueeze and concatenate the feature_maps.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+ if feature_maps is None:
+ return None
+ feature_map_list = _resize_concate(
+ feature_maps, align_corners, index=index, resize_size=resize_size)
+
+ feat_dim = len(feature_map_list[0].shape) - 1
+ output_feature_maps = torch.cat(
+ [torch.unsqueeze(fmap, dim=feat_dim + 1) for fmap in feature_map_list],
+ dim=feat_dim + 1)
+ return [output_feature_maps]
+
+
+def _resize_concate(feature_maps, align_corners, index=-1, resize_size=None):
+ """Resize and concatenate the feature_maps.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+ if feature_maps is None:
+ return None
+
+ feature_map_list = []
+
+ if index < 0:
+ index += len(feature_maps)
+
+ if resize_size is None:
+ resize_size = (feature_maps[index].size(2),
+ feature_maps[index].size(3))
+
+ for feature_map in feature_maps:
+ ori_size = (feature_map.size(2), feature_map.size(3))
+ if ori_size != resize_size:
+ feature_map = torch.nn.functional.interpolate(
+ feature_map,
+ size=resize_size,
+ mode='bilinear',
+ align_corners=align_corners)
+
+ feature_map_list.append(feature_map)
+
+ return feature_map_list
+
+
+def aggregate_stage_flip(feature_maps,
+ feature_maps_flip,
+ index=-1,
+ project2image=True,
+ size_projected=None,
+ align_corners=False,
+ aggregate_stage='concat',
+ aggregate_flip='average'):
+ """Inference the model to get multi-stage outputs (heatmaps & tags), and
+ resize them to base sizes.
+
+ Args:
+ feature_maps (list[Tensor]): feature_maps can be heatmaps,
+ tags, and pafs.
+ feature_maps_flip (list[Tensor] | None): flipped feature_maps.
+ feature maps can be heatmaps, tags, and pafs.
+ project2image (bool): Option to resize to base scale.
+ size_projected (list[int, int]): Base size of heatmaps [w, h].
+ align_corners (bool): Align corners when performing interpolation.
+ aggregate_stage (str): Methods to aggregate multi-stage feature maps.
+ Options: 'concat', 'average'. Default: 'concat.
+
+ - 'concat': Concatenate the original and the flipped feature maps.
+ - 'average': Get the average of the original and the flipped
+ feature maps.
+ aggregate_flip (str): Methods to aggregate the original and
+ the flipped feature maps. Options: 'concat', 'average', 'none'.
+ Default: 'average.
+
+ - 'concat': Concatenate the original and the flipped feature maps.
+ - 'average': Get the average of the original and the flipped
+ feature maps..
+ - 'none': no flipped feature maps.
+
+ Returns:
+ list[Tensor]: Aggregated feature maps with shape [NxKxWxH].
+ """
+
+ if feature_maps_flip is None:
+ aggregate_flip = 'none'
+
+ output_feature_maps = []
+
+ if aggregate_stage == 'average':
+ _aggregate_stage_func = _resize_average
+ elif aggregate_stage == 'concat':
+ _aggregate_stage_func = _resize_concate
+ else:
+ NotImplementedError()
+
+ if project2image and size_projected:
+ _origin = _aggregate_stage_func(
+ feature_maps,
+ align_corners,
+ index=index,
+ resize_size=(size_projected[1], size_projected[0]))
+
+ _flipped = _aggregate_stage_func(
+ feature_maps_flip,
+ align_corners,
+ index=index,
+ resize_size=(size_projected[1], size_projected[0]))
+ else:
+ _origin = _aggregate_stage_func(
+ feature_maps, align_corners, index=index, resize_size=None)
+ _flipped = _aggregate_stage_func(
+ feature_maps_flip, align_corners, index=index, resize_size=None)
+
+ if aggregate_flip == 'average':
+ assert feature_maps_flip is not None
+ for _ori, _fli in zip(_origin, _flipped):
+ output_feature_maps.append((_ori + _fli) / 2.0)
+
+ elif aggregate_flip == 'concat':
+ assert feature_maps_flip is not None
+ output_feature_maps.append(*_origin)
+ output_feature_maps.append(*_flipped)
+
+ elif aggregate_flip == 'none':
+ if isinstance(_origin, list):
+ output_feature_maps.append(*_origin)
+ else:
+ output_feature_maps.append(_origin)
+ else:
+ NotImplementedError()
+
+ return output_feature_maps
+
+
+def aggregate_scale(feature_maps_list,
+ align_corners=False,
+ aggregate_scale='average'):
+ """Aggregate multi-scale outputs.
+
+ Note:
+ batch size: N
+ keypoints num : K
+ heatmap width: W
+ heatmap height: H
+
+ Args:
+ feature_maps_list (list[Tensor]): Aggregated feature maps.
+ project2image (bool): Option to resize to base scale.
+ align_corners (bool): Align corners when performing interpolation.
+ aggregate_scale (str): Methods to aggregate multi-scale feature maps.
+ Options: 'average', 'unsqueeze_concat'.
+
+ - 'average': Get the average of the feature maps.
+ - 'unsqueeze_concat': Concatenate the feature maps along new axis.
+ Default: 'average.
+
+ Returns:
+ Tensor: Aggregated feature maps.
+ """
+
+ if aggregate_scale == 'average':
+ output_feature_maps = _resize_average(
+ feature_maps_list, align_corners, index=0, resize_size=None)
+
+ elif aggregate_scale == 'unsqueeze_concat':
+ output_feature_maps = _resize_unsqueeze_concat(
+ feature_maps_list, align_corners, index=0, resize_size=None)
+ else:
+ NotImplementedError()
+
+ return output_feature_maps[0]
+
+
+def get_group_preds(grouped_joints,
+ center,
+ scale,
+ heatmap_size,
+ use_udp=False):
+ """Transform the grouped joints back to the image.
+
+ Args:
+ grouped_joints (list): Grouped person joints.
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ heatmap_size (np.ndarray[2, ]): Size of the destination heatmaps.
+ use_udp (bool): Unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR'2020).
+
+ Returns:
+ list: List of the pose result for each person.
+ """
+ if len(grouped_joints) == 0:
+ return []
+
+ if use_udp:
+ if grouped_joints[0].shape[0] > 0:
+ heatmap_size_t = np.array(heatmap_size, dtype=np.float32) - 1.0
+ trans = get_warp_matrix(
+ theta=0,
+ size_input=heatmap_size_t,
+ size_dst=scale,
+ size_target=heatmap_size_t)
+ grouped_joints[0][..., :2] = \
+ warp_affine_joints(grouped_joints[0][..., :2], trans)
+ results = [person for person in grouped_joints[0]]
+ else:
+ results = []
+ for person in grouped_joints[0]:
+ joints = transform_preds(person, center, scale, heatmap_size)
+ results.append(joints)
+
+ return results
diff --git a/mmpose/core/evaluation/eval_hooks.py b/mmpose/core/evaluation/eval_hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf36a038859ee7d7a77b68706ee96c2154fc39cc
--- /dev/null
+++ b/mmpose/core/evaluation/eval_hooks.py
@@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv.runner import DistEvalHook as _DistEvalHook
+from mmcv.runner import EvalHook as _EvalHook
+
+MMPOSE_GREATER_KEYS = [
+ 'acc', 'ap', 'ar', 'pck', 'auc', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc'
+]
+MMPOSE_LESS_KEYS = ['loss', 'epe', 'nme', 'mpjpe', 'p-mpjpe', 'n-mpjpe']
+
+
+class EvalHook(_EvalHook):
+
+ def __init__(self,
+ dataloader,
+ start=None,
+ interval=1,
+ by_epoch=True,
+ save_best=None,
+ rule=None,
+ test_fn=None,
+ greater_keys=MMPOSE_GREATER_KEYS,
+ less_keys=MMPOSE_LESS_KEYS,
+ **eval_kwargs):
+
+ if test_fn is None:
+ from mmpose.apis import single_gpu_test
+ test_fn = single_gpu_test
+
+ # to be compatible with the config before v0.16.0
+
+ # remove "gpu_collect" from eval_kwargs
+ if 'gpu_collect' in eval_kwargs:
+ warnings.warn(
+ '"gpu_collect" will be deprecated in EvalHook.'
+ 'Please remove it from the config.', DeprecationWarning)
+ _ = eval_kwargs.pop('gpu_collect')
+
+ # update "save_best" according to "key_indicator" and remove the
+ # latter from eval_kwargs
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
+ warnings.warn(
+ '"key_indicator" will be deprecated in EvalHook.'
+ 'Please use "save_best" to specify the metric key,'
+ 'e.g., save_best="AP".', DeprecationWarning)
+
+ key_indicator = eval_kwargs.pop('key_indicator', 'AP')
+ if save_best is True and key_indicator is None:
+ raise ValueError('key_indicator should not be None, when '
+ 'save_best is set to True.')
+ save_best = key_indicator
+
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
+ rule, test_fn, greater_keys, less_keys, **eval_kwargs)
+
+
+class DistEvalHook(_DistEvalHook):
+
+ def __init__(self,
+ dataloader,
+ start=None,
+ interval=1,
+ by_epoch=True,
+ save_best=None,
+ rule=None,
+ test_fn=None,
+ greater_keys=MMPOSE_GREATER_KEYS,
+ less_keys=MMPOSE_LESS_KEYS,
+ broadcast_bn_buffer=True,
+ tmpdir=None,
+ gpu_collect=False,
+ **eval_kwargs):
+
+ if test_fn is None:
+ from mmpose.apis import multi_gpu_test
+ test_fn = multi_gpu_test
+
+ # to be compatible with the config before v0.16.0
+
+ # update "save_best" according to "key_indicator" and remove the
+ # latter from eval_kwargs
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
+ warnings.warn(
+ '"key_indicator" will be deprecated in EvalHook.'
+ 'Please use "save_best" to specify the metric key,'
+ 'e.g., save_best="AP".', DeprecationWarning)
+
+ key_indicator = eval_kwargs.pop('key_indicator', 'AP')
+ if save_best is True and key_indicator is None:
+ raise ValueError('key_indicator should not be None, when '
+ 'save_best is set to True.')
+ save_best = key_indicator
+
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
+ rule, test_fn, greater_keys, less_keys,
+ broadcast_bn_buffer, tmpdir, gpu_collect,
+ **eval_kwargs)
diff --git a/mmpose/core/evaluation/mesh_eval.py b/mmpose/core/evaluation/mesh_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..683b4539b29d1829a324de424c6d9f85a7037e5d
--- /dev/null
+++ b/mmpose/core/evaluation/mesh_eval.py
@@ -0,0 +1,66 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/akanazawa/hmr
+# Original licence: Copyright (c) 2018 akanazawa, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+
+
+def compute_similarity_transform(source_points, target_points):
+ """Computes a similarity transform (sR, t) that takes a set of 3D points
+ source_points (N x 3) closest to a set of 3D points target_points, where R
+ is an 3x3 rotation matrix, t 3x1 translation, s scale. And return the
+ transformed 3D points source_points_hat (N x 3). i.e. solves the orthogonal
+ Procrutes problem.
+
+ Note:
+ Points number: N
+
+ Args:
+ source_points (np.ndarray): Source point set with shape [N, 3].
+ target_points (np.ndarray): Target point set with shape [N, 3].
+
+ Returns:
+ np.ndarray: Transformed source point set with shape [N, 3].
+ """
+
+ assert target_points.shape[0] == source_points.shape[0]
+ assert target_points.shape[1] == 3 and source_points.shape[1] == 3
+
+ source_points = source_points.T
+ target_points = target_points.T
+
+ # 1. Remove mean.
+ mu1 = source_points.mean(axis=1, keepdims=True)
+ mu2 = target_points.mean(axis=1, keepdims=True)
+ X1 = source_points - mu1
+ X2 = target_points - mu2
+
+ # 2. Compute variance of X1 used for scale.
+ var1 = np.sum(X1**2)
+
+ # 3. The outer product of X1 and X2.
+ K = X1.dot(X2.T)
+
+ # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
+ # singular vectors of K.
+ U, _, Vh = np.linalg.svd(K)
+ V = Vh.T
+ # Construct Z that fixes the orientation of R to get det(R)=1.
+ Z = np.eye(U.shape[0])
+ Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T)))
+ # Construct R.
+ R = V.dot(Z.dot(U.T))
+
+ # 5. Recover scale.
+ scale = np.trace(R.dot(K)) / var1
+
+ # 6. Recover translation.
+ t = mu2 - scale * (R.dot(mu1))
+
+ # 7. Transform the source points:
+ source_points_hat = scale * R.dot(source_points) + t
+
+ source_points_hat = source_points_hat.T
+
+ return source_points_hat
diff --git a/mmpose/core/evaluation/pose3d_eval.py b/mmpose/core/evaluation/pose3d_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..545778ca7441c2d3e8ec58449c8ca7b162322e9e
--- /dev/null
+++ b/mmpose/core/evaluation/pose3d_eval.py
@@ -0,0 +1,171 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from .mesh_eval import compute_similarity_transform
+
+
+def keypoint_mpjpe(pred, gt, mask, alignment='none'):
+ """Calculate the mean per-joint position error (MPJPE) and the error after
+ rigid alignment with the ground truth (P-MPJPE).
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - keypoint_dims: C
+
+ Args:
+ pred (np.ndarray): Predicted keypoint location with shape [N, K, C].
+ gt (np.ndarray): Groundtruth keypoint location with shape [N, K, C].
+ mask (np.ndarray): Visibility of the target with shape [N, K].
+ False for invisible joints, and True for visible.
+ Invisible joints will be ignored for accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in
+ scale, rotation and translation.
+ Returns:
+ tuple: A tuple containing joint position errors
+
+ - (float | np.ndarray): mean per-joint position error (mpjpe).
+ - (float | np.ndarray): mpjpe after rigid alignment with the
+ ground truth (p-mpjpe).
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)[mask].mean()
+
+ return error
+
+
+def keypoint_3d_pck(pred, gt, mask, alignment='none', threshold=0.15):
+ """Calculate the Percentage of Correct Keypoints (3DPCK) w. or w/o rigid
+ alignment.
+
+ Paper ref: `Monocular 3D Human Pose Estimation In The Wild Using Improved
+ CNN Supervision' 3DV'2017. `__ .
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - keypoint_dims: C
+
+ Args:
+ pred (np.ndarray[N, K, C]): Predicted keypoint location.
+ gt (np.ndarray[N, K, C]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in scale,
+ rotation and translation.
+
+ threshold: If L2 distance between the prediction and the groundtruth
+ is less then threshold, the predicted result is considered as
+ correct. Default: 0.15 (m).
+
+ Returns:
+ pck: percentage of correct keypoints.
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)
+ pck = (error < threshold).astype(np.float32)[mask].mean() * 100
+
+ return pck
+
+
+def keypoint_3d_auc(pred, gt, mask, alignment='none'):
+ """Calculate the Area Under the Curve (3DAUC) computed for a range of 3DPCK
+ thresholds.
+
+ Paper ref: `Monocular 3D Human Pose Estimation In The Wild Using Improved
+ CNN Supervision' 3DV'2017. `__ .
+ This implementation is derived from mpii_compute_3d_pck.m, which is
+ provided as part of the MPI-INF-3DHP test data release.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ keypoint_dims: C
+
+ Args:
+ pred (np.ndarray[N, K, C]): Predicted keypoint location.
+ gt (np.ndarray[N, K, C]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in scale,
+ rotation and translation.
+
+ Returns:
+ auc: AUC computed for a range of 3DPCK thresholds.
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)
+
+ thresholds = np.linspace(0., 0.15, 31)
+ pck_values = np.zeros(len(thresholds))
+ for i in range(len(thresholds)):
+ pck_values[i] = (error < thresholds[i]).astype(np.float32)[mask].mean()
+
+ auc = pck_values.mean() * 100
+
+ return auc
diff --git a/mmpose/core/evaluation/top_down_eval.py b/mmpose/core/evaluation/top_down_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee6a2501cf1eec1b16f7d58bf9fd62da0fa48ccf
--- /dev/null
+++ b/mmpose/core/evaluation/top_down_eval.py
@@ -0,0 +1,684 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import transform_preds
+
+
+def _calc_distances(preds, targets, mask, normalize):
+ """Calculate the normalized distances between preds and target.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (normally, D=2 or D=3)
+
+ Args:
+ preds (np.ndarray[N, K, D]): Predicted keypoint location.
+ targets (np.ndarray[N, K, D]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (np.ndarray[N, D]): Typical value is heatmap_size
+
+ Returns:
+ np.ndarray[K, N]: The normalized distances. \
+ If target keypoints are missing, the distance is -1.
+ """
+ N, K, _ = preds.shape
+ # set mask=0 when normalize==0
+ _mask = mask.copy()
+ _mask[np.where((normalize == 0).sum(1))[0], :] = False
+ distances = np.full((N, K), -1, dtype=np.float32)
+ # handle invalid values
+ normalize[np.where(normalize <= 0)] = 1e6
+ distances[_mask] = np.linalg.norm(
+ ((preds - targets) / normalize[:, None, :])[_mask], axis=-1)
+ return distances.T
+
+
+def _distance_acc(distances, thr=0.5):
+ """Return the percentage below the distance threshold, while ignoring
+ distances values with -1.
+
+ Note:
+ batch_size: N
+ Args:
+ distances (np.ndarray[N, ]): The normalized distances.
+ thr (float): Threshold of the distances.
+
+ Returns:
+ float: Percentage of distances below the threshold. \
+ If all target keypoints are missing, return -1.
+ """
+ distance_valid = distances != -1
+ num_distance_valid = distance_valid.sum()
+ if num_distance_valid > 0:
+ return (distances[distance_valid] < thr).sum() / num_distance_valid
+ return -1
+
+
+def _get_max_preds(heatmaps):
+ """Get keypoint predictions from score maps.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+
+ Returns:
+ tuple: A tuple containing aggregated results.
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ assert isinstance(heatmaps,
+ np.ndarray), ('heatmaps should be numpy.ndarray')
+ assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+ N, K, _, W = heatmaps.shape
+ heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+ idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+ maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+
+ preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+ preds[:, :, 0] = preds[:, :, 0] % W
+ preds[:, :, 1] = preds[:, :, 1] // W
+
+ preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)
+ return preds, maxvals
+
+
+def _get_max_preds_3d(heatmaps):
+ """Get keypoint predictions from 3D score maps.
+
+ Note:
+ batch size: N
+ num keypoints: K
+ heatmap depth size: D
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+
+ Returns:
+ tuple: A tuple containing aggregated results.
+
+ - preds (np.ndarray[N, K, 3]): Predicted keypoint location.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ assert isinstance(heatmaps, np.ndarray), \
+ ('heatmaps should be numpy.ndarray')
+ assert heatmaps.ndim == 5, 'heatmaps should be 5-ndim'
+
+ N, K, D, H, W = heatmaps.shape
+ heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+ idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+ maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+
+ preds = np.zeros((N, K, 3), dtype=np.float32)
+ _idx = idx[..., 0]
+ preds[..., 2] = _idx // (H * W)
+ preds[..., 1] = (_idx // W) % H
+ preds[..., 0] = _idx % W
+
+ preds = np.where(maxvals > 0.0, preds, -1)
+ return preds, maxvals
+
+
+def pose_pck_accuracy(output, target, mask, thr=0.05, normalize=None):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints from heatmaps.
+
+ Note:
+ PCK metric measures accuracy of the localization of the body joints.
+ The distances between predicted positions and the ground-truth ones
+ are typically normalized by the bounding box size.
+ The threshold (thr) of the normalized distance is commonly set
+ as 0.05, 0.1 or 0.2 etc.
+
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ output (np.ndarray[N, K, H, W]): Model output heatmaps.
+ target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ thr (float): Threshold of PCK calculation. Default 0.05.
+ normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+
+ Returns:
+ tuple: A tuple containing keypoint accuracy.
+
+ - np.ndarray[K]: Accuracy of each keypoint.
+ - float: Averaged accuracy across all keypoints.
+ - int: Number of valid keypoints.
+ """
+ N, K, H, W = output.shape
+ if K == 0:
+ return None, 0, 0
+ if normalize is None:
+ normalize = np.tile(np.array([[H, W]]), (N, 1))
+
+ pred, _ = _get_max_preds(output)
+ gt, _ = _get_max_preds(target)
+ return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)
+
+
+def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ PCK metric measures accuracy of the localization of the body joints.
+ The distances between predicted positions and the ground-truth ones
+ are typically normalized by the bounding box size.
+ The threshold (thr) of the normalized distance is commonly set
+ as 0.05, 0.1 or 0.2 etc.
+
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ thr (float): Threshold of PCK calculation.
+ normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+
+ Returns:
+ tuple: A tuple containing keypoint accuracy.
+
+ - acc (np.ndarray[K]): Accuracy of each keypoint.
+ - avg_acc (float): Averaged accuracy across all keypoints.
+ - cnt (int): Number of valid keypoints.
+ """
+ distances = _calc_distances(pred, gt, mask, normalize)
+
+ acc = np.array([_distance_acc(d, thr) for d in distances])
+ valid_acc = acc[acc >= 0]
+ cnt = len(valid_acc)
+ avg_acc = valid_acc.mean() if cnt > 0 else 0
+ return acc, avg_acc, cnt
+
+
+def keypoint_auc(pred, gt, mask, normalize, num_step=20):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (float): Normalization factor.
+
+ Returns:
+ float: Area under curve.
+ """
+ nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))
+ x = [1.0 * i / num_step for i in range(num_step)]
+ y = []
+ for thr in x:
+ _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
+ y.append(avg_acc)
+
+ auc = 0
+ for i in range(num_step):
+ auc += 1.0 / num_step * y[i]
+ return auc
+
+
+def keypoint_nme(pred, gt, mask, normalize_factor):
+ """Calculate the normalized mean error (NME).
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize_factor (np.ndarray[N, 2]): Normalization factor.
+
+ Returns:
+ float: normalized mean error
+ """
+ distances = _calc_distances(pred, gt, mask, normalize_factor)
+ distance_valid = distances[distances != -1]
+ return distance_valid.sum() / max(1, len(distance_valid))
+
+
+def keypoint_epe(pred, gt, mask):
+ """Calculate the end-point error.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+
+ Returns:
+ float: Average end-point error.
+ """
+
+ distances = _calc_distances(
+ pred, gt, mask,
+ np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
+ distance_valid = distances[distances != -1]
+ return distance_valid.sum() / max(1, len(distance_valid))
+
+
+def _taylor(heatmap, coord):
+ """Distribution aware coordinate decoding method.
+
+ Note:
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmap (np.ndarray[H, W]): Heatmap of a particular joint type.
+ coord (np.ndarray[2,]): Coordinates of the predicted keypoints.
+
+ Returns:
+ np.ndarray[2,]: Updated coordinates.
+ """
+ H, W = heatmap.shape[:2]
+ px, py = int(coord[0]), int(coord[1])
+ if 1 < px < W - 2 and 1 < py < H - 2:
+ dx = 0.5 * (heatmap[py][px + 1] - heatmap[py][px - 1])
+ dy = 0.5 * (heatmap[py + 1][px] - heatmap[py - 1][px])
+ dxx = 0.25 * (
+ heatmap[py][px + 2] - 2 * heatmap[py][px] + heatmap[py][px - 2])
+ dxy = 0.25 * (
+ heatmap[py + 1][px + 1] - heatmap[py - 1][px + 1] -
+ heatmap[py + 1][px - 1] + heatmap[py - 1][px - 1])
+ dyy = 0.25 * (
+ heatmap[py + 2 * 1][px] - 2 * heatmap[py][px] +
+ heatmap[py - 2 * 1][px])
+ derivative = np.array([[dx], [dy]])
+ hessian = np.array([[dxx, dxy], [dxy, dyy]])
+ if dxx * dyy - dxy**2 != 0:
+ hessianinv = np.linalg.inv(hessian)
+ offset = -hessianinv @ derivative
+ offset = np.squeeze(np.array(offset.T), axis=0)
+ coord += offset
+ return coord
+
+
+def post_dark_udp(coords, batch_heatmaps, kernel=3):
+ """DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
+ Devil is in the Details: Delving into Unbiased Data Processing for Human
+ Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+
+ Note:
+ - batch size: B
+ - num keypoints: K
+ - num persons: N
+ - height of heatmaps: H
+ - width of heatmaps: W
+
+ B=1 for bottom_up paradigm where all persons share the same heatmap.
+ B=N for top_down paradigm where each person has its own heatmaps.
+
+ Args:
+ coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
+ batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
+ kernel (int): Gaussian kernel size (K) for modulation.
+
+ Returns:
+ np.ndarray([N, K, 2]): Refined coordinates.
+ """
+ if not isinstance(batch_heatmaps, np.ndarray):
+ batch_heatmaps = batch_heatmaps.cpu().numpy()
+ B, K, H, W = batch_heatmaps.shape
+ N = coords.shape[0]
+ assert (B == 1 or B == N)
+ for heatmaps in batch_heatmaps:
+ for heatmap in heatmaps:
+ cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
+ np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
+ np.log(batch_heatmaps, batch_heatmaps)
+
+ batch_heatmaps_pad = np.pad(
+ batch_heatmaps, ((0, 0), (0, 0), (1, 1), (1, 1)),
+ mode='edge').flatten()
+
+ index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
+ index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
+ index = index.astype(int).reshape(-1, 1)
+ i_ = batch_heatmaps_pad[index]
+ ix1 = batch_heatmaps_pad[index + 1]
+ iy1 = batch_heatmaps_pad[index + W + 2]
+ ix1y1 = batch_heatmaps_pad[index + W + 3]
+ ix1_y1_ = batch_heatmaps_pad[index - W - 3]
+ ix1_ = batch_heatmaps_pad[index - 1]
+ iy1_ = batch_heatmaps_pad[index - 2 - W]
+
+ dx = 0.5 * (ix1 - ix1_)
+ dy = 0.5 * (iy1 - iy1_)
+ derivative = np.concatenate([dx, dy], axis=1)
+ derivative = derivative.reshape(N, K, 2, 1)
+ dxx = ix1 - 2 * i_ + ix1_
+ dyy = iy1 - 2 * i_ + iy1_
+ dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
+ hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
+ hessian = hessian.reshape(N, K, 2, 2)
+ hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
+ coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
+ return coords
+
+
+def _gaussian_blur(heatmaps, kernel=11):
+ """Modulate heatmap distribution with Gaussian.
+ sigma = 0.3*((kernel_size-1)*0.5-1)+0.8
+ sigma~=3 if k=17
+ sigma=2 if k=11;
+ sigma~=1.5 if k=7;
+ sigma~=1 if k=3;
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ kernel (int): Gaussian kernel size (K) for modulation, which should
+ match the heatmap gaussian sigma when training.
+ K=17 for sigma=3 and k=11 for sigma=2.
+
+ Returns:
+ np.ndarray ([N, K, H, W]): Modulated heatmap distribution.
+ """
+ assert kernel % 2 == 1
+
+ border = (kernel - 1) // 2
+ batch_size = heatmaps.shape[0]
+ num_joints = heatmaps.shape[1]
+ height = heatmaps.shape[2]
+ width = heatmaps.shape[3]
+ for i in range(batch_size):
+ for j in range(num_joints):
+ origin_max = np.max(heatmaps[i, j])
+ dr = np.zeros((height + 2 * border, width + 2 * border),
+ dtype=np.float32)
+ dr[border:-border, border:-border] = heatmaps[i, j].copy()
+ dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
+ heatmaps[i, j] = dr[border:-border, border:-border].copy()
+ heatmaps[i, j] *= origin_max / np.max(heatmaps[i, j])
+ return heatmaps
+
+
+def keypoints_from_regression(regression_preds, center, scale, img_size):
+ """Get final keypoint predictions from regression vectors and transform
+ them back to the image.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ regression_preds (np.ndarray[N, K, 2]): model prediction.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+ img_size (list(img_width, img_height)): model input image size.
+
+ Returns:
+ tuple:
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ N, K, _ = regression_preds.shape
+ preds, maxvals = regression_preds, np.ones((N, K, 1), dtype=np.float32)
+
+ preds = preds * img_size
+
+ # Transform back to the image
+ for i in range(N):
+ preds[i] = transform_preds(preds[i], center[i], scale[i], img_size)
+
+ return preds, maxvals
+
+
+def keypoints_from_heatmaps(heatmaps,
+ center,
+ scale,
+ unbiased=False,
+ post_process='default',
+ kernel=11,
+ valid_radius_factor=0.0546875,
+ use_udp=False,
+ target_type='GaussianHeatmap'):
+ """Get final keypoint predictions from heatmaps and transform them back to
+ the image.
+
+ Note:
+ - batch size: N
+ - num keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+ post_process (str/None): Choice of methods to post-process
+ heatmaps. Currently supported: None, 'default', 'unbiased',
+ 'megvii'.
+ unbiased (bool): Option to use unbiased decoding. Mutually
+ exclusive with megvii.
+ Note: this arg is deprecated and unbiased=True can be replaced
+ by post_process='unbiased'
+ Paper ref: Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+ kernel (int): Gaussian kernel size (K) for modulation, which should
+ match the heatmap gaussian sigma when training.
+ K=17 for sigma=3 and k=11 for sigma=2.
+ valid_radius_factor (float): The radius factor of the positive area
+ in classification heatmap for UDP.
+ use_udp (bool): Use unbiased data processing.
+ target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
+ GaussianHeatmap: Classification target with gaussian distribution.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Returns:
+ tuple: A tuple containing keypoint predictions and scores.
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ # Avoid being affected
+ heatmaps = heatmaps.copy()
+
+ # detect conflicts
+ if unbiased:
+ assert post_process not in [False, None, 'megvii']
+ if post_process in ['megvii', 'unbiased']:
+ assert kernel > 0
+ if use_udp:
+ assert not post_process == 'megvii'
+
+ # normalize configs
+ if post_process is False:
+ warnings.warn(
+ 'post_process=False is deprecated, '
+ 'please use post_process=None instead', DeprecationWarning)
+ post_process = None
+ elif post_process is True:
+ if unbiased is True:
+ warnings.warn(
+ 'post_process=True, unbiased=True is deprecated,'
+ " please use post_process='unbiased' instead",
+ DeprecationWarning)
+ post_process = 'unbiased'
+ else:
+ warnings.warn(
+ 'post_process=True, unbiased=False is deprecated, '
+ "please use post_process='default' instead",
+ DeprecationWarning)
+ post_process = 'default'
+ elif post_process == 'default':
+ if unbiased is True:
+ warnings.warn(
+ 'unbiased=True is deprecated, please use '
+ "post_process='unbiased' instead", DeprecationWarning)
+ post_process = 'unbiased'
+
+ # start processing
+ if post_process == 'megvii':
+ heatmaps = _gaussian_blur(heatmaps, kernel=kernel)
+
+ N, K, H, W = heatmaps.shape
+ if use_udp:
+ if target_type.lower() == 'GaussianHeatMap'.lower():
+ preds, maxvals = _get_max_preds(heatmaps)
+ preds = post_dark_udp(preds, heatmaps, kernel=kernel)
+ elif target_type.lower() == 'CombinedTarget'.lower():
+ for person_heatmaps in heatmaps:
+ for i, heatmap in enumerate(person_heatmaps):
+ kt = 2 * kernel + 1 if i % 3 == 0 else kernel
+ cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap)
+ # valid radius is in direct proportion to the height of heatmap.
+ valid_radius = valid_radius_factor * H
+ offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius
+ offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius
+ heatmaps = heatmaps[:, ::3, :]
+ preds, maxvals = _get_max_preds(heatmaps)
+ index = preds[..., 0] + preds[..., 1] * W
+ index += W * H * np.arange(0, N * K / 3)
+ index = index.astype(int).reshape(N, K // 3, 1)
+ preds += np.concatenate((offset_x[index], offset_y[index]), axis=2)
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+ else:
+ preds, maxvals = _get_max_preds(heatmaps)
+ if post_process == 'unbiased': # alleviate biased coordinate
+ # apply Gaussian distribution modulation.
+ heatmaps = np.log(
+ np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10))
+ for n in range(N):
+ for k in range(K):
+ preds[n][k] = _taylor(heatmaps[n][k], preds[n][k])
+ elif post_process is not None:
+ # add +/-0.25 shift to the predicted locations for higher acc.
+ for n in range(N):
+ for k in range(K):
+ heatmap = heatmaps[n][k]
+ px = int(preds[n][k][0])
+ py = int(preds[n][k][1])
+ if 1 < px < W - 1 and 1 < py < H - 1:
+ diff = np.array([
+ heatmap[py][px + 1] - heatmap[py][px - 1],
+ heatmap[py + 1][px] - heatmap[py - 1][px]
+ ])
+ preds[n][k] += np.sign(diff) * .25
+ if post_process == 'megvii':
+ preds[n][k] += 0.5
+
+ # Transform back to the image
+ for i in range(N):
+ preds[i] = transform_preds(
+ preds[i], center[i], scale[i], [W, H], use_udp=use_udp)
+
+ if post_process == 'megvii':
+ maxvals = maxvals / 255.0 + 0.5
+
+ return preds, maxvals
+
+
+def keypoints_from_heatmaps3d(heatmaps, center, scale):
+ """Get final keypoint predictions from 3d heatmaps and transform them back
+ to the image.
+
+ Note:
+ - batch size: N
+ - num keypoints: K
+ - heatmap depth size: D
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+
+ Returns:
+ tuple: A tuple containing keypoint predictions and scores.
+
+ - preds (np.ndarray[N, K, 3]): Predicted 3d keypoint location \
+ in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ N, K, D, H, W = heatmaps.shape
+ preds, maxvals = _get_max_preds_3d(heatmaps)
+ # Transform back to the image
+ for i in range(N):
+ preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i],
+ [W, H])
+ return preds, maxvals
+
+
+def multilabel_classification_accuracy(pred, gt, mask, thr=0.5):
+ """Get multi-label classification accuracy.
+
+ Note:
+ - batch size: N
+ - label number: L
+
+ Args:
+ pred (np.ndarray[N, L, 2]): model predicted labels.
+ gt (np.ndarray[N, L, 2]): ground-truth labels.
+ mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of
+ ground-truth labels.
+
+ Returns:
+ float: multi-label classification accuracy.
+ """
+ # we only compute accuracy on the samples with ground-truth of all labels.
+ valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)
+ pred, gt = pred[valid], gt[valid]
+
+ if pred.shape[0] == 0:
+ acc = 0.0 # when no sample is with gt labels, set acc to 0.
+ else:
+ # The classification of a sample is regarded as correct
+ # only if it's correct for all labels.
+ acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()
+ return acc
diff --git a/mmpose/core/fp16/__init__.py b/mmpose/core/fp16/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb054810870626496ab4145446b17cf2c2e0b5d
--- /dev/null
+++ b/mmpose/core/fp16/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .decorators import auto_fp16, force_fp32
+from .hooks import Fp16OptimizerHook, wrap_fp16_model
+from .utils import cast_tensor_type
+
+__all__ = [
+ 'auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model',
+ 'cast_tensor_type'
+]
diff --git a/mmpose/core/fp16/__pycache__/__init__.cpython-310.pyc b/mmpose/core/fp16/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..338daed29cdbca8adaca6829f8fba47cff31be10
Binary files /dev/null and b/mmpose/core/fp16/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/fp16/__pycache__/decorators.cpython-310.pyc b/mmpose/core/fp16/__pycache__/decorators.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d51ec29b81f7f2b68dd1bcf145939ed21d372736
Binary files /dev/null and b/mmpose/core/fp16/__pycache__/decorators.cpython-310.pyc differ
diff --git a/mmpose/core/fp16/__pycache__/hooks.cpython-310.pyc b/mmpose/core/fp16/__pycache__/hooks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a3cb56f76db474e0c089e0373cfd12eb3e73702
Binary files /dev/null and b/mmpose/core/fp16/__pycache__/hooks.cpython-310.pyc differ
diff --git a/mmpose/core/fp16/__pycache__/utils.cpython-310.pyc b/mmpose/core/fp16/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39af6cf4a989cafc16f42e7e1cc4aa0ec9804e5f
Binary files /dev/null and b/mmpose/core/fp16/__pycache__/utils.cpython-310.pyc differ
diff --git a/mmpose/core/fp16/decorators.py b/mmpose/core/fp16/decorators.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d70ddf533c069b26f08ef3a973328790843def5
--- /dev/null
+++ b/mmpose/core/fp16/decorators.py
@@ -0,0 +1,175 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools
+import warnings
+from inspect import getfullargspec
+
+import torch
+
+from .utils import cast_tensor_type
+
+
+def auto_fp16(apply_to=None, out_fp32=False):
+ """Decorator to enable fp16 training automatically.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If inputs arguments are fp32 tensors, they will
+ be converted to fp16 automatically. Arguments other than fp32 tensors are
+ ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp32 (bool): Whether to convert the output back to fp32.
+
+ Example:
+
+ >>> import torch.nn as nn
+ >>> class MyModule1(nn.Module):
+ >>>
+ >>> # Convert x and y to fp16
+ >>> @auto_fp16()
+ >>> def forward(self, x, y):
+ >>> pass
+
+ >>> import torch.nn as nn
+ >>> class MyModule2(nn.Module):
+ >>>
+ >>> # convert pred to fp16
+ >>> @auto_fp16(apply_to=('pred', ))
+ >>> def do_something(self, pred, others):
+ >>> pass
+ """
+
+ warnings.warn(
+ 'auto_fp16 in mmpose will be deprecated in the next release.'
+ 'Please use mmcv.runner.auto_fp16 instead (mmcv>=1.3.1).',
+ DeprecationWarning)
+
+ def auto_fp16_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@auto_fp16 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ # NOTE: default args are not taken into consideration
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.float, torch.half))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = {}
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.float, torch.half)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp32:
+ output = cast_tensor_type(output, torch.half, torch.float)
+ return output
+
+ return new_func
+
+ return auto_fp16_wrapper
+
+
+def force_fp32(apply_to=None, out_fp16=False):
+ """Decorator to convert input arguments to fp32 in force.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If there are some inputs that must be processed
+ in fp32 mode, then this decorator can handle it. If inputs arguments are
+ fp16 tensors, they will be converted to fp32 automatically. Arguments other
+ than fp16 tensors are ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp16 (bool): Whether to convert the output back to fp16.
+
+ Example:
+
+ >>> import torch.nn as nn
+ >>> class MyModule1(nn.Module):
+ >>>
+ >>> # Convert x and y to fp32
+ >>> @force_fp32()
+ >>> def loss(self, x, y):
+ >>> pass
+
+ >>> import torch.nn as nn
+ >>> class MyModule2(nn.Module):
+ >>>
+ >>> # convert pred to fp32
+ >>> @force_fp32(apply_to=('pred', ))
+ >>> def post_process(self, pred, others):
+ >>> pass
+ """
+ warnings.warn(
+ 'force_fp32 in mmpose will be deprecated in the next release.'
+ 'Please use mmcv.runner.force_fp32 instead (mmcv>=1.3.1).',
+ DeprecationWarning)
+
+ def force_fp32_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@force_fp32 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.half, torch.float))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = dict()
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.half, torch.float)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp16:
+ output = cast_tensor_type(output, torch.float, torch.half)
+ return output
+
+ return new_func
+
+ return force_fp32_wrapper
diff --git a/mmpose/core/fp16/hooks.py b/mmpose/core/fp16/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..74081a9b73b95ebb20cabf07cfaeab86cc874780
--- /dev/null
+++ b/mmpose/core/fp16/hooks.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.runner import OptimizerHook
+from mmcv.utils import _BatchNorm
+
+from ..utils.dist_utils import allreduce_grads
+from .utils import cast_tensor_type
+
+
+class Fp16OptimizerHook(OptimizerHook):
+ """FP16 optimizer hook.
+
+ The steps of fp16 optimizer is as follows.
+ 1. Scale the loss value.
+ 2. BP in the fp16 model.
+ 2. Copy gradients from fp16 model to fp32 weights.
+ 3. Update fp32 weights.
+ 4. Copy updated parameters from fp32 weights to fp16 model.
+
+ Refer to https://arxiv.org/abs/1710.03740 for more details.
+
+ Args:
+ loss_scale (float): Scale factor multiplied with loss.
+ """
+
+ def __init__(self,
+ grad_clip=None,
+ coalesce=True,
+ bucket_size_mb=-1,
+ loss_scale=512.,
+ distributed=True):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+ self.loss_scale = loss_scale
+ self.distributed = distributed
+
+ def before_run(self, runner):
+ """Preparing steps before Mixed Precision Training.
+
+ 1. Make a master copy of fp32 weights for optimization.
+ 2. Convert the main model from fp32 to fp16.
+
+ Args:
+ runner (:obj:`mmcv.Runner`): The underlines training runner.
+ """
+ # keep a copy of fp32 weights
+ runner.optimizer.param_groups = copy.deepcopy(
+ runner.optimizer.param_groups)
+ # convert model to fp16
+ wrap_fp16_model(runner.model)
+
+ @staticmethod
+ def copy_grads_to_fp32(fp16_net, fp32_weights):
+ """Copy gradients from fp16 model to fp32 weight copy."""
+ for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()):
+ if fp16_param.grad is not None:
+ if fp32_param.grad is None:
+ fp32_param.grad = fp32_param.data.new(fp32_param.size())
+ fp32_param.grad.copy_(fp16_param.grad)
+
+ @staticmethod
+ def copy_params_to_fp16(fp16_net, fp32_weights):
+ """Copy updated params from fp32 weight copy to fp16 model."""
+ for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights):
+ fp16_param.data.copy_(fp32_param.data)
+
+ def after_train_iter(self, runner):
+ """Backward optimization steps for Mixed Precision Training.
+
+ 1. Scale the loss by a scale factor.
+ 2. Backward the loss to obtain the gradients (fp16).
+ 3. Copy gradients from the model to the fp32 weight copy.
+ 4. Scale the gradients back and update the fp32 weight copy.
+ 5. Copy back the params from fp32 weight copy to the fp16 model.
+
+ Args:
+ runner (:obj:`mmcv.Runner`): The underlines training runner.
+ """
+ # clear grads of last iteration
+ runner.model.zero_grad()
+ runner.optimizer.zero_grad()
+ # scale the loss value
+ scaled_loss = runner.outputs['loss'] * self.loss_scale
+ scaled_loss.backward()
+ # copy fp16 grads in the model to fp32 params in the optimizer
+ fp32_weights = []
+ for param_group in runner.optimizer.param_groups:
+ fp32_weights += param_group['params']
+ self.copy_grads_to_fp32(runner.model, fp32_weights)
+ # allreduce grads
+ if self.distributed:
+ allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb)
+ # scale the gradients back
+ for param in fp32_weights:
+ if param.grad is not None:
+ param.grad.div_(self.loss_scale)
+ if self.grad_clip is not None:
+ self.clip_grads(fp32_weights)
+ # update fp32 params
+ runner.optimizer.step()
+ # copy fp32 params to the fp16 model
+ self.copy_params_to_fp16(runner.model, fp32_weights)
+
+
+def wrap_fp16_model(model):
+ """Wrap the FP32 model to FP16.
+
+ 1. Convert FP32 model to FP16.
+ 2. Remain some necessary layers to be FP32, e.g., normalization layers.
+
+ Args:
+ model (nn.Module): Model in FP32.
+ """
+ # convert model to fp16
+ model.half()
+ # patch the normalization layers to make it work in fp32 mode
+ patch_norm_fp32(model)
+ # set `fp16_enabled` flag
+ for m in model.modules():
+ if hasattr(m, 'fp16_enabled'):
+ m.fp16_enabled = True
+
+
+def patch_norm_fp32(module):
+ """Recursively convert normalization layers from FP16 to FP32.
+
+ Args:
+ module (nn.Module): The modules to be converted in FP16.
+
+ Returns:
+ nn.Module: The converted module, the normalization layers have been
+ converted to FP32.
+ """
+ if isinstance(module, (_BatchNorm, nn.GroupNorm)):
+ module.float()
+ module.forward = patch_forward_method(module.forward, torch.half,
+ torch.float)
+ for child in module.children():
+ patch_norm_fp32(child)
+ return module
+
+
+def patch_forward_method(func, src_type, dst_type, convert_output=True):
+ """Patch the forward method of a module.
+
+ Args:
+ func (callable): The original forward method.
+ src_type (torch.dtype): Type of input arguments to be converted from.
+ dst_type (torch.dtype): Type of input arguments to be converted to.
+ convert_output (bool): Whether to convert the output back to src_type.
+
+ Returns:
+ callable: The patched forward method.
+ """
+
+ def new_forward(*args, **kwargs):
+ output = func(*cast_tensor_type(args, src_type, dst_type),
+ **cast_tensor_type(kwargs, src_type, dst_type))
+ if convert_output:
+ output = cast_tensor_type(output, dst_type, src_type)
+ return output
+
+ return new_forward
diff --git a/mmpose/core/fp16/utils.py b/mmpose/core/fp16/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1ec3d328328560c7959ae5e77621feb77692068
--- /dev/null
+++ b/mmpose/core/fp16/utils.py
@@ -0,0 +1,34 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import abc
+
+import numpy as np
+import torch
+
+
+def cast_tensor_type(inputs, src_type, dst_type):
+ """Recursively convert Tensor in inputs from src_type to dst_type.
+
+ Args:
+ inputs: Inputs that to be casted.
+ src_type (torch.dtype): Source type.
+ dst_type (torch.dtype): Destination type.
+
+ Returns:
+ The same type with inputs, but all contained Tensors have been cast.
+ """
+ if isinstance(inputs, torch.Tensor):
+ return inputs.to(dst_type)
+ elif isinstance(inputs, str):
+ return inputs
+ elif isinstance(inputs, np.ndarray):
+ return inputs
+ elif isinstance(inputs, abc.Mapping):
+ return type(inputs)({
+ k: cast_tensor_type(v, src_type, dst_type)
+ for k, v in inputs.items()
+ })
+ elif isinstance(inputs, abc.Iterable):
+ return type(inputs)(
+ cast_tensor_type(item, src_type, dst_type) for item in inputs)
+
+ return inputs
diff --git a/mmpose/core/optimizer/__init__.py b/mmpose/core/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4340ffc075afdcdf3d9f7a398ead394ca5a168a1
--- /dev/null
+++ b/mmpose/core/optimizer/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import OPTIMIZERS, build_optimizers
+
+__all__ = ['build_optimizers', 'OPTIMIZERS']
diff --git a/mmpose/core/optimizer/__pycache__/__init__.cpython-310.pyc b/mmpose/core/optimizer/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c60786a05b284ae7ba530e33f4cd04a684fd58ca
Binary files /dev/null and b/mmpose/core/optimizer/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/optimizer/__pycache__/builder.cpython-310.pyc b/mmpose/core/optimizer/__pycache__/builder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed567c546800dd66298710dfc8351466bb7523ab
Binary files /dev/null and b/mmpose/core/optimizer/__pycache__/builder.cpython-310.pyc differ
diff --git a/mmpose/core/optimizer/builder.py b/mmpose/core/optimizer/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d6accd707db0728142dbcfccee15d902e3632a3
--- /dev/null
+++ b/mmpose/core/optimizer/builder.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import build_optimizer
+from mmcv.utils import Registry
+
+OPTIMIZERS = Registry('optimizers')
+
+
+def build_optimizers(model, cfgs):
+ """Build multiple optimizers from configs.
+
+ If `cfgs` contains several dicts for optimizers, then a dict for each
+ constructed optimizers will be returned.
+ If `cfgs` only contains one optimizer config, the constructed optimizer
+ itself will be returned.
+
+ For example,
+
+ 1) Multiple optimizer configs:
+
+ .. code-block:: python
+
+ optimizer_cfg = dict(
+ model1=dict(type='SGD', lr=lr),
+ model2=dict(type='SGD', lr=lr))
+
+ The return dict is
+ ``dict('model1': torch.optim.Optimizer, 'model2': torch.optim.Optimizer)``
+
+ 2) Single optimizer config:
+
+ .. code-block:: python
+
+ optimizer_cfg = dict(type='SGD', lr=lr)
+
+ The return is ``torch.optim.Optimizer``.
+
+ Args:
+ model (:obj:`nn.Module`): The model with parameters to be optimized.
+ cfgs (dict): The config dict of the optimizer.
+
+ Returns:
+ dict[:obj:`torch.optim.Optimizer`] | :obj:`torch.optim.Optimizer`:
+ The initialized optimizers.
+ """
+ optimizers = {}
+ if hasattr(model, 'module'):
+ model = model.module
+ # determine whether 'cfgs' has several dicts for optimizers
+ if all(isinstance(v, dict) for v in cfgs.values()):
+ for key, cfg in cfgs.items():
+ cfg_ = cfg.copy()
+ module = getattr(model, key)
+ optimizers[key] = build_optimizer(module, cfg_)
+ return optimizers
+
+ return build_optimizer(model, cfgs)
diff --git a/mmpose/core/post_processing/__init__.py b/mmpose/core/post_processing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ee6858d953134a9b870b1a3635968729a4762ea
--- /dev/null
+++ b/mmpose/core/post_processing/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .nms import oks_iou, oks_nms, soft_oks_nms
+from .one_euro_filter import OneEuroFilter
+from .post_transforms import (affine_transform, flip_back, fliplr_joints,
+ fliplr_regression, get_affine_transform,
+ get_warp_matrix, rotate_point, transform_preds,
+ warp_affine_joints)
+
+__all__ = [
+ 'oks_nms', 'soft_oks_nms', 'affine_transform', 'rotate_point', 'flip_back',
+ 'fliplr_joints', 'fliplr_regression', 'transform_preds',
+ 'get_affine_transform', 'get_warp_matrix', 'warp_affine_joints',
+ 'OneEuroFilter', 'oks_iou'
+]
diff --git a/mmpose/core/post_processing/__pycache__/__init__.cpython-310.pyc b/mmpose/core/post_processing/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..129ff97fc65bf7c4386968de26ecc40cd28188d8
Binary files /dev/null and b/mmpose/core/post_processing/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/post_processing/__pycache__/group.cpython-310.pyc b/mmpose/core/post_processing/__pycache__/group.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7dc3405d75a9f835ee2a46fe603b4504073649c6
Binary files /dev/null and b/mmpose/core/post_processing/__pycache__/group.cpython-310.pyc differ
diff --git a/mmpose/core/post_processing/__pycache__/nms.cpython-310.pyc b/mmpose/core/post_processing/__pycache__/nms.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59706886e3e80bdb1343c0f085d2774160c69b48
Binary files /dev/null and b/mmpose/core/post_processing/__pycache__/nms.cpython-310.pyc differ
diff --git a/mmpose/core/post_processing/__pycache__/one_euro_filter.cpython-310.pyc b/mmpose/core/post_processing/__pycache__/one_euro_filter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6447df077538d73ab24508b6ede527e63272ec91
Binary files /dev/null and b/mmpose/core/post_processing/__pycache__/one_euro_filter.cpython-310.pyc differ
diff --git a/mmpose/core/post_processing/__pycache__/post_transforms.cpython-310.pyc b/mmpose/core/post_processing/__pycache__/post_transforms.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9791f5b5222d232d255f038722de70fcca62490
Binary files /dev/null and b/mmpose/core/post_processing/__pycache__/post_transforms.cpython-310.pyc differ
diff --git a/mmpose/core/post_processing/group.py b/mmpose/core/post_processing/group.py
new file mode 100644
index 0000000000000000000000000000000000000000..6235dbc111eae55e8bc1d34671db84152bc7c542
--- /dev/null
+++ b/mmpose/core/post_processing/group.py
@@ -0,0 +1,410 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/princeton-vl/pose-ae-train/
+# Original licence: Copyright (c) 2017, umich-vl, under BSD 3-Clause License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+import torch
+from munkres import Munkres
+
+from mmpose.core.evaluation import post_dark_udp
+
+
+def _py_max_match(scores):
+ """Apply munkres algorithm to get the best match.
+
+ Args:
+ scores(np.ndarray): cost matrix.
+
+ Returns:
+ np.ndarray: best match.
+ """
+ m = Munkres()
+ tmp = m.compute(scores)
+ tmp = np.array(tmp).astype(int)
+ return tmp
+
+
+def _match_by_tag(inp, params):
+ """Match joints by tags. Use Munkres algorithm to calculate the best match
+ for keypoints grouping.
+
+ Note:
+ number of keypoints: K
+ max number of people in an image: M (M=30 by default)
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ inp(tuple):
+ tag_k (np.ndarray[KxMxL]): tag corresponding to the
+ top k values of feature map per keypoint.
+ loc_k (np.ndarray[KxMx2]): top k locations of the
+ feature maps for keypoint.
+ val_k (np.ndarray[KxM]): top k value of the
+ feature maps per keypoint.
+ params(Params): class Params().
+
+ Returns:
+ np.ndarray: result of pose groups.
+ """
+ assert isinstance(params, _Params), 'params should be class _Params()'
+
+ tag_k, loc_k, val_k = inp
+
+ default_ = np.zeros((params.num_joints, 3 + tag_k.shape[2]),
+ dtype=np.float32)
+
+ joint_dict = {}
+ tag_dict = {}
+ for i in range(params.num_joints):
+ idx = params.joint_order[i]
+
+ tags = tag_k[idx]
+ joints = np.concatenate((loc_k[idx], val_k[idx, :, None], tags), 1)
+ mask = joints[:, 2] > params.detection_threshold
+ tags = tags[mask]
+ joints = joints[mask]
+
+ if joints.shape[0] == 0:
+ continue
+
+ if i == 0 or len(joint_dict) == 0:
+ for tag, joint in zip(tags, joints):
+ key = tag[0]
+ joint_dict.setdefault(key, np.copy(default_))[idx] = joint
+ tag_dict[key] = [tag]
+ else:
+ grouped_keys = list(joint_dict.keys())[:params.max_num_people]
+ grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys]
+
+ if (params.ignore_too_much
+ and len(grouped_keys) == params.max_num_people):
+ continue
+
+ diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :]
+ diff_normed = np.linalg.norm(diff, ord=2, axis=2)
+ diff_saved = np.copy(diff_normed)
+
+ if params.use_detection_val:
+ diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
+
+ num_added = diff.shape[0]
+ num_grouped = diff.shape[1]
+
+ if num_added > num_grouped:
+ diff_normed = np.concatenate(
+ (diff_normed,
+ np.zeros((num_added, num_added - num_grouped),
+ dtype=np.float32) + 1e10),
+ axis=1)
+
+ pairs = _py_max_match(diff_normed)
+ for row, col in pairs:
+ if (row < num_added and col < num_grouped
+ and diff_saved[row][col] < params.tag_threshold):
+ key = grouped_keys[col]
+ joint_dict[key][idx] = joints[row]
+ tag_dict[key].append(tags[row])
+ else:
+ key = tags[row][0]
+ joint_dict.setdefault(key, np.copy(default_))[idx] = \
+ joints[row]
+ tag_dict[key] = [tags[row]]
+
+ results = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32)
+ return results
+
+
+class _Params:
+ """A class of parameter.
+
+ Args:
+ cfg(Config): config.
+ """
+
+ def __init__(self, cfg):
+ self.num_joints = cfg['num_joints']
+ self.max_num_people = cfg['max_num_people']
+
+ self.detection_threshold = cfg['detection_threshold']
+ self.tag_threshold = cfg['tag_threshold']
+ self.use_detection_val = cfg['use_detection_val']
+ self.ignore_too_much = cfg['ignore_too_much']
+
+ if self.num_joints == 17:
+ self.joint_order = [
+ i - 1 for i in
+ [1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]
+ ]
+ else:
+ self.joint_order = list(np.arange(self.num_joints))
+
+
+class HeatmapParser:
+ """The heatmap parser for post processing."""
+
+ def __init__(self, cfg):
+ self.params = _Params(cfg)
+ self.tag_per_joint = cfg['tag_per_joint']
+ self.pool = torch.nn.MaxPool2d(cfg['nms_kernel'], 1,
+ cfg['nms_padding'])
+ self.use_udp = cfg.get('use_udp', False)
+ self.score_per_joint = cfg.get('score_per_joint', False)
+
+ def nms(self, heatmaps):
+ """Non-Maximum Suppression for heatmaps.
+
+ Args:
+ heatmap(torch.Tensor): Heatmaps before nms.
+
+ Returns:
+ torch.Tensor: Heatmaps after nms.
+ """
+
+ maxm = self.pool(heatmaps)
+ maxm = torch.eq(maxm, heatmaps).float()
+ heatmaps = heatmaps * maxm
+
+ return heatmaps
+
+ def match(self, tag_k, loc_k, val_k):
+ """Group keypoints to human poses in a batch.
+
+ Args:
+ tag_k (np.ndarray[NxKxMxL]): tag corresponding to the
+ top k values of feature map per keypoint.
+ loc_k (np.ndarray[NxKxMx2]): top k locations of the
+ feature maps for keypoint.
+ val_k (np.ndarray[NxKxM]): top k value of the
+ feature maps per keypoint.
+
+ Returns:
+ list
+ """
+
+ def _match(x):
+ return _match_by_tag(x, self.params)
+
+ return list(map(_match, zip(tag_k, loc_k, val_k)))
+
+ def top_k(self, heatmaps, tags):
+ """Find top_k values in an image.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ max number of people: M
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmaps (torch.Tensor[NxKxHxW])
+ tags (torch.Tensor[NxKxHxWxL])
+
+ Returns:
+ dict: A dict containing top_k values.
+
+ - tag_k (np.ndarray[NxKxMxL]):
+ tag corresponding to the top k values of
+ feature map per keypoint.
+ - loc_k (np.ndarray[NxKxMx2]):
+ top k location of feature map per keypoint.
+ - val_k (np.ndarray[NxKxM]):
+ top k value of feature map per keypoint.
+ """
+ heatmaps = self.nms(heatmaps)
+ N, K, H, W = heatmaps.size()
+ heatmaps = heatmaps.view(N, K, -1)
+ val_k, ind = heatmaps.topk(self.params.max_num_people, dim=2)
+
+ tags = tags.view(tags.size(0), tags.size(1), W * H, -1)
+ if not self.tag_per_joint:
+ tags = tags.expand(-1, self.params.num_joints, -1, -1)
+
+ tag_k = torch.stack(
+ [torch.gather(tags[..., i], 2, ind) for i in range(tags.size(3))],
+ dim=3)
+
+ x = ind % W
+ y = ind // W
+
+ ind_k = torch.stack((x, y), dim=3)
+
+ results = {
+ 'tag_k': tag_k.cpu().numpy(),
+ 'loc_k': ind_k.cpu().numpy(),
+ 'val_k': val_k.cpu().numpy()
+ }
+
+ return results
+
+ @staticmethod
+ def adjust(results, heatmaps):
+ """Adjust the coordinates for better accuracy.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ results (list(np.ndarray)): Keypoint predictions.
+ heatmaps (torch.Tensor[NxKxHxW]): Heatmaps.
+ """
+ _, _, H, W = heatmaps.shape
+ for batch_id, people in enumerate(results):
+ for people_id, people_i in enumerate(people):
+ for joint_id, joint in enumerate(people_i):
+ if joint[2] > 0:
+ x, y = joint[0:2]
+ xx, yy = int(x), int(y)
+ tmp = heatmaps[batch_id][joint_id]
+ if tmp[min(H - 1, yy + 1), xx] > tmp[max(0, yy - 1),
+ xx]:
+ y += 0.25
+ else:
+ y -= 0.25
+
+ if tmp[yy, min(W - 1, xx + 1)] > tmp[yy,
+ max(0, xx - 1)]:
+ x += 0.25
+ else:
+ x -= 0.25
+ results[batch_id][people_id, joint_id,
+ 0:2] = (x + 0.5, y + 0.5)
+ return results
+
+ @staticmethod
+ def refine(heatmap, tag, keypoints, use_udp=False):
+ """Given initial keypoint predictions, we identify missing joints.
+
+ Note:
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmap: np.ndarray(K, H, W).
+ tag: np.ndarray(K, H, W) | np.ndarray(K, H, W, L)
+ keypoints: np.ndarray of size (K, 3 + L)
+ last dim is (x, y, score, tag).
+ use_udp: bool-unbiased data processing
+
+ Returns:
+ np.ndarray: The refined keypoints.
+ """
+
+ K, H, W = heatmap.shape
+ if len(tag.shape) == 3:
+ tag = tag[..., None]
+
+ tags = []
+ for i in range(K):
+ if keypoints[i, 2] > 0:
+ # save tag value of detected keypoint
+ x, y = keypoints[i][:2].astype(int)
+ x = np.clip(x, 0, W - 1)
+ y = np.clip(y, 0, H - 1)
+ tags.append(tag[i, y, x])
+
+ # mean tag of current detected people
+ prev_tag = np.mean(tags, axis=0)
+ results = []
+
+ for _heatmap, _tag in zip(heatmap, tag):
+ # distance of all tag values with mean tag of
+ # current detected people
+ distance_tag = (((_tag -
+ prev_tag[None, None, :])**2).sum(axis=2)**0.5)
+ norm_heatmap = _heatmap - np.round(distance_tag)
+
+ # find maximum position
+ y, x = np.unravel_index(np.argmax(norm_heatmap), _heatmap.shape)
+ xx = x.copy()
+ yy = y.copy()
+ # detection score at maximum position
+ val = _heatmap[y, x]
+ if not use_udp:
+ # offset by 0.5
+ x += 0.5
+ y += 0.5
+
+ # add a quarter offset
+ if _heatmap[yy, min(W - 1, xx + 1)] > _heatmap[yy, max(0, xx - 1)]:
+ x += 0.25
+ else:
+ x -= 0.25
+
+ if _heatmap[min(H - 1, yy + 1), xx] > _heatmap[max(0, yy - 1), xx]:
+ y += 0.25
+ else:
+ y -= 0.25
+
+ results.append((x, y, val))
+ results = np.array(results)
+
+ if results is not None:
+ for i in range(K):
+ # add keypoint if it is not detected
+ if results[i, 2] > 0 and keypoints[i, 2] == 0:
+ keypoints[i, :3] = results[i, :3]
+
+ return keypoints
+
+ def parse(self, heatmaps, tags, adjust=True, refine=True):
+ """Group keypoints into poses given heatmap and tag.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmaps (torch.Tensor[NxKxHxW]): model output heatmaps.
+ tags (torch.Tensor[NxKxHxWxL]): model output tagmaps.
+
+ Returns:
+ tuple: A tuple containing keypoint grouping results.
+
+ - results (list(np.ndarray)): Pose results.
+ - scores (list/list(np.ndarray)): Score of people.
+ """
+ results = self.match(**self.top_k(heatmaps, tags))
+
+ if adjust:
+ if self.use_udp:
+ for i in range(len(results)):
+ if results[i].shape[0] > 0:
+ results[i][..., :2] = post_dark_udp(
+ results[i][..., :2].copy(), heatmaps[i:i + 1, :])
+ else:
+ results = self.adjust(results, heatmaps)
+
+ if self.score_per_joint:
+ scores = [i[:, 2] for i in results[0]]
+ else:
+ scores = [i[:, 2].mean() for i in results[0]]
+
+ if refine:
+ results = results[0]
+ # for every detected person
+ for i in range(len(results)):
+ heatmap_numpy = heatmaps[0].cpu().numpy()
+ tag_numpy = tags[0].cpu().numpy()
+ if not self.tag_per_joint:
+ tag_numpy = np.tile(tag_numpy,
+ (self.params.num_joints, 1, 1, 1))
+ results[i] = self.refine(
+ heatmap_numpy, tag_numpy, results[i], use_udp=self.use_udp)
+ results = [results]
+
+ return results, scores
diff --git a/mmpose/core/post_processing/nms.py b/mmpose/core/post_processing/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..86a0ab35e0e26d27bb0bb55071018ffc5ac9af1d
--- /dev/null
+++ b/mmpose/core/post_processing/nms.py
@@ -0,0 +1,207 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+
+
+def nms(dets, thr):
+ """Greedily select boxes with high confidence and overlap <= thr.
+
+ Args:
+ dets: [[x1, y1, x2, y2, score]].
+ thr: Retain overlap < thr.
+
+ Returns:
+ list: Indexes to keep.
+ """
+ if len(dets) == 0:
+ return []
+
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while len(order) > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thr)[0]
+ order = order[inds + 1]
+
+ return keep
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None):
+ """Calculate oks ious.
+
+ Args:
+ g: Ground truth keypoints.
+ d: Detected keypoints.
+ a_g: Area of the ground truth object.
+ a_d: Area of the detected object.
+ sigmas: standard deviation of keypoint labelling.
+ vis_thr: threshold of the keypoint visibility.
+
+ Returns:
+ list: The oks ious.
+ """
+ if sigmas is None:
+ sigmas = np.array([
+ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
+ .87, .87, .89, .89
+ ]) / 10.0
+ vars = (sigmas * 2)**2
+ xg = g[0::3]
+ yg = g[1::3]
+ vg = g[2::3]
+ ious = np.zeros(len(d), dtype=np.float32)
+ for n_d in range(0, len(d)):
+ xd = d[n_d, 0::3]
+ yd = d[n_d, 1::3]
+ vd = d[n_d, 2::3]
+ dx = xd - xg
+ dy = yd - yg
+ e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+ if vis_thr is not None:
+ ind = list(vg > vis_thr) and list(vd > vis_thr)
+ e = e[ind]
+ ious[n_d] = np.sum(np.exp(-e)) / len(e) if len(e) != 0 else 0.0
+ return ious
+
+
+def oks_nms(kpts_db, thr, sigmas=None, vis_thr=None, score_per_joint=False):
+ """OKS NMS implementations.
+
+ Args:
+ kpts_db: keypoints.
+ thr: Retain overlap < thr.
+ sigmas: standard deviation of keypoint labelling.
+ vis_thr: threshold of the keypoint visibility.
+ score_per_joint: the input scores (in kpts_db) are per joint scores
+
+ Returns:
+ np.ndarray: indexes to keep.
+ """
+ if len(kpts_db) == 0:
+ return []
+
+ if score_per_joint:
+ scores = np.array([k['score'].mean() for k in kpts_db])
+ else:
+ scores = np.array([k['score'] for k in kpts_db])
+
+ kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
+ areas = np.array([k['area'] for k in kpts_db])
+
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while len(order) > 0:
+ i = order[0]
+ keep.append(i)
+
+ oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+ sigmas, vis_thr)
+
+ inds = np.where(oks_ovr <= thr)[0]
+ order = order[inds + 1]
+
+ keep = np.array(keep)
+
+ return keep
+
+
+def _rescore(overlap, scores, thr, type='gaussian'):
+ """Rescoring mechanism gaussian or linear.
+
+ Args:
+ overlap: calculated ious
+ scores: target scores.
+ thr: retain oks overlap < thr.
+ type: 'gaussian' or 'linear'
+
+ Returns:
+ np.ndarray: indexes to keep
+ """
+ assert len(overlap) == len(scores)
+ assert type in ['gaussian', 'linear']
+
+ if type == 'linear':
+ inds = np.where(overlap >= thr)[0]
+ scores[inds] = scores[inds] * (1 - overlap[inds])
+ else:
+ scores = scores * np.exp(-overlap**2 / thr)
+
+ return scores
+
+
+def soft_oks_nms(kpts_db,
+ thr,
+ max_dets=20,
+ sigmas=None,
+ vis_thr=None,
+ score_per_joint=False):
+ """Soft OKS NMS implementations.
+
+ Args:
+ kpts_db
+ thr: retain oks overlap < thr.
+ max_dets: max number of detections to keep.
+ sigmas: Keypoint labelling uncertainty.
+ score_per_joint: the input scores (in kpts_db) are per joint scores
+
+ Returns:
+ np.ndarray: indexes to keep.
+ """
+ if len(kpts_db) == 0:
+ return []
+
+ if score_per_joint:
+ scores = np.array([k['score'].mean() for k in kpts_db])
+ else:
+ scores = np.array([k['score'] for k in kpts_db])
+
+ kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
+ areas = np.array([k['area'] for k in kpts_db])
+
+ order = scores.argsort()[::-1]
+ scores = scores[order]
+
+ keep = np.zeros(max_dets, dtype=np.intp)
+ keep_cnt = 0
+ while len(order) > 0 and keep_cnt < max_dets:
+ i = order[0]
+
+ oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+ sigmas, vis_thr)
+
+ order = order[1:]
+ scores = _rescore(oks_ovr, scores[1:], thr)
+
+ tmp = scores.argsort()[::-1]
+ order = order[tmp]
+ scores = scores[tmp]
+
+ keep[keep_cnt] = i
+ keep_cnt += 1
+
+ keep = keep[:keep_cnt]
+
+ return keep
diff --git a/mmpose/core/post_processing/one_euro_filter.py b/mmpose/core/post_processing/one_euro_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..01ffa5fda9b1669e3611f14643ed731669b3b421
--- /dev/null
+++ b/mmpose/core/post_processing/one_euro_filter.py
@@ -0,0 +1,102 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HoBeom/OneEuroFilter-Numpy
+# Original licence: Copyright (c) HoBeom Jeon, under the MIT License.
+# ------------------------------------------------------------------------------
+from time import time
+
+import numpy as np
+
+
+def smoothing_factor(t_e, cutoff):
+ r = 2 * np.pi * cutoff * t_e
+ return r / (r + 1)
+
+
+def exponential_smoothing(a, x, x_prev):
+ return a * x + (1 - a) * x_prev
+
+
+class OneEuroFilter:
+
+ def __init__(self,
+ x0,
+ dx0=0.0,
+ min_cutoff=1.7,
+ beta=0.3,
+ d_cutoff=30.0,
+ fps=None):
+ """One Euro Filter for keypoints smoothing.
+
+ Args:
+ x0 (np.ndarray[K, 2]): Initialize keypoints value
+ dx0 (float): 0.0
+ min_cutoff (float): parameter for one euro filter
+ beta (float): parameter for one euro filter
+ d_cutoff (float): Input data FPS
+ fps (float): Video FPS for video inference
+ """
+
+ # The parameters.
+ self.data_shape = x0.shape
+ self.min_cutoff = np.full(x0.shape, min_cutoff)
+ self.beta = np.full(x0.shape, beta)
+ self.d_cutoff = np.full(x0.shape, d_cutoff)
+ # Previous values.
+ self.x_prev = x0.astype(np.float32)
+ self.dx_prev = np.full(x0.shape, dx0)
+ self.mask_prev = np.ma.masked_where(x0 <= 0, x0)
+ self.realtime = True
+ if fps is None:
+ # Using in realtime inference
+ self.t_e = None
+ self.skip_frame_factor = d_cutoff
+ else:
+ # fps using video inference
+ self.realtime = False
+ self.d_cutoff = np.full(x0.shape, float(fps))
+ self.t_prev = time()
+
+ def __call__(self, x, t_e=1.0):
+ """Compute the filtered signal.
+
+ Hyper-parameters (cutoff, beta) are from `VNect
+ `__ .
+
+ Realtime Camera fps (d_cutoff) default 30.0
+
+ Args:
+ x (np.ndarray[K, 2]): keypoints results in frame
+ t_e (Optional): video skip frame count for posetrack
+ evaluation
+ """
+ assert x.shape == self.data_shape
+
+ t = 0
+ if self.realtime:
+ t = time()
+ t_e = (t - self.t_prev) * self.skip_frame_factor
+ t_e = np.full(x.shape, t_e)
+
+ # missing keypoints mask
+ mask = np.ma.masked_where(x <= 0, x)
+
+ # The filtered derivative of the signal.
+ a_d = smoothing_factor(t_e, self.d_cutoff)
+ dx = (x - self.x_prev) / t_e
+ dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
+
+ # The filtered signal.
+ cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
+ a = smoothing_factor(t_e, cutoff)
+ x_hat = exponential_smoothing(a, x, self.x_prev)
+
+ # missing keypoints remove
+ np.copyto(x_hat, -10, where=mask.mask)
+
+ # Memorize the previous values.
+ self.x_prev = x_hat
+ self.dx_prev = dx_hat
+ self.t_prev = t
+ self.mask_prev = mask
+
+ return x_hat
diff --git a/mmpose/core/post_processing/post_transforms.py b/mmpose/core/post_processing/post_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..93063fb1c1a60519a527037795654b0278a880e4
--- /dev/null
+++ b/mmpose/core/post_processing/post_transforms.py
@@ -0,0 +1,366 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import math
+
+import cv2
+import numpy as np
+import torch
+
+
+def fliplr_joints(joints_3d, joints_3d_visible, img_width, flip_pairs):
+ """Flip human joints horizontally.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ joints_3d_visible (np.ndarray([K, 1])): Visibility of keypoints.
+ img_width (int): Image width.
+ flip_pairs (list[tuple]): Pairs of keypoints which are mirrored
+ (for example, left ear and right ear).
+
+ Returns:
+ tuple: Flipped human joints.
+
+ - joints_3d_flipped (np.ndarray([K, 3])): Flipped joints.
+ - joints_3d_visible_flipped (np.ndarray([K, 1])): Joint visibility.
+ """
+
+ assert len(joints_3d) == len(joints_3d_visible)
+ assert img_width > 0
+
+ joints_3d_flipped = joints_3d.copy()
+ joints_3d_visible_flipped = joints_3d_visible.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ joints_3d_flipped[left, :] = joints_3d[right, :]
+ joints_3d_flipped[right, :] = joints_3d[left, :]
+
+ joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
+ joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
+
+ # Flip horizontally
+ joints_3d_flipped[:, 0] = img_width - 1 - joints_3d_flipped[:, 0]
+ joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
+
+ return joints_3d_flipped, joints_3d_visible_flipped
+
+
+def fliplr_regression(regression,
+ flip_pairs,
+ center_mode='static',
+ center_x=0.5,
+ center_index=0):
+ """Flip human joints horizontally.
+
+ Note:
+ - batch_size: N
+ - num_keypoint: K
+
+ Args:
+ regression (np.ndarray([..., K, C])): Coordinates of keypoints, where K
+ is the joint number and C is the dimension. Example shapes are:
+
+ - [N, K, C]: a batch of keypoints where N is the batch size.
+ - [N, T, K, C]: a batch of pose sequences, where T is the frame
+ number.
+ flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+ center_mode (str): The mode to set the center location on the x-axis
+ to flip around. Options are:
+
+ - static: use a static x value (see center_x also)
+ - root: use a root joint (see center_index also)
+ center_x (float): Set the x-axis location of the flip center. Only used
+ when center_mode=static.
+ center_index (int): Set the index of the root joint, whose x location
+ will be used as the flip center. Only used when center_mode=root.
+
+ Returns:
+ np.ndarray([..., K, C]): Flipped joints.
+ """
+ assert regression.ndim >= 2, f'Invalid pose shape {regression.shape}'
+
+ allowed_center_mode = {'static', 'root'}
+ assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
+ f'{center_mode}, allowed choices are {allowed_center_mode}'
+
+ if center_mode == 'static':
+ x_c = center_x
+ elif center_mode == 'root':
+ assert regression.shape[-2] > center_index
+ x_c = regression[..., center_index:center_index + 1, 0]
+
+ regression_flipped = regression.copy()
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ regression_flipped[..., left, :] = regression[..., right, :]
+ regression_flipped[..., right, :] = regression[..., left, :]
+
+ # Flip horizontally
+ regression_flipped[..., 0] = x_c * 2 - regression_flipped[..., 0]
+ return regression_flipped
+
+
+def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatmap'):
+ """Flip the flipped heatmaps back to the original form.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ output_flipped (np.ndarray[N, K, H, W]): The output heatmaps obtained
+ from the flipped images.
+ flip_pairs (list[tuple()): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+ target_type (str): GaussianHeatmap or CombinedTarget
+
+ Returns:
+ np.ndarray: heatmaps that flipped back to the original image
+ """
+ assert output_flipped.ndim == 4, \
+ 'output_flipped should be [batch_size, num_keypoints, height, width]'
+ shape_ori = output_flipped.shape
+ channels = 1
+ if target_type.lower() == 'CombinedTarget'.lower():
+ channels = 3
+ output_flipped[:, 1::3, ...] = -output_flipped[:, 1::3, ...]
+ output_flipped = output_flipped.reshape(shape_ori[0], -1, channels,
+ shape_ori[2], shape_ori[3])
+ output_flipped_back = output_flipped.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ output_flipped_back[:, left, ...] = output_flipped[:, right, ...]
+ output_flipped_back[:, right, ...] = output_flipped[:, left, ...]
+ output_flipped_back = output_flipped_back.reshape(shape_ori)
+ # Flip horizontally
+ output_flipped_back = output_flipped_back[..., ::-1]
+ return output_flipped_back
+
+
+def transform_preds(coords, center, scale, output_size, use_udp=False):
+ """Get final keypoint predictions from heatmaps and apply scaling and
+ translation to map them back to the image.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ coords (np.ndarray[K, ndims]):
+
+ * If ndims=2, corrds are predicted keypoint location.
+ * If ndims=4, corrds are composed of (x, y, scores, tags)
+ * If ndims=5, corrds are composed of (x, y, scores, tags,
+ flipped_tags)
+
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ output_size (np.ndarray[2, ] | list(2,)): Size of the
+ destination heatmaps.
+ use_udp (bool): Use unbiased data processing
+
+ Returns:
+ np.ndarray: Predicted coordinates in the images.
+ """
+ assert coords.shape[1] in (2, 4, 5)
+ assert len(center) == 2
+ assert len(scale) == 2
+ assert len(output_size) == 2
+
+ # Recover the scale which is normalized by a factor of 200.
+ scale = scale * 200.0
+
+ if use_udp:
+ scale_x = scale[0] / (output_size[0] - 1.0)
+ scale_y = scale[1] / (output_size[1] - 1.0)
+ else:
+ scale_x = scale[0] / output_size[0]
+ scale_y = scale[1] / output_size[1]
+
+ target_coords = np.ones_like(coords)
+ target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
+ target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
+
+ return target_coords
+
+
+def get_affine_transform(center,
+ scale,
+ rot,
+ output_size,
+ shift=(0., 0.),
+ inv=False):
+ """Get the affine transform matrix, given the center/scale/rot/output_size.
+
+ Args:
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ rot (float): Rotation angle (degree).
+ output_size (np.ndarray[2, ] | list(2,)): Size of the
+ destination heatmaps.
+ shift (0-100%): Shift translation ratio wrt the width/height.
+ Default (0., 0.).
+ inv (bool): Option to inverse the affine transform direction.
+ (inv=False: src->dst or inv=True: dst->src)
+
+ Returns:
+ np.ndarray: The transform matrix.
+ """
+ assert len(center) == 2
+ assert len(scale) == 2
+ assert len(output_size) == 2
+ assert len(shift) == 2
+
+ # pixel_std is 200.
+ scale_tmp = scale * 200.0
+
+ shift = np.array(shift)
+ src_w = scale_tmp[0]
+ dst_w = output_size[0]
+ dst_h = output_size[1]
+
+ rot_rad = np.pi * rot / 180
+ src_dir = rotate_point([0., src_w * -0.5], rot_rad)
+ dst_dir = np.array([0., dst_w * -0.5])
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ src[0, :] = center + scale_tmp * shift
+ src[1, :] = center + src_dir + scale_tmp * shift
+ src[2, :] = _get_3rd_point(src[0, :], src[1, :])
+
+ dst = np.zeros((3, 2), dtype=np.float32)
+ dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+ dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+ dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
+
+ if inv:
+ trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+ else:
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ return trans
+
+
+def affine_transform(pt, trans_mat):
+ """Apply an affine transformation to the points.
+
+ Args:
+ pt (np.ndarray): a 2 dimensional point to be transformed
+ trans_mat (np.ndarray): 2x3 matrix of an affine transform
+
+ Returns:
+ np.ndarray: Transformed points.
+ """
+ assert len(pt) == 2
+ new_pt = np.array(trans_mat) @ np.array([pt[0], pt[1], 1.])
+
+ return new_pt
+
+
+def _get_3rd_point(a, b):
+ """To calculate the affine matrix, three pairs of points are required. This
+ function is used to get the 3rd point, given 2D points a & b.
+
+ The 3rd point is defined by rotating vector `a - b` by 90 degrees
+ anticlockwise, using b as the rotation center.
+
+ Args:
+ a (np.ndarray): point(x,y)
+ b (np.ndarray): point(x,y)
+
+ Returns:
+ np.ndarray: The 3rd point.
+ """
+ assert len(a) == 2
+ assert len(b) == 2
+ direction = a - b
+ third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
+
+ return third_pt
+
+
+def rotate_point(pt, angle_rad):
+ """Rotate a point by an angle.
+
+ Args:
+ pt (list[float]): 2 dimensional point to be rotated
+ angle_rad (float): rotation angle by radian
+
+ Returns:
+ list[float]: Rotated point.
+ """
+ assert len(pt) == 2
+ sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+ new_x = pt[0] * cs - pt[1] * sn
+ new_y = pt[0] * sn + pt[1] * cs
+ rotated_pt = [new_x, new_y]
+
+ return rotated_pt
+
+
+def get_warp_matrix(theta, size_input, size_dst, size_target):
+ """Calculate the transformation matrix under the constraint of unbiased.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
+ Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Args:
+ theta (float): Rotation angle in degrees.
+ size_input (np.ndarray): Size of input image [w, h].
+ size_dst (np.ndarray): Size of output image [w, h].
+ size_target (np.ndarray): Size of ROI in input plane [w, h].
+
+ Returns:
+ np.ndarray: A matrix for transformation.
+ """
+ theta = np.deg2rad(theta)
+ matrix = np.zeros((2, 3), dtype=np.float32)
+ scale_x = size_dst[0] / size_target[0]
+ scale_y = size_dst[1] / size_target[1]
+ matrix[0, 0] = math.cos(theta) * scale_x
+ matrix[0, 1] = -math.sin(theta) * scale_x
+ matrix[0, 2] = scale_x * (-0.5 * size_input[0] * math.cos(theta) +
+ 0.5 * size_input[1] * math.sin(theta) +
+ 0.5 * size_target[0])
+ matrix[1, 0] = math.sin(theta) * scale_y
+ matrix[1, 1] = math.cos(theta) * scale_y
+ matrix[1, 2] = scale_y * (-0.5 * size_input[0] * math.sin(theta) -
+ 0.5 * size_input[1] * math.cos(theta) +
+ 0.5 * size_target[1])
+ return matrix
+
+
+def warp_affine_joints(joints, mat):
+ """Apply affine transformation defined by the transform matrix on the
+ joints.
+
+ Args:
+ joints (np.ndarray[..., 2]): Origin coordinate of joints.
+ mat (np.ndarray[3, 2]): The affine matrix.
+
+ Returns:
+ np.ndarray[..., 2]: Result coordinate of joints.
+ """
+ joints = np.array(joints)
+ shape = joints.shape
+ joints = joints.reshape(-1, 2)
+ return np.dot(
+ np.concatenate((joints, joints[:, 0:1] * 0 + 1), axis=1),
+ mat.T).reshape(shape)
+
+
+def affine_transform_torch(pts, t):
+ npts = pts.shape[0]
+ pts_homo = torch.cat([pts, torch.ones(npts, 1, device=pts.device)], dim=1)
+ out = torch.mm(t, torch.t(pts_homo))
+ return torch.t(out[:2, :])
diff --git a/mmpose/core/utils/__init__.py b/mmpose/core/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd6c0277a0647e605eaf29ccac41c1f9a37a05ac
--- /dev/null
+++ b/mmpose/core/utils/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .dist_utils import allreduce_grads
+from .regularizations import WeightNormClipHook
+
+__all__ = ['allreduce_grads', 'WeightNormClipHook']
diff --git a/mmpose/core/utils/__pycache__/__init__.cpython-310.pyc b/mmpose/core/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae020db75d712cac04929430e4cc7a8268ca14cc
Binary files /dev/null and b/mmpose/core/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/utils/__pycache__/dist_utils.cpython-310.pyc b/mmpose/core/utils/__pycache__/dist_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e2130a15c7f7758dc5dec2f288b6ac9f6cb5b08
Binary files /dev/null and b/mmpose/core/utils/__pycache__/dist_utils.cpython-310.pyc differ
diff --git a/mmpose/core/utils/__pycache__/regularizations.cpython-310.pyc b/mmpose/core/utils/__pycache__/regularizations.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa553732de8ea302bc9c8a99496d7e18a5a5e0a7
Binary files /dev/null and b/mmpose/core/utils/__pycache__/regularizations.cpython-310.pyc differ
diff --git a/mmpose/core/utils/dist_utils.py b/mmpose/core/utils/dist_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e76e591050284b1e9c541ea4ee8ee66708b8e7fb
--- /dev/null
+++ b/mmpose/core/utils/dist_utils.py
@@ -0,0 +1,51 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+
+import torch.distributed as dist
+from torch._utils import (_flatten_dense_tensors, _take_tensors,
+ _unflatten_dense_tensors)
+
+
+def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
+ """Allreduce parameters as a whole."""
+ if bucket_size_mb > 0:
+ bucket_size_bytes = bucket_size_mb * 1024 * 1024
+ buckets = _take_tensors(tensors, bucket_size_bytes)
+ else:
+ buckets = OrderedDict()
+ for tensor in tensors:
+ tp = tensor.type()
+ if tp not in buckets:
+ buckets[tp] = []
+ buckets[tp].append(tensor)
+ buckets = buckets.values()
+
+ for bucket in buckets:
+ flat_tensors = _flatten_dense_tensors(bucket)
+ dist.all_reduce(flat_tensors)
+ flat_tensors.div_(world_size)
+ for tensor, synced in zip(
+ bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
+ tensor.copy_(synced)
+
+
+def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
+ """Allreduce gradients.
+
+ Args:
+ params (list[torch.Parameters]): List of parameters of a model
+ coalesce (bool, optional): Whether allreduce parameters as a whole.
+ Default: True.
+ bucket_size_mb (int, optional): Size of bucket, the unit is MB.
+ Default: -1.
+ """
+ grads = [
+ param.grad.data for param in params
+ if param.requires_grad and param.grad is not None
+ ]
+ world_size = dist.get_world_size()
+ if coalesce:
+ _allreduce_coalesced(grads, world_size, bucket_size_mb)
+ else:
+ for tensor in grads:
+ dist.all_reduce(tensor.div_(world_size))
diff --git a/mmpose/core/utils/regularizations.py b/mmpose/core/utils/regularizations.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8c7449038066016f6efb60e126111ace962fe98
--- /dev/null
+++ b/mmpose/core/utils/regularizations.py
@@ -0,0 +1,86 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod, abstractproperty
+
+import torch
+
+
+class PytorchModuleHook(metaclass=ABCMeta):
+ """Base class for PyTorch module hook registers.
+
+ An instance of a subclass of PytorchModuleHook can be used to
+ register hook to a pytorch module using the `register` method like:
+ hook_register.register(module)
+
+ Subclasses should add/overwrite the following methods:
+ - __init__
+ - hook
+ - hook_type
+ """
+
+ @abstractmethod
+ def hook(self, *args, **kwargs):
+ """Hook function."""
+
+ @abstractproperty
+ def hook_type(self) -> str:
+ """Hook type Subclasses should overwrite this function to return a
+ string value in.
+
+ {`forward`, `forward_pre`, `backward`}
+ """
+
+ def register(self, module):
+ """Register the hook function to the module.
+
+ Args:
+ module (pytorch module): the module to register the hook.
+
+ Returns:
+ handle (torch.utils.hooks.RemovableHandle): a handle to remove
+ the hook by calling handle.remove()
+ """
+ assert isinstance(module, torch.nn.Module)
+
+ if self.hook_type == 'forward':
+ h = module.register_forward_hook(self.hook)
+ elif self.hook_type == 'forward_pre':
+ h = module.register_forward_pre_hook(self.hook)
+ elif self.hook_type == 'backward':
+ h = module.register_backward_hook(self.hook)
+ else:
+ raise ValueError(f'Invalid hook type {self.hook}')
+
+ return h
+
+
+class WeightNormClipHook(PytorchModuleHook):
+ """Apply weight norm clip regularization.
+
+ The module's parameter will be clip to a given maximum norm before each
+ forward pass.
+
+ Args:
+ max_norm (float): The maximum norm of the parameter.
+ module_param_names (str|list): The parameter name (or name list) to
+ apply weight norm clip.
+ """
+
+ def __init__(self, max_norm=1.0, module_param_names='weight'):
+ self.module_param_names = module_param_names if isinstance(
+ module_param_names, list) else [module_param_names]
+ self.max_norm = max_norm
+
+ @property
+ def hook_type(self):
+ return 'forward_pre'
+
+ def hook(self, module, _input):
+ for name in self.module_param_names:
+ assert name in module._parameters, f'{name} is not a parameter' \
+ f' of the module {type(module)}'
+ param = module._parameters[name]
+
+ with torch.no_grad():
+ m = param.norm().item()
+ if m > self.max_norm:
+ param.mul_(self.max_norm / (m + 1e-6))
diff --git a/mmpose/core/visualization/__init__.py b/mmpose/core/visualization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9705494bc8ef4dfb49e6a8db21ab6f243f3bb6d2
--- /dev/null
+++ b/mmpose/core/visualization/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .effects import apply_bugeye_effect, apply_sunglasses_effect
+from .image import (imshow_bboxes, imshow_keypoints, imshow_keypoints_3d,
+ imshow_mesh_3d)
+
+__all__ = [
+ 'imshow_keypoints',
+ 'imshow_keypoints_3d',
+ 'imshow_bboxes',
+ 'apply_bugeye_effect',
+ 'apply_sunglasses_effect',
+ 'imshow_mesh_3d',
+]
diff --git a/mmpose/core/visualization/__pycache__/__init__.cpython-310.pyc b/mmpose/core/visualization/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb7db60d43972be123c7fde0401aa02c95583052
Binary files /dev/null and b/mmpose/core/visualization/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/core/visualization/__pycache__/effects.cpython-310.pyc b/mmpose/core/visualization/__pycache__/effects.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c5b03fff2d400815e15f52cefc50ec92809dd4b
Binary files /dev/null and b/mmpose/core/visualization/__pycache__/effects.cpython-310.pyc differ
diff --git a/mmpose/core/visualization/__pycache__/image.cpython-310.pyc b/mmpose/core/visualization/__pycache__/image.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70d705abba576390840b3bcd859e8d443abe2bf5
Binary files /dev/null and b/mmpose/core/visualization/__pycache__/image.cpython-310.pyc differ
diff --git a/mmpose/core/visualization/effects.py b/mmpose/core/visualization/effects.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3add7d95dafe4d072b7945823aaa75664622994
--- /dev/null
+++ b/mmpose/core/visualization/effects.py
@@ -0,0 +1,111 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+
+def apply_bugeye_effect(img,
+ pose_results,
+ left_eye_index,
+ right_eye_index,
+ kpt_thr=0.5):
+ """Apply bug-eye effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "bbox" ([K, 4(or 5)]): detection bbox in
+ [x1, y1, x2, y2, (score)]
+ - "keypoints" ([K,3]): keypoint detection result in [x, y, score]
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ xx, yy = np.meshgrid(np.arange(img.shape[1]), np.arange(img.shape[0]))
+ xx = xx.astype(np.float32)
+ yy = yy.astype(np.float32)
+
+ for pose in pose_results:
+ bbox = pose['bbox']
+ kpts = pose['keypoints']
+
+ if kpts[left_eye_index, 2] < kpt_thr or kpts[right_eye_index,
+ 2] < kpt_thr:
+ continue
+
+ kpt_leye = kpts[left_eye_index, :2]
+ kpt_reye = kpts[right_eye_index, :2]
+ for xc, yc in [kpt_leye, kpt_reye]:
+
+ # distortion parameters
+ k1 = 0.001
+ epe = 1e-5
+
+ scale = (bbox[2] - bbox[0])**2 + (bbox[3] - bbox[1])**2
+ r2 = ((xx - xc)**2 + (yy - yc)**2)
+ r2 = (r2 + epe) / scale # normalized by bbox scale
+
+ xx = (xx - xc) / (1 + k1 / r2) + xc
+ yy = (yy - yc) / (1 + k1 / r2) + yc
+
+ img = cv2.remap(
+ img,
+ xx,
+ yy,
+ interpolation=cv2.INTER_AREA,
+ borderMode=cv2.BORDER_REPLICATE)
+ return img
+
+
+def apply_sunglasses_effect(img,
+ pose_results,
+ sunglasses_img,
+ left_eye_index,
+ right_eye_index,
+ kpt_thr=0.5):
+ """Apply sunglasses effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result in [x, y, score]
+ sunglasses_img (np.ndarray): Sunglasses image with white background.
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ hm, wm = sunglasses_img.shape[:2]
+ # anchor points in the sunglasses mask
+ pts_src = np.array([[0.3 * wm, 0.3 * hm], [0.3 * wm, 0.7 * hm],
+ [0.7 * wm, 0.3 * hm], [0.7 * wm, 0.7 * hm]],
+ dtype=np.float32)
+
+ for pose in pose_results:
+ kpts = pose['keypoints']
+
+ if kpts[left_eye_index, 2] < kpt_thr or kpts[right_eye_index,
+ 2] < kpt_thr:
+ continue
+
+ kpt_leye = kpts[left_eye_index, :2]
+ kpt_reye = kpts[right_eye_index, :2]
+ # orthogonal vector to the left-to-right eyes
+ vo = 0.5 * (kpt_reye - kpt_leye)[::-1] * [-1, 1]
+
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack(
+ [kpt_reye + vo, kpt_reye - vo, kpt_leye + vo, kpt_leye - vo])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ sunglasses_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with a threshold 200
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 200).astype(np.uint8)
+ img = cv2.copyTo(patch, mask, img)
+
+ return img
diff --git a/mmpose/core/visualization/image.py b/mmpose/core/visualization/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1742cda2644e2fd3d837b15f2eb5f41572e17f0
--- /dev/null
+++ b/mmpose/core/visualization/image.py
@@ -0,0 +1,442 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+import os
+import warnings
+
+import cv2
+import mmcv
+import numpy as np
+from matplotlib import pyplot as plt
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.color import color_val
+
+try:
+ import trimesh
+ has_trimesh = True
+except (ImportError, ModuleNotFoundError):
+ has_trimesh = False
+
+try:
+ #os.environ['PYOPENGL_PLATFORM'] = 'egl'
+ import pyrender
+ has_pyrender = True
+except (ImportError, ModuleNotFoundError):
+ has_pyrender = False
+
+
+def imshow_bboxes(img,
+ bboxes,
+ labels=None,
+ colors='green',
+ text_color='white',
+ thickness=1,
+ font_scale=0.5,
+ show=True,
+ win_name='',
+ wait_time=0,
+ out_file=None):
+ """Draw bboxes with labels (optional) on an image. This is a wrapper of
+ mmcv.imshow_bboxes.
+
+ Args:
+ img (str or ndarray): The image to be displayed.
+ bboxes (ndarray): ndarray of shape (k, 4), each row is a bbox in
+ format [x1, y1, x2, y2].
+ labels (str or list[str], optional): labels of each bbox.
+ colors (list[str or tuple or :obj:`Color`]): A list of colors.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ show (bool): Whether to show the image.
+ win_name (str): The window name.
+ wait_time (int): Value of waitKey param.
+ out_file (str, optional): The filename to write the image.
+
+ Returns:
+ ndarray: The image with bboxes drawn on it.
+ """
+
+ # adapt to mmcv.imshow_bboxes input format
+ bboxes = np.split(
+ bboxes, bboxes.shape[0], axis=0) if bboxes.shape[0] > 0 else []
+ if not isinstance(colors, list):
+ colors = [colors for _ in range(len(bboxes))]
+ colors = [mmcv.color_val(c) for c in colors]
+ assert len(bboxes) == len(colors)
+
+ img = mmcv.imshow_bboxes(
+ img,
+ bboxes,
+ colors,
+ top_k=-1,
+ thickness=thickness,
+ show=False,
+ out_file=None)
+
+ if labels is not None:
+ if not isinstance(labels, list):
+ labels = [labels for _ in range(len(bboxes))]
+ assert len(labels) == len(bboxes)
+
+ for bbox, label, color in zip(bboxes, labels, colors):
+ if label is None:
+ continue
+ bbox_int = bbox[0, :4].astype(np.int32)
+ # roughly estimate the proper font size
+ text_size, text_baseline = cv2.getTextSize(label,
+ cv2.FONT_HERSHEY_DUPLEX,
+ font_scale, thickness)
+ text_x1 = bbox_int[0]
+ text_y1 = max(0, bbox_int[1] - text_size[1] - text_baseline)
+ text_x2 = bbox_int[0] + text_size[0]
+ text_y2 = text_y1 + text_size[1] + text_baseline
+ cv2.rectangle(img, (text_x1, text_y1), (text_x2, text_y2), color,
+ cv2.FILLED)
+ cv2.putText(img, label, (text_x1, text_y2 - text_baseline),
+ cv2.FONT_HERSHEY_DUPLEX, font_scale,
+ mmcv.color_val(text_color), thickness)
+
+ if show:
+ mmcv.imshow(img, win_name, wait_time)
+ if out_file is not None:
+ mmcv.imwrite(img, out_file)
+ return img
+
+
+@deprecated_api_warning({'pose_limb_color': 'pose_link_color'})
+def imshow_keypoints(img,
+ pose_result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=4,
+ thickness=1,
+ show_keypoint_weight=False):
+ """Draw keypoints and links on an image.
+
+ Args:
+ img (str or Tensor): The image to draw poses on. If an image array
+ is given, id will be modified in-place.
+ pose_result (list[kpts]): The poses to draw. Each element kpts is
+ a set of K keypoints as an Kx3 numpy.ndarray, where each
+ keypoint is represented as x, y, score.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None,
+ the keypoint will not be drawn.
+ pose_link_color (np.array[Mx3]): Color of M links. If None, the
+ links will not be drawn.
+ thickness (int): Thickness of lines.
+ """
+
+ img = mmcv.imread(img)
+ img_h, img_w, _ = img.shape
+
+ for kpts in pose_result:
+
+ kpts = np.array(kpts, copy=False)
+
+ # draw each point on image
+ if pose_kpt_color is not None:
+ assert len(pose_kpt_color) == len(kpts)
+ for kid, kpt in enumerate(kpts):
+ x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2]
+ if kpt_score > kpt_score_thr:
+ color = tuple(int(c) for c in pose_kpt_color[kid])
+ if show_keypoint_weight:
+ img_copy = img.copy()
+ cv2.circle(img_copy, (int(x_coord), int(y_coord)),
+ radius, color, -1)
+ transparency = max(0, min(1, kpt_score))
+ cv2.addWeighted(
+ img_copy,
+ transparency,
+ img,
+ 1 - transparency,
+ 0,
+ dst=img)
+ else:
+ cv2.circle(img, (int(x_coord), int(y_coord)), radius,
+ color, -1)
+
+ # draw links
+ if skeleton is not None and pose_link_color is not None:
+ assert len(pose_link_color) == len(skeleton)
+ for sk_id, sk in enumerate(skeleton):
+ pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
+ pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
+ if (pos1[0] > 0 and pos1[0] < img_w and pos1[1] > 0
+ and pos1[1] < img_h and pos2[0] > 0 and pos2[0] < img_w
+ and pos2[1] > 0 and pos2[1] < img_h
+ and kpts[sk[0], 2] > kpt_score_thr
+ and kpts[sk[1], 2] > kpt_score_thr):
+ color = tuple(int(c) for c in pose_link_color[sk_id])
+ if show_keypoint_weight:
+ img_copy = img.copy()
+ X = (pos1[0], pos2[0])
+ Y = (pos1[1], pos2[1])
+ mX = np.mean(X)
+ mY = np.mean(Y)
+ length = ((Y[0] - Y[1])**2 + (X[0] - X[1])**2)**0.5
+ angle = math.degrees(
+ math.atan2(Y[0] - Y[1], X[0] - X[1]))
+ stickwidth = 2
+ polygon = cv2.ellipse2Poly(
+ (int(mX), int(mY)),
+ (int(length / 2), int(stickwidth)), int(angle), 0,
+ 360, 1)
+ cv2.fillConvexPoly(img_copy, polygon, color)
+ transparency = max(
+ 0, min(1, 0.5 * (kpts[sk[0], 2] + kpts[sk[1], 2])))
+ cv2.addWeighted(
+ img_copy,
+ transparency,
+ img,
+ 1 - transparency,
+ 0,
+ dst=img)
+ else:
+ cv2.line(img, pos1, pos2, color, thickness=thickness)
+
+ return img
+
+
+def imshow_keypoints_3d(
+ pose_result,
+ img=None,
+ skeleton=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ vis_height=400,
+ kpt_score_thr=0.3,
+ num_instances=-1,
+ *,
+ axis_azimuth=70,
+ axis_limit=1.7,
+ axis_dist=10.0,
+ axis_elev=15.0,
+):
+ """Draw 3D keypoints and links in 3D coordinates.
+
+ Args:
+ pose_result (list[dict]): 3D pose results containing:
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "title" (str): Optional. A string to specify the title of the
+ visualization of this pose result
+ img (str|np.ndarray): Opptional. The image or image path to show input
+ image and/or 2D pose. Note that the image should be given in BGR
+ channel order.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ pose_kpt_color (np.ndarray[Nx3]`): Color of N keypoints. If None, do
+ not nddraw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links. If None, do not
+ draw links.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ kpt_score_thr (float): Minimum score of keypoints to be shown.
+ Default: 0.3.
+ num_instances (int): Number of instances to be shown in 3D. If smaller
+ than 0, all the instances in the pose_result will be shown.
+ Otherwise, pad or truncate the pose_result to a length of
+ num_instances.
+ axis_azimuth (float): axis azimuth angle for 3D visualizations.
+ axis_dist (float): axis distance for 3D visualizations.
+ axis_elev (float): axis elevation view angle for 3D visualizations.
+ axis_limit (float): The axis limit to visualize 3d pose. The xyz
+ range will be set as:
+ - x: [x_c - axis_limit/2, x_c + axis_limit/2]
+ - y: [y_c - axis_limit/2, y_c + axis_limit/2]
+ - z: [0, axis_limit]
+ Where x_c, y_c is the mean value of x and y coordinates
+ figsize: (float): figure size in inch.
+ """
+
+ show_img = img is not None
+ if num_instances < 0:
+ num_instances = len(pose_result)
+ else:
+ if len(pose_result) > num_instances:
+ pose_result = pose_result[:num_instances]
+ elif len(pose_result) < num_instances:
+ pose_result += [dict()] * (num_instances - len(pose_result))
+ num_axis = num_instances + 1 if show_img else num_instances
+
+ plt.ioff()
+ fig = plt.figure(figsize=(vis_height * num_axis * 0.01, vis_height * 0.01))
+
+ if show_img:
+ img = mmcv.imread(img, channel_order='bgr')
+ img = mmcv.bgr2rgb(img)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ ax_img = fig.add_subplot(1, num_axis, 1)
+ ax_img.get_xaxis().set_visible(False)
+ ax_img.get_yaxis().set_visible(False)
+ ax_img.set_axis_off()
+ ax_img.set_title('Input')
+ ax_img.imshow(img, aspect='equal')
+
+ for idx, res in enumerate(pose_result):
+ dummy = len(res) == 0
+ kpts = np.zeros((1, 3)) if dummy else res['keypoints_3d']
+ if kpts.shape[1] == 3:
+ kpts = np.concatenate([kpts, np.ones((kpts.shape[0], 1))], axis=1)
+ valid = kpts[:, 3] >= kpt_score_thr
+
+ ax_idx = idx + 2 if show_img else idx + 1
+ ax = fig.add_subplot(1, num_axis, ax_idx, projection='3d')
+ ax.view_init(
+ elev=axis_elev,
+ azim=axis_azimuth,
+ )
+ x_c = np.mean(kpts[valid, 0]) if sum(valid) > 0 else 0
+ y_c = np.mean(kpts[valid, 1]) if sum(valid) > 0 else 0
+ ax.set_xlim3d([x_c - axis_limit / 2, x_c + axis_limit / 2])
+ ax.set_ylim3d([y_c - axis_limit / 2, y_c + axis_limit / 2])
+ ax.set_zlim3d([0, axis_limit])
+ ax.set_aspect('auto')
+ ax.set_xticks([])
+ ax.set_yticks([])
+ ax.set_zticks([])
+ ax.set_xticklabels([])
+ ax.set_yticklabels([])
+ ax.set_zticklabels([])
+ ax.dist = axis_dist
+
+ if not dummy and pose_kpt_color is not None:
+ pose_kpt_color = np.array(pose_kpt_color)
+ assert len(pose_kpt_color) == len(kpts)
+ x_3d, y_3d, z_3d = np.split(kpts[:, :3], [1, 2], axis=1)
+ # matplotlib uses RGB color in [0, 1] value range
+ _color = pose_kpt_color[..., ::-1] / 255.
+ ax.scatter(
+ x_3d[valid],
+ y_3d[valid],
+ z_3d[valid],
+ marker='o',
+ color=_color[valid],
+ )
+
+ if not dummy and skeleton is not None and pose_link_color is not None:
+ pose_link_color = np.array(pose_link_color)
+ assert len(pose_link_color) == len(skeleton)
+ for link, link_color in zip(skeleton, pose_link_color):
+ link_indices = [_i for _i in link]
+ xs_3d = kpts[link_indices, 0]
+ ys_3d = kpts[link_indices, 1]
+ zs_3d = kpts[link_indices, 2]
+ kpt_score = kpts[link_indices, 3]
+ if kpt_score.min() > kpt_score_thr:
+ # matplotlib uses RGB color in [0, 1] value range
+ _color = link_color[::-1] / 255.
+ ax.plot(xs_3d, ys_3d, zs_3d, color=_color, zdir='z')
+
+ if 'title' in res:
+ ax.set_title(res['title'])
+
+ # convert figure to numpy array
+ fig.tight_layout()
+ fig.canvas.draw()
+ img_w, img_h = fig.canvas.get_width_height()
+ img_vis = np.frombuffer(
+ fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(img_h, img_w, -1)
+ img_vis = mmcv.rgb2bgr(img_vis)
+
+ plt.close(fig)
+
+ return img_vis
+
+
+def imshow_mesh_3d(img,
+ vertices,
+ faces,
+ camera_center,
+ focal_length,
+ colors=(76, 76, 204)):
+ """Render 3D meshes on background image.
+
+ Args:
+ img(np.ndarray): Background image.
+ vertices (list of np.ndarray): Vetrex coordinates in camera space.
+ faces (list of np.ndarray): Faces of meshes.
+ camera_center ([2]): Center pixel.
+ focal_length ([2]): Focal length of camera.
+ colors (list[str or tuple or Color]): A list of mesh colors.
+ """
+
+ H, W, C = img.shape
+
+ if not has_pyrender:
+ warnings.warn('pyrender package is not installed.')
+ return img
+
+ if not has_trimesh:
+ warnings.warn('trimesh package is not installed.')
+ return img
+
+ try:
+ renderer = pyrender.OffscreenRenderer(
+ viewport_width=W, viewport_height=H)
+ except (ImportError, RuntimeError):
+ warnings.warn('pyrender package is not installed correctly.')
+ return img
+
+ if not isinstance(colors, list):
+ colors = [colors for _ in range(len(vertices))]
+ colors = [color_val(c) for c in colors]
+
+ depth_map = np.ones([H, W]) * np.inf
+ output_img = img
+ for idx in range(len(vertices)):
+ color = colors[idx]
+ color = [c / 255.0 for c in color]
+ color.append(1.0)
+ vert = vertices[idx]
+ face = faces[idx]
+
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.2, alphaMode='OPAQUE', baseColorFactor=color)
+
+ mesh = trimesh.Trimesh(vert, face)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(ambient_light=(0.5, 0.5, 0.5))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera = pyrender.IntrinsicsCamera(
+ fx=focal_length[0],
+ fy=focal_length[1],
+ cx=camera_center[0],
+ cy=camera_center[1],
+ zfar=1e5)
+ scene.add(camera, pose=camera_pose)
+
+ light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1)
+ light_pose = np.eye(4)
+
+ light_pose[:3, 3] = np.array([0, -1, 1])
+ scene.add(light, pose=light_pose)
+
+ light_pose[:3, 3] = np.array([0, 1, 1])
+ scene.add(light, pose=light_pose)
+
+ light_pose[:3, 3] = np.array([1, 1, 2])
+ scene.add(light, pose=light_pose)
+
+ color, rend_depth = renderer.render(
+ scene, flags=pyrender.RenderFlags.RGBA)
+
+ valid_mask = (rend_depth < depth_map) * (rend_depth > 0)
+ depth_map[valid_mask] = rend_depth[valid_mask]
+ valid_mask = valid_mask[:, :, None]
+ output_img = (
+ valid_mask * color[:, :, :3] + (1 - valid_mask) * output_img)
+
+ return output_img
diff --git a/mmpose/datasets/__init__.py b/mmpose/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b9e7cf035e1e7621d82ce98eb8ab372ce8cfc98
--- /dev/null
+++ b/mmpose/datasets/__init__.py
@@ -0,0 +1,42 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
+from .dataset_info import DatasetInfo
+from .pipelines import Compose
+from .samplers import DistributedSampler
+
+from .datasets import ( # isort:skip
+ AnimalATRWDataset, AnimalFlyDataset, AnimalHorse10Dataset,
+ AnimalLocustDataset, AnimalMacaqueDataset, AnimalPoseDataset,
+ AnimalZebraDataset, Body3DH36MDataset, BottomUpAicDataset,
+ BottomUpCocoDataset, BottomUpCocoWholeBodyDataset,
+ BottomUpCrowdPoseDataset, BottomUpMhpDataset, DeepFashionDataset,
+ Face300WDataset, FaceAFLWDataset, FaceCocoWholeBodyDataset,
+ FaceCOFWDataset, FaceWFLWDataset, FreiHandDataset,
+ HandCocoWholeBodyDataset, InterHand2DDataset, InterHand3DDataset,
+ MeshAdversarialDataset, MeshH36MDataset, MeshMixDataset, MoshDataset,
+ OneHand10KDataset, PanopticDataset, TopDownAicDataset, TopDownCocoDataset,
+ TopDownCocoWholeBodyDataset, TopDownCrowdPoseDataset,
+ TopDownFreiHandDataset, TopDownH36MDataset, TopDownJhmdbDataset,
+ TopDownMhpDataset, TopDownMpiiDataset, TopDownMpiiTrbDataset,
+ TopDownOCHumanDataset, TopDownOneHand10KDataset, TopDownPanopticDataset,
+ TopDownPoseTrack18Dataset, TopDownPoseTrack18VideoDataset)
+
+__all__ = [
+ 'TopDownCocoDataset', 'BottomUpCocoDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset', 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset', 'OneHand10KDataset', 'PanopticDataset',
+ 'HandCocoWholeBodyDataset', 'FreiHandDataset', 'InterHand2DDataset',
+ 'InterHand3DDataset', 'TopDownOCHumanDataset', 'TopDownAicDataset',
+ 'TopDownCocoWholeBodyDataset', 'MeshH36MDataset', 'MeshMixDataset',
+ 'MoshDataset', 'MeshAdversarialDataset', 'TopDownCrowdPoseDataset',
+ 'BottomUpCrowdPoseDataset', 'TopDownFreiHandDataset',
+ 'TopDownOneHand10KDataset', 'TopDownPanopticDataset',
+ 'TopDownPoseTrack18Dataset', 'TopDownJhmdbDataset', 'TopDownMhpDataset',
+ 'DeepFashionDataset', 'Face300WDataset', 'FaceAFLWDataset',
+ 'FaceWFLWDataset', 'FaceCOFWDataset', 'FaceCocoWholeBodyDataset',
+ 'Body3DH36MDataset', 'AnimalHorse10Dataset', 'AnimalMacaqueDataset',
+ 'AnimalFlyDataset', 'AnimalLocustDataset', 'AnimalZebraDataset',
+ 'AnimalATRWDataset', 'AnimalPoseDataset', 'TopDownH36MDataset',
+ 'TopDownPoseTrack18VideoDataset', 'build_dataloader', 'build_dataset',
+ 'Compose', 'DistributedSampler', 'DATASETS', 'PIPELINES', 'DatasetInfo'
+]
diff --git a/mmpose/datasets/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..754add8b87658148d8a4c7b87a9d95df13712d51
Binary files /dev/null and b/mmpose/datasets/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/__pycache__/builder.cpython-310.pyc b/mmpose/datasets/__pycache__/builder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a1772d17d3b60509a7dc45ee38dd5ab262e781a
Binary files /dev/null and b/mmpose/datasets/__pycache__/builder.cpython-310.pyc differ
diff --git a/mmpose/datasets/__pycache__/dataset_info.cpython-310.pyc b/mmpose/datasets/__pycache__/dataset_info.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8497b2cbda5877b36645919b0a0319262339ace9
Binary files /dev/null and b/mmpose/datasets/__pycache__/dataset_info.cpython-310.pyc differ
diff --git a/mmpose/datasets/builder.py b/mmpose/datasets/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..990ba859e010064377f805e6aa3826984cf25b55
--- /dev/null
+++ b/mmpose/datasets/builder.py
@@ -0,0 +1,162 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import platform
+import random
+from functools import partial
+
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg, is_seq_of
+from mmcv.utils.parrots_wrapper import _get_dataloader
+from torch.utils.data.dataset import ConcatDataset
+
+from .samplers import DistributedSampler
+
+if platform.system() != 'Windows':
+ # https://github.com/pytorch/pytorch/issues/973
+ import resource
+ rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ base_soft_limit = rlimit[0]
+ hard_limit = rlimit[1]
+ soft_limit = min(max(4096, base_soft_limit), hard_limit)
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+
+
+def _concat_dataset(cfg, default_args=None):
+ types = cfg['type']
+ ann_files = cfg['ann_file']
+ img_prefixes = cfg.get('img_prefix', None)
+ dataset_infos = cfg.get('dataset_info', None)
+
+ num_joints = cfg['data_cfg'].get('num_joints', None)
+ dataset_channel = cfg['data_cfg'].get('dataset_channel', None)
+
+ datasets = []
+ num_dset = len(ann_files)
+ for i in range(num_dset):
+ cfg_copy = copy.deepcopy(cfg)
+ cfg_copy['ann_file'] = ann_files[i]
+
+ if isinstance(types, (list, tuple)):
+ cfg_copy['type'] = types[i]
+ if isinstance(img_prefixes, (list, tuple)):
+ cfg_copy['img_prefix'] = img_prefixes[i]
+ if isinstance(dataset_infos, (list, tuple)):
+ cfg_copy['dataset_info'] = dataset_infos[i]
+
+ if isinstance(num_joints, (list, tuple)):
+ cfg_copy['data_cfg']['num_joints'] = num_joints[i]
+
+ if is_seq_of(dataset_channel, list):
+ cfg_copy['data_cfg']['dataset_channel'] = dataset_channel[i]
+
+ datasets.append(build_dataset(cfg_copy, default_args))
+
+ return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+ """Build a dataset from config dict.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+ default_args (dict, optional): Default initialization arguments.
+ Default: None.
+
+ Returns:
+ Dataset: The constructed dataset.
+ """
+ from .dataset_wrappers import RepeatDataset
+
+ if isinstance(cfg, (list, tuple)):
+ dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+ elif cfg['type'] == 'ConcatDataset':
+ dataset = ConcatDataset(
+ [build_dataset(c, default_args) for c in cfg['datasets']])
+ elif cfg['type'] == 'RepeatDataset':
+ dataset = RepeatDataset(
+ build_dataset(cfg['dataset'], default_args), cfg['times'])
+ elif isinstance(cfg.get('ann_file'), (list, tuple)):
+ dataset = _concat_dataset(cfg, default_args)
+ else:
+ dataset = build_from_cfg(cfg, DATASETS, default_args)
+ return dataset
+
+
+def build_dataloader(dataset,
+ samples_per_gpu,
+ workers_per_gpu,
+ num_gpus=1,
+ dist=True,
+ shuffle=True,
+ seed=None,
+ drop_last=True,
+ pin_memory=True,
+ **kwargs):
+ """Build PyTorch DataLoader.
+
+ In distributed training, each GPU/process has a dataloader.
+ In non-distributed training, there is only one dataloader for all GPUs.
+
+ Args:
+ dataset (Dataset): A PyTorch dataset.
+ samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+ batch size of each GPU.
+ workers_per_gpu (int): How many subprocesses to use for data loading
+ for each GPU.
+ num_gpus (int): Number of GPUs. Only used in non-distributed training.
+ dist (bool): Distributed training/test or not. Default: True.
+ shuffle (bool): Whether to shuffle the data at every epoch.
+ Default: True.
+ drop_last (bool): Whether to drop the last incomplete batch in epoch.
+ Default: True
+ pin_memory (bool): Whether to use pin_memory in DataLoader.
+ Default: True
+ kwargs: any keyword argument to be used to initialize DataLoader
+
+ Returns:
+ DataLoader: A PyTorch dataloader.
+ """
+ rank, world_size = get_dist_info()
+ if dist:
+ sampler = DistributedSampler(
+ dataset, world_size, rank, shuffle=shuffle, seed=seed)
+ shuffle = False
+ batch_size = samples_per_gpu
+ num_workers = workers_per_gpu
+ else:
+ sampler = None
+ batch_size = num_gpus * samples_per_gpu
+ num_workers = num_gpus * workers_per_gpu
+
+ init_fn = partial(
+ worker_init_fn, num_workers=num_workers, rank=rank,
+ seed=seed) if seed is not None else None
+
+ _, DataLoader = _get_dataloader()
+ data_loader = DataLoader(
+ dataset,
+ batch_size=batch_size,
+ sampler=sampler,
+ num_workers=num_workers,
+ collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ **kwargs)
+
+ return data_loader
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+ """Init the random seed for various workers."""
+ # The seed of each worker equals to
+ # num_worker * rank + worker_id + user_seed
+ worker_seed = num_workers * rank + worker_id + seed
+ np.random.seed(worker_seed)
+ random.seed(worker_seed)
diff --git a/mmpose/datasets/dataset_info.py b/mmpose/datasets/dataset_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef0d62e43089770797ef565d2153c8d42e4956c5
--- /dev/null
+++ b/mmpose/datasets/dataset_info.py
@@ -0,0 +1,104 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+
+class DatasetInfo:
+
+ def __init__(self, dataset_info):
+ self._dataset_info = dataset_info
+ self.dataset_name = self._dataset_info['dataset_name']
+ self.paper_info = self._dataset_info['paper_info']
+ self.keypoint_info = self._dataset_info['keypoint_info']
+ self.skeleton_info = self._dataset_info['skeleton_info']
+ self.joint_weights = np.array(
+ self._dataset_info['joint_weights'], dtype=np.float32)[:, None]
+
+ self.sigmas = np.array(self._dataset_info['sigmas'])
+
+ self._parse_keypoint_info()
+ self._parse_skeleton_info()
+
+ def _parse_skeleton_info(self):
+ """Parse skeleton information.
+
+ - link_num (int): number of links.
+ - skeleton (list((2,))): list of links (id).
+ - skeleton_name (list((2,))): list of links (name).
+ - pose_link_color (np.ndarray): the color of the link for
+ visualization.
+ """
+ self.link_num = len(self.skeleton_info.keys())
+ self.pose_link_color = []
+
+ self.skeleton_name = []
+ self.skeleton = []
+ for skid in self.skeleton_info.keys():
+ link = self.skeleton_info[skid]['link']
+ self.skeleton_name.append(link)
+ self.skeleton.append([
+ self.keypoint_name2id[link[0]], self.keypoint_name2id[link[1]]
+ ])
+ self.pose_link_color.append(self.skeleton_info[skid].get(
+ 'color', [255, 128, 0]))
+ self.pose_link_color = np.array(self.pose_link_color)
+
+ def _parse_keypoint_info(self):
+ """Parse keypoint information.
+
+ - keypoint_num (int): number of keypoints.
+ - keypoint_id2name (dict): mapping keypoint id to keypoint name.
+ - keypoint_name2id (dict): mapping keypoint name to keypoint id.
+ - upper_body_ids (list): a list of keypoints that belong to the
+ upper body.
+ - lower_body_ids (list): a list of keypoints that belong to the
+ lower body.
+ - flip_index (list): list of flip index (id)
+ - flip_pairs (list((2,))): list of flip pairs (id)
+ - flip_index_name (list): list of flip index (name)
+ - flip_pairs_name (list((2,))): list of flip pairs (name)
+ - pose_kpt_color (np.ndarray): the color of the keypoint for
+ visualization.
+ """
+
+ self.keypoint_num = len(self.keypoint_info.keys())
+ self.keypoint_id2name = {}
+ self.keypoint_name2id = {}
+
+ self.pose_kpt_color = []
+ self.upper_body_ids = []
+ self.lower_body_ids = []
+
+ self.flip_index_name = []
+ self.flip_pairs_name = []
+
+ for kid in self.keypoint_info.keys():
+
+ keypoint_name = self.keypoint_info[kid]['name']
+ self.keypoint_id2name[kid] = keypoint_name
+ self.keypoint_name2id[keypoint_name] = kid
+ self.pose_kpt_color.append(self.keypoint_info[kid].get(
+ 'color', [255, 128, 0]))
+
+ type = self.keypoint_info[kid].get('type', '')
+ if type == 'upper':
+ self.upper_body_ids.append(kid)
+ elif type == 'lower':
+ self.lower_body_ids.append(kid)
+ else:
+ pass
+
+ swap_keypoint = self.keypoint_info[kid].get('swap', '')
+ if swap_keypoint == keypoint_name or swap_keypoint == '':
+ self.flip_index_name.append(keypoint_name)
+ else:
+ self.flip_index_name.append(swap_keypoint)
+ if [swap_keypoint, keypoint_name] not in self.flip_pairs_name:
+ self.flip_pairs_name.append([keypoint_name, swap_keypoint])
+
+ self.flip_pairs = [[
+ self.keypoint_name2id[pair[0]], self.keypoint_name2id[pair[1]]
+ ] for pair in self.flip_pairs_name]
+ self.flip_index = [
+ self.keypoint_name2id[name] for name in self.flip_index_name
+ ]
+ self.pose_kpt_color = np.array(self.pose_kpt_color)
diff --git a/mmpose/datasets/dataset_wrappers.py b/mmpose/datasets/dataset_wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..aaaa173b91f2ad63dc7d80b793fa3d9619a4630c
--- /dev/null
+++ b/mmpose/datasets/dataset_wrappers.py
@@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class RepeatDataset:
+ """A wrapper of repeated dataset.
+
+ The length of repeated dataset will be `times` larger than the original
+ dataset. This is useful when the data loading time is long but the dataset
+ is small. Using RepeatDataset can reduce the data loading time between
+ epochs.
+
+ Args:
+ dataset (:obj:`Dataset`): The dataset to be repeated.
+ times (int): Repeat times.
+ """
+
+ def __init__(self, dataset, times):
+ self.dataset = dataset
+ self.times = times
+
+ self._ori_len = len(self.dataset)
+
+ def __getitem__(self, idx):
+ """Get data."""
+ return self.dataset[idx % self._ori_len]
+
+ def __len__(self):
+ """Length after repetition."""
+ return self.times * self._ori_len
diff --git a/mmpose/datasets/datasets/__init__.py b/mmpose/datasets/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3839e5eaa0c068fec5e86804ce9d75c9e85ae4b
--- /dev/null
+++ b/mmpose/datasets/datasets/__init__.py
@@ -0,0 +1,45 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from ...deprecated import (TopDownFreiHandDataset, TopDownOneHand10KDataset,
+ TopDownPanopticDataset)
+from .animal import (AnimalATRWDataset, AnimalFlyDataset, AnimalHorse10Dataset,
+ AnimalLocustDataset, AnimalMacaqueDataset,
+ AnimalPoseDataset, AnimalZebraDataset)
+from .body3d import Body3DH36MDataset, Body3DMviewDirectPanopticDataset
+from .bottom_up import (BottomUpAicDataset, BottomUpCocoDataset,
+ BottomUpCocoWholeBodyDataset, BottomUpCrowdPoseDataset,
+ BottomUpMhpDataset)
+from .face import (Face300WDataset, FaceAFLWDataset, FaceCocoWholeBodyDataset,
+ FaceCOFWDataset, FaceWFLWDataset)
+from .fashion import DeepFashionDataset
+from .hand import (FreiHandDataset, HandCocoWholeBodyDataset,
+ InterHand2DDataset, InterHand3DDataset, OneHand10KDataset,
+ PanopticDataset)
+from .mesh import (MeshAdversarialDataset, MeshH36MDataset, MeshMixDataset,
+ MoshDataset)
+from .top_down import (TopDownAicDataset, TopDownCocoDataset,
+ TopDownCocoWholeBodyDataset, TopDownCrowdPoseDataset,
+ TopDownH36MDataset, TopDownHalpeDataset,
+ TopDownJhmdbDataset, TopDownMhpDataset,
+ TopDownMpiiDataset, TopDownMpiiTrbDataset,
+ TopDownOCHumanDataset, TopDownPoseTrack18Dataset,
+ TopDownPoseTrack18VideoDataset)
+
+__all__ = [
+ 'TopDownCocoDataset', 'BottomUpCocoDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset', 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset', 'OneHand10KDataset', 'PanopticDataset',
+ 'HandCocoWholeBodyDataset', 'FreiHandDataset', 'InterHand2DDataset',
+ 'InterHand3DDataset', 'TopDownOCHumanDataset', 'TopDownAicDataset',
+ 'TopDownCocoWholeBodyDataset', 'MeshH36MDataset', 'MeshMixDataset',
+ 'MoshDataset', 'MeshAdversarialDataset', 'TopDownCrowdPoseDataset',
+ 'BottomUpCrowdPoseDataset', 'TopDownFreiHandDataset',
+ 'TopDownOneHand10KDataset', 'TopDownPanopticDataset',
+ 'TopDownPoseTrack18Dataset', 'TopDownJhmdbDataset', 'TopDownMhpDataset',
+ 'DeepFashionDataset', 'Face300WDataset', 'FaceAFLWDataset',
+ 'FaceWFLWDataset', 'FaceCOFWDataset', 'FaceCocoWholeBodyDataset',
+ 'Body3DH36MDataset', 'AnimalHorse10Dataset', 'AnimalMacaqueDataset',
+ 'AnimalFlyDataset', 'AnimalLocustDataset', 'AnimalZebraDataset',
+ 'AnimalATRWDataset', 'AnimalPoseDataset', 'TopDownH36MDataset',
+ 'TopDownHalpeDataset', 'TopDownPoseTrack18VideoDataset',
+ 'Body3DMviewDirectPanopticDataset'
+]
diff --git a/mmpose/datasets/datasets/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e93aa1a1f079e0a61c04a7684d3790c5bdf49248
Binary files /dev/null and b/mmpose/datasets/datasets/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__init__.py b/mmpose/datasets/datasets/animal/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..185b935ced4cf072975ec37701b5e8a3aa1d7939
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .animal_ap10k_dataset import AnimalAP10KDataset
+from .animal_atrw_dataset import AnimalATRWDataset
+from .animal_fly_dataset import AnimalFlyDataset
+from .animal_horse10_dataset import AnimalHorse10Dataset
+from .animal_locust_dataset import AnimalLocustDataset
+from .animal_macaque_dataset import AnimalMacaqueDataset
+from .animal_pose_dataset import AnimalPoseDataset
+from .animal_zebra_dataset import AnimalZebraDataset
+
+__all__ = [
+ 'AnimalHorse10Dataset', 'AnimalMacaqueDataset', 'AnimalFlyDataset',
+ 'AnimalLocustDataset', 'AnimalZebraDataset', 'AnimalATRWDataset',
+ 'AnimalPoseDataset', 'AnimalAP10KDataset'
+]
diff --git a/mmpose/datasets/datasets/animal/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e80fdcbc3bbe9eec554d655de71631b0b3b5deb
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_ap10k_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_ap10k_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd7f4215c71a54cdeebf13a7bdcbbaf25b96e41b
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_ap10k_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_atrw_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_atrw_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9c8418ee5d35e725d1fc28504717644b8145edc
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_atrw_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_fly_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_fly_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9404587672f604e0019d66802c403cea84694c4
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_fly_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_horse10_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_horse10_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fcd80d1f034a33d7a9a3a07e22a4b87faf8bbf8
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_horse10_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_locust_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_locust_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed3dd6a009555fd7b32bf94c61d02ef0cd6dc5cc
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_locust_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_macaque_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_macaque_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e810986f8cf88a9353f685fcfd6992ccb6c29ced
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_macaque_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_pose_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_pose_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5207fc9488f67de430dbcbe2123a174dbe3decd6
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_pose_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/__pycache__/animal_zebra_dataset.cpython-310.pyc b/mmpose/datasets/datasets/animal/__pycache__/animal_zebra_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..50c189c46762c3f9089d9ee1b090741af5a20f78
Binary files /dev/null and b/mmpose/datasets/datasets/animal/__pycache__/animal_zebra_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py b/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a1e73ed0c72f5c3fc4ccdab010b53acd2a57c4
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py
@@ -0,0 +1,367 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalAP10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AP-10K dataset for animal pose estimation.
+
+ "AP-10K: A Benchmark for Animal Pose Estimation in the Wild"
+ Neurips Dataset Track'2021.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AP-10K keypoint indexes::
+
+ 0: 'L_Eye',
+ 1: 'R_Eye',
+ 2: 'Nose',
+ 3: 'Neck',
+ 4: 'root of tail',
+ 5: 'L_Shoulder',
+ 6: 'L_Elbow',
+ 7: 'L_F_Paw',
+ 8: 'R_Shoulder',
+ 9: 'R_Elbow',
+ 10: 'R_F_Paw,
+ 11: 'L_Hip',
+ 12: 'L_Knee',
+ 13: 'L_B_Paw',
+ 14: 'R_Hip',
+ 15: 'R_Knee',
+ 16: 'R_B_Paw'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/ap10k.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db, self.id2Cat = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db, id2Cat = self._load_coco_keypoint_annotations()
+ return gt_db, id2Cat
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db, id2Cat = [], dict()
+ for img_id in self.img_ids:
+ db_tmp, id2Cat_tmp = self._load_coco_keypoint_annotation_kernel(
+ img_id)
+ gt_db.extend(db_tmp)
+ id2Cat.update({img_id: id2Cat_tmp})
+ return gt_db, id2Cat
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ id2Cat = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ category = obj['category_id']
+ id2Cat.append({
+ 'image_file': image_file,
+ 'bbox_id': bbox_id,
+ 'category': category,
+ })
+ bbox_id = bbox_id + 1
+
+ return rec, id2Cat
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ cat = self.id2Cat[image_id][bbox_ids[i]]['category']
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i],
+ 'category': cat
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': img_kpt['category'],
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/animal/animal_atrw_dataset.py b/mmpose/datasets/datasets/animal/animal_atrw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..edfd3f96c6571cda4bd39b223c3382f8cff17f51
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_atrw_dataset.py
@@ -0,0 +1,353 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalATRWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """ATRW dataset for animal pose estimation.
+
+ "ATRW: A Benchmark for Amur Tiger Re-identification in the Wild"
+ ACM MM'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ ATRW keypoint indexes::
+
+ 0: "left_ear",
+ 1: "right_ear",
+ 2: "nose",
+ 3: "right_shoulder",
+ 4: "right_front_paw",
+ 5: "left_shoulder",
+ 6: "left_front_paw",
+ 7: "right_hip",
+ 8: "right_knee",
+ 9: "right_back_paw",
+ 10: "left_hip",
+ 11: "left_knee",
+ 12: "left_back_paw",
+ 13: "tail",
+ 14: "center"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/atrw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4], padding=1.0)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/animal/animal_base_dataset.py b/mmpose/datasets/datasets/animal/animal_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e191882f3424167e9bd07693498f36cd57905fd0
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class AnimalBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'AnimalBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/animal/animal_fly_dataset.py b/mmpose/datasets/datasets/animal/animal_fly_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4141176142e0d12c1c65b772f4e48c873f04c47
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_fly_dataset.py
@@ -0,0 +1,215 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalFlyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalFlyDataset for animal pose estimation.
+
+ "Fast animal pose estimation using deep neural networks"
+ Nature methods'2019. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Vinegar Fly keypoint indexes::
+
+ 0: "head",
+ 1: "eyeL",
+ 2: "eyeR",
+ 3: "neck",
+ 4: "thorax",
+ 5: "abdomen",
+ 6: "forelegR1",
+ 7: "forelegR2",
+ 8: "forelegR3",
+ 9: "forelegR4",
+ 10: "midlegR1",
+ 11: "midlegR2",
+ 12: "midlegR3",
+ 13: "midlegR4",
+ 14: "hindlegR1",
+ 15: "hindlegR2",
+ 16: "hindlegR3",
+ 17: "hindlegR4",
+ 18: "forelegL1",
+ 19: "forelegL2",
+ 20: "forelegL3",
+ 21: "forelegL4",
+ 22: "midlegL1",
+ 23: "midlegL2",
+ 24: "midlegL3",
+ 25: "midlegL4",
+ 26: "hindlegL1",
+ 27: "hindlegL2",
+ 28: "hindlegL3",
+ 29: "hindlegL4",
+ 30: "wingL",
+ 31: "wingR"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/fly.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 192x192
+ center, scale = self._xywh2cs(0, 0, 192, 192, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str): Path of directory to save the results.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/animal/animal_horse10_dataset.py b/mmpose/datasets/datasets/animal/animal_horse10_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2bf1986edb75f8f5e60c4ddd45bfb45d5e38d9c
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_horse10_dataset.py
@@ -0,0 +1,220 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalHorse10Dataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalHorse10Dataset for animal pose estimation.
+
+ "Pretraining boosts out-of-domain robustness for pose estimation"
+ WACV'2021. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Horse-10 keypoint indexes::
+
+ 0: 'Nose',
+ 1: 'Eye',
+ 2: 'Nearknee',
+ 3: 'Nearfrontfetlock',
+ 4: 'Nearfrontfoot',
+ 5: 'Offknee',
+ 6: 'Offfrontfetlock',
+ 7: 'Offfrontfoot',
+ 8: 'Shoulder',
+ 9: 'Midshoulder',
+ 10: 'Elbow',
+ 11: 'Girth',
+ 12: 'Wither',
+ 13: 'Nearhindhock',
+ 14: 'Nearhindfetlock',
+ 15: 'Nearhindfoot',
+ 16: 'Hip',
+ 17: 'Stifle',
+ 18: 'Offhindhock',
+ 19: 'Offhindfetlock',
+ 20: 'Offhindfoot',
+ 21: 'Ischium'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/horse10.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 0, :] - gts[:, 1, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate horse-10 keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/animal/animal_locust_dataset.py b/mmpose/datasets/datasets/animal/animal_locust_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..95fb6ac896e7d0553efb6c479fca92684d87ac22
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_locust_dataset.py
@@ -0,0 +1,218 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalLocustDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalLocustDataset for animal pose estimation.
+
+ "DeepPoseKit, a software toolkit for fast and robust animal
+ pose estimation using deep learning" Elife'2019.
+ More details can be found in the paper.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Desert Locust keypoint indexes::
+
+ 0: "head",
+ 1: "neck",
+ 2: "thorax",
+ 3: "abdomen1",
+ 4: "abdomen2",
+ 5: "anttipL",
+ 6: "antbaseL",
+ 7: "eyeL",
+ 8: "forelegL1",
+ 9: "forelegL2",
+ 10: "forelegL3",
+ 11: "forelegL4",
+ 12: "midlegL1",
+ 13: "midlegL2",
+ 14: "midlegL3",
+ 15: "midlegL4",
+ 16: "hindlegL1",
+ 17: "hindlegL2",
+ 18: "hindlegL3",
+ 19: "hindlegL4",
+ 20: "anttipR",
+ 21: "antbaseR",
+ 22: "eyeR",
+ 23: "forelegR1",
+ 24: "forelegR2",
+ 25: "forelegR3",
+ 26: "forelegR4",
+ 27: "midlegR1",
+ 28: "midlegR2",
+ 29: "midlegR3",
+ 30: "midlegR4",
+ 31: "hindlegR1",
+ 32: "hindlegR2",
+ 33: "hindlegR3",
+ 34: "hindlegR4"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/locust.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 160x160
+ center, scale = self._xywh2cs(0, 0, 160, 160, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/animal/animal_macaque_dataset.py b/mmpose/datasets/datasets/animal/animal_macaque_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..359fecaa2b6e29f24e2bdb01a3a8715f12c5925f
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_macaque_dataset.py
@@ -0,0 +1,355 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalMacaqueDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MacaquePose dataset for animal pose estimation.
+
+ "MacaquePose: A novel ‘in the wild’ macaque monkey pose dataset
+ for markerless motion capture" bioRxiv'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Macaque keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/macaque.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/animal/animal_pose_dataset.py b/mmpose/datasets/datasets/animal/animal_pose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ced5703f3771597f21123b44c77a53a02a48e78
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_pose_dataset.py
@@ -0,0 +1,359 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalPoseDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Animal-Pose dataset for animal pose estimation.
+
+ "Cross-domain Adaptation For Animal Pose Estimation" ICCV'2019
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Animal-Pose keypoint indexes::
+
+ 0: 'L_Eye',
+ 1: 'R_Eye',
+ 2: 'L_EarBase',
+ 3: 'R_EarBase',
+ 4: 'Nose',
+ 5: 'Throat',
+ 6: 'TailBase',
+ 7: 'Withers',
+ 8: 'L_F_Elbow',
+ 9: 'R_F_Elbow',
+ 10: 'L_B_Elbow',
+ 11: 'R_B_Elbow',
+ 12: 'L_F_Knee',
+ 13: 'R_F_Knee',
+ 14: 'L_B_Knee',
+ 15: 'R_B_Knee',
+ 16: 'L_F_Paw',
+ 17: 'R_F_Paw',
+ 18: 'L_B_Paw',
+ 19: 'R_B_Paw'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/animalpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+
+ Args:
+ img_id: coco image id
+
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/animal/animal_zebra_dataset.py b/mmpose/datasets/datasets/animal/animal_zebra_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c5e3b73c885f86c13e7a5ebf02b03441b2dc93d
--- /dev/null
+++ b/mmpose/datasets/datasets/animal/animal_zebra_dataset.py
@@ -0,0 +1,193 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalZebraDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalZebraDataset for animal pose estimation.
+
+ "DeepPoseKit, a software toolkit for fast and robust animal
+ pose estimation using deep learning" Elife'2019.
+ More details can be found in the paper.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Desert Locust keypoint indexes::
+
+ 0: "snout",
+ 1: "head",
+ 2: "neck",
+ 3: "forelegL1",
+ 4: "forelegR1",
+ 5: "hindlegL1",
+ 6: "hindlegR1",
+ 7: "tailbase",
+ 8: "tailtip"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/zebra.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 160x160
+ center, scale = self._xywh2cs(0, 0, 160, 160, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/base/__init__.py b/mmpose/datasets/datasets/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5f9a0899cdfde4132b068e6408ca721a59dc9b4
--- /dev/null
+++ b/mmpose/datasets/datasets/base/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .kpt_2d_sview_rgb_img_bottom_up_dataset import \
+ Kpt2dSviewRgbImgBottomUpDataset
+from .kpt_2d_sview_rgb_img_top_down_dataset import \
+ Kpt2dSviewRgbImgTopDownDataset
+from .kpt_2d_sview_rgb_vid_top_down_dataset import \
+ Kpt2dSviewRgbVidTopDownDataset
+from .kpt_3d_mview_rgb_img_direct_dataset import Kpt3dMviewRgbImgDirectDataset
+from .kpt_3d_sview_kpt_2d_dataset import Kpt3dSviewKpt2dDataset
+from .kpt_3d_sview_rgb_img_top_down_dataset import \
+ Kpt3dSviewRgbImgTopDownDataset
+
+__all__ = [
+ 'Kpt3dMviewRgbImgDirectDataset', 'Kpt2dSviewRgbImgTopDownDataset',
+ 'Kpt3dSviewRgbImgTopDownDataset', 'Kpt2dSviewRgbImgBottomUpDataset',
+ 'Kpt3dSviewKpt2dDataset', 'Kpt2dSviewRgbVidTopDownDataset'
+]
diff --git a/mmpose/datasets/datasets/base/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cee42152016068da8ffc3b7f76ee7d4440f8e322
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_bottom_up_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_bottom_up_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc2e62a75a098e35edc9d4aa0622636934888ef2
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_bottom_up_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_top_down_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_top_down_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8a77ff2f102f6cadef5257f0f15b9228bf402e6
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_img_top_down_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_vid_top_down_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_vid_top_down_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..75da0ac01e04e3298a32b0abcb91a21195f72ad3
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_2d_sview_rgb_vid_top_down_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_3d_mview_rgb_img_direct_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_mview_rgb_img_direct_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b35be39bad4c0b84dfcb3a38653872495a23d26
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_mview_rgb_img_direct_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_kpt_2d_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_kpt_2d_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8bbeb7b8d08e3c580818747481b7f1575c7db27
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_kpt_2d_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_rgb_img_top_down_dataset.cpython-310.pyc b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_rgb_img_top_down_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..34aff4953002e8b8e726fd4ba184e2bd003cdb3f
Binary files /dev/null and b/mmpose/datasets/datasets/base/__pycache__/kpt_3d_sview_rgb_img_top_down_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..99306214db3a36465bdc8a24ebec41db58a6ca68
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py
@@ -0,0 +1,188 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import xtcocotools
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbImgBottomUpDataset(Dataset, metaclass=ABCMeta):
+ """Base class for bottom-up datasets.
+
+ All datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_single`
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ # bottom-up
+ self.base_size = data_cfg['base_size']
+ self.base_sigma = data_cfg['base_sigma']
+ self.int_sigma = False
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ self.ann_info['num_scales'] = data_cfg['num_scales']
+ self.ann_info['scale_aware_sigma'] = data_cfg['scale_aware_sigma']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.use_nms = data_cfg.get('use_nms', False)
+ self.soft_nms = data_cfg.get('soft_nms', True)
+ self.oks_thr = data_cfg.get('oks_thr', 0.9)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ if not test_mode:
+ self.img_ids = [
+ img_id for img_id in self.img_ids if
+ len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
+ ]
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _get_mask(self, anno, idx):
+ """Get ignore masks to mask out losses."""
+ coco = self.coco
+ img_info = coco.loadImgs(self.img_ids[idx])[0]
+
+ m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
+
+ for obj in anno:
+ if 'segmentation' in obj:
+ if obj['iscrowd']:
+ rle = xtcocotools.mask.frPyObjects(obj['segmentation'],
+ img_info['height'],
+ img_info['width'])
+ m += xtcocotools.mask.decode(rle)
+ elif obj['num_keypoints'] == 0:
+ rles = xtcocotools.mask.frPyObjects(
+ obj['segmentation'], img_info['height'],
+ img_info['width'])
+ for rle in rles:
+ m += xtcocotools.mask.decode(rle)
+
+ return m < 0.5
+
+ @abstractmethod
+ def _get_single(self, idx):
+ """Get anno for a single image."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ def prepare_train_img(self, idx):
+ """Prepare image for training given the index."""
+ results = copy.deepcopy(self._get_single(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def prepare_test_img(self, idx):
+ """Prepare image for testing given the index."""
+ results = copy.deepcopy(self._get_single(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def __len__(self):
+ """Get dataset length."""
+ return len(self.img_ids)
+
+ def __getitem__(self, idx):
+ """Get the sample for either training or testing given index."""
+ if self.test_mode:
+ return self.prepare_test_img(idx)
+
+ return self.prepare_train_img(idx)
diff --git a/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb281f1bcf1a3771aea4fb5335487b17d5994168
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
@@ -0,0 +1,287 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.core.evaluation.top_down_eval import (keypoint_auc, keypoint_epe,
+ keypoint_nme,
+ keypoint_pck_accuracy)
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbImgTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 2D top-down pose estimation with single-view RGB
+ image as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.ann_info['max_num_joints'] = data_cfg.get('max_num_joints', None)
+ self.ann_info['dataset_idx'] = data_cfg.get('dataset_idx', 0)
+
+ self.ann_info['use_different_joint_weights'] = data_cfg.get(
+ 'use_different_joint_weights', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get the normalize factor. generally inter-ocular distance measured
+ as the Euclidean distance between the outer corners of the eyes is
+ used. This function should be overrode, to measure NME.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+ return np.ones([gts.shape[0], 2], dtype=np.float32)
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self,
+ res_file,
+ metrics,
+ pck_thr=0.2,
+ pckh_thr=0.7,
+ auc_nor=30):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'PCKh', 'AUC', 'EPE', 'NME'.
+ pck_thr (float): PCK threshold, default as 0.2.
+ pckh_thr (float): PCKh threshold, default as 0.7.
+ auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ outputs = []
+ gts = []
+ masks = []
+ box_sizes = []
+ threshold_bbox = []
+ threshold_head_box = []
+
+ for pred, item in zip(preds, self.db):
+ outputs.append(np.array(pred['keypoints'])[:, :-1])
+ gts.append(np.array(item['joints_3d'])[:, :-1])
+ masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+ if 'PCK' in metrics:
+ bbox = np.array(item['bbox'])
+ bbox_thr = np.max(bbox[2:])
+ threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+ if 'PCKh' in metrics:
+ head_box_thr = item['head_size']
+ threshold_head_box.append(
+ np.array([head_box_thr, head_box_thr]))
+ box_sizes.append(item.get('box_size', 1))
+
+ outputs = np.array(outputs)
+ gts = np.array(gts)
+ masks = np.array(masks)
+ threshold_bbox = np.array(threshold_bbox)
+ threshold_head_box = np.array(threshold_head_box)
+ box_sizes = np.array(box_sizes).reshape([-1, 1])
+
+ if 'PCK' in metrics:
+ _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_bbox)
+ info_str.append(('PCK', pck))
+
+ if 'PCKh' in metrics:
+ _, pckh, _ = keypoint_pck_accuracy(outputs, gts, masks, pckh_thr,
+ threshold_head_box)
+ info_str.append(('PCKh', pckh))
+
+ if 'AUC' in metrics:
+ info_str.append(('AUC', keypoint_auc(outputs, gts, masks,
+ auc_nor)))
+
+ if 'EPE' in metrics:
+ info_str.append(('EPE', keypoint_epe(outputs, gts, masks)))
+
+ if 'NME' in metrics:
+ normalize_factor = self._get_normalize_factor(
+ gts=gts, box_sizes=box_sizes)
+ info_str.append(
+ ('NME', keypoint_nme(outputs, gts, masks, normalize_factor)))
+
+ return info_str
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e52927032d87e93021307804dfabe08a5b7ee3b6
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py
@@ -0,0 +1,200 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbVidTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 2D top-down pose estimation with single-view RGB
+ video as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where videos/images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.ann_info['use_different_joint_weights'] = data_cfg.get(
+ 'use_different_joint_weights', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ @abstractmethod
+ def _write_keypoint_results(keypoint_results, gt_folder, pred_folder):
+ """Write results into a json file."""
+
+ @abstractmethod
+ def _do_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation.
+ Args:
+ gt_folder (str): The folder of the json files storing
+ ground truth keypoint annotations.
+ pred_folder (str): The folder of the json files storing
+ prediction results.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py b/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..94cc1c22e97b8e5e798e366dfc69b611fa742d6e
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dMviewRgbImgDirectDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 3D top-down pose estimation with multi-view RGB
+ images as the input.
+
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['space_size'] = data_cfg['space_size']
+ self.ann_info['space_center'] = data_cfg['space_center']
+ self.ann_info['cube_size'] = data_cfg['cube_size']
+ self.ann_info['scale_aware_sigma'] = data_cfg.get(
+ 'scale_aware_sigma', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] <= dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['num_scales'] = 1
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ self.load_config(data_cfg)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+ self.num_joints = data_cfg['num_joints']
+ self.num_cameras = data_cfg['num_cameras']
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.subset = data_cfg.get('subset', 'train')
+ self.need_2d_label = data_cfg.get('need_2d_label', False)
+ self.need_camera_param = True
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db) // self.num_cameras
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = {}
+ # return self.pipeline(results)
+ for c in range(self.num_cameras):
+ result = copy.deepcopy(self.db[self.num_cameras * idx + c])
+ result['ann_info'] = self.ann_info
+ results[c] = result
+
+ return self.pipeline(results)
diff --git a/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py b/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbdb9989e83d9b8ff91cfd99f2fec6d87b13aceb
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py
@@ -0,0 +1,226 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dSviewKpt2dDataset(Dataset, metaclass=ABCMeta):
+ """Base class for 3D human pose datasets.
+
+ Subclasses should consider overwriting following methods:
+ - load_config
+ - load_annotations
+ - build_sample_indices
+ - evaluate
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ - num_joints: Number of joints.
+ - seq_len: Number of frames in a sequence. Default: 1.
+ - seq_frame_interval: Extract frames from the video at certain
+ intervals. Default: 1.
+ - causal: If set to True, the rightmost input frame will be the
+ target frame. Otherwise, the middle input frame will be the
+ target frame. Default: True.
+ - temporal_padding: Whether to pad the video so that poses will be
+ predicted for every frame in the video. Default: False
+ - subset: Reduce dataset size by fraction. Default: 1.
+ - need_2d_label: Whether need 2D joint labels or not.
+ Default: False.
+
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.data_cfg = copy.deepcopy(data_cfg)
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+ self.ann_info = {}
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ self.load_config(self.data_cfg)
+
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ self.data_info = self.load_annotations()
+ self.sample_indices = self.build_sample_indices()
+ self.pipeline = Compose(pipeline)
+
+ self.name2id = {
+ name: i
+ for i, name in enumerate(self.data_info['imgnames'])
+ }
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+
+ self.num_joints = data_cfg['num_joints']
+ self.seq_len = data_cfg.get('seq_len', 1)
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.causal = data_cfg.get('causal', True)
+ self.temporal_padding = data_cfg.get('temporal_padding', False)
+ self.subset = data_cfg.get('subset', 1)
+ self.need_2d_label = data_cfg.get('need_2d_label', False)
+ self.need_camera_param = False
+
+ def load_annotations(self):
+ """Load data annotation."""
+ data = np.load(self.ann_file)
+
+ # get image info
+ _imgnames = data['imgname']
+ num_imgs = len(_imgnames)
+ num_joints = self.ann_info['num_joints']
+
+ if 'scale' in data:
+ _scales = data['scale'].astype(np.float32)
+ else:
+ _scales = np.zeros(num_imgs, dtype=np.float32)
+
+ if 'center' in data:
+ _centers = data['center'].astype(np.float32)
+ else:
+ _centers = np.zeros((num_imgs, 2), dtype=np.float32)
+
+ # get 3D pose
+ if 'S' in data.keys():
+ _joints_3d = data['S'].astype(np.float32)
+ else:
+ _joints_3d = np.zeros((num_imgs, num_joints, 4), dtype=np.float32)
+
+ # get 2D pose
+ if 'part' in data.keys():
+ _joints_2d = data['part'].astype(np.float32)
+ else:
+ _joints_2d = np.zeros((num_imgs, num_joints, 3), dtype=np.float32)
+
+ data_info = {
+ 'imgnames': _imgnames,
+ 'joints_3d': _joints_3d,
+ 'joints_2d': _joints_2d,
+ 'scales': _scales,
+ 'centers': _centers,
+ }
+
+ return data_info
+
+ def build_sample_indices(self):
+ """Build sample indices.
+
+ The default method creates sample indices that each sample is a single
+ frame (i.e. seq_len=1). Override this method in the subclass to define
+ how frames are sampled to form data samples.
+
+ Outputs:
+ sample_indices [list(tuple)]: the frame indices of each sample.
+ For a sample, all frames will be treated as an input sequence,
+ and the ground-truth pose of the last frame will be the target.
+ """
+ sample_indices = []
+ if self.seq_len == 1:
+ num_imgs = len(self.ann_info['imgnames'])
+ sample_indices = [(idx, ) for idx in range(num_imgs)]
+ else:
+ raise NotImplementedError('Multi-frame data sample unsupported!')
+ return sample_indices
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ def prepare_data(self, idx):
+ """Get data sample."""
+ data = self.data_info
+
+ frame_ids = self.sample_indices[idx]
+ assert len(frame_ids) == self.seq_len
+
+ # get the 3D/2D pose sequence
+ _joints_3d = data['joints_3d'][frame_ids]
+ _joints_2d = data['joints_2d'][frame_ids]
+
+ # get the image info
+ _imgnames = data['imgnames'][frame_ids]
+ _centers = data['centers'][frame_ids]
+ _scales = data['scales'][frame_ids]
+ if _scales.ndim == 1:
+ _scales = np.stack([_scales, _scales], axis=1)
+
+ target_idx = -1 if self.causal else int(self.seq_len) // 2
+
+ results = {
+ 'input_2d': _joints_2d[:, :, :2],
+ 'input_2d_visible': _joints_2d[:, :, -1:],
+ 'input_3d': _joints_3d[:, :, :3],
+ 'input_3d_visible': _joints_3d[:, :, -1:],
+ 'target': _joints_3d[target_idx, :, :3],
+ 'target_visible': _joints_3d[target_idx, :, -1:],
+ 'image_paths': _imgnames,
+ 'target_image_path': _imgnames[target_idx],
+ 'scales': _scales,
+ 'centers': _centers,
+ }
+
+ if self.need_2d_label:
+ results['target_2d'] = _joints_2d[target_idx, :, :2]
+
+ if self.need_camera_param:
+ _cam_param = self.get_camera_param(_imgnames[0])
+ results['camera_param'] = _cam_param
+ # get image size from camera parameters
+ if 'w' in _cam_param and 'h' in _cam_param:
+ results['image_width'] = _cam_param['w']
+ results['image_height'] = _cam_param['h']
+
+ return results
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.sample_indices)
+
+ def __getitem__(self, idx):
+ """Get a sample with given index."""
+ results = copy.deepcopy(self.prepare_data(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ raise NotImplementedError
diff --git a/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py b/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..af01e81868d0a918da474be896525cbe47ef006d
--- /dev/null
+++ b/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py
@@ -0,0 +1,256 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dSviewRgbImgTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 3D top-down pose estimation with single-view RGB
+ image as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _cam2pixel(cam_coord, f, c):
+ """Transform the joints from their camera coordinates to their pixel
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ img_coord (ndarray[N, 3]): the coordinates (x, y, 0)
+ in the image plane.
+ """
+ x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
+ y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
+ z = np.zeros_like(x)
+ img_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return img_coord
+
+ @staticmethod
+ def _world2cam(world_coord, R, T):
+ """Transform the joints from their world coordinates to their camera
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ world_coord (ndarray[3, N]): 3D joints coordinates
+ in the world coordinate system
+ R (ndarray[3, 3]): camera rotation matrix
+ T (ndarray[3, 1]): camera position (x, y, z)
+
+ Returns:
+ cam_coord (ndarray[3, N]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ cam_coord = np.dot(R, world_coord - T)
+ return cam_coord
+
+ @staticmethod
+ def _pixel2cam(pixel_coord, f, c):
+ """Transform the joints from their pixel coordinates to their camera
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ pixel_coord (ndarray[N, 3]): 3D joints coordinates
+ in the pixel coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ x = (pixel_coord[:, 0] - c[0]) / f[0] * pixel_coord[:, 2]
+ y = (pixel_coord[:, 1] - c[1]) / f[1] * pixel_coord[:, 2]
+ z = pixel_coord[:, 2]
+ cam_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return cam_coord
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/body3d/__init__.py b/mmpose/datasets/datasets/body3d/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bc25a9ebbbeb936a304c9a0416fb9892b79cbef
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .body3d_h36m_dataset import Body3DH36MDataset
+from .body3d_mpi_inf_3dhp_dataset import Body3DMpiInf3dhpDataset
+from .body3d_mview_direct_panoptic_dataset import \
+ Body3DMviewDirectPanopticDataset
+from .body3d_semi_supervision_dataset import Body3DSemiSupervisionDataset
+
+__all__ = [
+ 'Body3DH36MDataset', 'Body3DSemiSupervisionDataset',
+ 'Body3DMpiInf3dhpDataset', 'Body3DMviewDirectPanopticDataset'
+]
diff --git a/mmpose/datasets/datasets/body3d/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/body3d/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3769a379851f1136f87751b3ec2f964b3e2154fd
Binary files /dev/null and b/mmpose/datasets/datasets/body3d/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/body3d/__pycache__/body3d_h36m_dataset.cpython-310.pyc b/mmpose/datasets/datasets/body3d/__pycache__/body3d_h36m_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e41cd3ed41a4f8e0cf0c48c6d08f72b947c1a1d6
Binary files /dev/null and b/mmpose/datasets/datasets/body3d/__pycache__/body3d_h36m_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/body3d/__pycache__/body3d_mpi_inf_3dhp_dataset.cpython-310.pyc b/mmpose/datasets/datasets/body3d/__pycache__/body3d_mpi_inf_3dhp_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78d27ab02a4bc30d84dd1a6807e6ccdae2228af7
Binary files /dev/null and b/mmpose/datasets/datasets/body3d/__pycache__/body3d_mpi_inf_3dhp_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/body3d/__pycache__/body3d_mview_direct_panoptic_dataset.cpython-310.pyc b/mmpose/datasets/datasets/body3d/__pycache__/body3d_mview_direct_panoptic_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31c00a062f3b96acaef07cd730f350a2e751021e
Binary files /dev/null and b/mmpose/datasets/datasets/body3d/__pycache__/body3d_mview_direct_panoptic_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/body3d/__pycache__/body3d_semi_supervision_dataset.cpython-310.pyc b/mmpose/datasets/datasets/body3d/__pycache__/body3d_semi_supervision_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7711e271671bcf3d02f3a946b441f7067d186cff
Binary files /dev/null and b/mmpose/datasets/datasets/body3d/__pycache__/body3d_semi_supervision_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/body3d/body3d_base_dataset.py b/mmpose/datasets/datasets/body3d/body3d_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c29232cf74e4af2cf5b60cd71bd301e4dca7f3
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/body3d_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class Body3DBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt3dSviewKpt2dDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'Body3DBaseDataset has been replaced by '
+ 'Kpt3dSviewKpt2dDataset'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py b/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae4949d5c5a869bfd37a2f19d47afafc3c1c3eea
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py
@@ -0,0 +1,343 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation import keypoint_mpjpe
+from mmpose.datasets.datasets.base import Kpt3dSviewKpt2dDataset
+from ...builder import DATASETS
+
+
+@DATASETS.register_module()
+class Body3DH36MDataset(Kpt3dSviewKpt2dDataset):
+ """Human3.6M dataset for 3D human pose estimation.
+
+ "Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human
+ Sensing in Natural Environments", TPAMI`2014.
+ More details can be found in the `paper
+ `__.
+
+ Human3.6M keypoint indexes::
+
+ 0: 'root (pelvis)',
+ 1: 'right_hip',
+ 2: 'right_knee',
+ 3: 'right_foot',
+ 4: 'left_hip',
+ 5: 'left_knee',
+ 6: 'left_foot',
+ 7: 'spine',
+ 8: 'thorax',
+ 9: 'neck_base',
+ 10: 'head',
+ 11: 'left_shoulder',
+ 12: 'left_elbow',
+ 13: 'left_wrist',
+ 14: 'right_shoulder',
+ 15: 'right_elbow',
+ 16: 'right_wrist'
+
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ JOINT_NAMES = [
+ 'Root', 'RHip', 'RKnee', 'RFoot', 'LHip', 'LKnee', 'LFoot', 'Spine',
+ 'Thorax', 'NeckBase', 'Head', 'LShoulder', 'LElbow', 'LWrist',
+ 'RShoulder', 'RElbow', 'RWrist'
+ ]
+
+ # 2D joint source options:
+ # "gt": from the annotation file
+ # "detection": from a detection result file of 2D keypoint
+ # "pipeline": will be generate by the pipeline
+ SUPPORTED_JOINT_2D_SRC = {'gt', 'detection', 'pipeline'}
+
+ # metric
+ ALLOWED_METRICS = {'mpjpe', 'p-mpjpe', 'n-mpjpe'}
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/h36m.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ def load_config(self, data_cfg):
+ super().load_config(data_cfg)
+ # h36m specific attributes
+ self.joint_2d_src = data_cfg.get('joint_2d_src', 'gt')
+ if self.joint_2d_src not in self.SUPPORTED_JOINT_2D_SRC:
+ raise ValueError(
+ f'Unsupported joint_2d_src "{self.joint_2d_src}". '
+ f'Supported options are {self.SUPPORTED_JOINT_2D_SRC}')
+
+ self.joint_2d_det_file = data_cfg.get('joint_2d_det_file', None)
+
+ self.need_camera_param = data_cfg.get('need_camera_param', False)
+ if self.need_camera_param:
+ assert 'camera_param_file' in data_cfg
+ self.camera_param = self._load_camera_param(
+ data_cfg['camera_param_file'])
+
+ # h36m specific annotation info
+ ann_info = {}
+ ann_info['use_different_joint_weights'] = False
+ # action filter
+ actions = data_cfg.get('actions', '_all_')
+ self.actions = set(
+ actions if isinstance(actions, (list, tuple)) else [actions])
+
+ # subject filter
+ subjects = data_cfg.get('subjects', '_all_')
+ self.subjects = set(
+ subjects if isinstance(subjects, (list, tuple)) else [subjects])
+
+ self.ann_info.update(ann_info)
+
+ def load_annotations(self):
+ data_info = super().load_annotations()
+
+ # get 2D joints
+ if self.joint_2d_src == 'gt':
+ data_info['joints_2d'] = data_info['joints_2d']
+ elif self.joint_2d_src == 'detection':
+ data_info['joints_2d'] = self._load_joint_2d_detection(
+ self.joint_2d_det_file)
+ assert data_info['joints_2d'].shape[0] == data_info[
+ 'joints_3d'].shape[0]
+ assert data_info['joints_2d'].shape[2] == 3
+ elif self.joint_2d_src == 'pipeline':
+ # joint_2d will be generated in the pipeline
+ pass
+ else:
+ raise NotImplementedError(
+ f'Unhandled joint_2d_src option {self.joint_2d_src}')
+
+ return data_info
+
+ @staticmethod
+ def _parse_h36m_imgname(imgname):
+ """Parse imgname to get information of subject, action and camera.
+
+ A typical h36m image filename is like:
+ S1_Directions_1.54138969_000001.jpg
+ """
+ subj, rest = osp.basename(imgname).split('_', 1)
+ action, rest = rest.split('.', 1)
+ camera, rest = rest.split('_', 1)
+
+ return subj, action, camera
+
+ def build_sample_indices(self):
+ """Split original videos into sequences and build frame indices.
+
+ This method overrides the default one in the base class.
+ """
+
+ # Group frames into videos. Assume that self.data_info is
+ # chronological.
+ video_frames = defaultdict(list)
+ for idx, imgname in enumerate(self.data_info['imgnames']):
+ subj, action, camera = self._parse_h36m_imgname(imgname)
+
+ if '_all_' not in self.actions and action not in self.actions:
+ continue
+
+ if '_all_' not in self.subjects and subj not in self.subjects:
+ continue
+
+ video_frames[(subj, action, camera)].append(idx)
+
+ # build sample indices
+ sample_indices = []
+ _len = (self.seq_len - 1) * self.seq_frame_interval + 1
+ _step = self.seq_frame_interval
+ for _, _indices in sorted(video_frames.items()):
+ n_frame = len(_indices)
+
+ if self.temporal_padding:
+ # Pad the sequence so that every frame in the sequence will be
+ # predicted.
+ if self.causal:
+ frames_left = self.seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (self.seq_len - 1) // 2
+ frames_right = frames_left
+ for i in range(n_frame):
+ pad_left = max(0, frames_left - i // _step)
+ pad_right = max(0,
+ frames_right - (n_frame - 1 - i) // _step)
+ start = max(i % _step, i - frames_left * _step)
+ end = min(n_frame - (n_frame - 1 - i) % _step,
+ i + frames_right * _step + 1)
+ sample_indices.append([_indices[0]] * pad_left +
+ _indices[start:end:_step] +
+ [_indices[-1]] * pad_right)
+ else:
+ seqs_from_video = [
+ _indices[i:(i + _len):_step]
+ for i in range(0, n_frame - _len + 1)
+ ]
+ sample_indices.extend(seqs_from_video)
+
+ # reduce dataset size if self.subset < 1
+ assert 0 < self.subset <= 1
+ subset_size = int(len(sample_indices) * self.subset)
+ start = np.random.randint(0, len(sample_indices) - subset_size + 1)
+ end = start + subset_size
+
+ return sample_indices[start:end]
+
+ def _load_joint_2d_detection(self, det_file):
+ """"Load 2D joint detection results from file."""
+ joints_2d = np.load(det_file).astype(np.float32)
+
+ return joints_2d
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}" for human3.6 dataset.'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ image_paths = result['target_image_paths']
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ target_id = self.name2id[image_paths[i]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'target_id': target_id,
+ })
+
+ mmcv.dump(kpts, res_file)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts)
+ elif _metric == 'p-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='p-mpjpe')
+ elif _metric == 'n-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='n-mpjpe')
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(_nv_tuples)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ def _report_mpjpe(self, keypoint_results, mode='mpjpe'):
+ """Cauculate mean per joint position error (MPJPE) or its variants like
+ P-MPJPE or N-MPJPE.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DH36MDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+
+ - ``'mpjpe'``: Standard MPJPE.
+ - ``'p-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ - ``'n-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ in scale only.
+ """
+
+ preds = []
+ gts = []
+ masks = []
+ action_category_indices = defaultdict(list)
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+ masks.append(gt_visible)
+
+ action = self._parse_h36m_imgname(
+ self.data_info['imgnames'][target_id])[1]
+ action_category = action.split('_')[0]
+ action_category_indices[action_category].append(idx)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.stack(masks).squeeze(-1) > 0
+
+ err_name = mode.upper()
+ if mode == 'mpjpe':
+ alignment = 'none'
+ elif mode == 'p-mpjpe':
+ alignment = 'procrustes'
+ elif mode == 'n-mpjpe':
+ alignment = 'scale'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_mpjpe(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ for action_category, indices in action_category_indices.items():
+ _error = keypoint_mpjpe(preds[indices], gts[indices],
+ masks[indices])
+ name_value_tuples.append((f'{err_name}_{action_category}', _error))
+
+ return name_value_tuples
+
+ def _load_camera_param(self, camera_param_file):
+ """Load camera parameters from file."""
+ return mmcv.load(camera_param_file)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ assert hasattr(self, 'camera_param')
+ subj, _, camera = self._parse_h36m_imgname(imgname)
+ return self.camera_param[(subj, camera)]
diff --git a/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py b/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d06fcd2f200e8c5c3d4174be90551990cc6886e
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py
@@ -0,0 +1,417 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation import (keypoint_3d_auc, keypoint_3d_pck,
+ keypoint_mpjpe)
+from mmpose.datasets.datasets.base import Kpt3dSviewKpt2dDataset
+from ...builder import DATASETS
+
+
+@DATASETS.register_module()
+class Body3DMpiInf3dhpDataset(Kpt3dSviewKpt2dDataset):
+ """MPI-INF-3DHP dataset for 3D human pose estimation.
+
+ "Monocular 3D Human Pose Estimation In The Wild Using Improved CNN
+ Supervision", 3DV'2017.
+ More details can be found in the `paper
+ `__.
+
+ MPI-INF-3DHP keypoint indexes:
+
+ 0: 'head_top',
+ 1: 'neck',
+ 2: 'right_shoulder',
+ 3: 'right_elbow',
+ 4: 'right_wrist',
+ 5: 'left_shoulder;,
+ 6: 'left_elbow',
+ 7: 'left_wrist',
+ 8: 'right_hip',
+ 9: 'right_knee',
+ 10: 'right_ankle',
+ 11: 'left_hip',
+ 12: 'left_knee',
+ 13: 'left_ankle',
+ 14: 'root (pelvis)',
+ 15: 'spine',
+ 16: 'head'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): Data configurations. Please refer to the docstring of
+ Body3DBaseDataset for common data attributes. Here are MPI-INF-3DHP
+ specific attributes.
+ - joint_2d_src: 2D joint source. Options include:
+ "gt": from the annotation file
+ "detection": from a detection result file of 2D keypoint
+ "pipeline": will be generate by the pipeline
+ Default: "gt".
+ - joint_2d_det_file: Path to the detection result file of 2D
+ keypoint. Only used when joint_2d_src == "detection".
+ - need_camera_param: Whether need camera parameters or not.
+ Default: False.
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ JOINT_NAMES = [
+ 'HeadTop', 'Neck', 'RShoulder', 'RElbow', 'RWrist', 'LShoulder',
+ 'LElbow', 'LWrist', 'RHip', 'RKnee', 'RAnkle', 'LHip', 'LKnee',
+ 'LAnkle', 'Root', 'Spine', 'Head'
+ ]
+
+ # 2D joint source options:
+ # "gt": from the annotation file
+ # "detection": from a detection result file of 2D keypoint
+ # "pipeline": will be generate by the pipeline
+ SUPPORTED_JOINT_2D_SRC = {'gt', 'detection', 'pipeline'}
+
+ # metric
+ ALLOWED_METRICS = {
+ 'mpjpe', 'p-mpjpe', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc'
+ }
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpi_inf_3dhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ def load_config(self, data_cfg):
+ super().load_config(data_cfg)
+ # mpi-inf-3dhp specific attributes
+ self.joint_2d_src = data_cfg.get('joint_2d_src', 'gt')
+ if self.joint_2d_src not in self.SUPPORTED_JOINT_2D_SRC:
+ raise ValueError(
+ f'Unsupported joint_2d_src "{self.joint_2d_src}". '
+ f'Supported options are {self.SUPPORTED_JOINT_2D_SRC}')
+
+ self.joint_2d_det_file = data_cfg.get('joint_2d_det_file', None)
+
+ self.need_camera_param = data_cfg.get('need_camera_param', False)
+ if self.need_camera_param:
+ assert 'camera_param_file' in data_cfg
+ self.camera_param = self._load_camera_param(
+ data_cfg['camera_param_file'])
+
+ # mpi-inf-3dhp specific annotation info
+ ann_info = {}
+ ann_info['use_different_joint_weights'] = False
+
+ self.ann_info.update(ann_info)
+
+ def load_annotations(self):
+ data_info = super().load_annotations()
+
+ # get 2D joints
+ if self.joint_2d_src == 'gt':
+ data_info['joints_2d'] = data_info['joints_2d']
+ elif self.joint_2d_src == 'detection':
+ data_info['joints_2d'] = self._load_joint_2d_detection(
+ self.joint_2d_det_file)
+ assert data_info['joints_2d'].shape[0] == data_info[
+ 'joints_3d'].shape[0]
+ assert data_info['joints_2d'].shape[2] == 3
+ elif self.joint_2d_src == 'pipeline':
+ # joint_2d will be generated in the pipeline
+ pass
+ else:
+ raise NotImplementedError(
+ f'Unhandled joint_2d_src option {self.joint_2d_src}')
+
+ return data_info
+
+ @staticmethod
+ def _parse_mpi_inf_3dhp_imgname(imgname):
+ """Parse imgname to get information of subject, sequence and camera.
+
+ A typical mpi-inf-3dhp training image filename is like:
+ S1_Seq1_Cam0_000001.jpg. A typical mpi-inf-3dhp testing image filename
+ is like: TS1_000001.jpg
+ """
+ if imgname[0] == 'S':
+ subj, rest = imgname.split('_', 1)
+ seq, rest = rest.split('_', 1)
+ camera, rest = rest.split('_', 1)
+ return subj, seq, camera
+ else:
+ subj, rest = imgname.split('_', 1)
+ return subj, None, None
+
+ def build_sample_indices(self):
+ """Split original videos into sequences and build frame indices.
+
+ This method overrides the default one in the base class.
+ """
+
+ # Group frames into videos. Assume that self.data_info is
+ # chronological.
+ video_frames = defaultdict(list)
+ for idx, imgname in enumerate(self.data_info['imgnames']):
+ subj, seq, camera = self._parse_mpi_inf_3dhp_imgname(imgname)
+ if seq is not None:
+ video_frames[(subj, seq, camera)].append(idx)
+ else:
+ video_frames[subj].append(idx)
+
+ # build sample indices
+ sample_indices = []
+ _len = (self.seq_len - 1) * self.seq_frame_interval + 1
+ _step = self.seq_frame_interval
+ for _, _indices in sorted(video_frames.items()):
+ n_frame = len(_indices)
+
+ if self.temporal_padding:
+ # Pad the sequence so that every frame in the sequence will be
+ # predicted.
+ if self.causal:
+ frames_left = self.seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (self.seq_len - 1) // 2
+ frames_right = frames_left
+ for i in range(n_frame):
+ pad_left = max(0, frames_left - i // _step)
+ pad_right = max(0,
+ frames_right - (n_frame - 1 - i) // _step)
+ start = max(i % _step, i - frames_left * _step)
+ end = min(n_frame - (n_frame - 1 - i) % _step,
+ i + frames_right * _step + 1)
+ sample_indices.append([_indices[0]] * pad_left +
+ _indices[start:end:_step] +
+ [_indices[-1]] * pad_right)
+ else:
+ seqs_from_video = [
+ _indices[i:(i + _len):_step]
+ for i in range(0, n_frame - _len + 1)
+ ]
+ sample_indices.extend(seqs_from_video)
+
+ # reduce dataset size if self.subset < 1
+ assert 0 < self.subset <= 1
+ subset_size = int(len(sample_indices) * self.subset)
+ start = np.random.randint(0, len(sample_indices) - subset_size + 1)
+ end = start + subset_size
+
+ return sample_indices[start:end]
+
+ def _load_joint_2d_detection(self, det_file):
+ """"Load 2D joint detection results from file."""
+ joints_2d = np.load(det_file).astype(np.float32)
+
+ return joints_2d
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}" for mpi-inf-3dhp dataset.'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ image_paths = result['target_image_paths']
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ target_id = self.name2id[image_paths[i]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'target_id': target_id,
+ })
+
+ mmcv.dump(kpts, res_file)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts)
+ elif _metric == 'p-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='p-mpjpe')
+ elif _metric == '3dpck':
+ _nv_tuples = self._report_3d_pck(kpts)
+ elif _metric == 'p-3dpck':
+ _nv_tuples = self._report_3d_pck(kpts, mode='p-3dpck')
+ elif _metric == '3dauc':
+ _nv_tuples = self._report_3d_auc(kpts)
+ elif _metric == 'p-3dauc':
+ _nv_tuples = self._report_3d_auc(kpts, mode='p-3dauc')
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(_nv_tuples)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ def _report_mpjpe(self, keypoint_results, mode='mpjpe'):
+ """Cauculate mean per joint position error (MPJPE) or its variants
+ P-MPJPE.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+ - ``'mpjpe'``: Standard MPJPE.
+ - ``'p-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == 'mpjpe':
+ alignment = 'none'
+ elif mode == 'p-mpjpe':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_mpjpe(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _report_3d_pck(self, keypoint_results, mode='3dpck'):
+ """Cauculate Percentage of Correct Keypoints (3DPCK) w. or w/o
+ Procrustes alignment.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+ - ``'3dpck'``: Standard 3DPCK.
+ - ``'p-3dpck'``: 3DPCK after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == '3dpck':
+ alignment = 'none'
+ elif mode == 'p-3dpck':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_3d_pck(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _report_3d_auc(self, keypoint_results, mode='3dauc'):
+ """Cauculate the Area Under the Curve (AUC) computed for a range of
+ 3DPCK thresholds.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+
+ - ``'3dauc'``: Standard 3DAUC.
+ - ``'p-3dauc'``: 3DAUC after aligning prediction to
+ groundtruth via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == '3dauc':
+ alignment = 'none'
+ elif mode == 'p-3dauc':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_3d_auc(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _load_camera_param(self, camear_param_file):
+ """Load camera parameters from file."""
+ return mmcv.load(camear_param_file)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ assert hasattr(self, 'camera_param')
+ return self.camera_param[imgname[:-11]]
diff --git a/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py b/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5bf92d182b972cd1821990bb3fc673d99f624e3
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py
@@ -0,0 +1,493 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import glob
+import json
+import os.path as osp
+import pickle
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.camera import SimpleCamera
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.datasets.base import Kpt3dMviewRgbImgDirectDataset
+
+
+@DATASETS.register_module()
+class Body3DMviewDirectPanopticDataset(Kpt3dMviewRgbImgDirectDataset):
+ """Panoptic dataset for direct multi-view human pose estimation.
+
+ `Panoptic Studio: A Massively Multiview System for Social Motion
+ Capture' ICCV'2015
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads both 2D and 3D annotations as well as camera parameters.
+
+ Panoptic keypoint indexes::
+
+ 'neck': 0,
+ 'nose': 1,
+ 'mid-hip': 2,
+ 'l-shoulder': 3,
+ 'l-elbow': 4,
+ 'l-wrist': 5,
+ 'l-hip': 6,
+ 'l-knee': 7,
+ 'l-ankle': 8,
+ 'r-shoulder': 9,
+ 'r-elbow': 10,
+ 'r-wrist': 11,
+ 'r-hip': 12,
+ 'r-knee': 13,
+ 'r-ankle': 14,
+ 'l-eye': 15,
+ 'l-ear': 16,
+ 'r-eye': 17,
+ 'r-ear': 18,
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+ ALLOWED_METRICS = {'mpjpe', 'mAP'}
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/panoptic_body3d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.load_config(data_cfg)
+ self.ann_info['use_different_joint_weights'] = False
+
+ if ann_file is None:
+ self.db_file = osp.join(
+ img_prefix, f'group_{self.subset}_cam{self.num_cameras}.pkl')
+ else:
+ self.db_file = ann_file
+
+ if osp.exists(self.db_file):
+ with open(self.db_file, 'rb') as f:
+ info = pickle.load(f)
+ assert info['sequence_list'] == self.seq_list
+ assert info['interval'] == self.seq_frame_interval
+ assert info['cam_list'] == self.cam_list
+ self.db = info['db']
+ else:
+ self.db = self._get_db()
+ info = {
+ 'sequence_list': self.seq_list,
+ 'interval': self.seq_frame_interval,
+ 'cam_list': self.cam_list,
+ 'db': self.db
+ }
+ with open(self.db_file, 'wb') as f:
+ pickle.dump(info, f)
+
+ self.db_size = len(self.db)
+
+ print(f'=> load {len(self.db)} samples')
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+ self.num_joints = data_cfg['num_joints']
+ assert self.num_joints <= 19
+ self.seq_list = data_cfg['seq_list']
+ self.cam_list = data_cfg['cam_list']
+ self.num_cameras = data_cfg['num_cameras']
+ assert self.num_cameras == len(self.cam_list)
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.subset = data_cfg.get('subset', 'train')
+ self.need_camera_param = True
+ self.root_id = data_cfg.get('root_id', 0)
+ self.max_persons = data_cfg.get('max_num', 10)
+
+ def _get_scale(self, raw_image_size):
+ heatmap_size = self.ann_info['heatmap_size']
+ image_size = self.ann_info['image_size']
+ assert heatmap_size[0][0] / heatmap_size[0][1] \
+ == image_size[0] / image_size[1]
+ w, h = raw_image_size
+ w_resized, h_resized = image_size
+ if w / w_resized < h / h_resized:
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ else:
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+
+ scale = np.array([w_pad, h_pad], dtype=np.float32)
+
+ return scale
+
+ def _get_cam(self, seq):
+ """Get camera parameters.
+
+ Args:
+ seq (str): Sequence name.
+
+ Returns: Camera parameters.
+ """
+ cam_file = osp.join(self.img_prefix, seq,
+ 'calibration_{:s}.json'.format(seq))
+ with open(cam_file) as cfile:
+ calib = json.load(cfile)
+
+ M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
+ cameras = {}
+ for cam in calib['cameras']:
+ if (cam['panel'], cam['node']) in self.cam_list:
+ sel_cam = {}
+ R_w2c = np.array(cam['R']).dot(M)
+ T_w2c = np.array(cam['t']).reshape((3, 1)) * 10.0 # cm to mm
+ R_c2w = R_w2c.T
+ T_c2w = -R_w2c.T @ T_w2c
+ sel_cam['R'] = R_c2w.tolist()
+ sel_cam['T'] = T_c2w.tolist()
+ sel_cam['K'] = cam['K'][:2]
+ distCoef = cam['distCoef']
+ sel_cam['k'] = [distCoef[0], distCoef[1], distCoef[4]]
+ sel_cam['p'] = [distCoef[2], distCoef[3]]
+ cameras[(cam['panel'], cam['node'])] = sel_cam
+
+ return cameras
+
+ def _get_db(self):
+ """Get dataset base.
+
+ Returns:
+ dict: the dataset base (2D and 3D information)
+ """
+ width = 1920
+ height = 1080
+ db = []
+ sample_id = 0
+ for seq in self.seq_list:
+ cameras = self._get_cam(seq)
+ curr_anno = osp.join(self.img_prefix, seq,
+ 'hdPose3d_stage1_coco19')
+ anno_files = sorted(glob.iglob('{:s}/*.json'.format(curr_anno)))
+ print(f'load sequence: {seq}', flush=True)
+ for i, file in enumerate(anno_files):
+ if i % self.seq_frame_interval == 0:
+ with open(file) as dfile:
+ bodies = json.load(dfile)['bodies']
+ if len(bodies) == 0:
+ continue
+
+ for k, cam_param in cameras.items():
+ single_view_camera = SimpleCamera(cam_param)
+ postfix = osp.basename(file).replace('body3DScene', '')
+ prefix = '{:02d}_{:02d}'.format(k[0], k[1])
+ image_file = osp.join(seq, 'hdImgs', prefix,
+ prefix + postfix)
+ image_file = image_file.replace('json', 'jpg')
+
+ all_poses_3d = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+ all_poses_vis_3d = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+ all_roots_3d = np.zeros((self.max_persons, 3),
+ dtype=np.float32)
+ all_poses = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+
+ cnt = 0
+ person_ids = -np.ones(self.max_persons, dtype=np.int)
+ for body in bodies:
+ if cnt >= self.max_persons:
+ break
+ pose3d = np.array(body['joints19']).reshape(
+ (-1, 4))
+ pose3d = pose3d[:self.num_joints]
+
+ joints_vis = pose3d[:, -1] > 0.1
+
+ if not joints_vis[self.root_id]:
+ continue
+
+ # Coordinate transformation
+ M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0],
+ [0.0, 1.0, 0.0]])
+ pose3d[:, 0:3] = pose3d[:, 0:3].dot(M) * 10.0
+
+ all_poses_3d[cnt] = pose3d[:, :3]
+ all_roots_3d[cnt] = pose3d[self.root_id, :3]
+ all_poses_vis_3d[cnt] = np.repeat(
+ np.reshape(joints_vis, (-1, 1)), 3, axis=1)
+
+ pose2d = np.zeros((pose3d.shape[0], 3))
+ # get pose_2d from pose_3d
+ pose2d[:, :2] = single_view_camera.world_to_pixel(
+ pose3d[:, :3])
+ x_check = np.bitwise_and(pose2d[:, 0] >= 0,
+ pose2d[:, 0] <= width - 1)
+ y_check = np.bitwise_and(
+ pose2d[:, 1] >= 0, pose2d[:, 1] <= height - 1)
+ check = np.bitwise_and(x_check, y_check)
+ joints_vis[np.logical_not(check)] = 0
+ pose2d[:, -1] = joints_vis
+
+ all_poses[cnt] = pose2d
+ person_ids[cnt] = body['id']
+ cnt += 1
+
+ if cnt > 0:
+ db.append({
+ 'image_file':
+ osp.join(self.img_prefix, image_file),
+ 'joints_3d':
+ all_poses_3d,
+ 'person_ids':
+ person_ids,
+ 'joints_3d_visible':
+ all_poses_vis_3d,
+ 'joints': [all_poses],
+ 'roots_3d':
+ all_roots_3d,
+ 'camera':
+ cam_param,
+ 'num_persons':
+ cnt,
+ 'sample_id':
+ sample_id,
+ 'center':
+ np.array((width / 2, height / 2),
+ dtype=np.float32),
+ 'scale':
+ self._get_scale((width, height))
+ })
+ sample_id += 1
+ return db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ """
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+ - pose_3d (np.ndarray): predicted 3D human pose
+ - sample_id (np.ndarray): sample id of a frame.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Defaults: 'mpjpe'.
+ **kwargs:
+
+ Returns:
+
+ """
+ pose_3ds = np.concatenate([result['pose_3d'] for result in results],
+ axis=0)
+ sample_ids = []
+ for result in results:
+ sample_ids.extend(result['sample_id'])
+
+ _results = [
+ dict(sample_id=sample_id, pose_3d=pose_3d)
+ for (sample_id, pose_3d) in zip(sample_ids, pose_3ds)
+ ]
+ _results = self._sort_and_unique_outputs(_results, key='sample_id')
+
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}"'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ mmcv.dump(_results, res_file)
+
+ eval_list = []
+ gt_num = self.db_size // self.num_cameras
+ assert len(
+ _results) == gt_num, f'number mismatch: {len(_results)}, {gt_num}'
+
+ total_gt = 0
+ for i in range(gt_num):
+ index = self.num_cameras * i
+ db_rec = copy.deepcopy(self.db[index])
+ joints_3d = db_rec['joints_3d']
+ joints_3d_vis = db_rec['joints_3d_visible']
+
+ if joints_3d_vis.sum() < 1:
+ continue
+
+ pred = _results[i]['pose_3d'].copy()
+ pred = pred[pred[:, 0, 3] >= 0]
+ for pose in pred:
+ mpjpes = []
+ for (gt, gt_vis) in zip(joints_3d, joints_3d_vis):
+ vis = gt_vis[:, 0] > 0
+ if vis.sum() < 1:
+ break
+ mpjpe = np.mean(
+ np.sqrt(
+ np.sum((pose[vis, 0:3] - gt[vis])**2, axis=-1)))
+ mpjpes.append(mpjpe)
+ min_gt = np.argmin(mpjpes)
+ min_mpjpe = np.min(mpjpes)
+ score = pose[0, 4]
+ eval_list.append({
+ 'mpjpe': float(min_mpjpe),
+ 'score': float(score),
+ 'gt_id': int(total_gt + min_gt)
+ })
+
+ total_gt += (joints_3d_vis[:, :, 0].sum(-1) >= 1).sum()
+
+ mpjpe_threshold = np.arange(25, 155, 25)
+ aps = []
+ ars = []
+ for t in mpjpe_threshold:
+ ap, ar = self._eval_list_to_ap(eval_list, total_gt, t)
+ aps.append(ap)
+ ars.append(ar)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ stats_names = ['RECALL 500mm', 'MPJPE 500mm']
+ info_str = list(
+ zip(stats_names, [
+ self._eval_list_to_recall(eval_list, total_gt),
+ self._eval_list_to_mpjpe(eval_list)
+ ]))
+ elif _metric == 'mAP':
+ stats_names = [
+ 'AP 25', 'AP 50', 'AP 75', 'AP 100', 'AP 125', 'AP 150',
+ 'mAP', 'AR 25', 'AR 50', 'AR 75', 'AR 100', 'AR 125',
+ 'AR 150', 'mAR'
+ ]
+ mAP = np.array(aps).mean()
+ mAR = np.array(ars).mean()
+ info_str = list(zip(stats_names, aps + [mAP] + ars + [mAR]))
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ @staticmethod
+ def _eval_list_to_ap(eval_list, total_gt, threshold):
+ """Get Average Precision (AP) and Average Recall at a certain
+ threshold."""
+
+ eval_list.sort(key=lambda k: k['score'], reverse=True)
+ total_num = len(eval_list)
+
+ tp = np.zeros(total_num)
+ fp = np.zeros(total_num)
+ gt_det = []
+ for i, item in enumerate(eval_list):
+ if item['mpjpe'] < threshold and item['gt_id'] not in gt_det:
+ tp[i] = 1
+ gt_det.append(item['gt_id'])
+ else:
+ fp[i] = 1
+ tp = np.cumsum(tp)
+ fp = np.cumsum(fp)
+ recall = tp / (total_gt + 1e-5)
+ precise = tp / (tp + fp + 1e-5)
+ for n in range(total_num - 2, -1, -1):
+ precise[n] = max(precise[n], precise[n + 1])
+
+ precise = np.concatenate(([0], precise, [0]))
+ recall = np.concatenate(([0], recall, [1]))
+ index = np.where(recall[1:] != recall[:-1])[0]
+ ap = np.sum((recall[index + 1] - recall[index]) * precise[index + 1])
+
+ return ap, recall[-2]
+
+ @staticmethod
+ def _eval_list_to_mpjpe(eval_list, threshold=500):
+ """Get MPJPE within a certain threshold."""
+ eval_list.sort(key=lambda k: k['score'], reverse=True)
+ gt_det = []
+
+ mpjpes = []
+ for i, item in enumerate(eval_list):
+ if item['mpjpe'] < threshold and item['gt_id'] not in gt_det:
+ mpjpes.append(item['mpjpe'])
+ gt_det.append(item['gt_id'])
+
+ return np.mean(mpjpes) if len(mpjpes) > 0 else np.inf
+
+ @staticmethod
+ def _eval_list_to_recall(eval_list, total_gt, threshold=500):
+ """Get Recall at a certain threshold."""
+ gt_ids = [e['gt_id'] for e in eval_list if e['mpjpe'] < threshold]
+
+ return len(np.unique(gt_ids)) / total_gt
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = {}
+ for c in range(self.num_cameras):
+ result = copy.deepcopy(self.db[self.num_cameras * idx + c])
+ result['ann_info'] = self.ann_info
+ width = 1920
+ height = 1080
+ result['mask'] = [np.ones((height, width), dtype=np.float32)]
+ results[c] = result
+
+ return self.pipeline(results)
+
+ @staticmethod
+ def _sort_and_unique_outputs(outputs, key='sample_id'):
+ """sort outputs and remove the repeated ones."""
+ outputs = sorted(outputs, key=lambda x: x[key])
+ num_outputs = len(outputs)
+ for i in range(num_outputs - 1, 0, -1):
+ if outputs[i][key] == outputs[i - 1][key]:
+ del outputs[i]
+
+ return outputs
diff --git a/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py b/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..491d54914d5838a1759b7da7fb16ad2b205ba83c
--- /dev/null
+++ b/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS, build_dataset
+
+
+@DATASETS.register_module()
+class Body3DSemiSupervisionDataset(Dataset):
+ """Mix Dataset for semi-supervised training in 3D human pose estimation
+ task.
+
+ The dataset combines data from two datasets (a labeled one and an unlabeled
+ one) and return a dict containing data from two datasets.
+
+ Args:
+ labeled_dataset (Dataset): Dataset with 3D keypoint annotations.
+ unlabeled_dataset (Dataset): Dataset without 3D keypoint annotations.
+ """
+
+ def __init__(self, labeled_dataset, unlabeled_dataset):
+ super().__init__()
+ self.labeled_dataset = build_dataset(labeled_dataset)
+ self.unlabeled_dataset = build_dataset(unlabeled_dataset)
+ self.length = len(self.unlabeled_dataset)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, i):
+ """Given index, get the data from unlabeled dataset and randomly sample
+ an item from labeled dataset.
+
+ Return a dict containing data from labeled and unlabeled dataset.
+ """
+ data = self.unlabeled_dataset[i]
+ rand_ind = np.random.randint(0, len(self.labeled_dataset))
+ labeled_data = self.labeled_dataset[rand_ind]
+ data.update(labeled_data)
+ return data
diff --git a/mmpose/datasets/datasets/bottom_up/__init__.py b/mmpose/datasets/datasets/bottom_up/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ac79377f8ef8c66f279e8c68c44c8bd61d87dbb
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_aic import BottomUpAicDataset
+from .bottom_up_coco import BottomUpCocoDataset
+from .bottom_up_coco_wholebody import BottomUpCocoWholeBodyDataset
+from .bottom_up_crowdpose import BottomUpCrowdPoseDataset
+from .bottom_up_mhp import BottomUpMhpDataset
+
+__all__ = [
+ 'BottomUpCocoDataset', 'BottomUpCrowdPoseDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset'
+]
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..723532710b35362ee83a5ad6cdc46e7be277cefc
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_aic.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_aic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ab762ee60ef2f65e44e28a22a65fda5a8ca938e
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_aic.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d3015c35d750ddbdf80fde32ebe83930327df0b
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco_wholebody.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco_wholebody.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95c7fe3f8b955d6b003606b13a5b69f83503846c
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_coco_wholebody.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_crowdpose.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_crowdpose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..539bb93f5fd9a916cb18ef1721fa95cc02a7b3d6
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_crowdpose.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_mhp.cpython-310.pyc b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_mhp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1a124ff98cc338a2b0bc427b82d9dfb3178a871
Binary files /dev/null and b/mmpose/datasets/datasets/bottom_up/__pycache__/bottom_up_mhp.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py b/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py
new file mode 100644
index 0000000000000000000000000000000000000000..e56b72586f36bc0758876fa5d0ce3016efad3802
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py
@@ -0,0 +1,105 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpAicDataset(BottomUpCocoDataset):
+ """Aic dataset for bottom-up pose estimation.
+
+ "AI Challenger : A Large-scale Dataset for Going Deeper
+ in Image Understanding", arXiv'2017.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AIC keypoint indexes::
+
+ 0: "right_shoulder",
+ 1: "right_elbow",
+ 2: "right_wrist",
+ 3: "left_shoulder",
+ 4: "left_elbow",
+ 5: "left_wrist",
+ 6: "right_hip",
+ 7: "right_knee",
+ 8: "right_ankle",
+ 9: "left_hip",
+ 10: "left_knee",
+ 11: "left_ankle",
+ 12: "head_top",
+ 13: "neck"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aic.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py b/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a2fea5d34b208b0d3703fe9dff1294e053ec950
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from torch.utils.data import Dataset
+
+
+class BottomUpBaseDataset(Dataset):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgBottomUpDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'BottomUpBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgBottomUpDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py b/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa2967fe22db1427975568aec40e7f1313d1de2d
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
@@ -0,0 +1,305 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.core.post_processing import oks_nms, soft_oks_nms
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgBottomUpDataset
+
+
+@DATASETS.register_module()
+class BottomUpCocoDataset(Kpt2dSviewRgbImgBottomUpDataset):
+ """COCO dataset for bottom-up pose estimation.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _get_single(self, idx):
+ """Get anno for a single image.
+
+ Args:
+ idx (int): image idx
+
+ Returns:
+ dict: info for model training
+ """
+ coco = self.coco
+ img_id = self.img_ids[idx]
+ ann_ids = coco.getAnnIds(imgIds=img_id)
+ anno = coco.loadAnns(ann_ids)
+
+ mask = self._get_mask(anno, idx)
+ anno = [
+ obj.copy() for obj in anno
+ if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+ ]
+
+ joints = self._get_joints(anno)
+ mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])]
+ joints_list = [
+ joints.copy() for _ in range(self.ann_info['num_scales'])
+ ]
+
+ db_rec = {}
+ db_rec['dataset'] = self.dataset_name
+ db_rec['image_file'] = osp.join(self.img_prefix, self.id2name[img_id])
+ db_rec['mask'] = mask_list
+ db_rec['joints'] = joints_list
+
+ return db_rec
+
+ def _get_joints(self, anno):
+ """Get joints for all people in an image."""
+ num_people = len(anno)
+
+ if self.ann_info['scale_aware_sigma']:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
+ dtype=np.float32)
+ else:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+
+ for i, obj in enumerate(anno):
+ joints[i, :, :3] = \
+ np.array(obj['keypoints']).reshape([-1, 3])
+ if self.ann_info['scale_aware_sigma']:
+ # get person box
+ box = obj['bbox']
+ size = max(box[2], box[3])
+ sigma = size / self.base_size * self.base_sigma
+ if self.int_sigma:
+ sigma = int(np.ceil(sigma))
+ assert sigma > 0, sigma
+ joints[i, :, 3] = sigma
+
+ return joints
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (list[np.ndarray(P, K, 3+tag_num)]): \
+ Pose predictions for all people in images.
+ - scores (list[P]): List of person scores.
+ - image_path (list[str]): For example, ['coco/images/\
+ val2017/000000397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ preds = []
+ scores = []
+ image_paths = []
+
+ for result in results:
+ preds.append(result['preds'])
+ scores.append(result['scores'])
+ image_paths.append(result['image_paths'][0])
+
+ kpts = defaultdict(list)
+ # iterate over images
+ for idx, _preds in enumerate(preds):
+ str_image_path = image_paths[idx]
+ image_id = self.name2id[osp.basename(str_image_path)]
+ # iterate over people
+ for idx_person, kpt in enumerate(_preds):
+ # use bbox area
+ area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (
+ np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
+
+ kpts[image_id].append({
+ 'keypoints': kpt[:, 0:3],
+ 'score': scores[idx][idx_person],
+ 'tags': kpt[:, 3],
+ 'image_id': image_id,
+ 'area': area,
+ })
+
+ valid_kpts = []
+ for img in kpts.keys():
+ img_kpts = kpts[img]
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, self.oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ for img_kpt, key_point in zip(img_kpts, key_points):
+ kpt = key_point.reshape((self.ann_info['num_joints'], 3))
+ left_top = np.amin(kpt, axis=0)
+ right_bottom = np.amax(kpt, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ cat_results.append({
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': img_kpt['score'],
+ 'bbox': [left_top[0], left_top[1], w, h]
+ })
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py b/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
new file mode 100644
index 0000000000000000000000000000000000000000..363d2efb2ec93dedb8abbe78430af52970c4afc3
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
@@ -0,0 +1,238 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpCocoWholeBodyDataset(BottomUpCocoDataset):
+ """CocoWholeBodyDataset dataset for bottom-up pose estimation.
+
+ `Whole-Body Human Pose Estimation in the Wild', ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ In total, we have 133 keypoints for wholebody pose estimation.
+
+ COCO-WholeBody keypoint indexes::
+
+ 0-16: 17 body keypoints,
+ 17-22: 6 foot keypoints,
+ 23-90: 68 face keypoints,
+ 91-132: 42 hand keypoints
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco_wholebody.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.body_num = 17
+ self.foot_num = 6
+ self.face_num = 68
+ self.left_hand_num = 21
+ self.right_hand_num = 21
+
+ print(f'=> num_images: {self.num_images}')
+
+ def _get_joints(self, anno):
+ """Get joints for all people in an image."""
+ num_people = len(anno)
+
+ if self.ann_info['scale_aware_sigma']:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
+ dtype=np.float32)
+ else:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+
+ for i, obj in enumerate(anno):
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+
+ joints[i, :self.ann_info['num_joints'], :3] = keypoints
+ if self.ann_info['scale_aware_sigma']:
+ # get person box
+ box = obj['bbox']
+ size = max(box[2], box[3])
+ sigma = size / self.base_size * self.base_sigma
+ if self.int_sigma:
+ sigma = int(np.ceil(sigma))
+ assert sigma > 0, sigma
+ joints[i, :, 3] = sigma
+
+ return joints
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num,
+ self.left_hand_num, self.right_hand_num
+ ]) * 3
+
+ for img_kpt, key_point in zip(img_kpts, key_points):
+ kpt = key_point.reshape((self.ann_info['num_joints'], 3))
+ left_top = np.amin(kpt, axis=0)
+ right_bottom = np.amax(kpt, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ cat_results.append({
+ 'image_id':
+ img_kpt['image_id'],
+ 'category_id':
+ cat_id,
+ 'keypoints':
+ key_point[cuts[0]:cuts[1]].tolist(),
+ 'foot_kpts':
+ key_point[cuts[1]:cuts[2]].tolist(),
+ 'face_kpts':
+ key_point[cuts[2]:cuts[3]].tolist(),
+ 'lefthand_kpts':
+ key_point[cuts[3]:cuts[4]].tolist(),
+ 'righthand_kpts':
+ key_point[cuts[4]:cuts[5]].tolist(),
+ 'score':
+ img_kpt['score'],
+ 'bbox': [left_top[0], left_top[1], w, h]
+ })
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num,
+ self.right_hand_num
+ ])
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_body',
+ self.sigmas[cuts[0]:cuts[1]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_foot',
+ self.sigmas[cuts[1]:cuts[2]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_face',
+ self.sigmas[cuts[2]:cuts[3]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_lefthand',
+ self.sigmas[cuts[3]:cuts[4]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_righthand',
+ self.sigmas[cuts[4]:cuts[5]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_wholebody',
+ self.sigmas,
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py b/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebabf3e1ddddd96de8aea9bfe00a095480b3112f
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py
@@ -0,0 +1,109 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpCrowdPoseDataset(BottomUpCocoDataset):
+ """CrowdPose dataset for bottom-up pose estimation.
+
+ "CrowdPose: Efficient Crowded Scenes Pose Estimation and
+ A New Benchmark", CVPR'2019.
+ More details can be found in the `paper
+ `__.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ CrowdPose keypoint indexes::
+
+ 0: 'left_shoulder',
+ 1: 'right_shoulder',
+ 2: 'left_elbow',
+ 3: 'right_elbow',
+ 4: 'left_wrist',
+ 5: 'right_wrist',
+ 6: 'left_hip',
+ 7: 'right_hip',
+ 8: 'left_knee',
+ 9: 'right_knee',
+ 10: 'left_ankle',
+ 11: 'right_ankle',
+ 12: 'top_head',
+ 13: 'neck'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/crowdpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)',
+ 'AP(H)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_crowd',
+ self.sigmas,
+ use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py b/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..143812332512e56e6962a780d8900d6ca8823c96
--- /dev/null
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py
@@ -0,0 +1,108 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpMhpDataset(BottomUpCocoDataset):
+ """MHPv2.0 dataset for top-down pose estimation.
+
+ "Understanding Humans in Crowded Scenes: Deep Nested Adversarial
+ Learning and A New Benchmark for Multi-Human Parsing", ACM MM'2018.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MHP keypoint indexes::
+
+ 0: "right ankle",
+ 1: "right knee",
+ 2: "right hip",
+ 3: "left hip",
+ 4: "left knee",
+ 5: "left ankle",
+ 6: "pelvis",
+ 7: "thorax",
+ 8: "upper neck",
+ 9: "head top",
+ 10: "right wrist",
+ 11: "right elbow",
+ 12: "right shoulder",
+ 13: "left shoulder",
+ 14: "left elbow",
+ 15: "left wrist",
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/face/__init__.py b/mmpose/datasets/datasets/face/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ba42d4413a657080bddf6224850e49a5a24601b
--- /dev/null
+++ b/mmpose/datasets/datasets/face/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .face_300w_dataset import Face300WDataset
+from .face_aflw_dataset import FaceAFLWDataset
+from .face_coco_wholebody_dataset import FaceCocoWholeBodyDataset
+from .face_cofw_dataset import FaceCOFWDataset
+from .face_wflw_dataset import FaceWFLWDataset
+
+__all__ = [
+ 'Face300WDataset', 'FaceAFLWDataset', 'FaceWFLWDataset', 'FaceCOFWDataset',
+ 'FaceCocoWholeBodyDataset'
+]
diff --git a/mmpose/datasets/datasets/face/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d64872444f91bf03bc13d2ea5502006e23deb67e
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/__pycache__/face_300w_dataset.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/face_300w_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8d2a42546078e3d9c3b7bef3dd91be8813e1bdb
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/face_300w_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/__pycache__/face_aflw_dataset.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/face_aflw_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ece5ccf11ef541b8fde778d8b911df12806aca3
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/face_aflw_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/__pycache__/face_coco_wholebody_dataset.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/face_coco_wholebody_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4133de31e882ea3324db2b725ec891ad71f445db
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/face_coco_wholebody_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/__pycache__/face_cofw_dataset.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/face_cofw_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8b42a8e60a376d95d13edff1c8ee7618d2f0365
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/face_cofw_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/__pycache__/face_wflw_dataset.cpython-310.pyc b/mmpose/datasets/datasets/face/__pycache__/face_wflw_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0302352d1cad6ae6a2b8476fc3bb8847da5b13b9
Binary files /dev/null and b/mmpose/datasets/datasets/face/__pycache__/face_wflw_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/face/face_300w_dataset.py b/mmpose/datasets/datasets/face/face_300w_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5b602e09c2df2469444bec306342dc97a9c3d8d
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_300w_dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class Face300WDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face300W dataset for top-down face keypoint localization.
+
+ "300 faces In-the-wild challenge: Database and results",
+ Image and Vision Computing (IMAVIS) 2019.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 68 points mark-up. The definition
+ can be found in `https://ibug.doc.ic.ac.uk/resources/300-W/`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/300w.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 36, :] - gts[:, 45, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['300W/ibug/\
+ image_018.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/face/face_aflw_dataset.py b/mmpose/datasets/datasets/face/face_aflw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..292d9eece7e33e97467088b8710bd2c7c272fe52
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_aflw_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceAFLWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face AFLW dataset for top-down face keypoint localization.
+
+ "Annotated Facial Landmarks in the Wild: A Large-scale,
+ Real-world Database for Facial Landmark Localization".
+ In Proc. First IEEE International Workshop on Benchmarking
+ Facial Image Analysis Technologies, 2011.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 19 points mark-up. The definition
+ can be found in `https://www.tugraz.at/institute/icg/research`
+ `/team-bischof/lrs/downloads/aflw/`
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aflw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if self.test_mode:
+ # 'box_size' is used as normalization factor
+ assert 'box_size' in obj
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'box_size': obj['box_size'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, box_sizes, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ box_sizes (np.ndarray[N, 1]): box size
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ return np.tile(box_sizes, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['aflw/images/flickr/ \
+ 0/image00002.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/face/face_base_dataset.py b/mmpose/datasets/datasets/face/face_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..466fabbfcbeaa8ba3abe976269ab8a1de56e4e51
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class FaceBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'FaceBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py b/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef5117a8a06626cb5bc520795cca06e788bf198d
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py
@@ -0,0 +1,198 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceCocoWholeBodyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoWholeBodyDataset for face keypoint localization.
+
+ `Whole-Body Human Pose Estimation in the Wild', ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The face landmark annotations follow the 68 points mark-up.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/'
+ 'coco_wholebody_face.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if obj['face_valid'] and max(obj['face_kpts']) > 0:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+
+ keypoints = np.array(obj['face_kpts']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['face_box'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['face_box'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 36, :] - gts[:, 45, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate COCO-WholeBody Face keypoint results. The pose prediction
+ results will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['coco/train2017/\
+ 000000000009.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/face/face_cofw_dataset.py b/mmpose/datasets/datasets/face/face_cofw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..456ea0e9adbbadb6ecf4dffb3b5ff5e48cf92123
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_cofw_dataset.py
@@ -0,0 +1,198 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceCOFWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face COFW dataset for top-down face keypoint localization.
+
+ "Robust face landmark estimation under occlusion", ICCV'2013.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 29 points mark-up. The definition
+ can be found in `http://www.vision.caltech.edu/xpburgos/ICCV13/`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/cofw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 8, :] - gts[:, 9, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['cofw/images/\
+ 000001.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/face/face_wflw_dataset.py b/mmpose/datasets/datasets/face/face_wflw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4611e197bd334a3864d8af99f1778af94c51d16
--- /dev/null
+++ b/mmpose/datasets/datasets/face/face_wflw_dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceWFLWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face WFLW dataset for top-down face keypoint localization.
+
+ "Look at Boundary: A Boundary-Aware Face Alignment Algorithm",
+ CVPR'2018.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 98 points mark-up. The definition
+ can be found in `https://wywu.github.io/projects/LAB/WFLW.html`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/wflw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 60, :] - gts[:, 72, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['wflw/images/\
+ 0--Parade/0_Parade_marchingband_1_1015.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/fashion/__init__.py b/mmpose/datasets/datasets/fashion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..575d6ed4af94686a87443f5938ed8b0d0809540f
--- /dev/null
+++ b/mmpose/datasets/datasets/fashion/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .deepfashion_dataset import DeepFashionDataset
+
+__all__ = ['DeepFashionDataset']
diff --git a/mmpose/datasets/datasets/fashion/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/fashion/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d6dfbd12c2c29ca17e86e346853b76ec74cda6b
Binary files /dev/null and b/mmpose/datasets/datasets/fashion/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/fashion/__pycache__/deepfashion_dataset.cpython-310.pyc b/mmpose/datasets/datasets/fashion/__pycache__/deepfashion_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..002377453655dee8c52dbfac1d92710236f6e243
Binary files /dev/null and b/mmpose/datasets/datasets/fashion/__pycache__/deepfashion_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/fashion/deepfashion_dataset.py b/mmpose/datasets/datasets/fashion/deepfashion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fef65528c27e4f4bb6c77100b5fd4e398c9129f
--- /dev/null
+++ b/mmpose/datasets/datasets/fashion/deepfashion_dataset.py
@@ -0,0 +1,225 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class DeepFashionDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """DeepFashion dataset (full-body clothes) for fashion landmark detection.
+
+ "DeepFashion: Powering Robust Clothes Recognition
+ and Retrieval with Rich Annotations", CVPR'2016.
+ "Fashion Landmark Detection in the Wild", ECCV'2016.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The dataset contains 3 categories for full-body, upper-body and lower-body.
+
+ Fashion landmark indexes for upper-body clothes::
+
+ 0: 'left collar',
+ 1: 'right collar',
+ 2: 'left sleeve',
+ 3: 'right sleeve',
+ 4: 'left hem',
+ 5: 'right hem'
+
+ Fashion landmark indexes for lower-body clothes::
+
+ 0: 'left waistline',
+ 1: 'right waistline',
+ 2: 'left hem',
+ 3: 'right hem'
+
+ Fashion landmark indexes for full-body clothes::
+
+ 0: 'left collar',
+ 1: 'right collar',
+ 2: 'left sleeve',
+ 3: 'right sleeve',
+ 4: 'left waistline',
+ 5: 'right waistline',
+ 6: 'left hem',
+ 7: 'right hem'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ subset='',
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ if subset != '':
+ warnings.warn(
+ 'subset is deprecated.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ if subset == 'upper':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_upper.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+ elif subset == 'lower':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_lower.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+ elif subset == 'full':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_full.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['img_00000001.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/fashion/fashion_base_dataset.py b/mmpose/datasets/datasets/fashion/fashion_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4e5860a478f5b9fb8d7a30873b6a4b0a32c3533
--- /dev/null
+++ b/mmpose/datasets/datasets/fashion/fashion_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class FashionBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'FashionBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/hand/__init__.py b/mmpose/datasets/datasets/hand/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..49159afa6027e82ead87053f7f807267288b7a94
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .freihand_dataset import FreiHandDataset
+from .hand_coco_wholebody_dataset import HandCocoWholeBodyDataset
+from .interhand2d_dataset import InterHand2DDataset
+from .interhand3d_dataset import InterHand3DDataset
+from .onehand10k_dataset import OneHand10KDataset
+from .panoptic_hand2d_dataset import PanopticDataset
+from .rhd2d_dataset import Rhd2DDataset
+
+__all__ = [
+ 'FreiHandDataset', 'InterHand2DDataset', 'InterHand3DDataset',
+ 'OneHand10KDataset', 'PanopticDataset', 'Rhd2DDataset',
+ 'HandCocoWholeBodyDataset'
+]
diff --git a/mmpose/datasets/datasets/hand/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af8b741caf655adf4436af13d6182a6a31a70b44
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/freihand_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/freihand_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..480d576292ce863f36597a0540c2f3c7e033df9f
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/freihand_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/hand_coco_wholebody_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/hand_coco_wholebody_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8419bac1637746bf989531ca0b5eb6221f6a554
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/hand_coco_wholebody_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/interhand2d_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/interhand2d_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ec0a86eb71cbfa27cfc2caebec2b5466f10280a1
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/interhand2d_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/interhand3d_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/interhand3d_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0b42f52fbb97e26e7866a827a7644efd00a3c33
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/interhand3d_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/onehand10k_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/onehand10k_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b412d713e352aa564af5d46de9f82a629d2c038
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/onehand10k_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/panoptic_hand2d_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/panoptic_hand2d_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ec6e90d3b1ecd6bad316fbcfa87bc130a272f26
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/panoptic_hand2d_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/__pycache__/rhd2d_dataset.cpython-310.pyc b/mmpose/datasets/datasets/hand/__pycache__/rhd2d_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f04e203f26f853259645847e6cb68204ae727e9
Binary files /dev/null and b/mmpose/datasets/datasets/hand/__pycache__/rhd2d_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/hand/freihand_dataset.py b/mmpose/datasets/datasets/hand/freihand_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9ceeff2ef61619fa42909526218740dbb89027a
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/freihand_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FreiHandDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """FreiHand dataset for top-down hand pose estimation.
+
+ "FreiHAND: A Dataset for Markerless Capture of Hand Pose
+ and Shape from Single RGB Images", ICCV'2019.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ FreiHand keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/freihand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 224x224
+ center, scale = self._xywh2cs(0, 0, 224, 224, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['training/rgb/\
+ 00031426.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/hand/hand_base_dataset.py b/mmpose/datasets/datasets/hand/hand_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd20846d40ec8f7d9520902d6a289ebedcb07cae
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/hand_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class HandBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'HandBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py b/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c95cc09fbbe61b16bc36646cff4d394b72a1711
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
@@ -0,0 +1,211 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class HandCocoWholeBodyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoWholeBodyDataset for top-down hand pose estimation.
+
+ "Whole-Body Human Pose Estimation in the Wild", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO-WholeBody Hand keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody_hand.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ for type in ['left', 'right']:
+ if obj[f'{type}hand_valid'] and max(
+ obj[f'{type}hand_kpts']) > 0:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+
+ keypoints = np.array(obj[f'{type}hand_kpts']).reshape(
+ -1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(
+ 1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(
+ *obj[f'{type}hand_box'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj[f'{type}hand_box'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate COCO-WholeBody Hand keypoint results. The pose prediction
+ results will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/hand/interhand2d_dataset.py b/mmpose/datasets/datasets/hand/interhand2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fea17fa59aa75ea9846c401a3ad2276fb2b525cc
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/interhand2d_dataset.py
@@ -0,0 +1,306 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class InterHand2DDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """InterHand2.6M 2D dataset for top-down hand pose estimation.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ InterHand2.6M keypoint indexes::
+
+ 0: 'thumb4',
+ 1: 'thumb3',
+ 2: 'thumb2',
+ 3: 'thumb1',
+ 4: 'forefinger4',
+ 5: 'forefinger3',
+ 6: 'forefinger2',
+ 7: 'forefinger1',
+ 8: 'middle_finger4',
+ 9: 'middle_finger3',
+ 10: 'middle_finger2',
+ 11: 'middle_finger1',
+ 12: 'ring_finger4',
+ 13: 'ring_finger3',
+ 14: 'ring_finger2',
+ 15: 'ring_finger1',
+ 16: 'pinky_finger4',
+ 17: 'pinky_finger3',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger1',
+ 20: 'wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ camera_file (str): Path to the camera file.
+ joint_file (str): Path to the joint file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (str): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ camera_file,
+ joint_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/interhand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.camera_file = camera_file
+ self.joint_file = joint_file
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @staticmethod
+ def _cam2pixel(cam_coord, f, c):
+ """Transform the joints from their camera coordinates to their pixel
+ coordinates.
+
+ Note:
+ - N: number of joints
+
+ Args:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ img_coord (ndarray[N, 3]): the coordinates (x, y, 0)
+ in the image plane.
+ """
+ x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
+ y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
+ z = np.zeros_like(x)
+ img_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return img_coord
+
+ @staticmethod
+ def _world2cam(world_coord, R, T):
+ """Transform the joints from their world coordinates to their camera
+ coordinates.
+
+ Note:
+ - N: number of joints
+
+ Args:
+ world_coord (ndarray[3, N]): 3D joints coordinates
+ in the world coordinate system
+ R (ndarray[3, 3]): camera rotation matrix
+ T (ndarray[3]): camera position (x, y, z)
+
+ Returns:
+ cam_coord (ndarray[3, N]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ cam_coord = np.dot(R, world_coord - T)
+ return cam_coord
+
+ def _get_db(self):
+ """Load dataset.
+
+ Adapted from 'https://github.com/facebookresearch/InterHand2.6M/'
+ 'blob/master/data/InterHand2.6M/dataset.py'
+ Copyright (c) FaceBook Research, under CC-BY-NC 4.0 license.
+ """
+ with open(self.camera_file, 'r') as f:
+ cameras = json.load(f)
+ with open(self.joint_file, 'r') as f:
+ joints = json.load(f)
+ gt_db = []
+ bbox_id = 0
+ for img_id in self.img_ids:
+ num_joints = self.ann_info['num_joints']
+
+ ann_id = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ ann = self.coco.loadAnns(ann_id)[0]
+ img = self.coco.loadImgs(img_id)[0]
+
+ capture_id = str(img['capture'])
+ camera_name = img['camera']
+ frame_idx = str(img['frame_idx'])
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ camera_pos, camera_rot = np.array(
+ cameras[capture_id]['campos'][camera_name],
+ dtype=np.float32), np.array(
+ cameras[capture_id]['camrot'][camera_name],
+ dtype=np.float32)
+ focal, principal_pt = np.array(
+ cameras[capture_id]['focal'][camera_name],
+ dtype=np.float32), np.array(
+ cameras[capture_id]['princpt'][camera_name],
+ dtype=np.float32)
+ joint_world = np.array(
+ joints[capture_id][frame_idx]['world_coord'], dtype=np.float32)
+ joint_cam = self._world2cam(
+ joint_world.transpose(1, 0), camera_rot,
+ camera_pos.reshape(3, 1)).transpose(1, 0)
+ joint_img = self._cam2pixel(joint_cam, focal, principal_pt)[:, :2]
+ joint_img = joint_img.reshape(2, -1, 2)
+
+ joint_valid = np.array(
+ ann['joint_valid'], dtype=np.float32).reshape(2, -1)
+ # if root is not valid -> root-relative 3D pose is also not valid.
+ # Therefore, mark all joints as invalid
+ for hand in range(2):
+ joint_valid[hand, :] *= joint_valid[hand][-1]
+
+ if np.sum(joint_valid[hand, :]) > 11:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+ joints_3d[:, :2] = joint_img[hand, :, :]
+ joints_3d_visible[:, :2] = np.minimum(
+ 1, joint_valid[hand, :].reshape(-1, 1))
+
+ # use the tightest bbox enclosing all keypoints as bbox
+ bbox = [img['width'], img['height'], 0, 0]
+ for i in range(num_joints):
+ if joints_3d_visible[i][0]:
+ bbox[0] = min(bbox[0], joints_3d[i][0])
+ bbox[1] = min(bbox[1], joints_3d[i][1])
+ bbox[2] = max(bbox[2], joints_3d[i][0])
+ bbox[3] = max(bbox[3], joints_3d[i][1])
+
+ bbox[2] -= bbox[0]
+ bbox[3] -= bbox[1]
+
+ # use 1.5bbox as input
+ center, scale = self._xywh2cs(*bbox, 1.5)
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': bbox,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate interhand2d keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Capture12/\
+ 0390_dh_touchROM/cam410209/image62434.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/hand/interhand3d_dataset.py b/mmpose/datasets/datasets/hand/interhand3d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..318d73fbd561c215aa31c83b4df786030400a4d9
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/interhand3d_dataset.py
@@ -0,0 +1,505 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation.top_down_eval import keypoint_epe
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt3dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class InterHand3DDataset(Kpt3dSviewRgbImgTopDownDataset):
+ """InterHand2.6M 3D dataset for top-down hand pose estimation.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ InterHand2.6M keypoint indexes::
+
+ 0: 'r_thumb4',
+ 1: 'r_thumb3',
+ 2: 'r_thumb2',
+ 3: 'r_thumb1',
+ 4: 'r_index4',
+ 5: 'r_index3',
+ 6: 'r_index2',
+ 7: 'r_index1',
+ 8: 'r_middle4',
+ 9: 'r_middle3',
+ 10: 'r_middle2',
+ 11: 'r_middle1',
+ 12: 'r_ring4',
+ 13: 'r_ring3',
+ 14: 'r_ring2',
+ 15: 'r_ring1',
+ 16: 'r_pinky4',
+ 17: 'r_pinky3',
+ 18: 'r_pinky2',
+ 19: 'r_pinky1',
+ 20: 'r_wrist',
+ 21: 'l_thumb4',
+ 22: 'l_thumb3',
+ 23: 'l_thumb2',
+ 24: 'l_thumb1',
+ 25: 'l_index4',
+ 26: 'l_index3',
+ 27: 'l_index2',
+ 28: 'l_index1',
+ 29: 'l_middle4',
+ 30: 'l_middle3',
+ 31: 'l_middle2',
+ 32: 'l_middle1',
+ 33: 'l_ring4',
+ 34: 'l_ring3',
+ 35: 'l_ring2',
+ 36: 'l_ring1',
+ 37: 'l_pinky4',
+ 38: 'l_pinky3',
+ 39: 'l_pinky2',
+ 40: 'l_pinky1',
+ 41: 'l_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ camera_file (str): Path to the camera file.
+ joint_file (str): Path to the joint file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ use_gt_root_depth (bool): Using the ground truth depth of the wrist
+ or given depth from rootnet_result_file.
+ rootnet_result_file (str): Path to the wrist depth file.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (str): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ camera_file,
+ joint_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ use_gt_root_depth=True,
+ rootnet_result_file=None,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/interhand3d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['heatmap3d_depth_bound'] = data_cfg[
+ 'heatmap3d_depth_bound']
+ self.ann_info['heatmap_size_root'] = data_cfg['heatmap_size_root']
+ self.ann_info['root_depth_bound'] = data_cfg['root_depth_bound']
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.camera_file = camera_file
+ self.joint_file = joint_file
+
+ self.use_gt_root_depth = use_gt_root_depth
+ if not self.use_gt_root_depth:
+ assert rootnet_result_file is not None
+ self.rootnet_result_file = rootnet_result_file
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @staticmethod
+ def _encode_handtype(hand_type):
+ if hand_type == 'right':
+ return np.array([1, 0], dtype=np.float32)
+ elif hand_type == 'left':
+ return np.array([0, 1], dtype=np.float32)
+ elif hand_type == 'interacting':
+ return np.array([1, 1], dtype=np.float32)
+ else:
+ assert 0, f'Not support hand type: {hand_type}'
+
+ def _get_db(self):
+ """Load dataset.
+
+ Adapted from 'https://github.com/facebookresearch/InterHand2.6M/'
+ 'blob/master/data/InterHand2.6M/dataset.py'
+ Copyright (c) FaceBook Research, under CC-BY-NC 4.0 license.
+ """
+ with open(self.camera_file, 'r') as f:
+ cameras = json.load(f)
+ with open(self.joint_file, 'r') as f:
+ joints = json.load(f)
+
+ if not self.use_gt_root_depth:
+ rootnet_result = {}
+ with open(self.rootnet_result_file, 'r') as f:
+ rootnet_annot = json.load(f)
+ for i in range(len(rootnet_annot)):
+ rootnet_result[str(
+ rootnet_annot[i]['annot_id'])] = rootnet_annot[i]
+
+ gt_db = []
+ bbox_id = 0
+ for img_id in self.img_ids:
+ num_joints = self.ann_info['num_joints']
+
+ ann_id = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ ann = self.coco.loadAnns(ann_id)[0]
+ img = self.coco.loadImgs(img_id)[0]
+
+ capture_id = str(img['capture'])
+ camera_name = img['camera']
+ frame_idx = str(img['frame_idx'])
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ camera_pos = np.array(
+ cameras[capture_id]['campos'][camera_name], dtype=np.float32)
+ camera_rot = np.array(
+ cameras[capture_id]['camrot'][camera_name], dtype=np.float32)
+ focal = np.array(
+ cameras[capture_id]['focal'][camera_name], dtype=np.float32)
+ principal_pt = np.array(
+ cameras[capture_id]['princpt'][camera_name], dtype=np.float32)
+ joint_world = np.array(
+ joints[capture_id][frame_idx]['world_coord'], dtype=np.float32)
+ joint_cam = self._world2cam(
+ joint_world.transpose(1, 0), camera_rot,
+ camera_pos.reshape(3, 1)).transpose(1, 0)
+ joint_img = self._cam2pixel(joint_cam, focal, principal_pt)[:, :2]
+
+ joint_valid = np.array(
+ ann['joint_valid'], dtype=np.float32).flatten()
+ hand_type = self._encode_handtype(ann['hand_type'])
+ hand_type_valid = ann['hand_type_valid']
+
+ if self.use_gt_root_depth:
+ bbox = np.array(ann['bbox'], dtype=np.float32)
+ # extend the bbox to include some context
+ center, scale = self._xywh2cs(*bbox, 1.25)
+ abs_depth = [joint_cam[20, 2], joint_cam[41, 2]]
+ else:
+ rootnet_ann_data = rootnet_result[str(ann_id[0])]
+ bbox = np.array(rootnet_ann_data['bbox'], dtype=np.float32)
+ # the bboxes have been extended
+ center, scale = self._xywh2cs(*bbox, 1.0)
+ abs_depth = rootnet_ann_data['abs_depth']
+ # 41: 'l_wrist', left hand root
+ # 20: 'r_wrist', right hand root
+ rel_root_depth = joint_cam[41, 2] - joint_cam[20, 2]
+ # if root is not valid, root-relative 3D depth is also invalid.
+ rel_root_valid = joint_valid[20] * joint_valid[41]
+
+ # if root is not valid -> root-relative 3D pose is also not valid.
+ # Therefore, mark all joints as invalid
+ joint_valid[:20] *= joint_valid[20]
+ joint_valid[21:] *= joint_valid[41]
+
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d[:, :2] = joint_img
+ joints_3d[:21, 2] = joint_cam[:21, 2] - joint_cam[20, 2]
+ joints_3d[21:, 2] = joint_cam[21:, 2] - joint_cam[41, 2]
+ joints_3d_visible[...] = np.minimum(1, joint_valid.reshape(-1, 1))
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'hand_type': hand_type,
+ 'hand_type_valid': hand_type_valid,
+ 'rel_root_depth': rel_root_depth,
+ 'rel_root_valid': rel_root_valid,
+ 'abs_depth': abs_depth,
+ 'joints_cam': joint_cam,
+ 'focal': focal,
+ 'princpt': principal_pt,
+ 'dataset': self.dataset_name,
+ 'bbox': bbox,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='MPJPE', **kwargs):
+ """Evaluate interhand2d keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - hand_type (np.ndarray[N, 4]): The first two dimensions are \
+ hand type, scores is the last two dimensions.
+ - rel_root_depth (np.ndarray[N]): The relative depth of left \
+ wrist and right wrist.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Capture6/\
+ 0012_aokay_upright/cam410061/image4996.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'MRRPE', 'MPJPE', 'Handedness_acc'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['MRRPE', 'MPJPE', 'Handedness_acc']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result.get('preds')
+ if preds is None and 'MPJPE' in metrics:
+ raise KeyError('metric MPJPE is not supported')
+
+ hand_type = result.get('hand_type')
+ if hand_type is None and 'Handedness_acc' in metrics:
+ raise KeyError('metric Handedness_acc is not supported')
+
+ rel_root_depth = result.get('rel_root_depth')
+ if rel_root_depth is None and 'MRRPE' in metrics:
+ raise KeyError('metric MRRPE is not supported')
+
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpt = {
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ }
+
+ if preds is not None:
+ kpt['keypoints'] = preds[i, :, :3].tolist()
+ if hand_type is not None:
+ kpt['hand_type'] = hand_type[i][0:2].tolist()
+ kpt['hand_type_score'] = hand_type[i][2:4].tolist()
+ if rel_root_depth is not None:
+ kpt['rel_root_depth'] = float(rel_root_depth[i])
+
+ kpts.append(kpt)
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _get_accuracy(outputs, gts, masks):
+ """Get accuracy of multi-label classification.
+
+ Note:
+ - batch_size: N
+ - label_num: C
+
+ Args:
+ outputs (np.array[N, C]): predicted multi-label.
+ gts (np.array[N, C]): Groundtruth muti-label.
+ masks (np.array[N, ]): masked outputs will be ignored for
+ accuracy calculation.
+
+ Returns:
+ float: mean accuracy
+ """
+ acc = (outputs == gts).all(axis=1)
+ return np.mean(acc[masks])
+
+ def _report_metric(self, res_file, metrics):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'MRRPE', 'MPJPE', 'Handedness_acc'.
+
+ Returns:
+ list: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ gts_rel_root = []
+ preds_rel_root = []
+ rel_root_masks = []
+ gts_joint_coord_cam = []
+ preds_joint_coord_cam = []
+ single_masks = []
+ interacting_masks = []
+ all_masks = []
+ gts_hand_type = []
+ preds_hand_type = []
+ hand_type_masks = []
+
+ for pred, item in zip(preds, self.db):
+ # mrrpe
+ if 'MRRPE' in metrics:
+ if item['hand_type'].all() and item['joints_3d_visible'][
+ 20, 0] and item['joints_3d_visible'][41, 0]:
+ rel_root_masks.append(True)
+
+ pred_left_root_img = np.array(
+ pred['keypoints'][41], dtype=np.float32)[None, :]
+ pred_left_root_img[:, 2] += item['abs_depth'][0] + pred[
+ 'rel_root_depth']
+ pred_left_root_cam = self._pixel2cam(
+ pred_left_root_img, item['focal'], item['princpt'])
+
+ pred_right_root_img = np.array(
+ pred['keypoints'][20], dtype=np.float32)[None, :]
+ pred_right_root_img[:, 2] += item['abs_depth'][0]
+ pred_right_root_cam = self._pixel2cam(
+ pred_right_root_img, item['focal'], item['princpt'])
+
+ preds_rel_root.append(pred_left_root_cam -
+ pred_right_root_cam)
+ gts_rel_root.append(
+ [item['joints_cam'][41] - item['joints_cam'][20]])
+ else:
+ rel_root_masks.append(False)
+ preds_rel_root.append([[0., 0., 0.]])
+ gts_rel_root.append([[0., 0., 0.]])
+
+ if 'MPJPE' in metrics:
+ pred_joint_coord_img = np.array(
+ pred['keypoints'], dtype=np.float32)
+ gt_joint_coord_cam = item['joints_cam'].copy()
+
+ pred_joint_coord_img[:21, 2] += item['abs_depth'][0]
+ pred_joint_coord_img[21:, 2] += item['abs_depth'][1]
+ pred_joint_coord_cam = self._pixel2cam(pred_joint_coord_img,
+ item['focal'],
+ item['princpt'])
+
+ pred_joint_coord_cam[:21] -= pred_joint_coord_cam[20]
+ pred_joint_coord_cam[21:] -= pred_joint_coord_cam[41]
+ gt_joint_coord_cam[:21] -= gt_joint_coord_cam[20]
+ gt_joint_coord_cam[21:] -= gt_joint_coord_cam[41]
+
+ preds_joint_coord_cam.append(pred_joint_coord_cam)
+ gts_joint_coord_cam.append(gt_joint_coord_cam)
+
+ mask = (np.array(item['joints_3d_visible'])[:, 0]) > 0
+
+ if item['hand_type'].all():
+ single_masks.append(
+ np.zeros(self.ann_info['num_joints'], dtype=bool))
+ interacting_masks.append(mask)
+ all_masks.append(mask)
+ else:
+ single_masks.append(mask)
+ interacting_masks.append(
+ np.zeros(self.ann_info['num_joints'], dtype=bool))
+ all_masks.append(mask)
+
+ if 'Handedness_acc' in metrics:
+ pred_hand_type = np.array(pred['hand_type'], dtype=int)
+ preds_hand_type.append(pred_hand_type)
+ gts_hand_type.append(item['hand_type'])
+ hand_type_masks.append(item['hand_type_valid'] > 0)
+
+ gts_rel_root = np.array(gts_rel_root, dtype=np.float32)
+ preds_rel_root = np.array(preds_rel_root, dtype=np.float32)
+ rel_root_masks = np.array(rel_root_masks, dtype=bool)[:, None]
+ gts_joint_coord_cam = np.array(gts_joint_coord_cam, dtype=np.float32)
+ preds_joint_coord_cam = np.array(
+ preds_joint_coord_cam, dtype=np.float32)
+ single_masks = np.array(single_masks, dtype=bool)
+ interacting_masks = np.array(interacting_masks, dtype=bool)
+ all_masks = np.array(all_masks, dtype=bool)
+ gts_hand_type = np.array(gts_hand_type, dtype=int)
+ preds_hand_type = np.array(preds_hand_type, dtype=int)
+ hand_type_masks = np.array(hand_type_masks, dtype=bool)
+
+ if 'MRRPE' in metrics:
+ info_str.append(('MRRPE',
+ keypoint_epe(preds_rel_root, gts_rel_root,
+ rel_root_masks)))
+
+ if 'MPJPE' in metrics:
+ info_str.append(('MPJPE_all',
+ keypoint_epe(preds_joint_coord_cam,
+ gts_joint_coord_cam, all_masks)))
+ info_str.append(('MPJPE_single',
+ keypoint_epe(preds_joint_coord_cam,
+ gts_joint_coord_cam, single_masks)))
+ info_str.append(
+ ('MPJPE_interacting',
+ keypoint_epe(preds_joint_coord_cam, gts_joint_coord_cam,
+ interacting_masks)))
+
+ if 'Handedness_acc' in metrics:
+ info_str.append(('Handedness_acc',
+ self._get_accuracy(preds_hand_type, gts_hand_type,
+ hand_type_masks)))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/hand/onehand10k_dataset.py b/mmpose/datasets/datasets/hand/onehand10k_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9783cab16c7e3c3a9600005008e985d112e71a07
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/onehand10k_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class OneHand10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """OneHand10K dataset for top-down hand pose estimation.
+
+ "Mask-pose Cascaded CNN for 2D Hand Pose Estimation from
+ Single Color Images", TCSVT'2019.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ OneHand10K keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/onehand10k.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate onehand10k keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py b/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1d7fc6af1ec0dee22a81e2dff8819827062a3d5
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py
@@ -0,0 +1,208 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class PanopticDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Panoptic dataset for top-down hand pose estimation.
+
+ "Hand Keypoint Detection in Single Images using Multiview
+ Bootstrapping", CVPR'2017.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Panoptic keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/panoptic_hand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # The bbox is the tightest bbox enclosing keypoints.
+ # The paper uses 2.2 bbox as the input, while
+ # we use 1.76 (2.2 * 0.8) bbox as the input.
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.76)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'head_size': obj['head_size'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate panoptic keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['hand_labels/\
+ manual_test/000648952_02_l.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCKh', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/hand/rhd2d_dataset.py b/mmpose/datasets/datasets/hand/rhd2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3667f5fb672f71b08331706656049734cdfa790d
--- /dev/null
+++ b/mmpose/datasets/datasets/hand/rhd2d_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class Rhd2DDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Rendered Handpose Dataset for top-down hand pose estimation.
+
+ "Learning to Estimate 3D Hand Pose from Single RGB Images",
+ ICCV'2017.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Rhd keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/rhd2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 224x224
+ center, scale = self._xywh2cs(*obj['bbox'][:4], padding=1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate rhd keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1], area, score]
+ - image_paths (list[str]): For example,
+ ['training/rgb/00031426.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/mmpose/datasets/datasets/mesh/__init__.py b/mmpose/datasets/datasets/mesh/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..14297c7261aed14f814e2e986f315dedd51702be
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .mesh_adv_dataset import MeshAdversarialDataset
+from .mesh_h36m_dataset import MeshH36MDataset
+from .mesh_mix_dataset import MeshMixDataset
+from .mosh_dataset import MoshDataset
+
+__all__ = [
+ 'MeshH36MDataset', 'MoshDataset', 'MeshMixDataset',
+ 'MeshAdversarialDataset'
+]
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fe28f57d4b0a70fc703367fcc55f8c8ee7ff8e8b
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/mesh_adv_dataset.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/mesh_adv_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4eccca4dac6c5aec1a14bcd1b2317796b0cd9d40
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/mesh_adv_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/mesh_base_dataset.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/mesh_base_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fb9764d3a2413e65af06251c3e9d769a3475326
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/mesh_base_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/mesh_h36m_dataset.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/mesh_h36m_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b807dbd7433f6212cb9a98bebb566e7c6385f0af
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/mesh_h36m_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/mesh_mix_dataset.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/mesh_mix_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b94da257890af051bb6697ec1ec4b97c53d0efa
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/mesh_mix_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/__pycache__/mosh_dataset.cpython-310.pyc b/mmpose/datasets/datasets/mesh/__pycache__/mosh_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb671808c2c57a8285b86ca14ef9d340100bd41b
Binary files /dev/null and b/mmpose/datasets/datasets/mesh/__pycache__/mosh_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py b/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd9ba39d50415d2897cd14e32435feee397c2963
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS, build_dataset
+
+
+@DATASETS.register_module()
+class MeshAdversarialDataset(Dataset):
+ """Mix Dataset for the adversarial training in 3D human mesh estimation
+ task.
+
+ The dataset combines data from two datasets and
+ return a dict containing data from two datasets.
+
+ Args:
+ train_dataset (Dataset): Dataset for 3D human mesh estimation.
+ adversarial_dataset (Dataset): Dataset for adversarial learning,
+ provides real SMPL parameters.
+ """
+
+ def __init__(self, train_dataset, adversarial_dataset):
+ super().__init__()
+ self.train_dataset = build_dataset(train_dataset)
+ self.adversarial_dataset = build_dataset(adversarial_dataset)
+ self.length = len(self.train_dataset)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, i):
+ """Given index, get the data from train dataset and randomly sample an
+ item from adversarial dataset.
+
+ Return a dict containing data from train and adversarial dataset.
+ """
+ data = self.train_dataset[i]
+ ind_adv = np.random.randint(
+ low=0, high=len(self.adversarial_dataset), dtype=int)
+ data.update(self.adversarial_dataset[ind_adv %
+ len(self.adversarial_dataset)])
+ return data
diff --git a/mmpose/datasets/datasets/mesh/mesh_base_dataset.py b/mmpose/datasets/datasets/mesh/mesh_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..79c8a8ac9040463152cb779ffff146ef5391b241
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/mesh_base_dataset.py
@@ -0,0 +1,155 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+import os
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.pipelines import Compose
+
+
+class MeshBaseDataset(Dataset, metaclass=ABCMeta):
+ """Base dataset for 3D human mesh estimation task. In 3D humamesh
+ estimation task, all datasets share this BaseDataset for training and have
+ their own evaluate function.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ This dataset can only be used for training.
+ For evaluation, subclass should write an extra evaluate function.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['iuv_size'] = np.array(data_cfg['iuv_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ self.ann_info['flip_pairs'] = None
+ self.db = []
+ self.pipeline = Compose(self.pipeline)
+
+ # flip_pairs
+ # For all mesh dataset, we use 24 joints as CMR and SPIN.
+ self.ann_info['flip_pairs'] = [[0, 5], [1, 4], [2, 3], [6, 11],
+ [7, 10], [8, 9], [20, 21], [22, 23]]
+ self.ann_info['use_different_joint_weights'] = False
+ assert self.ann_info['num_joints'] == 24
+ self.ann_info['joint_weights'] = np.ones([24, 1], dtype=np.float32)
+
+ self.ann_info['uv_type'] = data_cfg['uv_type']
+ self.ann_info['use_IUV'] = data_cfg['use_IUV']
+ uv_type = self.ann_info['uv_type']
+ self.iuv_prefix = os.path.join(self.img_prefix, f'{uv_type}_IUV_gt')
+ self.db = self._get_db(ann_file)
+
+ def _get_db(self, ann_file):
+ """Load dataset."""
+ data = np.load(ann_file)
+ tmpl = dict(
+ image_file=None,
+ center=None,
+ scale=None,
+ rotation=0,
+ joints_2d=None,
+ joints_2d_visible=None,
+ joints_3d=None,
+ joints_3d_visible=None,
+ gender=None,
+ pose=None,
+ beta=None,
+ has_smpl=0,
+ iuv_file=None,
+ has_iuv=0)
+ gt_db = []
+
+ _imgnames = data['imgname']
+ _scales = data['scale'].astype(np.float32)
+ _centers = data['center'].astype(np.float32)
+ dataset_len = len(_imgnames)
+
+ # Get 2D keypoints
+ if 'part' in data.keys():
+ _keypoints = data['part'].astype(np.float32)
+ else:
+ _keypoints = np.zeros((dataset_len, 24, 3), dtype=np.float32)
+
+ # Get gt 3D joints, if available
+ if 'S' in data.keys():
+ _joints_3d = data['S'].astype(np.float32)
+ else:
+ _joints_3d = np.zeros((dataset_len, 24, 4), dtype=np.float32)
+
+ # Get gt SMPL parameters, if available
+ if 'pose' in data.keys() and 'shape' in data.keys():
+ _poses = data['pose'].astype(np.float32)
+ _betas = data['shape'].astype(np.float32)
+ has_smpl = 1
+ else:
+ _poses = np.zeros((dataset_len, 72), dtype=np.float32)
+ _betas = np.zeros((dataset_len, 10), dtype=np.float32)
+ has_smpl = 0
+
+ # Get gender data, if available
+ if 'gender' in data.keys():
+ _genders = data['gender']
+ _genders = np.array([str(g) != 'm' for g in _genders]).astype(int)
+ else:
+ _genders = -1 * np.ones(dataset_len).astype(int)
+
+ # Get IUV image, if available
+ if 'iuv_names' in data.keys():
+ _iuv_names = data['iuv_names']
+ has_iuv = has_smpl
+ else:
+ _iuv_names = [''] * dataset_len
+ has_iuv = 0
+
+ for i in range(len(_imgnames)):
+ newitem = cp.deepcopy(tmpl)
+ newitem['image_file'] = os.path.join(self.img_prefix, _imgnames[i])
+ newitem['scale'] = np.array([_scales[i], _scales[i]])
+ newitem['center'] = _centers[i]
+ newitem['joints_2d'] = _keypoints[i, :, :2]
+ newitem['joints_2d_visible'] = _keypoints[i, :, -1][:, None]
+ newitem['joints_3d'] = _joints_3d[i, :, :3]
+ newitem['joints_3d_visible'] = _joints_3d[i, :, -1][:, None]
+ newitem['pose'] = _poses[i]
+ newitem['beta'] = _betas[i]
+ newitem['has_smpl'] = has_smpl
+ newitem['gender'] = _genders[i]
+ newitem['iuv_file'] = os.path.join(self.iuv_prefix, _iuv_names[i])
+ newitem['has_iuv'] = has_iuv
+ gt_db.append(newitem)
+ return gt_db
+
+ def __len__(self, ):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = cp.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
diff --git a/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py b/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac9ead1f5c1c1de40604c6830f6b0c762ad70eb
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py
@@ -0,0 +1,101 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+
+from mmpose.core.evaluation import keypoint_mpjpe
+from mmpose.datasets.builder import DATASETS
+from .mesh_base_dataset import MeshBaseDataset
+
+
+@DATASETS.register_module()
+class MeshH36MDataset(MeshBaseDataset):
+ """Human3.6M Dataset for 3D human mesh estimation. It inherits all function
+ from MeshBaseDataset and has its own evaluate function.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def evaluate(self, outputs, res_folder, metric='joint_error', logger=None):
+ """Evaluate 3D keypoint results."""
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['joint_error']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ res_file = os.path.join(res_folder, 'result_keypoints.json')
+ kpts = []
+ for out in outputs:
+ for (keypoints, image_path) in zip(out['keypoints_3d'],
+ out['image_path']):
+ kpts.append({
+ 'keypoints': keypoints.tolist(),
+ 'image': image_path,
+ })
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file)
+ name_value = OrderedDict(info_str)
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file):
+ """Keypoint evaluation.
+
+ Report mean per joint position error (MPJPE) and mean per joint
+ position error after rigid alignment (MPJPE-PA)
+ """
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ pred_joints_3d = [pred['keypoints'] for pred in preds]
+ gt_joints_3d = [item['joints_3d'] for item in self.db]
+ gt_joints_visible = [item['joints_3d_visible'] for item in self.db]
+
+ pred_joints_3d = np.array(pred_joints_3d)
+ gt_joints_3d = np.array(gt_joints_3d)
+ gt_joints_visible = np.array(gt_joints_visible)
+
+ # we only evaluate on 14 lsp joints
+ joint_mapper = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18]
+ pred_joints_3d = pred_joints_3d[:, joint_mapper, :]
+ pred_pelvis = (pred_joints_3d[:, 2] + pred_joints_3d[:, 3]) / 2
+ pred_joints_3d = pred_joints_3d - pred_pelvis[:, None, :]
+
+ gt_joints_3d = gt_joints_3d[:, joint_mapper, :]
+ gt_pelvis = (gt_joints_3d[:, 2] + gt_joints_3d[:, 3]) / 2
+ gt_joints_3d = gt_joints_3d - gt_pelvis[:, None, :]
+ gt_joints_visible = gt_joints_visible[:, joint_mapper, 0] > 0
+
+ mpjpe = keypoint_mpjpe(pred_joints_3d, gt_joints_3d, gt_joints_visible)
+ mpjpe_pa = keypoint_mpjpe(
+ pred_joints_3d,
+ gt_joints_3d,
+ gt_joints_visible,
+ alignment='procrustes')
+
+ info_str = []
+ info_str.append(('MPJPE', mpjpe * 1000))
+ info_str.append(('MPJPE-PA', mpjpe_pa * 1000))
+ return info_str
diff --git a/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py b/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..244a7c323c6c69aa2a00e9adfb0a11e08182c004
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import ConcatDataset, Dataset, WeightedRandomSampler
+
+from mmpose.datasets.builder import DATASETS
+from .mesh_base_dataset import MeshBaseDataset
+
+
+@DATASETS.register_module()
+class MeshMixDataset(Dataset, metaclass=ABCMeta):
+ """Mix Dataset for 3D human mesh estimation.
+
+ The dataset combines data from multiple datasets (MeshBaseDataset) and
+ sample the data from different datasets with the provided proportions.
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Args:
+ configs (list): List of configs for multiple datasets.
+ partition (list): Sample proportion of multiple datasets. The length
+ of partition should be same with that of configs. The elements
+ of it should be non-negative and is not necessary summing up to
+ one.
+
+ Example:
+ >>> from mmpose.datasets import MeshMixDataset
+ >>> data_cfg = dict(
+ >>> image_size=[256, 256],
+ >>> iuv_size=[64, 64],
+ >>> num_joints=24,
+ >>> use_IUV=True,
+ >>> uv_type='BF')
+ >>>
+ >>> mix_dataset = MeshMixDataset(
+ >>> configs=[
+ >>> dict(
+ >>> ann_file='tests/data/h36m/test_h36m.npz',
+ >>> img_prefix='tests/data/h36m',
+ >>> data_cfg=data_cfg,
+ >>> pipeline=[]),
+ >>> dict(
+ >>> ann_file='tests/data/h36m/test_h36m.npz',
+ >>> img_prefix='tests/data/h36m',
+ >>> data_cfg=data_cfg,
+ >>> pipeline=[]),
+ >>> ],
+ >>> partition=[0.6, 0.4])
+ """
+
+ def __init__(self, configs, partition):
+ """Load data from multiple datasets."""
+ assert min(partition) >= 0
+ datasets = [MeshBaseDataset(**cfg) for cfg in configs]
+ self.dataset = ConcatDataset(datasets)
+ self.length = max(len(ds) for ds in datasets)
+ weights = [
+ np.ones(len(ds)) * p / len(ds)
+ for (p, ds) in zip(partition, datasets)
+ ]
+ weights = np.concatenate(weights, axis=0)
+ self.sampler = WeightedRandomSampler(weights, 1)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, idx):
+ """Given index, sample the data from multiple datasets with the given
+ proportion."""
+ idx_new = list(self.sampler)[0]
+ return self.dataset[idx_new]
diff --git a/mmpose/datasets/datasets/mesh/mosh_dataset.py b/mmpose/datasets/datasets/mesh/mosh_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3185265e7d6e666d8c9096244c3df4104bcdb020
--- /dev/null
+++ b/mmpose/datasets/datasets/mesh/mosh_dataset.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.pipelines import Compose
+
+
+@DATASETS.register_module()
+class MoshDataset(Dataset, metaclass=ABCMeta):
+ """Mosh Dataset for the adversarial training in 3D human mesh estimation
+ task.
+
+ The dataset return a dict containing real-world SMPL parameters.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self, ann_file, pipeline, test_mode=False):
+
+ self.ann_file = ann_file
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.db = self._get_db(ann_file)
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_db(ann_file):
+ """Load dataset."""
+ data = np.load(ann_file)
+ _betas = data['shape'].astype(np.float32)
+ _poses = data['pose'].astype(np.float32)
+ tmpl = dict(
+ pose=None,
+ beta=None,
+ )
+ gt_db = []
+ dataset_len = len(_betas)
+
+ for i in range(dataset_len):
+ newitem = cp.deepcopy(tmpl)
+ newitem['pose'] = _poses[i]
+ newitem['beta'] = _betas[i]
+ gt_db.append(newitem)
+ return gt_db
+
+ def __len__(self, ):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ item = cp.deepcopy(self.db[idx])
+ trivial, pose, beta = \
+ np.zeros(3, dtype=np.float32), item['pose'], item['beta']
+ results = {
+ 'mosh_theta':
+ np.concatenate((trivial, pose, beta), axis=0).astype(np.float32)
+ }
+ return self.pipeline(results)
diff --git a/mmpose/datasets/datasets/top_down/__init__.py b/mmpose/datasets/datasets/top_down/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc5b46a8b1e3d68cda6ab6564eb748987a9a9e8d
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/__init__.py
@@ -0,0 +1,30 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .topdown_aic_dataset import TopDownAicDataset
+from .topdown_coco_dataset import TopDownCocoDataset
+from .topdown_coco_wholebody_dataset import TopDownCocoWholeBodyDataset
+from .topdown_crowdpose_dataset import TopDownCrowdPoseDataset
+from .topdown_h36m_dataset import TopDownH36MDataset
+from .topdown_halpe_dataset import TopDownHalpeDataset
+from .topdown_jhmdb_dataset import TopDownJhmdbDataset
+from .topdown_mhp_dataset import TopDownMhpDataset
+from .topdown_mpii_dataset import TopDownMpiiDataset
+from .topdown_mpii_trb_dataset import TopDownMpiiTrbDataset
+from .topdown_ochuman_dataset import TopDownOCHumanDataset
+from .topdown_posetrack18_dataset import TopDownPoseTrack18Dataset
+from .topdown_posetrack18_video_dataset import TopDownPoseTrack18VideoDataset
+
+__all__ = [
+ 'TopDownAicDataset',
+ 'TopDownCocoDataset',
+ 'TopDownCocoWholeBodyDataset',
+ 'TopDownCrowdPoseDataset',
+ 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset',
+ 'TopDownOCHumanDataset',
+ 'TopDownPoseTrack18Dataset',
+ 'TopDownJhmdbDataset',
+ 'TopDownMhpDataset',
+ 'TopDownH36MDataset',
+ 'TopDownHalpeDataset',
+ 'TopDownPoseTrack18VideoDataset',
+]
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a34d6795f0dc6abecfea5fb9aa2ac6722f6c83e6
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_aic_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_aic_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..497109ed29b7c91a22dd48e4fa6c978dd3e3055f
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_aic_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67a1abb6b5a8d8a22369de54ec16b56099c75a42
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_wholebody_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_wholebody_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16bc7b24cfebf57ee1b46f804c9d12cd8595a84d
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_coco_wholebody_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_crowdpose_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_crowdpose_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78ddb0d37b335ea5e69583ac267a09fc2a1186b4
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_crowdpose_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_h36m_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_h36m_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..131d384be6f4393a89f65443d64fb0b048423f3d
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_h36m_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_halpe_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_halpe_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67df5728ce47eb43a8009b6afeaa7ccbc536ae5d
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_halpe_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_jhmdb_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_jhmdb_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db6c4135d3acb9a17314364aa8650d5ce5e21362
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_jhmdb_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_mhp_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mhp_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7914fc8921a14698e30ecdc009fee75b21f38c2f
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mhp_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79781f0edcee078309dfe0982e5abe5db4e3446b
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_trb_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_trb_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4d039558e1fe50082ccb3f1a505d622d34edadc
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_mpii_trb_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_ochuman_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_ochuman_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4da6c60b60814992a56418b363557473379bfac3
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_ochuman_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e8a3119036c8f951dc92417413a9d3c51ed1e6d
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_video_dataset.cpython-310.pyc b/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_video_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b380f8f107fd286dcad26095e4bc1d703d11c60
Binary files /dev/null and b/mmpose/datasets/datasets/top_down/__pycache__/topdown_posetrack18_video_dataset.cpython-310.pyc differ
diff --git a/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py b/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..13c41dfea92189e113dd291afa3771547881efbc
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownAicDataset(TopDownCocoDataset):
+ """AicDataset dataset for top-down pose estimation.
+
+ "AI Challenger : A Large-scale Dataset for Going Deeper
+ in Image Understanding", arXiv'2017.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AIC keypoint indexes::
+
+ 0: "right_shoulder",
+ 1: "right_elbow",
+ 2: "right_wrist",
+ 3: "left_shoulder",
+ 4: "left_elbow",
+ 5: "left_wrist",
+ 6: "right_hip",
+ 7: "right_knee",
+ 8: "right_ankle",
+ 9: "left_hip",
+ 10: "left_knee",
+ 11: "left_ankle",
+ 12: "head_top",
+ 13: "neck"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aic.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/top_down/topdown_base_dataset.py b/mmpose/datasets/datasets/top_down/topdown_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc99576716ea5fc77af277e3e764c2c9b5dd158f
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class TopDownBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py b/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..664c88149634bb63966438508af52f6d746e9aef
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
@@ -0,0 +1,405 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownCocoDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoDataset dataset for top-down pose estimation.
+
+ "Microsoft COCO: Common Objects in Context", ECCV'2014.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ if (not self.test_mode) or self.use_gt_bbox:
+ # use ground truth bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ else:
+ # use bbox from detection
+ gt_db = self._load_coco_person_detection_results()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+
+ Args:
+ img_id: coco image id
+
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _load_coco_person_detection_results(self):
+ """Load coco person detection results."""
+ num_joints = self.ann_info['num_joints']
+ all_boxes = None
+ with open(self.bbox_file, 'r') as f:
+ all_boxes = json.load(f)
+
+ if not all_boxes:
+ raise ValueError('=> Load %s fail!' % self.bbox_file)
+
+ print(f'=> Total boxes: {len(all_boxes)}')
+
+ kpt_db = []
+ bbox_id = 0
+ for det_res in all_boxes:
+ if det_res['category_id'] != 1:
+ continue
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[det_res['image_id']])
+ box = det_res['bbox']
+ score = det_res['score']
+
+ if score < self.det_bbox_thr:
+ continue
+
+ center, scale = self._xywh2cs(*box[:4])
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+ kpt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'bbox': box[:4],
+ 'bbox_score': score,
+ 'dataset': self.dataset_name,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ print(f'=> Total boxes after filter '
+ f'low score@{self.det_bbox_thr}: {bbox_id}')
+ return kpt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py b/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..791a3c5790d68ef480bc54d94cf377c06e5f0383
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py
@@ -0,0 +1,274 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+
+import numpy as np
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownCocoWholeBodyDataset(TopDownCocoDataset):
+ """CocoWholeBodyDataset dataset for top-down pose estimation.
+
+ "Whole-Body Human Pose Estimation in the Wild", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO-WholeBody keypoint indexes::
+
+ 0-16: 17 body keypoints,
+ 17-22: 6 foot keypoints,
+ 23-90: 68 face keypoints,
+ 91-132: 42 hand keypoints
+
+ In total, we have 133 keypoints for wholebody pose estimation.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco_wholebody.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.body_num = 17
+ self.foot_num = 6
+ self.face_num = 68
+ self.left_hand_num = 21
+ self.right_hand_num = 21
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ rec = []
+ bbox_id = 0
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0)
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num,
+ self.left_hand_num, self.right_hand_num
+ ]) * 3
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point[cuts[0]:cuts[1]].tolist(),
+ 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(),
+ 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(),
+ 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(),
+ 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num,
+ self.right_hand_num
+ ])
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_body',
+ self.sigmas[cuts[0]:cuts[1]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_foot',
+ self.sigmas[cuts[1]:cuts[2]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_face',
+ self.sigmas[cuts[2]:cuts[3]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_lefthand',
+ self.sigmas[cuts[3]:cuts[4]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_righthand',
+ self.sigmas[cuts[4]:cuts[5]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_wholebody',
+ self.sigmas,
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py b/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9b196f744aa67d46c420612f9476b1d73c68cf3
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py
@@ -0,0 +1,110 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownCrowdPoseDataset(TopDownCocoDataset):
+ """CrowdPoseDataset dataset for top-down pose estimation.
+
+ "CrowdPose: Efficient Crowded Scenes Pose Estimation and
+ A New Benchmark", CVPR'2019.
+ More details can be found in the `paper
+ `__.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ CrowdPose keypoint indexes::
+
+ 0: 'left_shoulder',
+ 1: 'right_shoulder',
+ 2: 'left_elbow',
+ 3: 'right_elbow',
+ 4: 'left_wrist',
+ 5: 'right_wrist',
+ 6: 'left_hip',
+ 7: 'right_hip',
+ 8: 'left_knee',
+ 9: 'right_knee',
+ 10: 'left_ankle',
+ 11: 'right_ankle',
+ 12: 'top_head',
+ 13: 'neck'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/crowdpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_crowd',
+ self.sigmas,
+ use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)',
+ 'AP(H)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py b/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bc49e3a2994037993bdb44a6ba59e44eeef0270
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py
@@ -0,0 +1,206 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownH36MDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Human3.6M dataset for top-down 2D pose estimation.
+
+ "Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human
+ Sensing in Natural Environments", TPAMI`2014.
+ More details can be found in the `paper
+ `__.
+
+ Human3.6M keypoint indexes::
+
+ 0: 'root (pelvis)',
+ 1: 'right_hip',
+ 2: 'right_knee',
+ 3: 'right_foot',
+ 4: 'left_hip',
+ 5: 'left_knee',
+ 6: 'left_foot',
+ 7: 'spine',
+ 8: 'thorax',
+ 9: 'neck_base',
+ 10: 'head',
+ 11: 'left_shoulder',
+ 12: 'left_elbow',
+ 13: 'left_wrist',
+ 14: 'right_shoulder',
+ 15: 'right_elbow',
+ 16: 'right_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/h36m.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate human3.6m 2d keypoint results. The pose prediction results
+ will be saved in `${res_folder}/result_keypoints.json`.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0],
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'PCK'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
diff --git a/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py b/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7042daa29ec2b2b8eafb16a1404be32cf761d678
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py
@@ -0,0 +1,77 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownHalpeDataset(TopDownCocoDataset):
+ """HalpeDataset for top-down pose estimation.
+
+ 'https://github.com/Fang-Haoshu/Halpe-FullBody'
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Halpe keypoint indexes::
+
+ 0-19: 20 body keypoints,
+ 20-25: 6 foot keypoints,
+ 26-93: 68 face keypoints,
+ 94-135: 42 hand keypoints
+
+ In total, we have 136 keypoints for wholebody pose estimation.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/halpe.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
diff --git a/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py b/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5204f04d869c59b9fe9b9f337714d1aa6f555c9e
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py
@@ -0,0 +1,361 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation.top_down_eval import keypoint_pck_accuracy
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownJhmdbDataset(TopDownCocoDataset):
+ """JhmdbDataset dataset for top-down pose estimation.
+
+ "Towards understanding action recognition", ICCV'2013.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ sub-JHMDB keypoint indexes::
+
+ 0: "neck",
+ 1: "belly",
+ 2: "head",
+ 3: "right_shoulder",
+ 4: "left_shoulder",
+ 5: "right_hip",
+ 6: "left_hip",
+ 7: "right_elbow",
+ 8: "left_elbow",
+ 9: "right_knee",
+ 10: "left_knee",
+ 11: "right_wrist",
+ 12: "left_wrist",
+ 13: "right_ankle",
+ 14: "left_ankle"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/jhmdb.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ # JHMDB uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ x -= 1
+ y -= 1
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ rec = []
+ bbox_id = 0
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+
+ # JHMDB uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ joints_3d[:, :2] = keypoints[:, :2] - 1
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': f'{img_id}_{bbox_id:03}'
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _write_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file, metrics, pck_thr=0.2):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'PCKh', 'AUC', 'EPE'.
+ pck_thr (float): PCK threshold, default as 0.2.
+ pckh_thr (float): PCKh threshold, default as 0.7.
+ auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ outputs = []
+ gts = []
+ masks = []
+ threshold_bbox = []
+ threshold_torso = []
+
+ for pred, item in zip(preds, self.db):
+ outputs.append(np.array(pred['keypoints'])[:, :-1])
+ gts.append(np.array(item['joints_3d'])[:, :-1])
+ masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+ if 'PCK' in metrics:
+ bbox = np.array(item['bbox'])
+ bbox_thr = np.max(bbox[2:])
+ threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+
+ if 'tPCK' in metrics:
+ torso_thr = np.linalg.norm(item['joints_3d'][4, :2] -
+ item['joints_3d'][5, :2])
+ if torso_thr < 1:
+ torso_thr = np.linalg.norm(
+ np.array(pred['keypoints'])[4, :2] -
+ np.array(pred['keypoints'])[5, :2])
+ warnings.warn('Torso Size < 1.')
+ threshold_torso.append(np.array([torso_thr, torso_thr]))
+
+ outputs = np.array(outputs)
+ gts = np.array(gts)
+ masks = np.array(masks)
+ threshold_bbox = np.array(threshold_bbox)
+ threshold_torso = np.array(threshold_torso)
+
+ if 'PCK' in metrics:
+ pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_bbox)
+
+ stats_names = [
+ 'Head PCK', 'Sho PCK', 'Elb PCK', 'Wri PCK', 'Hip PCK',
+ 'Knee PCK', 'Ank PCK', 'Mean PCK'
+ ]
+
+ stats = [
+ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4],
+ 0.5 * pck_p[7] + 0.5 * pck_p[8],
+ 0.5 * pck_p[11] + 0.5 * pck_p[12],
+ 0.5 * pck_p[5] + 0.5 * pck_p[6],
+ 0.5 * pck_p[9] + 0.5 * pck_p[10],
+ 0.5 * pck_p[13] + 0.5 * pck_p[14], pck
+ ]
+
+ info_str.extend(list(zip(stats_names, stats)))
+
+ if 'tPCK' in metrics:
+ pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_torso)
+
+ stats_names = [
+ 'Head tPCK', 'Sho tPCK', 'Elb tPCK', 'Wri tPCK', 'Hip tPCK',
+ 'Knee tPCK', 'Ank tPCK', 'Mean tPCK'
+ ]
+
+ stats = [
+ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4],
+ 0.5 * pck_p[7] + 0.5 * pck_p[8],
+ 0.5 * pck_p[11] + 0.5 * pck_p[12],
+ 0.5 * pck_p[5] + 0.5 * pck_p[6],
+ 0.5 * pck_p[9] + 0.5 * pck_p[10],
+ 0.5 * pck_p[13] + 0.5 * pck_p[14], pck
+ ]
+
+ info_str.extend(list(zip(stats_names, stats)))
+
+ return info_str
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate onehand10k keypoint results. The pose prediction results
+ will be saved in `${res_folder}/result_keypoints.json`.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str])
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'tPCK'.
+ PCK means normalized by the bounding boxes, while tPCK
+ means normalized by the torso size.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'tPCK']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ # convert 0-based index to 1-based index,
+ # and get the first two dimensions.
+ preds[..., :2] += 1.0
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..050824a88ab520ad44feafd4a8553582689b1fab
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py
@@ -0,0 +1,125 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownMhpDataset(TopDownCocoDataset):
+ """MHPv2.0 dataset for top-down pose estimation.
+
+ "Understanding Humans in Crowded Scenes: Deep Nested Adversarial
+ Learning and A New Benchmark for Multi-Human Parsing", ACM MM'2018.
+ More details can be found in the `paper
+ `__
+
+ Note that, the evaluation metric used here is mAP (adapted from COCO),
+ which may be different from the official evaluation codes.
+ 'https://github.com/ZhaoJ9014/Multi-Human-Parsing/tree/master/'
+ 'Evaluation/Multi-Human-Pose'
+ Please be cautious if you use the results in papers.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MHP keypoint indexes::
+
+ 0: "right ankle",
+ 1: "right knee",
+ 2: "right hip",
+ 3: "left hip",
+ 4: "left knee",
+ 5: "left ankle",
+ 6: "pelvis",
+ 7: "thorax",
+ 8: "upper neck",
+ 9: "head top",
+ 10: "right wrist",
+ 11: "right elbow",
+ 12: "right shoulder",
+ 13: "left shoulder",
+ 14: "left elbow",
+ 15: "left wrist",
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ if 'image_thr' in data_cfg:
+ warnings.warn(
+ 'image_thr is deprecated, '
+ 'please use det_bbox_thr instead', DeprecationWarning)
+ self.det_bbox_thr = data_cfg['image_thr']
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..751046aa683dd6304b97f639d85cc9489027a6ef
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
@@ -0,0 +1,275 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os.path as osp
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from scipy.io import loadmat, savemat
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownMpiiDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MPII Dataset for top-down pose estimation.
+
+ "2D Human Pose Estimation: New Benchmark and State of the Art Analysis"
+ ,CVPR'2014. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MPII keypoint indexes::
+
+ 0: 'right_ankle'
+ 1: 'right_knee',
+ 2: 'right_hip',
+ 3: 'left_hip',
+ 4: 'left_knee',
+ 5: 'left_ankle',
+ 6: 'pelvis',
+ 7: 'thorax',
+ 8: 'upper_neck',
+ 9: 'head_top',
+ 10: 'right_wrist',
+ 11: 'right_elbow',
+ 12: 'right_shoulder',
+ 13: 'left_shoulder',
+ 14: 'left_elbow',
+ 15: 'left_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpii.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ coco_style=False,
+ test_mode=test_mode)
+
+ self.db = self._get_db()
+ self.image_set = set(x['image_file'] for x in self.db)
+ self.num_images = len(self.image_set)
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ # create train/val split
+ with open(self.ann_file) as anno_file:
+ anno = json.load(anno_file)
+
+ gt_db = []
+ bbox_id = 0
+ for a in anno:
+ image_name = a['image']
+
+ center = np.array(a['center'], dtype=np.float32)
+ scale = np.array([a['scale'], a['scale']], dtype=np.float32)
+
+ # Adjust center/scale slightly to avoid cropping limbs
+ if center[0] != -1:
+ center[1] = center[1] + 15 * scale[1]
+ # padding to include proper amount of context
+ scale = scale * 1.25
+
+ # MPII uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ center = center - 1
+
+ joints_3d = np.zeros((self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+ joints_3d_visible = np.zeros((self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+ if not self.test_mode:
+ joints = np.array(a['joints'])
+ joints_vis = np.array(a['joints_vis'])
+ assert len(joints) == self.ann_info['num_joints'], \
+ f'joint num diff: {len(joints)}' + \
+ f' vs {self.ann_info["num_joints"]}'
+
+ joints_3d[:, 0:2] = joints[:, 0:2] - 1
+ joints_3d_visible[:, :2] = joints_vis[:, None]
+ image_file = osp.join(self.img_prefix, image_name)
+ gt_db.append({
+ 'image_file': image_file,
+ 'bbox_id': bbox_id,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate PCKh for MPII dataset. Adapted from
+ https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+ Copyright (c) Microsoft, under the MIT License.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['/val2017/000000\
+ 397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ res_folder (str, optional): The folder to save the testing
+ results. Default: None.
+ metric (str | list[str]): Metrics to be performed.
+ Defaults: 'PCKh'.
+
+ Returns:
+ dict: PCKh for each joint
+ """
+
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ bbox_ids = result['bbox_ids']
+ batch_size = len(bbox_ids)
+ for i in range(batch_size):
+ kpts.append({'keypoints': preds[i], 'bbox_id': bbox_ids[i]})
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ preds = np.stack([kpt['keypoints'] for kpt in kpts])
+
+ # convert 0-based index to 1-based index,
+ # and get the first two dimensions.
+ preds = preds[..., :2] + 1.0
+
+ if res_folder:
+ pred_file = osp.join(res_folder, 'pred.mat')
+ savemat(pred_file, mdict={'preds': preds})
+
+ SC_BIAS = 0.6
+ threshold = 0.5
+
+ gt_file = osp.join(osp.dirname(self.ann_file), 'mpii_gt_val.mat')
+ gt_dict = loadmat(gt_file)
+ dataset_joints = gt_dict['dataset_joints']
+ jnt_missing = gt_dict['jnt_missing']
+ pos_gt_src = gt_dict['pos_gt_src']
+ headboxes_src = gt_dict['headboxes_src']
+
+ pos_pred_src = np.transpose(preds, [1, 2, 0])
+
+ head = np.where(dataset_joints == 'head')[1][0]
+ lsho = np.where(dataset_joints == 'lsho')[1][0]
+ lelb = np.where(dataset_joints == 'lelb')[1][0]
+ lwri = np.where(dataset_joints == 'lwri')[1][0]
+ lhip = np.where(dataset_joints == 'lhip')[1][0]
+ lkne = np.where(dataset_joints == 'lkne')[1][0]
+ lank = np.where(dataset_joints == 'lank')[1][0]
+
+ rsho = np.where(dataset_joints == 'rsho')[1][0]
+ relb = np.where(dataset_joints == 'relb')[1][0]
+ rwri = np.where(dataset_joints == 'rwri')[1][0]
+ rkne = np.where(dataset_joints == 'rkne')[1][0]
+ rank = np.where(dataset_joints == 'rank')[1][0]
+ rhip = np.where(dataset_joints == 'rhip')[1][0]
+
+ jnt_visible = 1 - jnt_missing
+ uv_error = pos_pred_src - pos_gt_src
+ uv_err = np.linalg.norm(uv_error, axis=1)
+ headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
+ headsizes = np.linalg.norm(headsizes, axis=0)
+ headsizes *= SC_BIAS
+ scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32)
+ scaled_uv_err = uv_err / scale
+ scaled_uv_err = scaled_uv_err * jnt_visible
+ jnt_count = np.sum(jnt_visible, axis=1)
+ less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+ PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count
+
+ # save
+ rng = np.arange(0, 0.5 + 0.01, 0.01)
+ pckAll = np.zeros((len(rng), 16), dtype=np.float32)
+
+ for r, threshold in enumerate(rng):
+ less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+ pckAll[r, :] = 100. * np.sum(
+ less_than_threshold, axis=1) / jnt_count
+
+ PCKh = np.ma.array(PCKh, mask=False)
+ PCKh.mask[6:8] = True
+
+ jnt_count = np.ma.array(jnt_count, mask=False)
+ jnt_count.mask[6:8] = True
+ jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
+
+ name_value = [('Head', PCKh[head]),
+ ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
+ ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
+ ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
+ ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
+ ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
+ ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
+ ('PCKh', np.sum(PCKh * jnt_ratio)),
+ ('PCKh@0.1', np.sum(pckAll[10, :] * jnt_ratio))]
+ name_value = OrderedDict(name_value)
+
+ return name_value
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0da65b47a27074fac6dc1bfbd98309f75e359a3
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
@@ -0,0 +1,310 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownMpiiTrbDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MPII-TRB Dataset dataset for top-down pose estimation.
+
+ "TRB: A Novel Triplet Representation for Understanding 2D Human Body",
+ ICCV'2019. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MPII-TRB keypoint indexes::
+
+ 0: 'left_shoulder'
+ 1: 'right_shoulder'
+ 2: 'left_elbow'
+ 3: 'right_elbow'
+ 4: 'left_wrist'
+ 5: 'right_wrist'
+ 6: 'left_hip'
+ 7: 'right_hip'
+ 8: 'left_knee'
+ 9: 'right_knee'
+ 10: 'left_ankle'
+ 11: 'right_ankle'
+ 12: 'head'
+ 13: 'neck'
+
+ 14: 'right_neck'
+ 15: 'left_neck'
+ 16: 'medial_right_shoulder'
+ 17: 'lateral_right_shoulder'
+ 18: 'medial_right_bow'
+ 19: 'lateral_right_bow'
+ 20: 'medial_right_wrist'
+ 21: 'lateral_right_wrist'
+ 22: 'medial_left_shoulder'
+ 23: 'lateral_left_shoulder'
+ 24: 'medial_left_bow'
+ 25: 'lateral_left_bow'
+ 26: 'medial_left_wrist'
+ 27: 'lateral_left_wrist'
+ 28: 'medial_right_hip'
+ 29: 'lateral_right_hip'
+ 30: 'medial_right_knee'
+ 31: 'lateral_right_knee'
+ 32: 'medial_right_ankle'
+ 33: 'lateral_right_ankle'
+ 34: 'medial_left_hip'
+ 35: 'lateral_left_hip'
+ 36: 'medial_left_knee'
+ 37: 'lateral_left_knee'
+ 38: 'medial_left_ankle'
+ 39: 'lateral_left_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpii_trb.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.db = self._get_db(ann_file)
+ self.image_set = set(x['image_file'] for x in self.db)
+ self.num_images = len(self.image_set)
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self, ann_file):
+ """Load dataset."""
+ with open(ann_file, 'r') as f:
+ data = json.load(f)
+ tmpl = dict(
+ image_file=None,
+ bbox_id=None,
+ center=None,
+ scale=None,
+ rotation=0,
+ joints_3d=None,
+ joints_3d_visible=None,
+ dataset=self.dataset_name)
+
+ imid2info = {
+ int(osp.splitext(x['file_name'])[0]): x
+ for x in data['images']
+ }
+
+ num_joints = self.ann_info['num_joints']
+ gt_db = []
+
+ for anno in data['annotations']:
+ newitem = cp.deepcopy(tmpl)
+ image_id = anno['image_id']
+ newitem['bbox_id'] = anno['id']
+ newitem['image_file'] = osp.join(self.img_prefix,
+ imid2info[image_id]['file_name'])
+
+ if max(anno['keypoints']) == 0:
+ continue
+
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ for ipt in range(num_joints):
+ joints_3d[ipt, 0] = anno['keypoints'][ipt * 3 + 0]
+ joints_3d[ipt, 1] = anno['keypoints'][ipt * 3 + 1]
+ joints_3d[ipt, 2] = 0
+ t_vis = min(anno['keypoints'][ipt * 3 + 2], 1)
+ joints_3d_visible[ipt, :] = (t_vis, t_vis, 0)
+
+ center = np.array(anno['center'], dtype=np.float32)
+ scale = self.ann_info['image_size'] / anno['scale'] / 200.0
+ newitem['center'] = center
+ newitem['scale'] = scale
+ newitem['joints_3d'] = joints_3d
+ newitem['joints_3d_visible'] = joints_3d_visible
+ if 'headbox' in anno:
+ newitem['headbox'] = anno['headbox']
+ gt_db.append(newitem)
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _evaluate_kernel(self, pred, joints_3d, joints_3d_visible, headbox):
+ """Evaluate one example."""
+ num_joints = self.ann_info['num_joints']
+ headbox = np.array(headbox)
+ threshold = np.linalg.norm(headbox[:2] - headbox[2:]) * 0.3
+ hit = np.zeros(num_joints, dtype=np.float32)
+ exist = np.zeros(num_joints, dtype=np.float32)
+
+ for i in range(num_joints):
+ pred_pt = pred[i]
+ gt_pt = joints_3d[i]
+ vis = joints_3d_visible[i][0]
+ if vis:
+ exist[i] = 1
+ else:
+ continue
+ distance = np.linalg.norm(pred_pt[:2] - gt_pt[:2])
+ if distance < threshold:
+ hit[i] = 1
+ return hit, exist
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate PCKh for MPII-TRB dataset.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['/val2017/\
+ 000000397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_ids (list[str]): For example, ['27407'].
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metrics to be performed.
+ Defaults: 'PCKh'.
+
+ Returns:
+ dict: PCKh for each joint
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ str_image_path = image_paths[i]
+ image_id = int(osp.basename(osp.splitext(str_image_path)[0]))
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file):
+ """Keypoint evaluation.
+
+ Report Mean Acc of skeleton, contour and all joints.
+ """
+ num_joints = self.ann_info['num_joints']
+ hit = np.zeros(num_joints, dtype=np.float32)
+ exist = np.zeros(num_joints, dtype=np.float32)
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+
+ assert len(preds) == len(
+ self.db), f'len(preds)={len(preds)}, len(self.db)={len(self.db)}'
+ for pred, item in zip(preds, self.db):
+ h, e = self._evaluate_kernel(pred['keypoints'], item['joints_3d'],
+ item['joints_3d_visible'],
+ item['headbox'])
+ hit += h
+ exist += e
+ skeleton = np.sum(hit[:14]) / np.sum(exist[:14])
+ contour = np.sum(hit[14:]) / np.sum(exist[14:])
+ mean = np.sum(hit) / np.sum(exist)
+
+ info_str = []
+ info_str.append(('Skeleton_acc', skeleton.item()))
+ info_str.append(('Contour_acc', contour.item()))
+ info_str.append(('PCKh', mean.item()))
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py b/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ad6b81405e2411bae1a531521208d2cc272fbf3
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
@@ -0,0 +1,97 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownOCHumanDataset(TopDownCocoDataset):
+ """OChuman dataset for top-down pose estimation.
+
+ "Pose2Seg: Detection Free Human Instance Segmentation", CVPR'2019.
+ More details can be found in the `paper
+ `__ .
+
+ "Occluded Human (OCHuman)" dataset contains 8110 heavily occluded
+ human instances within 4731 images. OCHuman dataset is designed for
+ validation and testing. To evaluate on OCHuman, the model should be
+ trained on COCO training set, and then test the robustness of the
+ model to occlusion using OCHuman.
+
+ OCHuman keypoint indexes (same as COCO)::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/ochuman.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
diff --git a/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py b/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c690860ac7a11129c9eee50c19eda05279e9ace1
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py
@@ -0,0 +1,312 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+try:
+ from poseval import eval_helpers
+ from poseval.evaluateAP import evaluateAP
+ has_poseval = True
+except (ImportError, ModuleNotFoundError):
+ has_poseval = False
+
+
+@DATASETS.register_module()
+class TopDownPoseTrack18Dataset(TopDownCocoDataset):
+ """PoseTrack18 dataset for top-down pose estimation.
+
+ "Posetrack: A benchmark for human pose estimation and tracking", CVPR'2018.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ PoseTrack2018 keypoint indexes::
+
+ 0: 'nose',
+ 1: 'head_bottom',
+ 2: 'head_top',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/posetrack18.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate posetrack keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['val/010016_mpii_test\
+ /000024.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_id (list(int))
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_folder = tmp_folder.name
+
+ gt_folder = osp.join(
+ osp.dirname(self.ann_file),
+ osp.splitext(self.ann_file.split('_')[-1])[0])
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = defaultdict(list)
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts[image_id].append(
+ [img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts[image_id].append(img_kpts)
+
+ self._write_posetrack18_keypoint_results(valid_kpts, gt_folder,
+ res_folder)
+
+ info_str = self._do_python_keypoint_eval(gt_folder, res_folder)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_posetrack18_keypoint_results(keypoint_results, gt_folder,
+ pred_folder):
+ """Write results into a json file.
+
+ Args:
+ keypoint_results (dict): keypoint results organized by image_id.
+ gt_folder (str): Path of directory for official gt files.
+ pred_folder (str): Path of directory to save the results.
+ """
+ categories = []
+
+ cat = {}
+ cat['supercategory'] = 'person'
+ cat['id'] = 1
+ cat['name'] = 'person'
+ cat['keypoints'] = [
+ 'nose', 'head_bottom', 'head_top', 'left_ear', 'right_ear',
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
+ 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee',
+ 'right_knee', 'left_ankle', 'right_ankle'
+ ]
+ cat['skeleton'] = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13],
+ [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10],
+ [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
+ [4, 6], [5, 7]]
+ categories.append(cat)
+
+ json_files = [
+ pos for pos in os.listdir(gt_folder) if pos.endswith('.json')
+ ]
+ for json_file in json_files:
+
+ with open(osp.join(gt_folder, json_file), 'r') as f:
+ gt = json.load(f)
+
+ annotations = []
+ images = []
+
+ for image in gt['images']:
+ im = {}
+ im['id'] = image['id']
+ im['file_name'] = image['file_name']
+ images.append(im)
+
+ img_kpts = keypoint_results[im['id']]
+
+ if len(img_kpts) == 0:
+ continue
+ for track_id, img_kpt in enumerate(img_kpts[0]):
+ ann = {}
+ ann['image_id'] = img_kpt['image_id']
+ ann['keypoints'] = np.array(
+ img_kpt['keypoints']).reshape(-1).tolist()
+ ann['scores'] = np.array(ann['keypoints']).reshape(
+ [-1, 3])[:, 2].tolist()
+ ann['score'] = float(img_kpt['score'])
+ ann['track_id'] = track_id
+ annotations.append(ann)
+
+ info = {}
+ info['images'] = images
+ info['categories'] = categories
+ info['annotations'] = annotations
+
+ with open(osp.join(pred_folder, json_file), 'w') as f:
+ json.dump(info, f, sort_keys=True, indent=4)
+
+ def _do_python_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation using poseval."""
+
+ if not has_poseval:
+ raise ImportError('Please install poseval package for evaluation'
+ 'on PoseTrack dataset '
+ '(see requirements/optional.txt)')
+
+ argv = ['', gt_folder + '/', pred_folder + '/']
+
+ print('Loading data')
+ gtFramesAll, prFramesAll = eval_helpers.load_data_dir(argv)
+
+ print('# gt frames :', len(gtFramesAll))
+ print('# pred frames:', len(prFramesAll))
+
+ # evaluate per-frame multi-person pose estimation (AP)
+ # compute AP
+ print('Evaluation of per-frame multi-person pose estimation')
+ apAll, _, _ = evaluateAP(gtFramesAll, prFramesAll, None, False, False)
+
+ # print AP
+ print('Average Precision (AP) metric:')
+ eval_helpers.printTable(apAll)
+
+ stats = eval_helpers.getCum(apAll)
+
+ stats_names = [
+ 'Head AP', 'Shou AP', 'Elb AP', 'Wri AP', 'Hip AP', 'Knee AP',
+ 'Ankl AP', 'Total AP'
+ ]
+
+ info_str = list(zip(stats_names, stats))
+
+ return info_str
diff --git a/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py b/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..045148d3e01ed513d9514ee81a85efaba9a72287
--- /dev/null
+++ b/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py
@@ -0,0 +1,549 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import deprecated_api_warning
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbVidTopDownDataset
+
+try:
+ from poseval import eval_helpers
+ from poseval.evaluateAP import evaluateAP
+ has_poseval = True
+except (ImportError, ModuleNotFoundError):
+ has_poseval = False
+
+
+@DATASETS.register_module()
+class TopDownPoseTrack18VideoDataset(Kpt2dSviewRgbVidTopDownDataset):
+ """PoseTrack18 dataset for top-down pose estimation.
+
+ "Posetrack: A benchmark for human pose estimation and tracking", CVPR'2018.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ PoseTrack2018 keypoint indexes::
+
+ 0: 'nose',
+ 1: 'head_bottom',
+ 2: 'head_top',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where videos/images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ ph_fill_len (int): The length of the placeholder to fill in the
+ image filenames, default: 6 in PoseTrack18.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False,
+ ph_fill_len=6):
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+ self.frame_weight_train = data_cfg['frame_weight_train']
+ self.frame_weight_test = data_cfg['frame_weight_test']
+ self.frame_weight = self.frame_weight_test \
+ if self.test_mode else self.frame_weight_train
+
+ self.ph_fill_len = ph_fill_len
+
+ # select the frame indices
+ self.frame_index_rand = data_cfg.get('frame_index_rand', True)
+ self.frame_index_range = data_cfg.get('frame_index_range', [-2, 2])
+ self.num_adj_frames = data_cfg.get('num_adj_frames', 1)
+ self.frame_indices_train = data_cfg.get('frame_indices_train', None)
+ self.frame_indices_test = data_cfg.get('frame_indices_test',
+ [-2, -1, 0, 1, 2])
+
+ if self.frame_indices_train is not None:
+ self.frame_indices_train.sort()
+ self.frame_indices_test.sort()
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ if (not self.test_mode) or self.use_gt_bbox:
+ # use ground truth bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ else:
+ # use bbox from detection
+ gt_db = self._load_posetrack_person_detection_results()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ file_name = img_ann['file_name']
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_files = []
+ cur_image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ image_files.append(cur_image_file)
+
+ # "images/val/012834_mpii_test/000000.jpg" -->> "000000.jpg"
+ cur_image_name = file_name.split('/')[-1]
+ ref_idx = int(cur_image_name.replace('.jpg', ''))
+
+ # select the frame indices
+ if not self.test_mode and self.frame_indices_train is not None:
+ indices = self.frame_indices_train
+ elif not self.test_mode and self.frame_index_rand:
+ low, high = self.frame_index_range
+ indices = np.random.randint(low, high + 1, self.num_adj_frames)
+ else:
+ indices = self.frame_indices_test
+
+ for index in indices:
+ if self.test_mode and index == 0:
+ continue
+ # the supporting frame index
+ support_idx = ref_idx + index
+ support_idx = np.clip(support_idx, 0, nframes - 1)
+ sup_image_file = cur_image_file.replace(
+ cur_image_name,
+ str(support_idx).zfill(self.ph_fill_len) + '.jpg')
+
+ if osp.exists(sup_image_file):
+ image_files.append(sup_image_file)
+ else:
+ warnings.warn(
+ f'{sup_image_file} does not exist, '
+ f'use {cur_image_file} instead.', UserWarning)
+ image_files.append(cur_image_file)
+ rec.append({
+ 'image_file': image_files,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id,
+ 'nframes': nframes,
+ 'frame_id': frame_id,
+ 'frame_weight': self.frame_weight
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _load_posetrack_person_detection_results(self):
+ """Load Posetrack person detection results.
+
+ Only in test mode.
+ """
+ num_joints = self.ann_info['num_joints']
+ all_boxes = None
+ with open(self.bbox_file, 'r') as f:
+ all_boxes = json.load(f)
+
+ if not all_boxes:
+ raise ValueError('=> Load %s fail!' % self.bbox_file)
+
+ print(f'=> Total boxes: {len(all_boxes)}')
+
+ kpt_db = []
+ bbox_id = 0
+ for det_res in all_boxes:
+ if det_res['category_id'] != 1:
+ continue
+
+ score = det_res['score']
+ if score < self.det_bbox_thr:
+ continue
+
+ box = det_res['bbox']
+
+ # deal with different bbox file formats
+ if 'nframes' in det_res and 'frame_id' in det_res:
+ nframes = int(det_res['nframes'])
+ frame_id = int(det_res['frame_id'])
+ elif 'image_name' in det_res:
+ img_id = self.name2id[det_res['image_name']]
+ img_ann = self.coco.loadImgs(img_id)[0]
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+ else:
+ img_id = det_res['image_id']
+ img_ann = self.coco.loadImgs(img_id)[0]
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+
+ image_files = []
+ if 'image_name' in det_res:
+ file_name = det_res['image_name']
+ else:
+ file_name = self.id2name[det_res['image_id']]
+
+ cur_image_file = osp.join(self.img_prefix, file_name)
+ image_files.append(cur_image_file)
+
+ # "images/val/012834_mpii_test/000000.jpg" -->> "000000.jpg"
+ cur_image_name = file_name.split('/')[-1]
+ ref_idx = int(cur_image_name.replace('.jpg', ''))
+
+ indices = self.frame_indices_test
+ for index in indices:
+ if self.test_mode and index == 0:
+ continue
+ # the supporting frame index
+ support_idx = ref_idx + index
+ support_idx = np.clip(support_idx, 0, nframes - 1)
+ sup_image_file = cur_image_file.replace(
+ cur_image_name,
+ str(support_idx).zfill(self.ph_fill_len) + '.jpg')
+
+ if osp.exists(sup_image_file):
+ image_files.append(sup_image_file)
+ else:
+ warnings.warn(f'{sup_image_file} does not exist, '
+ f'use {cur_image_file} instead.')
+ image_files.append(cur_image_file)
+
+ center, scale = self._xywh2cs(*box[:4])
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+ kpt_db.append({
+ 'image_file': image_files,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'bbox': box[:4],
+ 'bbox_score': score,
+ 'dataset': self.dataset_name,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'bbox_id': bbox_id,
+ 'nframes': nframes,
+ 'frame_id': frame_id,
+ 'frame_weight': self.frame_weight
+ })
+ bbox_id = bbox_id + 1
+ print(f'=> Total boxes after filter '
+ f'low score@{self.det_bbox_thr}: {bbox_id}')
+ return kpt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate posetrack keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['val/010016_mpii_test\
+ /000024.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_id (list(int))
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_folder = tmp_folder.name
+
+ gt_folder = osp.join(
+ osp.dirname(self.ann_file),
+ osp.splitext(self.ann_file.split('_')[-1])[0])
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ if not isinstance(image_paths[i], list):
+ image_id = self.name2id[image_paths[i]
+ [len(self.img_prefix):]]
+ else:
+ image_id = self.name2id[image_paths[i][0]
+ [len(self.img_prefix):]]
+
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = defaultdict(list)
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts[image_id].append(
+ [img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts[image_id].append(img_kpts)
+
+ self._write_keypoint_results(valid_kpts, gt_folder, res_folder)
+
+ info_str = self._do_keypoint_eval(gt_folder, res_folder)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoint_results, gt_folder, pred_folder):
+ """Write results into a json file.
+
+ Args:
+ keypoint_results (dict): keypoint results organized by image_id.
+ gt_folder (str): Path of directory for official gt files.
+ pred_folder (str): Path of directory to save the results.
+ """
+ categories = []
+
+ cat = {}
+ cat['supercategory'] = 'person'
+ cat['id'] = 1
+ cat['name'] = 'person'
+ cat['keypoints'] = [
+ 'nose', 'head_bottom', 'head_top', 'left_ear', 'right_ear',
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
+ 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee',
+ 'right_knee', 'left_ankle', 'right_ankle'
+ ]
+ cat['skeleton'] = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13],
+ [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10],
+ [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
+ [4, 6], [5, 7]]
+ categories.append(cat)
+
+ json_files = [
+ pos for pos in os.listdir(gt_folder) if pos.endswith('.json')
+ ]
+ for json_file in json_files:
+
+ with open(osp.join(gt_folder, json_file), 'r') as f:
+ gt = json.load(f)
+
+ annotations = []
+ images = []
+
+ for image in gt['images']:
+ im = {}
+ im['id'] = image['id']
+ im['file_name'] = image['file_name']
+ images.append(im)
+
+ img_kpts = keypoint_results[im['id']]
+
+ if len(img_kpts) == 0:
+ continue
+ for track_id, img_kpt in enumerate(img_kpts[0]):
+ ann = {}
+ ann['image_id'] = img_kpt['image_id']
+ ann['keypoints'] = np.array(
+ img_kpt['keypoints']).reshape(-1).tolist()
+ ann['scores'] = np.array(ann['keypoints']).reshape(
+ [-1, 3])[:, 2].tolist()
+ ann['score'] = float(img_kpt['score'])
+ ann['track_id'] = track_id
+ annotations.append(ann)
+
+ info = {}
+ info['images'] = images
+ info['categories'] = categories
+ info['annotations'] = annotations
+
+ with open(osp.join(pred_folder, json_file), 'w') as f:
+ json.dump(info, f, sort_keys=True, indent=4)
+
+ def _do_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation using poseval."""
+
+ if not has_poseval:
+ raise ImportError('Please install poseval package for evaluation'
+ 'on PoseTrack dataset '
+ '(see requirements/optional.txt)')
+
+ argv = ['', gt_folder + '/', pred_folder + '/']
+
+ print('Loading data')
+ gtFramesAll, prFramesAll = eval_helpers.load_data_dir(argv)
+
+ print('# gt frames :', len(gtFramesAll))
+ print('# pred frames:', len(prFramesAll))
+
+ # evaluate per-frame multi-person pose estimation (AP)
+ # compute AP
+ print('Evaluation of per-frame multi-person pose estimation')
+ apAll, _, _ = evaluateAP(gtFramesAll, prFramesAll, None, False, False)
+
+ # print AP
+ print('Average Precision (AP) metric:')
+ eval_helpers.printTable(apAll)
+
+ stats = eval_helpers.getCum(apAll)
+
+ stats_names = [
+ 'Head AP', 'Shou AP', 'Elb AP', 'Wri AP', 'Hip AP', 'Knee AP',
+ 'Ankl AP', 'Total AP'
+ ]
+
+ info_str = list(zip(stats_names, stats))
+
+ return info_str
diff --git a/mmpose/datasets/pipelines/__init__.py b/mmpose/datasets/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf06db1c9d0656627ed91670d9a91ede66e0254f
--- /dev/null
+++ b/mmpose/datasets/pipelines/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_transform import * # noqa
+from .hand_transform import * # noqa
+from .loading import LoadImageFromFile # noqa
+from .mesh_transform import * # noqa
+from .pose3d_transform import * # noqa
+from .shared_transform import * # noqa
+from .top_down_transform import * # noqa
diff --git a/mmpose/datasets/pipelines/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27c60276c441f5a04c615a0b288ba0753791b8ef
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/bottom_up_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/bottom_up_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27e73eee79496d02d5767343be616f05e1008529
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/bottom_up_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/hand_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/hand_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbb945d753154a38806b4788a34505539daa2be8
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/hand_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/loading.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/loading.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbe2a9db4bdaa63eb2d035d726a8b2bbfcaefe02
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/loading.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/mesh_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/mesh_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a65ce24aaed37ba42e670e6f9634f37a77fbc59
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/mesh_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/pose3d_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/pose3d_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a2732458a1e55e73b28cce3e287ccf68dc53a71
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/pose3d_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/shared_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/shared_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb7bf3df668b4fd70beb4ff315b9da82b02fe322
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/shared_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/__pycache__/top_down_transform.cpython-310.pyc b/mmpose/datasets/pipelines/__pycache__/top_down_transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66e1c788347612c691b6539c3349ef28c42b0879
Binary files /dev/null and b/mmpose/datasets/pipelines/__pycache__/top_down_transform.cpython-310.pyc differ
diff --git a/mmpose/datasets/pipelines/bottom_up_transform.py b/mmpose/datasets/pipelines/bottom_up_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..032ce4548f5c6c142771405bf84b3a647641b460
--- /dev/null
+++ b/mmpose/datasets/pipelines/bottom_up_transform.py
@@ -0,0 +1,816 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import (get_affine_transform, get_warp_matrix,
+ warp_affine_joints)
+from mmpose.datasets.builder import PIPELINES
+from .shared_transform import Compose
+
+
+def _ceil_to_multiples_of(x, base=64):
+ """Transform x to the integral multiple of the base."""
+ return int(np.ceil(x / base)) * base
+
+
+def _get_multi_scale_size(image,
+ input_size,
+ current_scale,
+ min_scale,
+ use_udp=False):
+ """Get the size for multi-scale training.
+
+ Args:
+ image: Input image.
+ input_size (np.ndarray[2]): Size (w, h) of the image input.
+ current_scale (float): Scale factor.
+ min_scale (float): Minimal scale.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Returns:
+ tuple: A tuple containing multi-scale sizes.
+
+ - (w_resized, h_resized) (tuple(int)): resized width/height
+ - center (np.ndarray)image center
+ - scale (np.ndarray): scales wrt width/height
+ """
+ assert len(input_size) == 2
+ h, w, _ = image.shape
+
+ # calculate the size for min_scale
+ min_input_w = _ceil_to_multiples_of(min_scale * input_size[0], 64)
+ min_input_h = _ceil_to_multiples_of(min_scale * input_size[1], 64)
+ if w < h:
+ w_resized = int(min_input_w * current_scale / min_scale)
+ h_resized = int(
+ _ceil_to_multiples_of(min_input_w / w * h, 64) * current_scale /
+ min_scale)
+ if use_udp:
+ scale_w = w - 1.0
+ scale_h = (h_resized - 1.0) / (w_resized - 1.0) * (w - 1.0)
+ else:
+ scale_w = w / 200.0
+ scale_h = h_resized / w_resized * w / 200.0
+ else:
+ h_resized = int(min_input_h * current_scale / min_scale)
+ w_resized = int(
+ _ceil_to_multiples_of(min_input_h / h * w, 64) * current_scale /
+ min_scale)
+ if use_udp:
+ scale_h = h - 1.0
+ scale_w = (w_resized - 1.0) / (h_resized - 1.0) * (h - 1.0)
+ else:
+ scale_h = h / 200.0
+ scale_w = w_resized / h_resized * h / 200.0
+ if use_udp:
+ center = (scale_w / 2.0, scale_h / 2.0)
+ else:
+ center = np.array([round(w / 2.0), round(h / 2.0)])
+ return (w_resized, h_resized), center, np.array([scale_w, scale_h])
+
+
+def _resize_align_multi_scale(image, input_size, current_scale, min_scale):
+ """Resize the images for multi-scale training.
+
+ Args:
+ image: Input image
+ input_size (np.ndarray[2]): Size (w, h) of the image input
+ current_scale (float): Current scale
+ min_scale (float): Minimal scale
+
+ Returns:
+ tuple: A tuple containing image info.
+
+ - image_resized (np.ndarray): resized image
+ - center (np.ndarray): center of image
+ - scale (np.ndarray): scale
+ """
+ assert len(input_size) == 2
+ size_resized, center, scale = _get_multi_scale_size(
+ image, input_size, current_scale, min_scale)
+
+ trans = get_affine_transform(center, scale, 0, size_resized)
+ image_resized = cv2.warpAffine(image, trans, size_resized)
+
+ return image_resized, center, scale
+
+
+def _resize_align_multi_scale_udp(image, input_size, current_scale, min_scale):
+ """Resize the images for multi-scale training.
+
+ Args:
+ image: Input image
+ input_size (np.ndarray[2]): Size (w, h) of the image input
+ current_scale (float): Current scale
+ min_scale (float): Minimal scale
+
+ Returns:
+ tuple: A tuple containing image info.
+
+ - image_resized (np.ndarray): resized image
+ - center (np.ndarray): center of image
+ - scale (np.ndarray): scale
+ """
+ assert len(input_size) == 2
+ size_resized, _, _ = _get_multi_scale_size(image, input_size,
+ current_scale, min_scale, True)
+
+ _, center, scale = _get_multi_scale_size(image, input_size, min_scale,
+ min_scale, True)
+
+ trans = get_warp_matrix(
+ theta=0,
+ size_input=np.array(scale, dtype=np.float32),
+ size_dst=np.array(size_resized, dtype=np.float32) - 1.0,
+ size_target=np.array(scale, dtype=np.float32))
+ image_resized = cv2.warpAffine(
+ image.copy(), trans, size_resized, flags=cv2.INTER_LINEAR)
+
+ return image_resized, center, scale
+
+
+class HeatmapGenerator:
+ """Generate heatmaps for bottom-up models.
+
+ Args:
+ num_joints (int): Number of keypoints
+ output_size (np.ndarray): Size (w, h) of feature map
+ sigma (int): Sigma of the heatmaps.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, output_size, num_joints, sigma=-1, use_udp=False):
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.num_joints = num_joints
+ if sigma < 0:
+ sigma = self.output_size.prod()**0.5 / 64
+ self.sigma = sigma
+ size = 6 * sigma + 3
+ self.use_udp = use_udp
+ if use_udp:
+ self.x = np.arange(0, size, 1, np.float32)
+ self.y = self.x[:, None]
+ else:
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+ x0, y0 = 3 * sigma + 1, 3 * sigma + 1
+ self.g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+
+ def __call__(self, joints):
+ """Generate heatmaps."""
+ hms = np.zeros(
+ (self.num_joints, self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ sigma = self.sigma
+ for p in joints:
+ for idx, pt in enumerate(p):
+ if pt[2] > 0:
+ x, y = int(pt[0]), int(pt[1])
+ if x < 0 or y < 0 or \
+ x >= self.output_size[0] or y >= self.output_size[1]:
+ continue
+
+ if self.use_udp:
+ x0 = 3 * sigma + 1 + pt[0] - x
+ y0 = 3 * sigma + 1 + pt[1] - y
+ g = np.exp(-((self.x - x0)**2 + (self.y - y0)**2) /
+ (2 * sigma**2))
+ else:
+ g = self.g
+
+ ul = int(np.round(x - 3 * sigma -
+ 1)), int(np.round(y - 3 * sigma - 1))
+ br = int(np.round(x + 3 * sigma +
+ 2)), int(np.round(y + 3 * sigma + 2))
+
+ c, d = max(0,
+ -ul[0]), min(br[0], self.output_size[0]) - ul[0]
+ a, b = max(0,
+ -ul[1]), min(br[1], self.output_size[1]) - ul[1]
+
+ cc, dd = max(0, ul[0]), min(br[0], self.output_size[0])
+ aa, bb = max(0, ul[1]), min(br[1], self.output_size[1])
+ hms[idx, aa:bb,
+ cc:dd] = np.maximum(hms[idx, aa:bb, cc:dd], g[a:b,
+ c:d])
+ return hms
+
+
+class JointsEncoder:
+ """Encodes the visible joints into (coordinates, score); The coordinate of
+ one joint and its score are of `int` type.
+
+ (idx * output_size**2 + y * output_size + x, 1) or (0, 0).
+
+ Args:
+ max_num_people(int): Max number of people in an image
+ num_joints(int): Number of keypoints
+ output_size(np.ndarray): Size (w, h) of feature map
+ tag_per_joint(bool): Option to use one tag map per joint.
+ """
+
+ def __init__(self, max_num_people, num_joints, output_size, tag_per_joint):
+ self.max_num_people = max_num_people
+ self.num_joints = num_joints
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.tag_per_joint = tag_per_joint
+
+ def __call__(self, joints):
+ """
+ Note:
+ - number of people in image: N
+ - number of keypoints: K
+ - max number of people in an image: M
+
+ Args:
+ joints (np.ndarray[N,K,3])
+
+ Returns:
+ visible_kpts (np.ndarray[M,K,2]).
+ """
+ visible_kpts = np.zeros((self.max_num_people, self.num_joints, 2),
+ dtype=np.float32)
+ for i in range(len(joints)):
+ tot = 0
+ for idx, pt in enumerate(joints[i]):
+ x, y = int(pt[0]), int(pt[1])
+ if (pt[2] > 0 and 0 <= y < self.output_size[1]
+ and 0 <= x < self.output_size[0]):
+ if self.tag_per_joint:
+ visible_kpts[i][tot] = \
+ (idx * self.output_size.prod()
+ + y * self.output_size[0] + x, 1)
+ else:
+ visible_kpts[i][tot] = (y * self.output_size[0] + x, 1)
+ tot += 1
+ return visible_kpts
+
+
+class PAFGenerator:
+ """Generate part affinity fields.
+
+ Args:
+ output_size (np.ndarray): Size (w, h) of feature map.
+ limb_width (int): Limb width of part affinity fields.
+ skeleton (list[list]): connections of joints.
+ """
+
+ def __init__(self, output_size, limb_width, skeleton):
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.limb_width = limb_width
+ self.skeleton = skeleton
+
+ def _accumulate_paf_map_(self, pafs, src, dst, count):
+ """Accumulate part affinity fields between two given joints.
+
+ Args:
+ pafs (np.ndarray[2,H,W]): paf maps (2 dimensions:x axis and
+ y axis) for a certain limb connection. This argument will
+ be modified inplace.
+ src (np.ndarray[2,]): coordinates of the source joint.
+ dst (np.ndarray[2,]): coordinates of the destination joint.
+ count (np.ndarray[H,W]): count map that preserves the number
+ of non-zero vectors at each point. This argument will be
+ modified inplace.
+ """
+ limb_vec = dst - src
+ norm = np.linalg.norm(limb_vec)
+ if norm == 0:
+ unit_limb_vec = np.zeros(2)
+ else:
+ unit_limb_vec = limb_vec / norm
+
+ min_x = max(np.floor(min(src[0], dst[0]) - self.limb_width), 0)
+ max_x = min(
+ np.ceil(max(src[0], dst[0]) + self.limb_width),
+ self.output_size[0] - 1)
+ min_y = max(np.floor(min(src[1], dst[1]) - self.limb_width), 0)
+ max_y = min(
+ np.ceil(max(src[1], dst[1]) + self.limb_width),
+ self.output_size[1] - 1)
+
+ range_x = list(range(int(min_x), int(max_x + 1), 1))
+ range_y = list(range(int(min_y), int(max_y + 1), 1))
+
+ mask = np.zeros_like(count, dtype=bool)
+ if len(range_x) > 0 and len(range_y) > 0:
+ xx, yy = np.meshgrid(range_x, range_y)
+ delta_x = xx - src[0]
+ delta_y = yy - src[1]
+ dist = np.abs(delta_x * unit_limb_vec[1] -
+ delta_y * unit_limb_vec[0])
+ mask_local = (dist < self.limb_width)
+ mask[yy, xx] = mask_local
+
+ pafs[0, mask] += unit_limb_vec[0]
+ pafs[1, mask] += unit_limb_vec[1]
+ count += mask
+
+ return pafs, count
+
+ def __call__(self, joints):
+ """Generate the target part affinity fields."""
+ pafs = np.zeros(
+ (len(self.skeleton) * 2, self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ for idx, sk in enumerate(self.skeleton):
+ count = np.zeros((self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ for p in joints:
+ src = p[sk[0]]
+ dst = p[sk[1]]
+ if src[2] > 0 and dst[2] > 0:
+ self._accumulate_paf_map_(pafs[2 * idx:2 * idx + 2],
+ src[:2], dst[:2], count)
+
+ pafs[2 * idx:2 * idx + 2] /= np.maximum(count, 1)
+
+ return pafs
+
+
+@PIPELINES.register_module()
+class BottomUpRandomFlip:
+ """Data augmentation with random image flip for bottom-up.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ image, mask, joints = results['img'], results['mask'], results[
+ 'joints']
+ self.flip_index = results['ann_info']['flip_index']
+ self.output_size = results['ann_info']['heatmap_size']
+
+ assert isinstance(mask, list)
+ assert isinstance(joints, list)
+ assert len(mask) == len(joints)
+ assert len(mask) == len(self.output_size)
+
+ if np.random.random() < self.flip_prob:
+ image = image[:, ::-1].copy() - np.zeros_like(image)
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = np.array([_output_size, _output_size],
+ dtype=np.int)
+ mask[i] = mask[i][:, ::-1].copy()
+ joints[i] = joints[i][:, self.flip_index]
+ joints[i][:, :, 0] = _output_size[0] - joints[i][:, :, 0] - 1
+ results['img'], results['mask'], results[
+ 'joints'] = image, mask, joints
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpRandomAffine:
+ """Data augmentation with random scaling & rotating.
+
+ Args:
+ rot_factor (int): Rotating to [-rotation_factor, rotation_factor]
+ scale_factor (float): Scaling to [1-scale_factor, 1+scale_factor]
+ scale_type: wrt ``long`` or ``short`` length of the image.
+ trans_factor: Translation factor.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self,
+ rot_factor,
+ scale_factor,
+ scale_type,
+ trans_factor,
+ use_udp=False):
+ self.max_rotation = rot_factor
+ self.min_scale = scale_factor[0]
+ self.max_scale = scale_factor[1]
+ self.scale_type = scale_type
+ self.trans_factor = trans_factor
+ self.use_udp = use_udp
+
+ def _get_scale(self, image_size, resized_size):
+ w, h = image_size
+ w_resized, h_resized = resized_size
+ if w / w_resized < h / h_resized:
+ if self.scale_type == 'long':
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ elif self.scale_type == 'short':
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+ else:
+ raise ValueError(f'Unknown scale type: {self.scale_type}')
+ else:
+ if self.scale_type == 'long':
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+ elif self.scale_type == 'short':
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ else:
+ raise ValueError(f'Unknown scale type: {self.scale_type}')
+
+ scale = np.array([w_pad, h_pad], dtype=np.float32)
+
+ return scale
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ image, mask, joints = results['img'], results['mask'], results[
+ 'joints']
+
+ self.input_size = results['ann_info']['image_size']
+ if not isinstance(self.input_size, np.ndarray):
+ self.input_size = np.array(self.input_size)
+ if self.input_size.size > 1:
+ assert len(self.input_size) == 2
+ else:
+ self.input_size = [self.input_size, self.input_size]
+ self.output_size = results['ann_info']['heatmap_size']
+
+ assert isinstance(mask, list)
+ assert isinstance(joints, list)
+ assert len(mask) == len(joints)
+ assert len(mask) == len(self.output_size), (len(mask),
+ len(self.output_size),
+ self.output_size)
+
+ height, width = image.shape[:2]
+ if self.use_udp:
+ center = np.array(((width - 1.0) / 2, (height - 1.0) / 2))
+ else:
+ center = np.array((width / 2, height / 2))
+
+ img_scale = np.array([width, height], dtype=np.float32)
+ aug_scale = np.random.random() * (self.max_scale - self.min_scale) \
+ + self.min_scale
+ img_scale *= aug_scale
+ aug_rot = (np.random.random() * 2 - 1) * self.max_rotation
+
+ if self.trans_factor > 0:
+ dx = np.random.randint(-self.trans_factor * img_scale[0] / 200.0,
+ self.trans_factor * img_scale[0] / 200.0)
+ dy = np.random.randint(-self.trans_factor * img_scale[1] / 200.0,
+ self.trans_factor * img_scale[1] / 200.0)
+
+ center[0] += dx
+ center[1] += dy
+ if self.use_udp:
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = [_output_size, _output_size]
+
+ scale = self._get_scale(img_scale, _output_size)
+
+ trans = get_warp_matrix(
+ theta=aug_rot,
+ size_input=center * 2.0,
+ size_dst=np.array(
+ (_output_size[0], _output_size[1]), dtype=np.float32) -
+ 1.0,
+ size_target=scale)
+ mask[i] = cv2.warpAffine(
+ (mask[i] * 255).astype(np.uint8),
+ trans, (int(_output_size[0]), int(_output_size[1])),
+ flags=cv2.INTER_LINEAR) / 255
+ mask[i] = (mask[i] > 0.5).astype(np.float32)
+ joints[i][:, :, 0:2] = \
+ warp_affine_joints(joints[i][:, :, 0:2].copy(), trans)
+ if results['ann_info']['scale_aware_sigma']:
+ joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
+ scale = self._get_scale(img_scale, self.input_size)
+ mat_input = get_warp_matrix(
+ theta=aug_rot,
+ size_input=center * 2.0,
+ size_dst=np.array((self.input_size[0], self.input_size[1]),
+ dtype=np.float32) - 1.0,
+ size_target=scale)
+ image = cv2.warpAffine(
+ image,
+ mat_input, (int(self.input_size[0]), int(self.input_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = [_output_size, _output_size]
+ scale = self._get_scale(img_scale, _output_size)
+ mat_output = get_affine_transform(
+ center=center,
+ scale=scale / 200.0,
+ rot=aug_rot,
+ output_size=_output_size)
+ mask[i] = cv2.warpAffine(
+ (mask[i] * 255).astype(np.uint8), mat_output,
+ (int(_output_size[0]), int(_output_size[1]))) / 255
+ mask[i] = (mask[i] > 0.5).astype(np.float32)
+
+ joints[i][:, :, 0:2] = \
+ warp_affine_joints(joints[i][:, :, 0:2], mat_output)
+ if results['ann_info']['scale_aware_sigma']:
+ joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
+
+ scale = self._get_scale(img_scale, self.input_size)
+ mat_input = get_affine_transform(
+ center=center,
+ scale=scale / 200.0,
+ rot=aug_rot,
+ output_size=self.input_size)
+ image = cv2.warpAffine(image, mat_input, (int(
+ self.input_size[0]), int(self.input_size[1])))
+
+ results['img'], results['mask'], results[
+ 'joints'] = image, mask, joints
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGenerateHeatmapTarget:
+ """Generate multi-scale heatmap target for bottom-up.
+
+ Args:
+ sigma (int): Sigma of heatmap Gaussian
+ max_num_people (int): Maximum number of people in an image
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, sigma, use_udp=False):
+ self.sigma = sigma
+ self.use_udp = use_udp
+
+ def _generate(self, num_joints, heatmap_size):
+ """Get heatmap generator."""
+ heatmap_generator = [
+ HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp)
+ for output_size in heatmap_size
+ ]
+ return heatmap_generator
+
+ def __call__(self, results):
+ """Generate multi-scale heatmap target for bottom-up."""
+ heatmap_generator = \
+ self._generate(results['ann_info']['num_joints'],
+ results['ann_info']['heatmap_size'])
+ target_list = list()
+ joints_list = results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ heatmaps = heatmap_generator[scale_id](joints_list[scale_id])
+ target_list.append(heatmaps.astype(np.float32))
+ results['target'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGenerateTarget:
+ """Generate multi-scale heatmap target for associate embedding.
+
+ Args:
+ sigma (int): Sigma of heatmap Gaussian
+ max_num_people (int): Maximum number of people in an image
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, sigma, max_num_people, use_udp=False):
+ self.sigma = sigma
+ self.max_num_people = max_num_people
+ self.use_udp = use_udp
+
+ def _generate(self, num_joints, heatmap_size):
+ """Get heatmap generator and joint encoder."""
+ heatmap_generator = [
+ HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp)
+ for output_size in heatmap_size
+ ]
+ joints_encoder = [
+ JointsEncoder(self.max_num_people, num_joints, output_size, True)
+ for output_size in heatmap_size
+ ]
+ return heatmap_generator, joints_encoder
+
+ def __call__(self, results):
+ """Generate multi-scale heatmap target for bottom-up."""
+ heatmap_generator, joints_encoder = \
+ self._generate(results['ann_info']['num_joints'],
+ results['ann_info']['heatmap_size'])
+ target_list = list()
+ mask_list, joints_list = results['mask'], results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ target_t = heatmap_generator[scale_id](joints_list[scale_id])
+ joints_t = joints_encoder[scale_id](joints_list[scale_id])
+
+ target_list.append(target_t.astype(np.float32))
+ mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
+ joints_list[scale_id] = joints_t.astype(np.int32)
+
+ results['masks'], results['joints'] = mask_list, joints_list
+ results['targets'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGeneratePAFTarget:
+ """Generate multi-scale heatmaps and part affinity fields (PAF) target for
+ bottom-up. Paper ref: Cao et al. Realtime Multi-Person 2D Human Pose
+ Estimation using Part Affinity Fields (CVPR 2017).
+
+ Args:
+ limb_width (int): Limb width of part affinity fields
+ """
+
+ def __init__(self, limb_width, skeleton=None):
+ self.limb_width = limb_width
+ self.skeleton = skeleton
+
+ def _generate(self, heatmap_size, skeleton):
+ """Get PAF generator."""
+ paf_generator = [
+ PAFGenerator(output_size, self.limb_width, skeleton)
+ for output_size in heatmap_size
+ ]
+ return paf_generator
+
+ def __call__(self, results):
+ """Generate multi-scale part affinity fields for bottom-up."""
+ if self.skeleton is None:
+ assert results['ann_info']['skeleton'] is not None
+ self.skeleton = results['ann_info']['skeleton']
+
+ paf_generator = \
+ self._generate(results['ann_info']['heatmap_size'],
+ self.skeleton)
+ target_list = list()
+ joints_list = results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ pafs = paf_generator[scale_id](joints_list[scale_id])
+ target_list.append(pafs.astype(np.float32))
+
+ results['target'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGetImgSize:
+ """Get multi-scale image sizes for bottom-up, including base_size and
+ test_scale_factor. Keep the ratio and the image is resized to
+ `results['ann_info']['image_size']×current_scale`.
+
+ Args:
+ test_scale_factor (List[float]): Multi scale
+ current_scale (int): default 1
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, test_scale_factor, current_scale=1, use_udp=False):
+ self.test_scale_factor = test_scale_factor
+ self.min_scale = min(test_scale_factor)
+ self.current_scale = current_scale
+ self.use_udp = use_udp
+
+ def __call__(self, results):
+ """Get multi-scale image sizes for bottom-up."""
+ input_size = results['ann_info']['image_size']
+ if not isinstance(input_size, np.ndarray):
+ input_size = np.array(input_size)
+ if input_size.size > 1:
+ assert len(input_size) == 2
+ else:
+ input_size = np.array([input_size, input_size], dtype=np.int)
+ img = results['img']
+
+ h, w, _ = img.shape
+
+ # calculate the size for min_scale
+ min_input_w = _ceil_to_multiples_of(self.min_scale * input_size[0], 64)
+ min_input_h = _ceil_to_multiples_of(self.min_scale * input_size[1], 64)
+ if w < h:
+ w_resized = int(min_input_w * self.current_scale / self.min_scale)
+ h_resized = int(
+ _ceil_to_multiples_of(min_input_w / w * h, 64) *
+ self.current_scale / self.min_scale)
+ if self.use_udp:
+ scale_w = w - 1.0
+ scale_h = (h_resized - 1.0) / (w_resized - 1.0) * (w - 1.0)
+ else:
+ scale_w = w / 200.0
+ scale_h = h_resized / w_resized * w / 200.0
+ else:
+ h_resized = int(min_input_h * self.current_scale / self.min_scale)
+ w_resized = int(
+ _ceil_to_multiples_of(min_input_h / h * w, 64) *
+ self.current_scale / self.min_scale)
+ if self.use_udp:
+ scale_h = h - 1.0
+ scale_w = (w_resized - 1.0) / (h_resized - 1.0) * (h - 1.0)
+ else:
+ scale_h = h / 200.0
+ scale_w = w_resized / h_resized * h / 200.0
+ if self.use_udp:
+ center = (scale_w / 2.0, scale_h / 2.0)
+ else:
+ center = np.array([round(w / 2.0), round(h / 2.0)])
+ results['ann_info']['test_scale_factor'] = self.test_scale_factor
+ results['ann_info']['base_size'] = (w_resized, h_resized)
+ results['ann_info']['center'] = center
+ results['ann_info']['scale'] = np.array([scale_w, scale_h])
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpResizeAlign:
+ """Resize multi-scale size and align transform for bottom-up.
+
+ Args:
+ transforms (List): ToTensor & Normalize
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, transforms, use_udp=False):
+ self.transforms = Compose(transforms)
+ if use_udp:
+ self._resize_align_multi_scale = _resize_align_multi_scale_udp
+ else:
+ self._resize_align_multi_scale = _resize_align_multi_scale
+
+ def __call__(self, results):
+ """Resize multi-scale size and align transform for bottom-up."""
+ input_size = results['ann_info']['image_size']
+ if not isinstance(input_size, np.ndarray):
+ input_size = np.array(input_size)
+ if input_size.size > 1:
+ assert len(input_size) == 2
+ else:
+ input_size = np.array([input_size, input_size], dtype=np.int)
+ test_scale_factor = results['ann_info']['test_scale_factor']
+ aug_data = []
+
+ for _, s in enumerate(sorted(test_scale_factor, reverse=True)):
+ _results = results.copy()
+ image_resized, _, _ = self._resize_align_multi_scale(
+ _results['img'], input_size, s, min(test_scale_factor))
+ _results['img'] = image_resized
+ _results = self.transforms(_results)
+ transformed_img = _results['img'].unsqueeze(0)
+ aug_data.append(transformed_img)
+
+ results['ann_info']['aug_data'] = aug_data
+
+ return results
diff --git a/mmpose/datasets/pipelines/hand_transform.py b/mmpose/datasets/pipelines/hand_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..b83e399c4e7a5e5b07650cb01e9426da9d8cee4b
--- /dev/null
+++ b/mmpose/datasets/pipelines/hand_transform.py
@@ -0,0 +1,63 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from mmpose.datasets.builder import PIPELINES
+from .top_down_transform import TopDownRandomFlip
+
+
+@PIPELINES.register_module()
+class HandRandomFlip(TopDownRandomFlip):
+ """Data augmentation with random image flip. A child class of
+ TopDownRandomFlip.
+
+ Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+ 'hand_type', 'rel_root_depth' and 'ann_info'.
+
+ Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+ 'hand_type', 'rel_root_depth'.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ # base flip augmentation
+ super().__call__(results)
+
+ # flip hand type and root depth
+ hand_type = results['hand_type']
+ rel_root_depth = results['rel_root_depth']
+ flipped = results['flipped']
+ if flipped:
+ hand_type[0], hand_type[1] = hand_type[1], hand_type[0]
+ rel_root_depth = -rel_root_depth
+ results['hand_type'] = hand_type
+ results['rel_root_depth'] = rel_root_depth
+ return results
+
+
+@PIPELINES.register_module()
+class HandGenerateRelDepthTarget:
+ """Generate the target relative root depth.
+
+ Required keys: 'rel_root_depth', 'rel_root_valid', 'ann_info'.
+
+ Modified keys: 'target', 'target_weight'.
+ """
+
+ def __init__(self):
+ pass
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ rel_root_depth = results['rel_root_depth']
+ rel_root_valid = results['rel_root_valid']
+ cfg = results['ann_info']
+ D = cfg['heatmap_size_root']
+ root_depth_bound = cfg['root_depth_bound']
+ target = (rel_root_depth / root_depth_bound + 0.5) * D
+ target_weight = rel_root_valid * (target >= 0) * (target <= D)
+ results['target'] = target * np.ones(1, dtype=np.float32)
+ results['target_weight'] = target_weight * np.ones(1, dtype=np.float32)
+ return results
diff --git a/mmpose/datasets/pipelines/loading.py b/mmpose/datasets/pipelines/loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..64750056438e8c06bcc4083dc1e8164f0671cd0f
--- /dev/null
+++ b/mmpose/datasets/pipelines/loading.py
@@ -0,0 +1,91 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadImageFromFile:
+ """Loading image(s) from file.
+
+ Required key: "image_file".
+
+ Added key: "img".
+
+ Args:
+ to_float32 (bool): Whether to convert the loaded image to a float32
+ numpy array. If set to False, the loaded image is an uint8 array.
+ Defaults to False.
+ color_type (str): Flags specifying the color type of a loaded image,
+ candidates are 'color', 'grayscale' and 'unchanged'.
+ channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmcv.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ """
+
+ def __init__(self,
+ to_float32=False,
+ color_type='color',
+ channel_order='rgb',
+ file_client_args=dict(backend='disk')):
+ self.to_float32 = to_float32
+ self.color_type = color_type
+ self.channel_order = channel_order
+ self.file_client_args = file_client_args.copy()
+ self.file_client = None
+
+ def _read_image(self, path):
+ img_bytes = self.file_client.get(path)
+ img = mmcv.imfrombytes(
+ img_bytes, flag=self.color_type, channel_order=self.channel_order)
+ if img is None:
+ raise ValueError(f'Fail to read {path}')
+ if self.to_float32:
+ img = img.astype(np.float32)
+ return img
+
+ def __call__(self, results):
+ """Loading image(s) from file."""
+ if self.file_client is None:
+ self.file_client = mmcv.FileClient(**self.file_client_args)
+
+ image_file = results.get('image_file', None)
+
+ if isinstance(image_file, (list, tuple)):
+ # Load images from a list of paths
+ results['img'] = [self._read_image(path) for path in image_file]
+ elif image_file is not None:
+ # Load single image from path
+ results['img'] = self._read_image(image_file)
+ else:
+ if 'img' not in results:
+ # If `image_file`` is not in results, check the `img` exists
+ # and format the image. This for compatibility when the image
+ # is manually set outside the pipeline.
+ raise KeyError('Either `image_file` or `img` should exist in '
+ 'results.')
+ assert isinstance(results['img'], np.ndarray)
+ if self.color_type == 'color' and self.channel_order == 'rgb':
+ # The original results['img'] is assumed to be image(s) in BGR
+ # order, so we convert the color according to the arguments.
+ if results['img'].ndim == 3:
+ results['img'] = mmcv.bgr2rgb(results['img'])
+ elif results['img'].ndim == 4:
+ results['img'] = np.concatenate(
+ [mmcv.bgr2rgb(img) for img in results['img']], axis=0)
+ else:
+ raise ValueError('results["img"] has invalid shape '
+ f'{results["img"].shape}')
+
+ results['image_file'] = None
+
+ return results
+
+ def __repr__(self):
+ repr_str = (f'{self.__class__.__name__}('
+ f'to_float32={self.to_float32}, '
+ f"color_type='{self.color_type}', "
+ f'file_client_args={self.file_client_args})')
+ return repr_str
diff --git a/mmpose/datasets/pipelines/mesh_transform.py b/mmpose/datasets/pipelines/mesh_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3f32febcf01f37daa4957bfb0f17b8478773d59
--- /dev/null
+++ b/mmpose/datasets/pipelines/mesh_transform.py
@@ -0,0 +1,399 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import mmcv
+import numpy as np
+import torch
+
+from mmpose.core.post_processing import (affine_transform, fliplr_joints,
+ get_affine_transform)
+from mmpose.datasets.builder import PIPELINES
+
+
+def _flip_smpl_pose(pose):
+ """Flip SMPL pose parameters horizontally.
+
+ Args:
+ pose (np.ndarray([72])): SMPL pose parameters
+
+ Returns:
+ pose_flipped
+ """
+
+ flippedParts = [
+ 0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13, 14, 18, 19,
+ 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33, 34, 35, 30, 31, 32, 36, 37,
+ 38, 42, 43, 44, 39, 40, 41, 45, 46, 47, 51, 52, 53, 48, 49, 50, 57, 58,
+ 59, 54, 55, 56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 67, 68
+ ]
+ pose_flipped = pose[flippedParts]
+ # Negate the second and the third dimension of the axis-angle
+ pose_flipped[1::3] = -pose_flipped[1::3]
+ pose_flipped[2::3] = -pose_flipped[2::3]
+ return pose_flipped
+
+
+def _flip_iuv(iuv, uv_type='BF'):
+ """Flip IUV image horizontally.
+
+ Note:
+ IUV image height: H
+ IUV image width: W
+
+ Args:
+ iuv np.ndarray([H, W, 3]): IUV image
+ uv_type (str): The type of the UV map.
+ Candidate values:
+ 'DP': The UV map used in DensePose project.
+ 'SMPL': The default UV map of SMPL model.
+ 'BF': The UV map used in DecoMR project.
+ Default: 'BF'
+
+ Returns:
+ iuv_flipped np.ndarray([H, W, 3]): Flipped IUV image
+ """
+ assert uv_type in ['DP', 'SMPL', 'BF']
+ if uv_type == 'BF':
+ iuv_flipped = iuv[:, ::-1, :]
+ iuv_flipped[:, :, 1] = 255 - iuv_flipped[:, :, 1]
+ else:
+ # The flip of other UV map is complex, not finished yet.
+ raise NotImplementedError(
+ f'The flip of {uv_type} UV map is not implemented yet.')
+
+ return iuv_flipped
+
+
+def _construct_rotation_matrix(rot, size=3):
+ """Construct the in-plane rotation matrix.
+
+ Args:
+ rot (float): Rotation angle (degree).
+ size (int): The size of the rotation matrix.
+ Candidate Values: 2, 3. Defaults to 3.
+
+ Returns:
+ rot_mat (np.ndarray([size, size]): Rotation matrix.
+ """
+ rot_mat = np.eye(size, dtype=np.float32)
+ if rot != 0:
+ rot_rad = np.deg2rad(rot)
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0, :2] = [cs, -sn]
+ rot_mat[1, :2] = [sn, cs]
+
+ return rot_mat
+
+
+def _rotate_joints_3d(joints_3d, rot):
+ """Rotate the 3D joints in the local coordinates.
+
+ Note:
+ Joints number: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ rot (float): Rotation angle (degree).
+
+ Returns:
+ joints_3d_rotated
+ """
+ # in-plane rotation
+ # 3D joints are rotated counterclockwise,
+ # so the rot angle is inversed.
+ rot_mat = _construct_rotation_matrix(-rot, 3)
+
+ joints_3d_rotated = np.einsum('ij,kj->ki', rot_mat, joints_3d)
+ joints_3d_rotated = joints_3d_rotated.astype('float32')
+ return joints_3d_rotated
+
+
+def _rotate_smpl_pose(pose, rot):
+ """Rotate SMPL pose parameters. SMPL (https://smpl.is.tue.mpg.de/) is a 3D
+ human model.
+
+ Args:
+ pose (np.ndarray([72])): SMPL pose parameters
+ rot (float): Rotation angle (degree).
+
+ Returns:
+ pose_rotated
+ """
+ pose_rotated = pose.copy()
+ if rot != 0:
+ rot_mat = _construct_rotation_matrix(-rot)
+ orient = pose[:3]
+ # find the rotation of the body in camera frame
+ per_rdg, _ = cv2.Rodrigues(orient)
+ # apply the global rotation to the global orientation
+ res_rot, _ = cv2.Rodrigues(np.dot(rot_mat, per_rdg))
+ pose_rotated[:3] = (res_rot.T)[0]
+
+ return pose_rotated
+
+
+def _flip_joints_3d(joints_3d, joints_3d_visible, flip_pairs):
+ """Flip human joints in 3D space horizontally.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ joints_3d_visible (np.ndarray([K, 1])): Visibility of keypoints.
+ flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+
+ Returns:
+ joints_3d_flipped, joints_3d_visible_flipped
+ """
+
+ assert len(joints_3d) == len(joints_3d_visible)
+
+ joints_3d_flipped = joints_3d.copy()
+ joints_3d_visible_flipped = joints_3d_visible.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ joints_3d_flipped[left, :] = joints_3d[right, :]
+ joints_3d_flipped[right, :] = joints_3d[left, :]
+
+ joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
+ joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
+
+ # Flip horizontally
+ joints_3d_flipped[:, 0] = -joints_3d_flipped[:, 0]
+ joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
+
+ return joints_3d_flipped, joints_3d_visible_flipped
+
+
+@PIPELINES.register_module()
+class LoadIUVFromFile:
+ """Loading IUV image from file."""
+
+ def __init__(self, to_float32=False):
+ self.to_float32 = to_float32
+ self.color_type = 'color'
+ # channel relations: iuv->bgr
+ self.channel_order = 'bgr'
+
+ def __call__(self, results):
+ """Loading image from file."""
+ has_iuv = results['has_iuv']
+ use_iuv = results['ann_info']['use_IUV']
+ if has_iuv and use_iuv:
+ iuv_file = results['iuv_file']
+ iuv = mmcv.imread(iuv_file, self.color_type, self.channel_order)
+ if iuv is None:
+ raise ValueError(f'Fail to read {iuv_file}')
+ else:
+ has_iuv = 0
+ iuv = None
+
+ results['has_iuv'] = has_iuv
+ results['iuv'] = iuv
+ return results
+
+
+@PIPELINES.register_module()
+class IUVToTensor:
+ """Transform IUV image to part index mask and uv coordinates image. The 3
+ channels of IUV image means: part index, u coordinates, v coordinates.
+
+ Required key: 'iuv', 'ann_info'.
+ Modifies key: 'part_index', 'uv_coordinates'.
+
+ Args:
+ results (dict): contain all information about training.
+ """
+
+ def __call__(self, results):
+ iuv = results['iuv']
+ if iuv is None:
+ H, W = results['ann_info']['iuv_size']
+ part_index = torch.zeros([1, H, W], dtype=torch.long)
+ uv_coordinates = torch.zeros([2, H, W], dtype=torch.float32)
+ else:
+ part_index = torch.LongTensor(iuv[:, :, 0])[None, :, :]
+ uv_coordinates = torch.FloatTensor(iuv[:, :, 1:]) / 255
+ uv_coordinates = uv_coordinates.permute(2, 0, 1)
+ results['part_index'] = part_index
+ results['uv_coordinates'] = uv_coordinates
+ return results
+
+
+@PIPELINES.register_module()
+class MeshRandomChannelNoise:
+ """Data augmentation with random channel noise.
+
+ Required keys: 'img'
+ Modifies key: 'img'
+
+ Args:
+ noise_factor (float): Multiply each channel with
+ a factor between``[1-scale_factor, 1+scale_factor]``
+ """
+
+ def __init__(self, noise_factor=0.4):
+ self.noise_factor = noise_factor
+
+ def __call__(self, results):
+ """Perform data augmentation with random channel noise."""
+ img = results['img']
+
+ # Each channel is multiplied with a number
+ # in the area [1-self.noise_factor, 1+self.noise_factor]
+ pn = np.random.uniform(1 - self.noise_factor, 1 + self.noise_factor,
+ (1, 3))
+ img = cv2.multiply(img, pn)
+
+ results['img'] = img
+ return results
+
+
+@PIPELINES.register_module()
+class MeshRandomFlip:
+ """Data augmentation with random image flip.
+
+ Required keys: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'center', 'pose', 'iuv' and 'ann_info'.
+ Modifies key: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'center', 'pose', 'iuv'.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ if np.random.rand() > self.flip_prob:
+ return results
+
+ img = results['img']
+ joints_2d = results['joints_2d']
+ joints_2d_visible = results['joints_2d_visible']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ pose = results['pose']
+ center = results['center']
+
+ img = img[:, ::-1, :]
+ pose = _flip_smpl_pose(pose)
+
+ joints_2d, joints_2d_visible = fliplr_joints(
+ joints_2d, joints_2d_visible, img.shape[1],
+ results['ann_info']['flip_pairs'])
+
+ joints_3d, joints_3d_visible = _flip_joints_3d(
+ joints_3d, joints_3d_visible, results['ann_info']['flip_pairs'])
+ center[0] = img.shape[1] - center[0] - 1
+
+ if 'iuv' in results.keys():
+ iuv = results['iuv']
+ if iuv is not None:
+ iuv = _flip_iuv(iuv, results['ann_info']['uv_type'])
+ results['iuv'] = iuv
+
+ results['img'] = img
+ results['joints_2d'] = joints_2d
+ results['joints_2d_visible'] = joints_2d_visible
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+ results['pose'] = pose
+ results['center'] = center
+ return results
+
+
+@PIPELINES.register_module()
+class MeshGetRandomScaleRotation:
+ """Data augmentation with random scaling & rotating.
+
+ Required key: 'scale'. Modifies key: 'scale' and 'rotation'.
+
+ Args:
+ rot_factor (int): Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor (float): Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ rot_prob (float): Probability of random rotation.
+ """
+
+ def __init__(self, rot_factor=30, scale_factor=0.25, rot_prob=0.6):
+ self.rot_factor = rot_factor
+ self.scale_factor = scale_factor
+ self.rot_prob = rot_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ s = results['scale']
+
+ sf = self.scale_factor
+ rf = self.rot_factor
+
+ s_factor = np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ s = s * s_factor
+
+ r_factor = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+ r = r_factor if np.random.rand() <= self.rot_prob else 0
+
+ results['scale'] = s
+ results['rotation'] = r
+
+ return results
+
+
+@PIPELINES.register_module()
+class MeshAffine:
+ """Affine transform the image to get input image. Affine transform the 2D
+ keypoints, 3D kepoints and IUV image too.
+
+ Required keys: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'pose', 'iuv', 'ann_info','scale', 'rotation' and
+ 'center'. Modifies key: 'img', 'joints_2d','joints_2d_visible',
+ 'joints_3d', 'pose', 'iuv'.
+ """
+
+ def __call__(self, results):
+ image_size = results['ann_info']['image_size']
+
+ img = results['img']
+ joints_2d = results['joints_2d']
+ joints_2d_visible = results['joints_2d_visible']
+ joints_3d = results['joints_3d']
+ pose = results['pose']
+
+ c = results['center']
+ s = results['scale']
+ r = results['rotation']
+ trans = get_affine_transform(c, s, r, image_size)
+
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+
+ for i in range(results['ann_info']['num_joints']):
+ if joints_2d_visible[i, 0] > 0.0:
+ joints_2d[i] = affine_transform(joints_2d[i], trans)
+
+ joints_3d = _rotate_joints_3d(joints_3d, r)
+ pose = _rotate_smpl_pose(pose, r)
+
+ results['img'] = img
+ results['joints_2d'] = joints_2d
+ results['joints_2d_visible'] = joints_2d_visible
+ results['joints_3d'] = joints_3d
+ results['pose'] = pose
+
+ if 'iuv' in results.keys():
+ iuv = results['iuv']
+ if iuv is not None:
+ iuv_size = results['ann_info']['iuv_size']
+ iuv = cv2.warpAffine(
+ iuv,
+ trans, (int(iuv_size[0]), int(iuv_size[1])),
+ flags=cv2.INTER_NEAREST)
+ results['iuv'] = iuv
+
+ return results
diff --git a/mmpose/datasets/pipelines/pose3d_transform.py b/mmpose/datasets/pipelines/pose3d_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..124937861f71bf8148641d59dbb42bd47457c902
--- /dev/null
+++ b/mmpose/datasets/pipelines/pose3d_transform.py
@@ -0,0 +1,643 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.utils import build_from_cfg
+
+from mmpose.core.camera import CAMERAS
+from mmpose.core.post_processing import fliplr_regression
+from mmpose.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class GetRootCenteredPose:
+ """Zero-center the pose around a given root joint. Optionally, the root
+ joint can be removed from the original pose and stored as a separate item.
+
+ Note that the root-centered joints may no longer align with some annotation
+ information (e.g. flip_pairs, num_joints, inference_channel, etc.) due to
+ the removal of the root joint.
+
+ Args:
+ item (str): The name of the pose to apply root-centering.
+ root_index (int): Root joint index in the pose.
+ visible_item (str): The name of the visibility item.
+ remove_root (bool): If true, remove the root joint from the pose
+ root_name (str): Optional. If not none, it will be used as the key to
+ store the root position separated from the original pose.
+
+ Required keys:
+ item
+
+ Modified keys:
+ item, visible_item, root_name
+ """
+
+ def __init__(self,
+ item,
+ root_index,
+ visible_item=None,
+ remove_root=False,
+ root_name=None):
+ self.item = item
+ self.root_index = root_index
+ self.remove_root = remove_root
+ self.root_name = root_name
+ self.visible_item = visible_item
+
+ def __call__(self, results):
+ assert self.item in results
+ joints = results[self.item]
+ root_idx = self.root_index
+
+ assert joints.ndim >= 2 and joints.shape[-2] > root_idx,\
+ f'Got invalid joint shape {joints.shape}'
+
+ root = joints[..., root_idx:root_idx + 1, :]
+ joints = joints - root
+
+ results[self.item] = joints
+ if self.root_name is not None:
+ results[self.root_name] = root
+
+ if self.remove_root:
+ results[self.item] = np.delete(
+ results[self.item], root_idx, axis=-2)
+ if self.visible_item is not None:
+ assert self.visible_item in results
+ results[self.visible_item] = np.delete(
+ results[self.visible_item], root_idx, axis=-2)
+ # Add a flag to avoid latter transforms that rely on the root
+ # joint or the original joint index
+ results[f'{self.item}_root_removed'] = True
+
+ # Save the root index which is necessary to restore the global pose
+ if self.root_name is not None:
+ results[f'{self.root_name}_index'] = self.root_index
+
+ return results
+
+
+@PIPELINES.register_module()
+class NormalizeJointCoordinate:
+ """Normalize the joint coordinate with given mean and std.
+
+ Args:
+ item (str): The name of the pose to normalize.
+ mean (array): Mean values of joint coordinates in shape [K, C].
+ std (array): Std values of joint coordinates in shape [K, C].
+ norm_param_file (str): Optionally load a dict containing `mean` and
+ `std` from a file using `mmcv.load`.
+
+ Required keys:
+ item
+
+ Modified keys:
+ item
+ """
+
+ def __init__(self, item, mean=None, std=None, norm_param_file=None):
+ self.item = item
+ self.norm_param_file = norm_param_file
+ if norm_param_file is not None:
+ norm_param = mmcv.load(norm_param_file)
+ assert 'mean' in norm_param and 'std' in norm_param
+ mean = norm_param['mean']
+ std = norm_param['std']
+ else:
+ assert mean is not None
+ assert std is not None
+
+ self.mean = np.array(mean, dtype=np.float32)
+ self.std = np.array(std, dtype=np.float32)
+
+ def __call__(self, results):
+ assert self.item in results
+ results[self.item] = (results[self.item] - self.mean) / self.std
+ results[f'{self.item}_mean'] = self.mean.copy()
+ results[f'{self.item}_std'] = self.std.copy()
+ return results
+
+
+@PIPELINES.register_module()
+class ImageCoordinateNormalization:
+ """Normalize the 2D joint coordinate with image width and height. Range [0,
+ w] is mapped to [-1, 1], while preserving the aspect ratio.
+
+ Args:
+ item (str|list[str]): The name of the pose to normalize.
+ norm_camera (bool): Whether to normalize camera intrinsics.
+ Default: False.
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+ item
+
+ Modified keys:
+ item (, camera_param)
+ """
+
+ def __init__(self, item, norm_camera=False, camera_param=None):
+ self.item = item
+ if isinstance(self.item, str):
+ self.item = [self.item]
+
+ self.norm_camera = norm_camera
+
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+
+ def __call__(self, results):
+ center = np.array(
+ [0.5 * results['image_width'], 0.5 * results['image_height']],
+ dtype=np.float32)
+ scale = np.array(0.5 * results['image_width'], dtype=np.float32)
+
+ for item in self.item:
+ results[item] = (results[item] - center) / scale
+
+ if self.norm_camera:
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, \
+ 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'f' in camera_param and 'c' in camera_param
+ camera_param['f'] = camera_param['f'] / scale
+ camera_param['c'] = (camera_param['c'] - center[:, None]) / scale
+ if 'camera_param' not in results:
+ results['camera_param'] = dict()
+ results['camera_param'].update(camera_param)
+
+ return results
+
+
+@PIPELINES.register_module()
+class CollectCameraIntrinsics:
+ """Store camera intrinsics in a 1-dim array, including f, c, k, p.
+
+ Args:
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+ need_distortion (bool): Whether need distortion parameters k and p.
+ Default: True.
+
+ Required keys:
+ camera_param (if camera parameters are not given in initialization)
+
+ Modified keys:
+ intrinsics
+ """
+
+ def __init__(self, camera_param=None, need_distortion=True):
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+ self.need_distortion = need_distortion
+
+ def __call__(self, results):
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'f' in camera_param and 'c' in camera_param
+ intrinsics = np.concatenate(
+ [camera_param['f'].reshape(2), camera_param['c'].reshape(2)])
+ if self.need_distortion:
+ assert 'k' in camera_param and 'p' in camera_param
+ intrinsics = np.concatenate([
+ intrinsics, camera_param['k'].reshape(3),
+ camera_param['p'].reshape(2)
+ ])
+ results['intrinsics'] = intrinsics
+
+ return results
+
+
+@PIPELINES.register_module()
+class CameraProjection:
+ """Apply camera projection to joint coordinates.
+
+ Args:
+ item (str): The name of the pose to apply camera projection.
+ mode (str): The type of camera projection, supported options are
+
+ - world_to_camera
+ - world_to_pixel
+ - camera_to_world
+ - camera_to_pixel
+ output_name (str|None): The name of the projected pose. If None
+ (default) is given, the projected pose will be stored in place.
+ camera_type (str): The camera class name (should be registered in
+ CAMERA).
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+
+ - item
+ - camera_param (if camera parameters are not given in initialization)
+
+ Modified keys:
+ output_name
+ """
+
+ def __init__(self,
+ item,
+ mode,
+ output_name=None,
+ camera_type='SimpleCamera',
+ camera_param=None):
+ self.item = item
+ self.mode = mode
+ self.output_name = output_name
+ self.camera_type = camera_type
+ allowed_mode = {
+ 'world_to_camera',
+ 'world_to_pixel',
+ 'camera_to_world',
+ 'camera_to_pixel',
+ }
+ if mode not in allowed_mode:
+ raise ValueError(
+ f'Got invalid mode: {mode}, allowed modes are {allowed_mode}')
+
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera = self._build_camera(camera_param)
+
+ def _build_camera(self, param):
+ cfgs = dict(type=self.camera_type, param=param)
+ return build_from_cfg(cfgs, CAMERAS)
+
+ def __call__(self, results):
+ assert self.item in results
+ joints = results[self.item]
+
+ if self.static_camera:
+ camera = self.camera
+ else:
+ assert 'camera_param' in results, 'Camera parameters are missing.'
+ camera = self._build_camera(results['camera_param'])
+
+ if self.mode == 'world_to_camera':
+ output = camera.world_to_camera(joints)
+ elif self.mode == 'world_to_pixel':
+ output = camera.world_to_pixel(joints)
+ elif self.mode == 'camera_to_world':
+ output = camera.camera_to_world(joints)
+ elif self.mode == 'camera_to_pixel':
+ output = camera.camera_to_pixel(joints)
+ else:
+ raise NotImplementedError
+
+ output_name = self.output_name
+ if output_name is None:
+ output_name = self.item
+
+ results[output_name] = output
+ return results
+
+
+@PIPELINES.register_module()
+class RelativeJointRandomFlip:
+ """Data augmentation with random horizontal joint flip around a root joint.
+
+ Args:
+ item (str|list[str]): The name of the pose to flip.
+ flip_cfg (dict|list[dict]): Configurations of the fliplr_regression
+ function. It should contain the following arguments:
+
+ - ``center_mode``: The mode to set the center location on the \
+ x-axis to flip around.
+ - ``center_x`` or ``center_index``: Set the x-axis location or \
+ the root joint's index to define the flip center.
+
+ Please refer to the docstring of the fliplr_regression function for
+ more details.
+ visible_item (str|list[str]): The name of the visibility item which
+ will be flipped accordingly along with the pose.
+ flip_prob (float): Probability of flip.
+ flip_camera (bool): Whether to flip horizontal distortion coefficients.
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+ item
+
+ Modified keys:
+ item (, camera_param)
+ """
+
+ def __init__(self,
+ item,
+ flip_cfg,
+ visible_item=None,
+ flip_prob=0.5,
+ flip_camera=False,
+ camera_param=None):
+ self.item = item
+ self.flip_cfg = flip_cfg
+ self.vis_item = visible_item
+ self.flip_prob = flip_prob
+ self.flip_camera = flip_camera
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+
+ if isinstance(self.item, str):
+ self.item = [self.item]
+ if isinstance(self.flip_cfg, dict):
+ self.flip_cfg = [self.flip_cfg] * len(self.item)
+ assert len(self.item) == len(self.flip_cfg)
+ if isinstance(self.vis_item, str):
+ self.vis_item = [self.vis_item]
+
+ def __call__(self, results):
+
+ if results.get(f'{self.item}_root_removed', False):
+ raise RuntimeError('The transform RelativeJointRandomFlip should '
+ f'not be applied to {self.item} whose root '
+ 'joint has been removed and joint indices have '
+ 'been changed')
+
+ if np.random.rand() <= self.flip_prob:
+
+ flip_pairs = results['ann_info']['flip_pairs']
+
+ # flip joint coordinates
+ for i, item in enumerate(self.item):
+ assert item in results
+ joints = results[item]
+
+ joints_flipped = fliplr_regression(joints, flip_pairs,
+ **self.flip_cfg[i])
+
+ results[item] = joints_flipped
+
+ # flip joint visibility
+ for vis_item in self.vis_item:
+ assert vis_item in results
+ visible = results[vis_item]
+ visible_flipped = visible.copy()
+ for left, right in flip_pairs:
+ visible_flipped[..., left, :] = visible[..., right, :]
+ visible_flipped[..., right, :] = visible[..., left, :]
+ results[vis_item] = visible_flipped
+
+ # flip horizontal distortion coefficients
+ if self.flip_camera:
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, \
+ 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'c' in camera_param
+ camera_param['c'][0] *= -1
+
+ if 'p' in camera_param:
+ camera_param['p'][0] *= -1
+
+ if 'camera_param' not in results:
+ results['camera_param'] = dict()
+ results['camera_param'].update(camera_param)
+
+ return results
+
+
+@PIPELINES.register_module()
+class PoseSequenceToTensor:
+ """Convert pose sequence from numpy array to Tensor.
+
+ The original pose sequence should have a shape of [T,K,C] or [K,C], where
+ T is the sequence length, K and C are keypoint number and dimension. The
+ converted pose sequence will have a shape of [KxC, T].
+
+ Args:
+ item (str): The name of the pose sequence
+
+ Required keys:
+ item
+
+ Modified keys:
+ item
+ """
+
+ def __init__(self, item):
+ self.item = item
+
+ def __call__(self, results):
+ assert self.item in results
+ seq = results[self.item]
+
+ assert isinstance(seq, np.ndarray)
+ assert seq.ndim in {2, 3}
+
+ if seq.ndim == 2:
+ seq = seq[None, ...]
+
+ T = seq.shape[0]
+ seq = seq.transpose(1, 2, 0).reshape(-1, T)
+ results[self.item] = torch.from_numpy(seq)
+
+ return results
+
+
+@PIPELINES.register_module()
+class Generate3DHeatmapTarget:
+ """Generate the target 3d heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian.
+ joint_indices (list): Indices of joints used for heatmap generation.
+ If None (default) is given, all joints will be used.
+ max_bound (float): The maximal value of heatmap.
+ """
+
+ def __init__(self, sigma=2, joint_indices=None, max_bound=1.0):
+ self.sigma = sigma
+ self.joint_indices = joint_indices
+ self.max_bound = max_bound
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ cfg = results['ann_info']
+ image_size = cfg['image_size']
+ W, H, D = cfg['heatmap_size']
+ heatmap3d_depth_bound = cfg['heatmap3d_depth_bound']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ # select the joints used for target generation
+ if self.joint_indices is not None:
+ joints_3d = joints_3d[self.joint_indices, ...]
+ joints_3d_visible = joints_3d_visible[self.joint_indices, ...]
+ joint_weights = joint_weights[self.joint_indices, ...]
+ num_joints = joints_3d.shape[0]
+
+ # get the joint location in heatmap coordinates
+ mu_x = joints_3d[:, 0] * W / image_size[0]
+ mu_y = joints_3d[:, 1] * H / image_size[1]
+ mu_z = (joints_3d[:, 2] / heatmap3d_depth_bound + 0.5) * D
+
+ target = np.zeros([num_joints, D, H, W], dtype=np.float32)
+
+ target_weight = joints_3d_visible[:, 0].astype(np.float32)
+ target_weight = target_weight * (mu_z >= 0) * (mu_z < D)
+ if use_different_joint_weights:
+ target_weight = target_weight * joint_weights
+ target_weight = target_weight[:, None]
+
+ # only compute the voxel value near the joints location
+ tmp_size = 3 * self.sigma
+
+ # get neighboring voxels coordinates
+ x = y = z = np.arange(2 * tmp_size + 1, dtype=np.float32) - tmp_size
+ zz, yy, xx = np.meshgrid(z, y, x)
+ xx = xx[None, ...].astype(np.float32)
+ yy = yy[None, ...].astype(np.float32)
+ zz = zz[None, ...].astype(np.float32)
+ mu_x = mu_x[..., None, None, None]
+ mu_y = mu_y[..., None, None, None]
+ mu_z = mu_z[..., None, None, None]
+ xx, yy, zz = xx + mu_x, yy + mu_y, zz + mu_z
+
+ # round the coordinates
+ xx = xx.round().clip(0, W - 1)
+ yy = yy.round().clip(0, H - 1)
+ zz = zz.round().clip(0, D - 1)
+
+ # compute the target value near joints
+ local_target = \
+ np.exp(-((xx - mu_x)**2 + (yy - mu_y)**2 + (zz - mu_z)**2) /
+ (2 * self.sigma**2))
+
+ # put the local target value to the full target heatmap
+ local_size = xx.shape[1]
+ idx_joints = np.tile(
+ np.arange(num_joints)[:, None, None, None],
+ [1, local_size, local_size, local_size])
+ idx = np.stack([idx_joints, zz, yy, xx],
+ axis=-1).astype(int).reshape(-1, 4)
+ target[idx[:, 0], idx[:, 1], idx[:, 2],
+ idx[:, 3]] = local_target.reshape(-1)
+ target = target * self.max_bound
+ results['target'] = target
+ results['target_weight'] = target_weight
+ return results
+
+
+@PIPELINES.register_module()
+class GenerateVoxel3DHeatmapTarget:
+ """Generate the target 3d heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info_3d'.
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian (mm).
+ joint_indices (list): Indices of joints used for heatmap generation.
+ If None (default) is given, all joints will be used.
+ """
+
+ def __init__(self, sigma=200.0, joint_indices=None):
+ self.sigma = sigma # mm
+ self.joint_indices = joint_indices
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ cfg = results['ann_info']
+
+ num_people = len(joints_3d)
+ num_joints = joints_3d[0].shape[0]
+
+ if self.joint_indices is not None:
+ num_joints = len(self.joint_indices)
+ joint_indices = self.joint_indices
+ else:
+ joint_indices = list(range(num_joints))
+
+ space_size = cfg['space_size']
+ space_center = cfg['space_center']
+ cube_size = cfg['cube_size']
+ grids_x = np.linspace(-space_size[0] / 2, space_size[0] / 2,
+ cube_size[0]) + space_center[0]
+ grids_y = np.linspace(-space_size[1] / 2, space_size[1] / 2,
+ cube_size[1]) + space_center[1]
+ grids_z = np.linspace(-space_size[2] / 2, space_size[2] / 2,
+ cube_size[2]) + space_center[2]
+
+ target = np.zeros(
+ (num_joints, cube_size[0], cube_size[1], cube_size[2]),
+ dtype=np.float32)
+
+ for n in range(num_people):
+ for idx, joint_id in enumerate(joint_indices):
+ mu_x = joints_3d[n][joint_id][0]
+ mu_y = joints_3d[n][joint_id][1]
+ mu_z = joints_3d[n][joint_id][2]
+ vis = joints_3d_visible[n][joint_id][0]
+ if vis < 1:
+ continue
+ i_x = [
+ np.searchsorted(grids_x, mu_x - 3 * self.sigma),
+ np.searchsorted(grids_x, mu_x + 3 * self.sigma, 'right')
+ ]
+ i_y = [
+ np.searchsorted(grids_y, mu_y - 3 * self.sigma),
+ np.searchsorted(grids_y, mu_y + 3 * self.sigma, 'right')
+ ]
+ i_z = [
+ np.searchsorted(grids_z, mu_z - 3 * self.sigma),
+ np.searchsorted(grids_z, mu_z + 3 * self.sigma, 'right')
+ ]
+ if i_x[0] >= i_x[1] or i_y[0] >= i_y[1] or i_z[0] >= i_z[1]:
+ continue
+ kernel_xs, kernel_ys, kernel_zs = np.meshgrid(
+ grids_x[i_x[0]:i_x[1]],
+ grids_y[i_y[0]:i_y[1]],
+ grids_z[i_z[0]:i_z[1]],
+ indexing='ij')
+ g = np.exp(-((kernel_xs - mu_x)**2 + (kernel_ys - mu_y)**2 +
+ (kernel_zs - mu_z)**2) / (2 * self.sigma**2))
+ target[idx, i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]] \
+ = np.maximum(target[idx, i_x[0]:i_x[1],
+ i_y[0]:i_y[1], i_z[0]:i_z[1]], g)
+
+ target = np.clip(target, 0, 1)
+ if target.shape[0] == 1:
+ target = target[0]
+
+ results['targets_3d'] = target
+
+ return results
diff --git a/mmpose/datasets/pipelines/shared_transform.py b/mmpose/datasets/pipelines/shared_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4fea806ce84b0484cabb7b44ba09c34cc109be0
--- /dev/null
+++ b/mmpose/datasets/pipelines/shared_transform.py
@@ -0,0 +1,527 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from collections.abc import Sequence
+
+import mmcv
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+from mmcv.utils import build_from_cfg
+from numpy import random
+from torchvision.transforms import functional as F
+
+from ..builder import PIPELINES
+
+try:
+ import albumentations
+except ImportError:
+ albumentations = None
+
+
+@PIPELINES.register_module()
+class ToTensor:
+ """Transform image to Tensor.
+
+ Required key: 'img'. Modifies key: 'img'.
+
+ Args:
+ results (dict): contain all information about training.
+ """
+
+ def __call__(self, results):
+ if isinstance(results['img'], (list, tuple)):
+ results['img'] = [F.to_tensor(img) for img in results['img']]
+ else:
+ results['img'] = F.to_tensor(results['img'])
+
+ return results
+
+
+@PIPELINES.register_module()
+class NormalizeTensor:
+ """Normalize the Tensor image (CxHxW), with mean and std.
+
+ Required key: 'img'. Modifies key: 'img'.
+
+ Args:
+ mean (list[float]): Mean values of 3 channels.
+ std (list[float]): Std values of 3 channels.
+ """
+
+ def __init__(self, mean, std):
+ self.mean = mean
+ self.std = std
+
+ def __call__(self, results):
+ if isinstance(results['img'], (list, tuple)):
+ results['img'] = [
+ F.normalize(img, mean=self.mean, std=self.std)
+ for img in results['img']
+ ]
+ else:
+ results['img'] = F.normalize(
+ results['img'], mean=self.mean, std=self.std)
+
+ return results
+
+
+@PIPELINES.register_module()
+class Compose:
+ """Compose a data pipeline with a sequence of transforms.
+
+ Args:
+ transforms (list[dict | callable]): Either config
+ dicts of transforms or transform objects.
+ """
+
+ def __init__(self, transforms):
+ assert isinstance(transforms, Sequence)
+ self.transforms = []
+ for transform in transforms:
+ if isinstance(transform, dict):
+ transform = build_from_cfg(transform, PIPELINES)
+ self.transforms.append(transform)
+ elif callable(transform):
+ self.transforms.append(transform)
+ else:
+ raise TypeError('transform must be callable or a dict, but got'
+ f' {type(transform)}')
+
+ def __call__(self, data):
+ """Call function to apply transforms sequentially.
+
+ Args:
+ data (dict): A result dict contains the data to transform.
+
+ Returns:
+ dict: Transformed data.
+ """
+ for t in self.transforms:
+ data = t(data)
+ if data is None:
+ return None
+ return data
+
+ def __repr__(self):
+ """Compute the string representation."""
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += f'\n {t}'
+ format_string += '\n)'
+ return format_string
+
+
+@PIPELINES.register_module()
+class Collect:
+ """Collect data from the loader relevant to the specific task.
+
+ This keeps the items in `keys` as it is, and collect items in `meta_keys`
+ into a meta item called `meta_name`.This is usually the last stage of the
+ data loader pipeline.
+ For example, when keys='imgs', meta_keys=('filename', 'label',
+ 'original_shape'), meta_name='img_metas', the results will be a dict with
+ keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of
+ another dict with keys 'filename', 'label', 'original_shape'.
+
+ Args:
+ keys (Sequence[str|tuple]): Required keys to be collected. If a tuple
+ (key, key_new) is given as an element, the item retrieved by key will
+ be renamed as key_new in collected data.
+ meta_name (str): The name of the key that contains meta information.
+ This key is always populated. Default: "img_metas".
+ meta_keys (Sequence[str|tuple]): Keys that are collected under
+ meta_name. The contents of the `meta_name` dictionary depends
+ on `meta_keys`.
+ """
+
+ def __init__(self, keys, meta_keys, meta_name='img_metas'):
+ self.keys = keys
+ self.meta_keys = meta_keys
+ self.meta_name = meta_name
+
+ def __call__(self, results):
+ """Performs the Collect formatting.
+
+ Args:
+ results (dict): The resulting dict to be modified and passed
+ to the next transform in pipeline.
+ """
+ if 'ann_info' in results:
+ results.update(results['ann_info'])
+
+ data = {}
+ for key in self.keys:
+ if isinstance(key, tuple):
+ assert len(key) == 2
+ key_src, key_tgt = key[:2]
+ else:
+ key_src = key_tgt = key
+ data[key_tgt] = results[key_src]
+
+ meta = {}
+ if len(self.meta_keys) != 0:
+ for key in self.meta_keys:
+ if isinstance(key, tuple):
+ assert len(key) == 2
+ key_src, key_tgt = key[:2]
+ else:
+ key_src = key_tgt = key
+ meta[key_tgt] = results[key_src]
+ if 'bbox_id' in results:
+ meta['bbox_id'] = results['bbox_id']
+ data[self.meta_name] = DC(meta, cpu_only=True)
+
+ return data
+
+ def __repr__(self):
+ """Compute the string representation."""
+ return (f'{self.__class__.__name__}('
+ f'keys={self.keys}, meta_keys={self.meta_keys})')
+
+
+@PIPELINES.register_module()
+class Albumentation:
+ """Albumentation augmentation (pixel-level transforms only). Adds custom
+ pixel-level transformations from Albumentations library. Please visit
+ `https://albumentations.readthedocs.io` to get more information.
+
+ Note: we only support pixel-level transforms.
+ Please visit `https://github.com/albumentations-team/`
+ `albumentations#pixel-level-transforms`
+ to get more information about pixel-level transforms.
+
+ An example of ``transforms`` is as followed:
+
+ .. code-block:: python
+
+ [
+ dict(
+ type='RandomBrightnessContrast',
+ brightness_limit=[0.1, 0.3],
+ contrast_limit=[0.1, 0.3],
+ p=0.2),
+ dict(type='ChannelShuffle', p=0.1),
+ dict(
+ type='OneOf',
+ transforms=[
+ dict(type='Blur', blur_limit=3, p=1.0),
+ dict(type='MedianBlur', blur_limit=3, p=1.0)
+ ],
+ p=0.1),
+ ]
+
+ Args:
+ transforms (list[dict]): A list of Albumentation transformations
+ keymap (dict): Contains {'input key':'albumentation-style key'},
+ e.g., {'img': 'image'}.
+ """
+
+ def __init__(self, transforms, keymap=None):
+ if albumentations is None:
+ raise RuntimeError('albumentations is not installed')
+
+ self.transforms = transforms
+ self.filter_lost_elements = False
+
+ self.aug = albumentations.Compose(
+ [self.albu_builder(t) for t in self.transforms])
+
+ if not keymap:
+ self.keymap_to_albu = {
+ 'img': 'image',
+ }
+ else:
+ self.keymap_to_albu = keymap
+ self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}
+
+ def albu_builder(self, cfg):
+ """Import a module from albumentations.
+
+ It resembles some of :func:`build_from_cfg` logic.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+
+ Returns:
+ obj: The constructed object.
+ """
+
+ assert isinstance(cfg, dict) and 'type' in cfg
+ args = cfg.copy()
+
+ obj_type = args.pop('type')
+ if mmcv.is_str(obj_type):
+ if albumentations is None:
+ raise RuntimeError('albumentations is not installed')
+ if not hasattr(albumentations.augmentations.transforms, obj_type):
+ warnings.warn('{obj_type} is not pixel-level transformations. '
+ 'Please use with caution.')
+ obj_cls = getattr(albumentations, obj_type)
+ else:
+ raise TypeError(f'type must be a str, but got {type(obj_type)}')
+
+ if 'transforms' in args:
+ args['transforms'] = [
+ self.albu_builder(transform)
+ for transform in args['transforms']
+ ]
+
+ return obj_cls(**args)
+
+ @staticmethod
+ def mapper(d, keymap):
+ """Dictionary mapper.
+
+ Renames keys according to keymap provided.
+
+ Args:
+ d (dict): old dict
+ keymap (dict): {'old_key':'new_key'}
+
+ Returns:
+ dict: new dict.
+ """
+
+ updated_dict = {keymap.get(k, k): v for k, v in d.items()}
+ return updated_dict
+
+ def __call__(self, results):
+ # dict to albumentations format
+ results = self.mapper(results, self.keymap_to_albu)
+
+ results = self.aug(**results)
+ # back to the original format
+ results = self.mapper(results, self.keymap_back)
+
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__ + f'(transforms={self.transforms})'
+ return repr_str
+
+
+@PIPELINES.register_module()
+class PhotometricDistortion:
+ """Apply photometric distortion to image sequentially, every transformation
+ is applied with a probability of 0.5. The position of random contrast is in
+ second or second to last.
+
+ 1. random brightness
+ 2. random contrast (mode 0)
+ 3. convert color from BGR to HSV
+ 4. random saturation
+ 5. random hue
+ 6. convert color from HSV to BGR
+ 7. random contrast (mode 1)
+ 8. randomly swap channels
+
+ Args:
+ brightness_delta (int): delta of brightness.
+ contrast_range (tuple): range of contrast.
+ saturation_range (tuple): range of saturation.
+ hue_delta (int): delta of hue.
+ """
+
+ def __init__(self,
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18):
+ self.brightness_delta = brightness_delta
+ self.contrast_lower, self.contrast_upper = contrast_range
+ self.saturation_lower, self.saturation_upper = saturation_range
+ self.hue_delta = hue_delta
+
+ def convert(self, img, alpha=1, beta=0):
+ """Multiple with alpha and add beta with clip."""
+ img = img.astype(np.float32) * alpha + beta
+ img = np.clip(img, 0, 255)
+ return img.astype(np.uint8)
+
+ def brightness(self, img):
+ """Brightness distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ beta=random.uniform(-self.brightness_delta,
+ self.brightness_delta))
+ return img
+
+ def contrast(self, img):
+ """Contrast distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ alpha=random.uniform(self.contrast_lower, self.contrast_upper))
+ return img
+
+ def saturation(self, img):
+ # Apply saturation distortion to hsv-formatted img
+ img[:, :, 1] = self.convert(
+ img[:, :, 1],
+ alpha=random.uniform(self.saturation_lower, self.saturation_upper))
+ return img
+
+ def hue(self, img):
+ # Apply hue distortion to hsv-formatted img
+ img[:, :, 0] = (img[:, :, 0].astype(int) +
+ random.randint(-self.hue_delta, self.hue_delta)) % 180
+ return img
+
+ def swap_channels(self, img):
+ # Apply channel swap
+ if random.randint(2):
+ img = img[..., random.permutation(3)]
+ return img
+
+ def __call__(self, results):
+ """Call function to perform photometric distortion on images.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Result dict with images distorted.
+ """
+
+ img = results['img']
+ # random brightness
+ img = self.brightness(img)
+
+ # mode == 0 --> do random contrast first
+ # mode == 1 --> do random contrast last
+ mode = random.randint(2)
+ if mode == 1:
+ img = self.contrast(img)
+
+ hsv_mode = random.randint(4)
+ if hsv_mode:
+ # random saturation/hue distortion
+ img = mmcv.bgr2hsv(img)
+ if hsv_mode == 1 or hsv_mode == 3:
+ img = self.saturation(img)
+ if hsv_mode == 2 or hsv_mode == 3:
+ img = self.hue(img)
+ img = mmcv.hsv2bgr(img)
+
+ # random contrast
+ if mode == 0:
+ img = self.contrast(img)
+
+ # randomly swap channels
+ self.swap_channels(img)
+
+ results['img'] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += (f'(brightness_delta={self.brightness_delta}, '
+ f'contrast_range=({self.contrast_lower}, '
+ f'{self.contrast_upper}), '
+ f'saturation_range=({self.saturation_lower}, '
+ f'{self.saturation_upper}), '
+ f'hue_delta={self.hue_delta})')
+ return repr_str
+
+
+@PIPELINES.register_module()
+class MultiItemProcess:
+ """Process each item and merge multi-item results to lists.
+
+ Args:
+ pipeline (dict): Dictionary to construct pipeline for a single item.
+ """
+
+ def __init__(self, pipeline):
+ self.pipeline = Compose(pipeline)
+
+ def __call__(self, results):
+ results_ = {}
+ for idx, result in results.items():
+ single_result = self.pipeline(result)
+ for k, v in single_result.items():
+ if k in results_:
+ results_[k].append(v)
+ else:
+ results_[k] = [v]
+
+ return results_
+
+
+@PIPELINES.register_module()
+class DiscardDuplicatedItems:
+
+ def __init__(self, keys_list):
+ """Discard duplicated single-item results.
+
+ Args:
+ keys_list (list): List of keys that need to be deduplicate.
+ """
+ self.keys_list = keys_list
+
+ def __call__(self, results):
+ for k, v in results.items():
+ if k in self.keys_list:
+ assert isinstance(v, Sequence)
+ results[k] = v[0]
+
+ return results
+
+
+@PIPELINES.register_module()
+class MultitaskGatherTarget:
+ """Gather the targets for multitask heads.
+
+ Args:
+ pipeline_list (list[list]): List of pipelines for all heads.
+ pipeline_indices (list[int]): Pipeline index of each head.
+ """
+
+ def __init__(self,
+ pipeline_list,
+ pipeline_indices=None,
+ keys=('target', 'target_weight')):
+ self.keys = keys
+ self.pipelines = []
+ for pipeline in pipeline_list:
+ self.pipelines.append(Compose(pipeline))
+ if pipeline_indices is None:
+ self.pipeline_indices = list(range(len(pipeline_list)))
+ else:
+ self.pipeline_indices = pipeline_indices
+
+ def __call__(self, results):
+ # generate target and target weights using all pipelines
+ pipeline_outputs = []
+ for pipeline in self.pipelines:
+ pipeline_output = pipeline(results)
+ pipeline_outputs.append(pipeline_output.copy())
+
+ for key in self.keys:
+ result_key = []
+ for ind in self.pipeline_indices:
+ result_key.append(pipeline_outputs[ind].get(key, None))
+ results[key] = result_key
+ return results
+
+
+@PIPELINES.register_module()
+class RenameKeys:
+ """Rename the keys.
+
+ Args:
+ key_pairs (Sequence[tuple]): Required keys to be renamed.
+ If a tuple (key_src, key_tgt) is given as an element,
+ the item retrieved by key_src will be renamed as key_tgt.
+ """
+
+ def __init__(self, key_pairs):
+ self.key_pairs = key_pairs
+
+ def __call__(self, results):
+ """Rename keys."""
+ for key_pair in self.key_pairs:
+ assert len(key_pair) == 2
+ key_src, key_tgt = key_pair
+ results[key_tgt] = results.pop(key_src)
+ return results
diff --git a/mmpose/datasets/pipelines/top_down_transform.py b/mmpose/datasets/pipelines/top_down_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..1af1ea92d0cc5f973356ab72f300661e30b5d439
--- /dev/null
+++ b/mmpose/datasets/pipelines/top_down_transform.py
@@ -0,0 +1,736 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import (affine_transform, fliplr_joints,
+ get_affine_transform, get_warp_matrix,
+ warp_affine_joints)
+from mmpose.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class TopDownRandomFlip:
+ """Data augmentation with random image flip.
+
+ Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
+ 'ann_info'.
+
+ Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
+ 'flipped'.
+
+ Args:
+ flip (bool): Option to perform random flip.
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ img = results['img']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ center = results['center']
+
+ # A flag indicating whether the image is flipped,
+ # which can be used by child class.
+ flipped = False
+ if np.random.rand() <= self.flip_prob:
+ flipped = True
+ if not isinstance(img, list):
+ img = img[:, ::-1, :]
+ else:
+ img = [i[:, ::-1, :] for i in img]
+ if not isinstance(img, list):
+ joints_3d, joints_3d_visible = fliplr_joints(
+ joints_3d, joints_3d_visible, img.shape[1],
+ results['ann_info']['flip_pairs'])
+ center[0] = img.shape[1] - center[0] - 1
+ else:
+ joints_3d, joints_3d_visible = fliplr_joints(
+ joints_3d, joints_3d_visible, img[0].shape[1],
+ results['ann_info']['flip_pairs'])
+ center[0] = img[0].shape[1] - center[0] - 1
+
+ results['img'] = img
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+ results['center'] = center
+ results['flipped'] = flipped
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownHalfBodyTransform:
+ """Data augmentation with half-body transform. Keep only the upper body or
+ the lower body at random.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', and 'ann_info'.
+
+ Modifies key: 'scale' and 'center'.
+
+ Args:
+ num_joints_half_body (int): Threshold of performing
+ half-body transform. If the body has fewer number
+ of joints (< num_joints_half_body), ignore this step.
+ prob_half_body (float): Probability of half-body transform.
+ """
+
+ def __init__(self, num_joints_half_body=8, prob_half_body=0.3):
+ self.num_joints_half_body = num_joints_half_body
+ self.prob_half_body = prob_half_body
+
+ @staticmethod
+ def half_body_transform(cfg, joints_3d, joints_3d_visible):
+ """Get center&scale for half-body transform."""
+ upper_joints = []
+ lower_joints = []
+ for joint_id in range(cfg['num_joints']):
+ if joints_3d_visible[joint_id][0] > 0:
+ if joint_id in cfg['upper_body_ids']:
+ upper_joints.append(joints_3d[joint_id])
+ else:
+ lower_joints.append(joints_3d[joint_id])
+
+ if np.random.randn() < 0.5 and len(upper_joints) > 2:
+ selected_joints = upper_joints
+ elif len(lower_joints) > 2:
+ selected_joints = lower_joints
+ else:
+ selected_joints = upper_joints
+
+ if len(selected_joints) < 2:
+ return None, None
+
+ selected_joints = np.array(selected_joints, dtype=np.float32)
+ center = selected_joints.mean(axis=0)[:2]
+
+ left_top = np.amin(selected_joints, axis=0)
+
+ right_bottom = np.amax(selected_joints, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ aspect_ratio = cfg['image_size'][0] / cfg['image_size'][1]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ scale = scale * 1.5
+ return center, scale
+
+ def __call__(self, results):
+ """Perform data augmentation with half-body transform."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ if (np.sum(joints_3d_visible[:, 0]) > self.num_joints_half_body
+ and np.random.rand() < self.prob_half_body):
+
+ c_half_body, s_half_body = self.half_body_transform(
+ results['ann_info'], joints_3d, joints_3d_visible)
+
+ if c_half_body is not None and s_half_body is not None:
+ results['center'] = c_half_body
+ results['scale'] = s_half_body
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGetRandomScaleRotation:
+ """Data augmentation with random scaling & rotating.
+
+ Required key: 'scale'.
+
+ Modifies key: 'scale' and 'rotation'.
+
+ Args:
+ rot_factor (int): Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor (float): Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ rot_prob (float): Probability of random rotation.
+ """
+
+ def __init__(self, rot_factor=40, scale_factor=0.5, rot_prob=0.6):
+ self.rot_factor = rot_factor
+ self.scale_factor = scale_factor
+ self.rot_prob = rot_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ s = results['scale']
+
+ sf = self.scale_factor
+ rf = self.rot_factor
+
+ s_factor = np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ s = s * s_factor
+
+ r_factor = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+ r = r_factor if np.random.rand() <= self.rot_prob else 0
+
+ results['scale'] = s
+ results['rotation'] = r
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownAffine:
+ """Affine transform the image to make input.
+
+ Required keys:'img', 'joints_3d', 'joints_3d_visible', 'ann_info','scale',
+ 'rotation' and 'center'.
+
+ Modified keys:'img', 'joints_3d', and 'joints_3d_visible'.
+
+ Args:
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, use_udp=False):
+ self.use_udp = use_udp
+
+ def __call__(self, results):
+ image_size = results['ann_info']['image_size']
+
+ img = results['img']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ c = results['center']
+ s = results['scale']
+ r = results['rotation']
+
+ if self.use_udp:
+ trans = get_warp_matrix(r, c * 2.0, image_size - 1.0, s * 200.0)
+ if not isinstance(img, list):
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ img = [
+ cv2.warpAffine(
+ i,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR) for i in img
+ ]
+
+ joints_3d[:, 0:2] = \
+ warp_affine_joints(joints_3d[:, 0:2].copy(), trans)
+
+ else:
+ trans = get_affine_transform(c, s, r, image_size)
+ if not isinstance(img, list):
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ img = [
+ cv2.warpAffine(
+ i,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR) for i in img
+ ]
+ for i in range(results['ann_info']['num_joints']):
+ if joints_3d_visible[i, 0] > 0.0:
+ joints_3d[i,
+ 0:2] = affine_transform(joints_3d[i, 0:2], trans)
+
+ results['img'] = img
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGenerateTarget:
+ """Generate the target heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian for 'MSRA' approach.
+ kernel: Kernel of heatmap gaussian for 'Megvii' approach.
+ encoding (str): Approach to generate target heatmaps.
+ Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
+ Default:'MSRA'
+ unbiased_encoding (bool): Option to use unbiased
+ encoding methods.
+ Paper ref: Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+ keypoint_pose_distance: Keypoint pose distance for UDP.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ target_type (str): supported targets: 'GaussianHeatmap',
+ 'CombinedTarget'. Default:'GaussianHeatmap'
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self,
+ sigma=2,
+ kernel=(11, 11),
+ valid_radius_factor=0.0546875,
+ target_type='GaussianHeatmap',
+ encoding='MSRA',
+ unbiased_encoding=False):
+ self.sigma = sigma
+ self.unbiased_encoding = unbiased_encoding
+ self.kernel = kernel
+ self.valid_radius_factor = valid_radius_factor
+ self.target_type = target_type
+ self.encoding = encoding
+
+ def _msra_generate_target(self, cfg, joints_3d, joints_3d_visible, sigma):
+ """Generate the target heatmap via "MSRA" approach.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray ([num_joints, 3])
+ joints_3d_visible: np.ndarray ([num_joints, 3])
+ sigma: Sigma of heatmap gaussian
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target: Target heatmaps.
+ - target_weight: (1: visible, 0: invisible)
+ """
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ W, H = cfg['heatmap_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ target_weight = np.zeros((num_joints, 1), dtype=np.float32)
+ target = np.zeros((num_joints, H, W), dtype=np.float32)
+
+ # 3-sigma rule
+ tmp_size = sigma * 3
+
+ if self.unbiased_encoding:
+ for joint_id in range(num_joints):
+ target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+
+ feat_stride = image_size / [W, H]
+ mu_x = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y = joints_3d[joint_id][1] / feat_stride[1]
+ # Check that any part of the gaussian is in-bounds
+ ul = [mu_x - tmp_size, mu_y - tmp_size]
+ br = [mu_x + tmp_size + 1, mu_y + tmp_size + 1]
+ if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+ target_weight[joint_id] = 0
+
+ if target_weight[joint_id] == 0:
+ continue
+
+ x = np.arange(0, W, 1, np.float32)
+ y = np.arange(0, H, 1, np.float32)
+ y = y[:, None]
+
+ if target_weight[joint_id] > 0.5:
+ target[joint_id] = np.exp(-((x - mu_x)**2 +
+ (y - mu_y)**2) /
+ (2 * sigma**2))
+ else:
+ for joint_id in range(num_joints):
+ target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+
+ feat_stride = image_size / [W, H]
+ mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+ mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+ br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+ if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+ target_weight[joint_id] = 0
+
+ if target_weight[joint_id] > 0.5:
+ size = 2 * tmp_size + 1
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+ x0 = y0 = size // 2
+ # The gaussian is not normalized,
+ # we want the center value to equal 1
+ g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], W) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], H) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], W)
+ img_y = max(0, ul[1]), min(br[1], H)
+
+ target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+ g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def _megvii_generate_target(self, cfg, joints_3d, joints_3d_visible,
+ kernel):
+ """Generate the target heatmap via "Megvii" approach.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray ([num_joints, 3])
+ joints_3d_visible: np.ndarray ([num_joints, 3])
+ kernel: Kernel of heatmap gaussian
+
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target: Target heatmaps.
+ - target_weight: (1: visible, 0: invisible)
+ """
+
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ W, H = cfg['heatmap_size']
+ heatmaps = np.zeros((num_joints, H, W), dtype='float32')
+ target_weight = np.zeros((num_joints, 1), dtype=np.float32)
+
+ for i in range(num_joints):
+ target_weight[i] = joints_3d_visible[i, 0]
+
+ if target_weight[i] < 1:
+ continue
+
+ target_y = int(joints_3d[i, 1] * H / image_size[1])
+ target_x = int(joints_3d[i, 0] * W / image_size[0])
+
+ if (target_x >= W or target_x < 0) \
+ or (target_y >= H or target_y < 0):
+ target_weight[i] = 0
+ continue
+
+ heatmaps[i, target_y, target_x] = 1
+ heatmaps[i] = cv2.GaussianBlur(heatmaps[i], kernel, 0)
+ maxi = heatmaps[i, target_y, target_x]
+
+ heatmaps[i] /= maxi / 255
+
+ return heatmaps, target_weight
+
+ def _udp_generate_target(self, cfg, joints_3d, joints_3d_visible, factor,
+ target_type):
+ """Generate the target heatmap via 'UDP' approach. Paper ref: Huang et
+ al. The Devil is in the Details: Delving into Unbiased Data Processing
+ for Human Pose Estimation (CVPR 2020).
+
+ Note:
+ - num keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+ - num target channels: C
+ - C = K if target_type=='GaussianHeatmap'
+ - C = 3*K if target_type=='CombinedTarget'
+
+ Args:
+ cfg (dict): data config
+ joints_3d (np.ndarray[K, 3]): Annotated keypoints.
+ joints_3d_visible (np.ndarray[K, 3]): Visibility of keypoints.
+ factor (float): kernel factor for GaussianHeatmap target or
+ valid radius factor for CombinedTarget.
+ target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
+ GaussianHeatmap: Heatmap target with gaussian distribution.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target (np.ndarray[C, H, W]): Target heatmaps.
+ - target_weight (np.ndarray[K, 1]): (1: visible, 0: invisible)
+ """
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ heatmap_size = cfg['heatmap_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ target_weight = np.ones((num_joints, 1), dtype=np.float32)
+ target_weight[:, 0] = joints_3d_visible[:, 0]
+
+ if target_type.lower() == 'GaussianHeatmap'.lower():
+ target = np.zeros((num_joints, heatmap_size[1], heatmap_size[0]),
+ dtype=np.float32)
+
+ tmp_size = factor * 3
+
+ # prepare for gaussian
+ size = 2 * tmp_size + 1
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+
+ for joint_id in range(num_joints):
+ feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+ mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+ mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+ br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+ if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] \
+ or br[0] < 0 or br[1] < 0:
+ # If not, just return the image as is
+ target_weight[joint_id] = 0
+ continue
+
+ # # Generate gaussian
+ mu_x_ac = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y_ac = joints_3d[joint_id][1] / feat_stride[1]
+ x0 = y0 = size // 2
+ x0 += mu_x_ac - mu_x
+ y0 += mu_y_ac - mu_y
+ g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * factor**2))
+
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
+ img_y = max(0, ul[1]), min(br[1], heatmap_size[1])
+
+ v = target_weight[joint_id]
+ if v > 0.5:
+ target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+ g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+ elif target_type.lower() == 'CombinedTarget'.lower():
+ target = np.zeros(
+ (num_joints, 3, heatmap_size[1] * heatmap_size[0]),
+ dtype=np.float32)
+ feat_width = heatmap_size[0]
+ feat_height = heatmap_size[1]
+ feat_x_int = np.arange(0, feat_width)
+ feat_y_int = np.arange(0, feat_height)
+ feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
+ feat_x_int = feat_x_int.flatten()
+ feat_y_int = feat_y_int.flatten()
+ # Calculate the radius of the positive area in classification
+ # heatmap.
+ valid_radius = factor * heatmap_size[1]
+ feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+ for joint_id in range(num_joints):
+ mu_x = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y = joints_3d[joint_id][1] / feat_stride[1]
+ x_offset = (mu_x - feat_x_int) / valid_radius
+ y_offset = (mu_y - feat_y_int) / valid_radius
+ dis = x_offset**2 + y_offset**2
+ keep_pos = np.where(dis <= 1)[0]
+ v = target_weight[joint_id]
+ if v > 0.5:
+ target[joint_id, 0, keep_pos] = 1
+ target[joint_id, 1, keep_pos] = x_offset[keep_pos]
+ target[joint_id, 2, keep_pos] = y_offset[keep_pos]
+ target = target.reshape(num_joints * 3, heatmap_size[1],
+ heatmap_size[0])
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ assert self.encoding in ['MSRA', 'Megvii', 'UDP']
+
+ if self.encoding == 'MSRA':
+ if isinstance(self.sigma, list):
+ num_sigmas = len(self.sigma)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ heatmap_size = cfg['heatmap_size']
+
+ target = np.empty(
+ (0, num_joints, heatmap_size[1], heatmap_size[0]),
+ dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_sigmas):
+ target_i, target_weight_i = self._msra_generate_target(
+ cfg, joints_3d, joints_3d_visible, self.sigma[i])
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._msra_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible,
+ self.sigma)
+
+ elif self.encoding == 'Megvii':
+ if isinstance(self.kernel, list):
+ num_kernels = len(self.kernel)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ W, H = cfg['heatmap_size']
+
+ target = np.empty((0, num_joints, H, W), dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_kernels):
+ target_i, target_weight_i = self._megvii_generate_target(
+ cfg, joints_3d, joints_3d_visible, self.kernel[i])
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._megvii_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible,
+ self.kernel)
+
+ elif self.encoding == 'UDP':
+ if self.target_type.lower() == 'CombinedTarget'.lower():
+ factors = self.valid_radius_factor
+ channel_factor = 3
+ elif self.target_type.lower() == 'GaussianHeatmap'.lower():
+ factors = self.sigma
+ channel_factor = 1
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+ if isinstance(factors, list):
+ num_factors = len(factors)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ W, H = cfg['heatmap_size']
+
+ target = np.empty((0, channel_factor * num_joints, H, W),
+ dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_factors):
+ target_i, target_weight_i = self._udp_generate_target(
+ cfg, joints_3d, joints_3d_visible, factors[i],
+ self.target_type)
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._udp_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible, factors,
+ self.target_type)
+ else:
+ raise ValueError(
+ f'Encoding approach {self.encoding} is not supported!')
+
+ if results['ann_info'].get('max_num_joints', None) is not None:
+ W, H = results['ann_info']['heatmap_size']
+ padded_length = int(results['ann_info'].get('max_num_joints') - results['ann_info'].get('num_joints'))
+ target_weight = np.concatenate([target_weight, np.zeros((padded_length, 1), dtype=np.float32)], 0)
+ target = np.concatenate([target, np.zeros((padded_length, H, W), dtype=np.float32)], 0)
+
+ results['target'] = target
+ results['target_weight'] = target_weight
+
+ results['dataset_idx'] = results['ann_info'].get('dataset_idx', 0)
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGenerateTargetRegression:
+ """Generate the target regression vector (coordinates).
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'. Modified keys:
+ 'target', and 'target_weight'.
+ """
+
+ def __init__(self):
+ pass
+
+ def _generate_target(self, cfg, joints_3d, joints_3d_visible):
+ """Generate the target regression vector.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray([num_joints, 3])
+ joints_3d_visible: np.ndarray([num_joints, 3])
+
+ Returns:
+ target, target_weight(1: visible, 0: invisible)
+ """
+ image_size = cfg['image_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ mask = (joints_3d[:, 0] >= 0) * (
+ joints_3d[:, 0] <= image_size[0] - 1) * (joints_3d[:, 1] >= 0) * (
+ joints_3d[:, 1] <= image_size[1] - 1)
+
+ target = joints_3d[:, :2] / image_size
+
+ target = target.astype(np.float32)
+ target_weight = joints_3d_visible[:, :2] * mask[:, None]
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ target, target_weight = self._generate_target(results['ann_info'],
+ joints_3d,
+ joints_3d_visible)
+
+ results['target'] = target
+ results['target_weight'] = target_weight
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownRandomTranslation:
+ """Data augmentation with random translation.
+
+ Required key: 'scale' and 'center'.
+
+ Modifies key: 'center'.
+
+ Note:
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ trans_factor (float): Translating center to
+ ``[-trans_factor, trans_factor] * [W, H] + center``.
+ trans_prob (float): Probability of random translation.
+ """
+
+ def __init__(self, trans_factor=0.15, trans_prob=1.0):
+ self.trans_factor = trans_factor
+ self.trans_prob = trans_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random translation."""
+ center = results['center']
+ scale = results['scale']
+ if np.random.rand() <= self.trans_prob:
+ # reference bbox size is [200, 200] pixels
+ center += self.trans_factor * np.random.uniform(
+ -1, 1, size=2) * scale * 200
+ results['center'] = center
+ return results
diff --git a/mmpose/datasets/registry.py b/mmpose/datasets/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba3cc49e452eb4bceefa3bbb1b994d7f2ab7fff9
--- /dev/null
+++ b/mmpose/datasets/registry.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .builder import DATASETS, PIPELINES
+
+__all__ = ['DATASETS', 'PIPELINES']
+
+warnings.simplefilter('once', DeprecationWarning)
+warnings.warn(
+ 'Registries (DATASETS, PIPELINES) have been moved to '
+ 'mmpose.datasets.builder. Importing from '
+ 'mmpose.models.registry will be deprecated in the future.',
+ DeprecationWarning)
diff --git a/mmpose/datasets/samplers/__init__.py b/mmpose/datasets/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..da09effaf20fefe1a102277672b98db7d884f002
--- /dev/null
+++ b/mmpose/datasets/samplers/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .distributed_sampler import DistributedSampler
+
+__all__ = ['DistributedSampler']
diff --git a/mmpose/datasets/samplers/__pycache__/__init__.cpython-310.pyc b/mmpose/datasets/samplers/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a92c5ab83ac9f925a3886f824838e26727c09f1a
Binary files /dev/null and b/mmpose/datasets/samplers/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/datasets/samplers/__pycache__/distributed_sampler.cpython-310.pyc b/mmpose/datasets/samplers/__pycache__/distributed_sampler.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..193047a47a499aa0fe3604648032e76c0e2479b5
Binary files /dev/null and b/mmpose/datasets/samplers/__pycache__/distributed_sampler.cpython-310.pyc differ
diff --git a/mmpose/datasets/samplers/distributed_sampler.py b/mmpose/datasets/samplers/distributed_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcb5f522a2252678250385f9b37463ce3a0e24f5
--- /dev/null
+++ b/mmpose/datasets/samplers/distributed_sampler.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.utils.data import DistributedSampler as _DistributedSampler
+
+
+class DistributedSampler(_DistributedSampler):
+ """DistributedSampler inheriting from
+ `torch.utils.data.DistributedSampler`.
+
+ In pytorch of lower versions, there is no `shuffle` argument. This child
+ class will port one to DistributedSampler.
+ """
+
+ def __init__(self,
+ dataset,
+ num_replicas=None,
+ rank=None,
+ shuffle=True,
+ seed=0):
+ super().__init__(
+ dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
+ # for the compatibility from PyTorch 1.3+
+ self.seed = seed if seed is not None else 0
+
+ def __iter__(self):
+ """Deterministically shuffle based on epoch."""
+ if self.shuffle:
+ g = torch.Generator()
+ g.manual_seed(self.epoch + self.seed)
+ indices = torch.randperm(len(self.dataset), generator=g).tolist()
+ else:
+ indices = torch.arange(len(self.dataset)).tolist()
+
+ # add extra samples to make it evenly divisible
+ indices += indices[:(self.total_size - len(indices))]
+ assert len(indices) == self.total_size
+
+ # subsample
+ indices = indices[self.rank:self.total_size:self.num_replicas]
+ assert len(indices) == self.num_samples
+ return iter(indices)
diff --git a/mmpose/deprecated.py b/mmpose/deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..b930901722ab8fe57455f8eaf9e7c1c728b4b4f8
--- /dev/null
+++ b/mmpose/deprecated.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .datasets.builder import DATASETS
+from .datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset
+from .models.builder import HEADS, POSENETS
+from .models.detectors import AssociativeEmbedding
+from .models.heads import (AEHigherResolutionHead, AESimpleHead,
+ DeepposeRegressionHead, HMRMeshHead,
+ TopdownHeatmapMSMUHead,
+ TopdownHeatmapMultiStageHead,
+ TopdownHeatmapSimpleHead)
+
+
+@DATASETS.register_module()
+class TopDownFreiHandDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownFreiHandDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownFreiHandDataset has been renamed into FreiHandDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@DATASETS.register_module()
+class TopDownOneHand10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownOneHand10KDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownOneHand10KDataset has been renamed into OneHand10KDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@DATASETS.register_module()
+class TopDownPanopticDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownPanopticDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownPanopticDataset has been renamed into PanopticDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@HEADS.register_module()
+class BottomUpHigherResolutionHead(AEHigherResolutionHead):
+ """Bottom-up head for Higher Resolution.
+
+ BottomUpHigherResolutionHead has been renamed into AEHigherResolutionHead,
+ check https://github.com/open- mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUpHigherResolutionHead has been renamed into '
+ 'AEHigherResolutionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class BottomUpSimpleHead(AESimpleHead):
+ """Bottom-up simple head.
+
+ BottomUpSimpleHead has been renamed into AESimpleHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUpHigherResolutionHead has been renamed into '
+ 'AEHigherResolutionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownSimpleHead(TopdownHeatmapSimpleHead):
+ """Top-down heatmap simple head.
+
+ TopDownSimpleHead has been renamed into TopdownHeatmapSimpleHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownSimpleHead has been renamed into '
+ 'TopdownHeatmapSimpleHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownMultiStageHead(TopdownHeatmapMultiStageHead):
+ """Top-down heatmap multi-stage head.
+
+ TopDownMultiStageHead has been renamed into TopdownHeatmapMultiStageHead,
+ check https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownMultiStageHead has been renamed into '
+ 'TopdownHeatmapMultiStageHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownMSMUHead(TopdownHeatmapMSMUHead):
+ """Heads for multi-stage multi-unit heads.
+
+ TopDownMSMUHead has been renamed into TopdownHeatmapMSMUHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownMSMUHead has been renamed into '
+ 'TopdownHeatmapMSMUHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class MeshHMRHead(HMRMeshHead):
+ """SMPL parameters regressor head.
+
+ MeshHMRHead has been renamed into HMRMeshHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'MeshHMRHead has been renamed into '
+ 'HMRMeshHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class FcHead(DeepposeRegressionHead):
+ """FcHead (deprecated).
+
+ FcHead has been renamed into DeepposeRegressionHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'FcHead has been renamed into '
+ 'DeepposeRegressionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@POSENETS.register_module()
+class BottomUp(AssociativeEmbedding):
+ """Associative Embedding.
+
+ BottomUp has been renamed into AssociativeEmbedding, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUp has been renamed into '
+ 'AssociativeEmbedding, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
diff --git a/mmpose/models/__init__.py b/mmpose/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbec55e439201119145ebb7423f9281b63f0ec07
--- /dev/null
+++ b/mmpose/models/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .backbones import * # noqa
+from .builder import (BACKBONES, HEADS, LOSSES, MESH_MODELS, NECKS, POSENETS,
+ build_backbone, build_head, build_loss, build_mesh_model,
+ build_neck, build_posenet)
+from .detectors import * # noqa
+from .heads import * # noqa
+from .losses import * # noqa
+from .necks import * # noqa
+from .utils import * # noqa
+
+__all__ = [
+ 'BACKBONES', 'HEADS', 'NECKS', 'LOSSES', 'POSENETS', 'MESH_MODELS',
+ 'build_backbone', 'build_head', 'build_loss', 'build_posenet',
+ 'build_neck', 'build_mesh_model'
+]
diff --git a/mmpose/models/__pycache__/__init__.cpython-310.pyc b/mmpose/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47bb905f2a479ff3343f0920fdd4635391a1d871
Binary files /dev/null and b/mmpose/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/__pycache__/builder.cpython-310.pyc b/mmpose/models/__pycache__/builder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31c8656d3dfa70f4144f9b207ce24552fa77fa84
Binary files /dev/null and b/mmpose/models/__pycache__/builder.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__init__.py b/mmpose/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b8efcfbb5ac55e0f3b2de78e96bb799f54eab39
--- /dev/null
+++ b/mmpose/models/backbones/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .alexnet import AlexNet
+from .cpm import CPM
+from .hourglass import HourglassNet
+from .hourglass_ae import HourglassAENet
+from .hrformer import HRFormer
+from .hrnet import HRNet
+from .litehrnet import LiteHRNet
+from .mobilenet_v2 import MobileNetV2
+from .mobilenet_v3 import MobileNetV3
+from .mspn import MSPN
+from .regnet import RegNet
+from .resnest import ResNeSt
+from .resnet import ResNet, ResNetV1d
+from .resnext import ResNeXt
+from .rsn import RSN
+from .scnet import SCNet
+from .seresnet import SEResNet
+from .seresnext import SEResNeXt
+from .shufflenet_v1 import ShuffleNetV1
+from .shufflenet_v2 import ShuffleNetV2
+from .tcn import TCN
+from .v2v_net import V2VNet
+from .vgg import VGG
+from .vipnas_mbv3 import ViPNAS_MobileNetV3
+from .vipnas_resnet import ViPNAS_ResNet
+from .vit import ViT
+from .vit_moe import ViTMoE
+
+__all__ = [
+ 'AlexNet', 'HourglassNet', 'HourglassAENet', 'HRNet', 'MobileNetV2',
+ 'MobileNetV3', 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SCNet',
+ 'SEResNet', 'SEResNeXt', 'ShuffleNetV1', 'ShuffleNetV2', 'CPM', 'RSN',
+ 'MSPN', 'ResNeSt', 'VGG', 'TCN', 'ViPNAS_ResNet', 'ViPNAS_MobileNetV3',
+ 'LiteHRNet', 'V2VNet', 'HRFormer', 'ViT', 'ViTMoE'
+]
diff --git a/mmpose/models/backbones/__pycache__/__init__.cpython-310.pyc b/mmpose/models/backbones/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7439e2570ab01d991e926a543a590f9534361df9
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/alexnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/alexnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6851121a5c9f7259f7dbffcfe7b18aaef45e2a3b
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/alexnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/base_backbone.cpython-310.pyc b/mmpose/models/backbones/__pycache__/base_backbone.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ccbc0801f4722d1601edc22f0e11d6ae86de3fa
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/base_backbone.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/cpm.cpython-310.pyc b/mmpose/models/backbones/__pycache__/cpm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..621e2b6396941405ba734639e7ca0fc717127992
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/cpm.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/hourglass.cpython-310.pyc b/mmpose/models/backbones/__pycache__/hourglass.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a761d3fda75e6835a97f9b20ec98bc38d1273748
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/hourglass.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/hourglass_ae.cpython-310.pyc b/mmpose/models/backbones/__pycache__/hourglass_ae.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..508b3a15c2f7c56f6b2527b9a32f9e46adf6ee73
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/hourglass_ae.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/hrformer.cpython-310.pyc b/mmpose/models/backbones/__pycache__/hrformer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a12c08a890903349370e2ad71ed4a68941057d41
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/hrformer.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/hrnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/hrnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43515c17ed2e7a716f6c09bcdf8fc9ae6defdcc9
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/hrnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/litehrnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/litehrnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..57936f9c69c395d1b5e874812a8f063386499bbd
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/litehrnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/mobilenet_v2.cpython-310.pyc b/mmpose/models/backbones/__pycache__/mobilenet_v2.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c6b086703654de85c6b7354d94bf4fad7633eff
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/mobilenet_v2.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/mobilenet_v3.cpython-310.pyc b/mmpose/models/backbones/__pycache__/mobilenet_v3.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ec3004f0dddea043e636d36c307f21e110d4e84
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/mobilenet_v3.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/mspn.cpython-310.pyc b/mmpose/models/backbones/__pycache__/mspn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..042e3cfd9df8c34039fc40b4967ac29182ef127a
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/mspn.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/regnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/regnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da386b002cb71719aef7c5971e927cd3751089e2
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/regnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/resnest.cpython-310.pyc b/mmpose/models/backbones/__pycache__/resnest.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a0d3636d57a385bd5d8ad7b3c2bf2019bc69f96f
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/resnest.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/resnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/resnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4fe4727aaaaa4b9890140c6b011ed5cd2e33f6bc
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/resnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/resnext.cpython-310.pyc b/mmpose/models/backbones/__pycache__/resnext.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4fc47d81bb84c9118fcc0e8305aaa005346e999d
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/resnext.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/rsn.cpython-310.pyc b/mmpose/models/backbones/__pycache__/rsn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40acb540c0166e3993aab42a6cc3b9d0de317685
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/rsn.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/scnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/scnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ffcd8f562127237d138a217ba0876478e9da974
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/scnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/seresnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/seresnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89f1e101207375da919aa1fd0fc66e386b2da669
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/seresnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/seresnext.cpython-310.pyc b/mmpose/models/backbones/__pycache__/seresnext.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91ed2b7294b65ca4a227f634d8b7819c3c85e46b
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/seresnext.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/shufflenet_v1.cpython-310.pyc b/mmpose/models/backbones/__pycache__/shufflenet_v1.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d5c93cb26e992eab43df47be5e477f262b6b4ce2
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/shufflenet_v1.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/shufflenet_v2.cpython-310.pyc b/mmpose/models/backbones/__pycache__/shufflenet_v2.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07515a99646d1a722c0bf301c4a33ca1c120da3a
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/shufflenet_v2.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/tcn.cpython-310.pyc b/mmpose/models/backbones/__pycache__/tcn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf3ae9333fd6f40a1a90b394b89a3634bb1cde1a
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/tcn.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/v2v_net.cpython-310.pyc b/mmpose/models/backbones/__pycache__/v2v_net.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0e6007f57906e7386ff4c077a9a012dadfed5d1
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/v2v_net.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/vgg.cpython-310.pyc b/mmpose/models/backbones/__pycache__/vgg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..692424edecbca72cda760c9a61bb419b27095d6a
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/vgg.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/vipnas_mbv3.cpython-310.pyc b/mmpose/models/backbones/__pycache__/vipnas_mbv3.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a6d4f02e10b3a3ebdc3ea1356b99ec0edd815321
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/vipnas_mbv3.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/vipnas_resnet.cpython-310.pyc b/mmpose/models/backbones/__pycache__/vipnas_resnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be411c0eee507db71151cb6eed320edb4464e6df
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/vipnas_resnet.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/vit.cpython-310.pyc b/mmpose/models/backbones/__pycache__/vit.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..20590775b2b69af647eaff130be7aa6c5ae2ae48
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/vit.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/__pycache__/vit_moe.cpython-310.pyc b/mmpose/models/backbones/__pycache__/vit_moe.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3b5740d0593e748bba503077b449e66eef8c6acb
Binary files /dev/null and b/mmpose/models/backbones/__pycache__/vit_moe.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/alexnet.py b/mmpose/models/backbones/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8efd74d118f5abe4d9c880ebe80ce7cbd58c6b2
--- /dev/null
+++ b/mmpose/models/backbones/alexnet.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+@BACKBONES.register_module()
+class AlexNet(BaseBackbone):
+ """`AlexNet `__ backbone.
+
+ The input for AlexNet is a 224x224 RGB image.
+
+ Args:
+ num_classes (int): number of classes for classification.
+ The default value is -1, which uses the backbone as
+ a feature extractor without the top classifier.
+ """
+
+ def __init__(self, num_classes=-1):
+ super().__init__()
+ self.num_classes = num_classes
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(64, 192, kernel_size=5, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(192, 384, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(384, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ )
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Dropout(),
+ nn.Linear(256 * 6 * 6, 4096),
+ nn.ReLU(inplace=True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(inplace=True),
+ nn.Linear(4096, num_classes),
+ )
+
+ def forward(self, x):
+
+ x = self.features(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), 256 * 6 * 6)
+ x = self.classifier(x)
+
+ return x
diff --git a/mmpose/models/backbones/base_backbone.py b/mmpose/models/backbones/base_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..d64dca1da1380aca4521bc1066c76e8a6f56c18c
--- /dev/null
+++ b/mmpose/models/backbones/base_backbone.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+from abc import ABCMeta, abstractmethod
+
+import torch.nn as nn
+
+# from .utils import load_checkpoint
+from mmcv_custom.checkpoint import load_checkpoint
+
+class BaseBackbone(nn.Module, metaclass=ABCMeta):
+ """Base backbone.
+
+ This class defines the basic functions of a backbone. Any backbone that
+ inherits this class should at least define its own `forward` function.
+ """
+
+ def init_weights(self, pretrained=None, patch_padding='pad', part_features=None):
+ """Init backbone weights.
+
+ Args:
+ pretrained (str | None): If pretrained is a string, then it
+ initializes backbone weights by loading the pretrained
+ checkpoint. If pretrained is None, then it follows default
+ initializer or customized initializer in subclasses.
+ """
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger, patch_padding=patch_padding, part_features=part_features)
+ elif pretrained is None:
+ # use default initializer or customized initializer in subclasses
+ pass
+ else:
+ raise TypeError('pretrained must be a str or None.'
+ f' But received {type(pretrained)}.')
+
+ @abstractmethod
+ def forward(self, x):
+ """Forward function.
+
+ Args:
+ x (Tensor | tuple[Tensor]): x could be a torch.Tensor or a tuple of
+ torch.Tensor, containing input data for forward computation.
+ """
diff --git a/mmpose/models/backbones/cpm.py b/mmpose/models/backbones/cpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..458245d755f930f4ff625a754aadbab5c13494a6
--- /dev/null
+++ b/mmpose/models/backbones/cpm.py
@@ -0,0 +1,186 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class CpmBlock(nn.Module):
+ """CpmBlock for Convolutional Pose Machine.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ channels (list): Output channels of each conv module.
+ kernels (list): Kernel sizes of each conv module.
+ """
+
+ def __init__(self,
+ in_channels,
+ channels=(128, 128, 128),
+ kernels=(11, 11, 11),
+ norm_cfg=None):
+ super().__init__()
+
+ assert len(channels) == len(kernels)
+ layers = []
+ for i in range(len(channels)):
+ if i == 0:
+ input_channels = in_channels
+ else:
+ input_channels = channels[i - 1]
+ layers.append(
+ ConvModule(
+ input_channels,
+ channels[i],
+ kernels[i],
+ padding=(kernels[i] - 1) // 2,
+ norm_cfg=norm_cfg))
+ self.model = nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Model forward function."""
+ out = self.model(x)
+ return out
+
+
+@BACKBONES.register_module()
+class CPM(BaseBackbone):
+ """CPM backbone.
+
+ Convolutional Pose Machines.
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ in_channels (int): The input channels of the CPM.
+ out_channels (int): The output channels of the CPM.
+ feat_channels (int): Feature channel of each CPM stage.
+ middle_channels (int): Feature channel of conv after the middle stage.
+ num_stages (int): Number of stages.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import CPM
+ >>> import torch
+ >>> self = CPM(3, 17)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 368, 368)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ feat_channels=128,
+ middle_channels=32,
+ num_stages=6,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ assert in_channels == 3
+
+ self.num_stages = num_stages
+ assert self.num_stages >= 1
+
+ self.stem = nn.Sequential(
+ ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 32, 5, padding=2, norm_cfg=norm_cfg),
+ ConvModule(32, 512, 9, padding=4, norm_cfg=norm_cfg),
+ ConvModule(512, 512, 1, padding=0, norm_cfg=norm_cfg),
+ ConvModule(512, out_channels, 1, padding=0, act_cfg=None))
+
+ self.middle = nn.Sequential(
+ ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ self.cpm_stages = nn.ModuleList([
+ CpmBlock(
+ middle_channels + out_channels,
+ channels=[feat_channels, feat_channels, feat_channels],
+ kernels=[11, 11, 11],
+ norm_cfg=norm_cfg) for _ in range(num_stages - 1)
+ ])
+
+ self.middle_conv = nn.ModuleList([
+ nn.Sequential(
+ ConvModule(
+ 128, middle_channels, 5, padding=2, norm_cfg=norm_cfg))
+ for _ in range(num_stages - 1)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ nn.Sequential(
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 1,
+ padding=0,
+ norm_cfg=norm_cfg),
+ ConvModule(feat_channels, out_channels, 1, act_cfg=None))
+ for _ in range(num_stages - 1)
+ ])
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ stage1_out = self.stem(x)
+ middle_out = self.middle(x)
+ out_feats = []
+
+ out_feats.append(stage1_out)
+
+ for ind in range(self.num_stages - 1):
+ single_stage = self.cpm_stages[ind]
+ out_conv = self.out_convs[ind]
+
+ inp_feat = torch.cat(
+ [out_feats[-1], self.middle_conv[ind](middle_out)], 1)
+ cpm_feat = single_stage(inp_feat)
+ out_feat = out_conv(cpm_feat)
+ out_feats.append(out_feat)
+
+ return out_feats
diff --git a/mmpose/models/backbones/hourglass.py b/mmpose/models/backbones/hourglass.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf75fad9895ebfd3f3c2a6bffedb3d7e4cc77cba
--- /dev/null
+++ b/mmpose/models/backbones/hourglass.py
@@ -0,0 +1,212 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .resnet import BasicBlock, ResLayer
+from .utils import load_checkpoint
+
+
+class HourglassModule(nn.Module):
+ """Hourglass Module for HourglassNet backbone.
+
+ Generate module recursively and use BasicBlock as the base unit.
+
+ Args:
+ depth (int): Depth of current HourglassModule.
+ stage_channels (list[int]): Feature channels of sub-modules in current
+ and follow-up HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in current and
+ follow-up HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ """
+
+ def __init__(self,
+ depth,
+ stage_channels,
+ stage_blocks,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.depth = depth
+
+ cur_block = stage_blocks[0]
+ next_block = stage_blocks[1]
+
+ cur_channel = stage_channels[0]
+ next_channel = stage_channels[1]
+
+ self.up1 = ResLayer(
+ BasicBlock, cur_block, cur_channel, cur_channel, norm_cfg=norm_cfg)
+
+ self.low1 = ResLayer(
+ BasicBlock,
+ cur_block,
+ cur_channel,
+ next_channel,
+ stride=2,
+ norm_cfg=norm_cfg)
+
+ if self.depth > 1:
+ self.low2 = HourglassModule(depth - 1, stage_channels[1:],
+ stage_blocks[1:])
+ else:
+ self.low2 = ResLayer(
+ BasicBlock,
+ next_block,
+ next_channel,
+ next_channel,
+ norm_cfg=norm_cfg)
+
+ self.low3 = ResLayer(
+ BasicBlock,
+ cur_block,
+ next_channel,
+ cur_channel,
+ norm_cfg=norm_cfg,
+ downsample_first=False)
+
+ self.up2 = nn.Upsample(scale_factor=2)
+
+ def forward(self, x):
+ """Model forward function."""
+ up1 = self.up1(x)
+ low1 = self.low1(x)
+ low2 = self.low2(low1)
+ low3 = self.low3(low2)
+ up2 = self.up2(low3)
+ return up1 + up2
+
+
+@BACKBONES.register_module()
+class HourglassNet(BaseBackbone):
+ """HourglassNet backbone.
+
+ Stacked Hourglass Networks for Human Pose Estimation.
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ downsample_times (int): Downsample times in a HourglassModule.
+ num_stacks (int): Number of HourglassModule modules stacked,
+ 1 for Hourglass-52, 2 for Hourglass-104.
+ stage_channels (list[int]): Feature channel of each sub-module in a
+ HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in a
+ HourglassModule.
+ feat_channel (int): Feature channel of conv after a HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import HourglassNet
+ >>> import torch
+ >>> self = HourglassNet()
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 256, 128, 128)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ downsample_times=5,
+ num_stacks=2,
+ stage_channels=(256, 256, 384, 384, 384, 512),
+ stage_blocks=(2, 2, 2, 2, 2, 4),
+ feat_channel=256,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.num_stacks = num_stacks
+ assert self.num_stacks >= 1
+ assert len(stage_channels) == len(stage_blocks)
+ assert len(stage_channels) > downsample_times
+
+ cur_channel = stage_channels[0]
+
+ self.stem = nn.Sequential(
+ ConvModule(3, 128, 7, padding=3, stride=2, norm_cfg=norm_cfg),
+ ResLayer(BasicBlock, 1, 128, 256, stride=2, norm_cfg=norm_cfg))
+
+ self.hourglass_modules = nn.ModuleList([
+ HourglassModule(downsample_times, stage_channels, stage_blocks)
+ for _ in range(num_stacks)
+ ])
+
+ self.inters = ResLayer(
+ BasicBlock,
+ num_stacks - 1,
+ cur_channel,
+ cur_channel,
+ norm_cfg=norm_cfg)
+
+ self.conv1x1s = nn.ModuleList([
+ ConvModule(
+ cur_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)
+ for _ in range(num_stacks - 1)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ ConvModule(
+ cur_channel, feat_channel, 3, padding=1, norm_cfg=norm_cfg)
+ for _ in range(num_stacks)
+ ])
+
+ self.remap_convs = nn.ModuleList([
+ ConvModule(
+ feat_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)
+ for _ in range(num_stacks - 1)
+ ])
+
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ inter_feat = self.stem(x)
+ out_feats = []
+
+ for ind in range(self.num_stacks):
+ single_hourglass = self.hourglass_modules[ind]
+ out_conv = self.out_convs[ind]
+
+ hourglass_feat = single_hourglass(inter_feat)
+ out_feat = out_conv(hourglass_feat)
+ out_feats.append(out_feat)
+
+ if ind < self.num_stacks - 1:
+ inter_feat = self.conv1x1s[ind](
+ inter_feat) + self.remap_convs[ind](
+ out_feat)
+ inter_feat = self.inters[ind](self.relu(inter_feat))
+
+ return out_feats
diff --git a/mmpose/models/backbones/hourglass_ae.py b/mmpose/models/backbones/hourglass_ae.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a700e5cb2157fd1dc16771145f065e991b270ea
--- /dev/null
+++ b/mmpose/models/backbones/hourglass_ae.py
@@ -0,0 +1,212 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, MaxPool2d, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class HourglassAEModule(nn.Module):
+ """Modified Hourglass Module for HourglassNet_AE backbone.
+
+ Generate module recursively and use BasicBlock as the base unit.
+
+ Args:
+ depth (int): Depth of current HourglassModule.
+ stage_channels (list[int]): Feature channels of sub-modules in current
+ and follow-up HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ """
+
+ def __init__(self,
+ depth,
+ stage_channels,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.depth = depth
+
+ cur_channel = stage_channels[0]
+ next_channel = stage_channels[1]
+
+ self.up1 = ConvModule(
+ cur_channel, cur_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.pool1 = MaxPool2d(2, 2)
+
+ self.low1 = ConvModule(
+ cur_channel, next_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ if self.depth > 1:
+ self.low2 = HourglassAEModule(depth - 1, stage_channels[1:])
+ else:
+ self.low2 = ConvModule(
+ next_channel, next_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.low3 = ConvModule(
+ next_channel, cur_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
+
+ def forward(self, x):
+ """Model forward function."""
+ up1 = self.up1(x)
+ pool1 = self.pool1(x)
+ low1 = self.low1(pool1)
+ low2 = self.low2(low1)
+ low3 = self.low3(low2)
+ up2 = self.up2(low3)
+ return up1 + up2
+
+
+@BACKBONES.register_module()
+class HourglassAENet(BaseBackbone):
+ """Hourglass-AE Network proposed by Newell et al.
+
+ Associative Embedding: End-to-End Learning for Joint
+ Detection and Grouping.
+
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ downsample_times (int): Downsample times in a HourglassModule.
+ num_stacks (int): Number of HourglassModule modules stacked,
+ 1 for Hourglass-52, 2 for Hourglass-104.
+ stage_channels (list[int]): Feature channel of each sub-module in a
+ HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in a
+ HourglassModule.
+ feat_channels (int): Feature channel of conv after a HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import HourglassAENet
+ >>> import torch
+ >>> self = HourglassAENet()
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 512, 512)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 34, 128, 128)
+ """
+
+ def __init__(self,
+ downsample_times=4,
+ num_stacks=1,
+ out_channels=34,
+ stage_channels=(256, 384, 512, 640, 768),
+ feat_channels=256,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.num_stacks = num_stacks
+ assert self.num_stacks >= 1
+ assert len(stage_channels) > downsample_times
+
+ cur_channels = stage_channels[0]
+
+ self.stem = nn.Sequential(
+ ConvModule(3, 64, 7, padding=3, stride=2, norm_cfg=norm_cfg),
+ ConvModule(64, 128, 3, padding=1, norm_cfg=norm_cfg),
+ MaxPool2d(2, 2),
+ ConvModule(128, 128, 3, padding=1, norm_cfg=norm_cfg),
+ ConvModule(128, feat_channels, 3, padding=1, norm_cfg=norm_cfg),
+ )
+
+ self.hourglass_modules = nn.ModuleList([
+ nn.Sequential(
+ HourglassAEModule(
+ downsample_times, stage_channels, norm_cfg=norm_cfg),
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 3,
+ padding=1,
+ norm_cfg=norm_cfg),
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 3,
+ padding=1,
+ norm_cfg=norm_cfg)) for _ in range(num_stacks)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ ConvModule(
+ cur_channels,
+ out_channels,
+ 1,
+ padding=0,
+ norm_cfg=None,
+ act_cfg=None) for _ in range(num_stacks)
+ ])
+
+ self.remap_out_convs = nn.ModuleList([
+ ConvModule(
+ out_channels,
+ feat_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for _ in range(num_stacks - 1)
+ ])
+
+ self.remap_feature_convs = nn.ModuleList([
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for _ in range(num_stacks - 1)
+ ])
+
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ inter_feat = self.stem(x)
+ out_feats = []
+
+ for ind in range(self.num_stacks):
+ single_hourglass = self.hourglass_modules[ind]
+ out_conv = self.out_convs[ind]
+
+ hourglass_feat = single_hourglass(inter_feat)
+ out_feat = out_conv(hourglass_feat)
+ out_feats.append(out_feat)
+
+ if ind < self.num_stacks - 1:
+ inter_feat = inter_feat + self.remap_out_convs[ind](
+ out_feat) + self.remap_feature_convs[ind](
+ hourglass_feat)
+
+ return out_feats
diff --git a/mmpose/models/backbones/hrformer.py b/mmpose/models/backbones/hrformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b843300a9fdb85908678c5a3fd45ce19e97ce2fe
--- /dev/null
+++ b/mmpose/models/backbones/hrformer.py
@@ -0,0 +1,746 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+import math
+
+import torch
+import torch.nn as nn
+# from timm.models.layers import to_2tuple, trunc_normal_
+from mmcv.cnn import (build_activation_layer, build_conv_layer,
+ build_norm_layer, trunc_normal_init)
+from mmcv.cnn.bricks.transformer import build_dropout
+from mmcv.runner import BaseModule
+from torch.nn.functional import pad
+
+from ..builder import BACKBONES
+from .hrnet import Bottleneck, HRModule, HRNet
+
+
+def nlc_to_nchw(x, hw_shape):
+ """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.
+
+ Args:
+ x (Tensor): The input tensor of shape [N, L, C] before conversion.
+ hw_shape (Sequence[int]): The height and width of output feature map.
+
+ Returns:
+ Tensor: The output tensor of shape [N, C, H, W] after conversion.
+ """
+ H, W = hw_shape
+ assert len(x.shape) == 3
+ B, L, C = x.shape
+ assert L == H * W, 'The seq_len doesn\'t match H, W'
+ return x.transpose(1, 2).reshape(B, C, H, W)
+
+
+def nchw_to_nlc(x):
+ """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.
+
+ Args:
+ x (Tensor): The input tensor of shape [N, C, H, W] before conversion.
+
+ Returns:
+ Tensor: The output tensor of shape [N, L, C] after conversion.
+ """
+ assert len(x.shape) == 4
+ return x.flatten(2).transpose(1, 2).contiguous()
+
+
+def build_drop_path(drop_path_rate):
+ """Build drop path layer."""
+ return build_dropout(dict(type='DropPath', drop_prob=drop_path_rate))
+
+
+class WindowMSA(BaseModule):
+ """Window based multi-head self-attention (W-MSA) module with relative
+ position bias.
+
+ Args:
+ embed_dims (int): Number of input channels.
+ num_heads (int): Number of attention heads.
+ window_size (tuple[int]): The height and width of the window.
+ qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.
+ Default: True.
+ qk_scale (float | None, optional): Override default qk scale of
+ head_dim ** -0.5 if set. Default: None.
+ attn_drop_rate (float, optional): Dropout ratio of attention weight.
+ Default: 0.0
+ proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.
+ with_rpe (bool, optional): If True, use relative position bias.
+ Default: True.
+ init_cfg (dict | None, optional): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ embed_dims,
+ num_heads,
+ window_size,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop_rate=0.,
+ proj_drop_rate=0.,
+ with_rpe=True,
+ init_cfg=None):
+
+ super().__init__(init_cfg=init_cfg)
+ self.embed_dims = embed_dims
+ self.window_size = window_size # Wh, Ww
+ self.num_heads = num_heads
+ head_embed_dims = embed_dims // num_heads
+ self.scale = qk_scale or head_embed_dims**-0.5
+
+ self.with_rpe = with_rpe
+ if self.with_rpe:
+ # define a parameter table of relative position bias
+ self.relative_position_bias_table = nn.Parameter(
+ torch.zeros(
+ (2 * window_size[0] - 1) * (2 * window_size[1] - 1),
+ num_heads)) # 2*Wh-1 * 2*Ww-1, nH
+
+ Wh, Ww = self.window_size
+ rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww)
+ rel_position_index = rel_index_coords + rel_index_coords.T
+ rel_position_index = rel_position_index.flip(1).contiguous()
+ self.register_buffer('relative_position_index', rel_position_index)
+
+ self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias)
+ self.attn_drop = nn.Dropout(attn_drop_rate)
+ self.proj = nn.Linear(embed_dims, embed_dims)
+ self.proj_drop = nn.Dropout(proj_drop_rate)
+
+ self.softmax = nn.Softmax(dim=-1)
+
+ def init_weights(self):
+ trunc_normal_init(self.relative_position_bias_table, std=0.02)
+
+ def forward(self, x, mask=None):
+ """
+ Args:
+
+ x (tensor): input features with shape of (B*num_windows, N, C)
+ mask (tensor | None, Optional): mask with shape of (num_windows,
+ Wh*Ww, Wh*Ww), value should be between (-inf, 0].
+ """
+ B, N, C = x.shape
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
+ C // self.num_heads).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2]
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ if self.with_rpe:
+ relative_position_bias = self.relative_position_bias_table[
+ self.relative_position_index.view(-1)].view(
+ self.window_size[0] * self.window_size[1],
+ self.window_size[0] * self.window_size[1],
+ -1) # Wh*Ww,Wh*Ww,nH
+ relative_position_bias = relative_position_bias.permute(
+ 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
+ attn = attn + relative_position_bias.unsqueeze(0)
+
+ if mask is not None:
+ nW = mask.shape[0]
+ attn = attn.view(B // nW, nW, self.num_heads, N,
+ N) + mask.unsqueeze(1).unsqueeze(0)
+ attn = attn.view(-1, self.num_heads, N, N)
+ attn = self.softmax(attn)
+
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+ return x
+
+ @staticmethod
+ def double_step_seq(step1, len1, step2, len2):
+ seq1 = torch.arange(0, step1 * len1, step1)
+ seq2 = torch.arange(0, step2 * len2, step2)
+ return (seq1[:, None] + seq2[None, :]).reshape(1, -1)
+
+
+class LocalWindowSelfAttention(BaseModule):
+ r""" Local-window Self Attention (LSA) module with relative position bias.
+
+ This module is the short-range self-attention module in the
+ Interlaced Sparse Self-Attention `_.
+
+ Args:
+ embed_dims (int): Number of input channels.
+ num_heads (int): Number of attention heads.
+ window_size (tuple[int] | int): The height and width of the window.
+ qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.
+ Default: True.
+ qk_scale (float | None, optional): Override default qk scale of
+ head_dim ** -0.5 if set. Default: None.
+ attn_drop_rate (float, optional): Dropout ratio of attention weight.
+ Default: 0.0
+ proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.
+ with_rpe (bool, optional): If True, use relative position bias.
+ Default: True.
+ with_pad_mask (bool, optional): If True, mask out the padded tokens in
+ the attention process. Default: False.
+ init_cfg (dict | None, optional): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ embed_dims,
+ num_heads,
+ window_size,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop_rate=0.,
+ proj_drop_rate=0.,
+ with_rpe=True,
+ with_pad_mask=False,
+ init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+ if isinstance(window_size, int):
+ window_size = (window_size, window_size)
+ self.window_size = window_size
+ self.with_pad_mask = with_pad_mask
+ self.attn = WindowMSA(
+ embed_dims=embed_dims,
+ num_heads=num_heads,
+ window_size=window_size,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ attn_drop_rate=attn_drop_rate,
+ proj_drop_rate=proj_drop_rate,
+ with_rpe=with_rpe,
+ init_cfg=init_cfg)
+
+ def forward(self, x, H, W, **kwargs):
+ """Forward function."""
+ B, N, C = x.shape
+ x = x.view(B, H, W, C)
+ Wh, Ww = self.window_size
+
+ # center-pad the feature on H and W axes
+ pad_h = math.ceil(H / Wh) * Wh - H
+ pad_w = math.ceil(W / Ww) * Ww - W
+ x = pad(x, (0, 0, pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
+ pad_h - pad_h // 2))
+
+ # permute
+ x = x.view(B, math.ceil(H / Wh), Wh, math.ceil(W / Ww), Ww, C)
+ x = x.permute(0, 1, 3, 2, 4, 5)
+ x = x.reshape(-1, Wh * Ww, C) # (B*num_window, Wh*Ww, C)
+
+ # attention
+ if self.with_pad_mask and pad_h > 0 and pad_w > 0:
+ pad_mask = x.new_zeros(1, H, W, 1)
+ pad_mask = pad(
+ pad_mask, [
+ 0, 0, pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
+ pad_h - pad_h // 2
+ ],
+ value=-float('inf'))
+ pad_mask = pad_mask.view(1, math.ceil(H / Wh), Wh,
+ math.ceil(W / Ww), Ww, 1)
+ pad_mask = pad_mask.permute(1, 3, 0, 2, 4, 5)
+ pad_mask = pad_mask.reshape(-1, Wh * Ww)
+ pad_mask = pad_mask[:, None, :].expand([-1, Wh * Ww, -1])
+ out = self.attn(x, pad_mask, **kwargs)
+ else:
+ out = self.attn(x, **kwargs)
+
+ # reverse permutation
+ out = out.reshape(B, math.ceil(H / Wh), math.ceil(W / Ww), Wh, Ww, C)
+ out = out.permute(0, 1, 3, 2, 4, 5)
+ out = out.reshape(B, H + pad_h, W + pad_w, C)
+
+ # de-pad
+ out = out[:, pad_h // 2:H + pad_h // 2, pad_w // 2:W + pad_w // 2]
+ return out.reshape(B, N, C)
+
+
+class CrossFFN(BaseModule):
+ r"""FFN with Depthwise Conv of HRFormer.
+
+ Args:
+ in_features (int): The feature dimension.
+ hidden_features (int, optional): The hidden dimension of FFNs.
+ Defaults: The same as in_features.
+ act_cfg (dict, optional): Config of activation layer.
+ Default: dict(type='GELU').
+ dw_act_cfg (dict, optional): Config of activation layer appended
+ right after DW Conv. Default: dict(type='GELU').
+ norm_cfg (dict, optional): Config of norm layer.
+ Default: dict(type='SyncBN').
+ init_cfg (dict | list | None, optional): The init config.
+ Default: None.
+ """
+
+ def __init__(self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_cfg=dict(type='GELU'),
+ dw_act_cfg=dict(type='GELU'),
+ norm_cfg=dict(type='SyncBN'),
+ init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Conv2d(in_features, hidden_features, kernel_size=1)
+ self.act1 = build_activation_layer(act_cfg)
+ self.norm1 = build_norm_layer(norm_cfg, hidden_features)[1]
+ self.dw3x3 = nn.Conv2d(
+ hidden_features,
+ hidden_features,
+ kernel_size=3,
+ stride=1,
+ groups=hidden_features,
+ padding=1)
+ self.act2 = build_activation_layer(dw_act_cfg)
+ self.norm2 = build_norm_layer(norm_cfg, hidden_features)[1]
+ self.fc2 = nn.Conv2d(hidden_features, out_features, kernel_size=1)
+ self.act3 = build_activation_layer(act_cfg)
+ self.norm3 = build_norm_layer(norm_cfg, out_features)[1]
+
+ # put the modules togather
+ self.layers = [
+ self.fc1, self.norm1, self.act1, self.dw3x3, self.norm2, self.act2,
+ self.fc2, self.norm3, self.act3
+ ]
+
+ def forward(self, x, H, W):
+ """Forward function."""
+ x = nlc_to_nchw(x, (H, W))
+ for layer in self.layers:
+ x = layer(x)
+ x = nchw_to_nlc(x)
+ return x
+
+
+class HRFormerBlock(BaseModule):
+ """High-Resolution Block for HRFormer.
+
+ Args:
+ in_features (int): The input dimension.
+ out_features (int): The output dimension.
+ num_heads (int): The number of head within each LSA.
+ window_size (int, optional): The window size for the LSA.
+ Default: 7
+ mlp_ratio (int, optional): The expansion ration of FFN.
+ Default: 4
+ act_cfg (dict, optional): Config of activation layer.
+ Default: dict(type='GELU').
+ norm_cfg (dict, optional): Config of norm layer.
+ Default: dict(type='SyncBN').
+ transformer_norm_cfg (dict, optional): Config of transformer norm
+ layer. Default: dict(type='LN', eps=1e-6).
+ init_cfg (dict | list | None, optional): The init config.
+ Default: None.
+ """
+
+ expansion = 1
+
+ def __init__(self,
+ in_features,
+ out_features,
+ num_heads,
+ window_size=7,
+ mlp_ratio=4.0,
+ drop_path=0.0,
+ act_cfg=dict(type='GELU'),
+ norm_cfg=dict(type='SyncBN'),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ init_cfg=None,
+ **kwargs):
+ super(HRFormerBlock, self).__init__(init_cfg=init_cfg)
+ self.num_heads = num_heads
+ self.window_size = window_size
+ self.mlp_ratio = mlp_ratio
+
+ self.norm1 = build_norm_layer(transformer_norm_cfg, in_features)[1]
+ self.attn = LocalWindowSelfAttention(
+ in_features,
+ num_heads=num_heads,
+ window_size=window_size,
+ init_cfg=None,
+ **kwargs)
+
+ self.norm2 = build_norm_layer(transformer_norm_cfg, out_features)[1]
+ self.ffn = CrossFFN(
+ in_features=in_features,
+ hidden_features=int(in_features * mlp_ratio),
+ out_features=out_features,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ dw_act_cfg=act_cfg,
+ init_cfg=None)
+
+ self.drop_path = build_drop_path(
+ drop_path) if drop_path > 0.0 else nn.Identity()
+
+ def forward(self, x):
+ """Forward function."""
+ B, C, H, W = x.size()
+ # Attention
+ x = x.view(B, C, -1).permute(0, 2, 1)
+ x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+ # FFN
+ x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
+ x = x.permute(0, 2, 1).view(B, C, H, W)
+ return x
+
+ def extra_repr(self):
+ """(Optional) Set the extra information about this module."""
+ return 'num_heads={}, window_size={}, mlp_ratio={}'.format(
+ self.num_heads, self.window_size, self.mlp_ratio)
+
+
+class HRFomerModule(HRModule):
+ """High-Resolution Module for HRFormer.
+
+ Args:
+ num_branches (int): The number of branches in the HRFormerModule.
+ block (nn.Module): The building block of HRFormer.
+ The block should be the HRFormerBlock.
+ num_blocks (tuple): The number of blocks in each branch.
+ The length must be equal to num_branches.
+ num_inchannels (tuple): The number of input channels in each branch.
+ The length must be equal to num_branches.
+ num_channels (tuple): The number of channels in each branch.
+ The length must be equal to num_branches.
+ num_heads (tuple): The number of heads within the LSAs.
+ num_window_sizes (tuple): The window size for the LSAs.
+ num_mlp_ratios (tuple): The expansion ratio for the FFNs.
+ drop_path (int, optional): The drop path rate of HRFomer.
+ Default: 0.0
+ multiscale_output (bool, optional): Whether to output multi-level
+ features produced by multiple branches. If False, only the first
+ level feature will be output. Default: True.
+ conv_cfg (dict, optional): Config of the conv layers.
+ Default: None.
+ norm_cfg (dict, optional): Config of the norm layers appended
+ right after conv. Default: dict(type='SyncBN', requires_grad=True)
+ transformer_norm_cfg (dict, optional): Config of the norm layers.
+ Default: dict(type='LN', eps=1e-6)
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False
+ upsample_cfg(dict, optional): The config of upsample layers in fuse
+ layers. Default: dict(mode='bilinear', align_corners=False)
+ """
+
+ def __init__(self,
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ num_heads,
+ num_window_sizes,
+ num_mlp_ratios,
+ multiscale_output=True,
+ drop_paths=0.0,
+ with_rpe=True,
+ with_pad_mask=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ with_cp=False,
+ upsample_cfg=dict(mode='bilinear', align_corners=False)):
+
+ self.transformer_norm_cfg = transformer_norm_cfg
+ self.drop_paths = drop_paths
+ self.num_heads = num_heads
+ self.num_window_sizes = num_window_sizes
+ self.num_mlp_ratios = num_mlp_ratios
+ self.with_rpe = with_rpe
+ self.with_pad_mask = with_pad_mask
+
+ super().__init__(num_branches, block, num_blocks, num_inchannels,
+ num_channels, multiscale_output, with_cp, conv_cfg,
+ norm_cfg, upsample_cfg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ """Build one branch."""
+ # HRFormerBlock does not support down sample layer yet.
+ assert stride == 1 and self.in_channels[branch_index] == num_channels[
+ branch_index]
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ num_heads=self.num_heads[branch_index],
+ window_size=self.num_window_sizes[branch_index],
+ mlp_ratio=self.num_mlp_ratios[branch_index],
+ drop_path=self.drop_paths[0],
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ init_cfg=None,
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask))
+
+ self.in_channels[
+ branch_index] = self.in_channels[branch_index] * block.expansion
+ for i in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ num_heads=self.num_heads[branch_index],
+ window_size=self.num_window_sizes[branch_index],
+ mlp_ratio=self.num_mlp_ratios[branch_index],
+ drop_path=self.drop_paths[i],
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ init_cfg=None,
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask))
+ return nn.Sequential(*layers)
+
+ def _make_fuse_layers(self):
+ """Build fuse layers."""
+ if self.num_branches == 1:
+ return None
+ num_branches = self.num_branches
+ num_inchannels = self.in_channels
+ fuse_layers = []
+ for i in range(num_branches if self.multiscale_output else 1):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_inchannels[i],
+ kernel_size=1,
+ stride=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_inchannels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i),
+ mode=self.upsample_cfg['mode'],
+ align_corners=self.
+ upsample_cfg['align_corners'])))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv3x3s = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ num_outchannels_conv3x3 = num_inchannels[i]
+ with_out_act = False
+ else:
+ num_outchannels_conv3x3 = num_inchannels[j]
+ with_out_act = True
+ sub_modules = [
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_inchannels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=num_inchannels[j],
+ bias=False,
+ ),
+ build_norm_layer(self.norm_cfg,
+ num_inchannels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ kernel_size=1,
+ stride=1,
+ bias=False,
+ ),
+ build_norm_layer(self.norm_cfg,
+ num_outchannels_conv3x3)[1]
+ ]
+ if with_out_act:
+ sub_modules.append(nn.ReLU(False))
+ conv3x3s.append(nn.Sequential(*sub_modules))
+ fuse_layer.append(nn.Sequential(*conv3x3s))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def get_num_inchannels(self):
+ """Return the number of input channels."""
+ return self.in_channels
+
+
+@BACKBONES.register_module()
+class HRFormer(HRNet):
+ """HRFormer backbone.
+
+ This backbone is the implementation of `HRFormer: High-Resolution
+ Transformer for Dense Prediction `_.
+
+ Args:
+ extra (dict): Detailed configuration for each stage of HRNet.
+ There must be 4 stages, the configuration for each stage must have
+ 5 keys:
+
+ - num_modules (int): The number of HRModule in this stage.
+ - num_branches (int): The number of branches in the HRModule.
+ - block (str): The type of block.
+ - num_blocks (tuple): The number of blocks in each branch.
+ The length must be equal to num_branches.
+ - num_channels (tuple): The number of channels in each branch.
+ The length must be equal to num_branches.
+ in_channels (int): Number of input image channels. Normally 3.
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: None.
+ norm_cfg (dict): Config of norm layer.
+ Use `SyncBN` by default.
+ transformer_norm_cfg (dict): Config of transformer norm layer.
+ Use `LN` by default.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ Example:
+ >>> from mmpose.models import HRFormer
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(2, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7),
+ >>> num_heads=(1, 2),
+ >>> mlp_ratios=(4, 4),
+ >>> num_blocks=(2, 2),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7, 7),
+ >>> num_heads=(1, 2, 4),
+ >>> mlp_ratios=(4, 4, 4),
+ >>> num_blocks=(2, 2, 2),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=2,
+ >>> num_branches=4,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7, 7, 7),
+ >>> num_heads=(1, 2, 4, 8),
+ >>> mlp_ratios=(4, 4, 4, 4),
+ >>> num_blocks=(2, 2, 2, 2),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRFormer(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ (1, 64, 4, 4)
+ (1, 128, 2, 2)
+ (1, 256, 1, 1)
+ """
+
+ blocks_dict = {'BOTTLENECK': Bottleneck, 'HRFORMERBLOCK': HRFormerBlock}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=False,
+ frozen_stages=-1):
+
+ # stochastic depth
+ depths = [
+ extra[stage]['num_blocks'][0] * extra[stage]['num_modules']
+ for stage in ['stage2', 'stage3', 'stage4']
+ ]
+ depth_s2, depth_s3, _ = depths
+ drop_path_rate = extra['drop_path_rate']
+ dpr = [
+ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+ ]
+ extra['stage2']['drop_path_rates'] = dpr[0:depth_s2]
+ extra['stage3']['drop_path_rates'] = dpr[depth_s2:depth_s2 + depth_s3]
+ extra['stage4']['drop_path_rates'] = dpr[depth_s2 + depth_s3:]
+
+ # HRFormer use bilinear upsample as default
+ upsample_cfg = extra.get('upsample', {
+ 'mode': 'bilinear',
+ 'align_corners': False
+ })
+ extra['upsample'] = upsample_cfg
+ self.transformer_norm_cfg = transformer_norm_cfg
+ self.with_rpe = extra.get('with_rpe', True)
+ self.with_pad_mask = extra.get('with_pad_mask', False)
+
+ super().__init__(extra, in_channels, conv_cfg, norm_cfg, norm_eval,
+ with_cp, zero_init_residual, frozen_stages)
+
+ def _make_stage(self,
+ layer_config,
+ num_inchannels,
+ multiscale_output=True):
+ """Make each stage."""
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+ num_heads = layer_config['num_heads']
+ num_window_sizes = layer_config['window_sizes']
+ num_mlp_ratios = layer_config['mlp_ratios']
+ drop_path_rates = layer_config['drop_path_rates']
+
+ modules = []
+ for i in range(num_modules):
+ # multiscale_output is only used at the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ modules.append(
+ HRFomerModule(
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ num_heads,
+ num_window_sizes,
+ num_mlp_ratios,
+ reset_multiscale_output,
+ drop_paths=drop_path_rates[num_blocks[0] *
+ i:num_blocks[0] * (i + 1)],
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ with_cp=self.with_cp,
+ upsample_cfg=self.upsample_cfg))
+ num_inchannels = modules[-1].get_num_inchannels()
+
+ return nn.Sequential(*modules), num_inchannels
diff --git a/mmpose/models/backbones/hrnet.py b/mmpose/models/backbones/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..87dc8cef555b5e8d78fcc69293047b0cbe2ea8a6
--- /dev/null
+++ b/mmpose/models/backbones/hrnet.py
@@ -0,0 +1,604 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .resnet import BasicBlock, Bottleneck, get_expansion
+from .utils import load_checkpoint
+
+
+class HRModule(nn.Module):
+ """High-Resolution Module for HRNet.
+
+ In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange
+ is in this module.
+ """
+
+ def __init__(self,
+ num_branches,
+ blocks,
+ num_blocks,
+ in_channels,
+ num_channels,
+ multiscale_output=False,
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ upsample_cfg=dict(mode='nearest', align_corners=None)):
+
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self._check_branches(num_branches, num_blocks, in_channels,
+ num_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.multiscale_output = multiscale_output
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.upsample_cfg = upsample_cfg
+ self.with_cp = with_cp
+ self.branches = self._make_branches(num_branches, blocks, num_blocks,
+ num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(inplace=True)
+
+ @staticmethod
+ def _check_branches(num_branches, num_blocks, in_channels, num_channels):
+ """Check input to avoid ValueError."""
+ if num_branches != len(num_blocks):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_BLOCKS({len(num_blocks)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_CHANNELS({len(num_channels)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(in_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_INCHANNELS({len(in_channels)})'
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ """Make one branch."""
+ downsample = None
+ if stride != 1 or \
+ self.in_channels[branch_index] != \
+ num_channels[branch_index] * get_expansion(block):
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(
+ self.norm_cfg,
+ num_channels[branch_index] * get_expansion(block))[1])
+
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ stride=stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ self.in_channels[branch_index] = \
+ num_channels[branch_index] * get_expansion(block)
+ for _ in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ """Make branches."""
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(
+ self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ """Make fuse layer."""
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i),
+ mode=self.upsample_cfg['mode'],
+ align_corners=self.
+ upsample_cfg['align_corners'])))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=True)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+ for i in range(len(self.fuse_layers)):
+ y = 0
+ for j in range(self.num_branches):
+ if i == j:
+ y += x[j]
+ else:
+ y += self.fuse_layers[i][j](x[j])
+ x_fuse.append(self.relu(y))
+ return x_fuse
+
+
+@BACKBONES.register_module()
+class HRNet(nn.Module):
+ """HRNet backbone.
+
+ `High-Resolution Representations for Labeling Pixels and Regions
+ `__
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Default: 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+
+ Example:
+ >>> from mmpose.models import HRNet
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(4, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=3,
+ >>> num_branches=4,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4, 4),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ """
+
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=False,
+ frozen_stages=-1):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+ self.zero_init_residual = zero_init_residual
+ self.frozen_stages = frozen_stages
+
+ # stem net
+ self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ 64,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.upsample_cfg = self.extra.get('upsample', {
+ 'mode': 'nearest',
+ 'align_corners': None
+ })
+
+ # stage 1
+ self.stage1_cfg = self.extra['stage1']
+ num_channels = self.stage1_cfg['num_channels'][0]
+ block_type = self.stage1_cfg['block']
+ num_blocks = self.stage1_cfg['num_blocks'][0]
+
+ block = self.blocks_dict[block_type]
+ stage1_out_channels = num_channels * get_expansion(block)
+ self.layer1 = self._make_layer(block, 64, stage1_out_channels,
+ num_blocks)
+
+ # stage 2
+ self.stage2_cfg = self.extra['stage2']
+ num_channels = self.stage2_cfg['num_channels']
+ block_type = self.stage2_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition1 = self._make_transition_layer([stage1_out_channels],
+ num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(
+ self.stage2_cfg, num_channels)
+
+ # stage 3
+ self.stage3_cfg = self.extra['stage3']
+ num_channels = self.stage3_cfg['num_channels']
+ block_type = self.stage3_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition2 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(
+ self.stage3_cfg, num_channels)
+
+ # stage 4
+ self.stage4_cfg = self.extra['stage4']
+ num_channels = self.stage4_cfg['num_channels']
+ block_type = self.stage4_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition3 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg,
+ num_channels,
+ multiscale_output=self.stage4_cfg.get('multiscale_output', False))
+
+ self._freeze_stages()
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ """Make transition layer."""
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU(inplace=True)))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True)))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
+ """Make layer."""
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1])
+
+ layers = []
+ layers.append(
+ block(
+ in_channels,
+ out_channels,
+ stride=stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ for _ in range(1, blocks):
+ layers.append(
+ block(
+ out_channels,
+ out_channels,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, in_channels, multiscale_output=True):
+ """Make stage."""
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+
+ hr_modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used for the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ hr_modules.append(
+ HRModule(
+ num_branches,
+ block,
+ num_blocks,
+ in_channels,
+ num_channels,
+ reset_multiscale_output,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg,
+ upsample_cfg=self.upsample_cfg))
+
+ in_channels = hr_modules[-1].in_channels
+
+ return nn.Sequential(*hr_modules), in_channels
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.norm1.eval()
+ self.norm2.eval()
+
+ for m in [self.conv1, self.norm1, self.conv2, self.norm2]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ if i == 1:
+ m = getattr(self, 'layer1')
+ else:
+ m = getattr(self, f'stage{i}')
+
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if i < 4:
+ m = getattr(self, f'transition{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.norm2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg['num_branches']):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg['num_branches']):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg['num_branches']):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ return y_list
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/litehrnet.py b/mmpose/models/backbones/litehrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..954368841eb631e3dc6c77e9810f6980f3739bf3
--- /dev/null
+++ b/mmpose/models/backbones/litehrnet.py
@@ -0,0 +1,984 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HRNet/Lite-HRNet
+# Original licence: Apache License 2.0.
+# ------------------------------------------------------------------------------
+
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
+ build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .utils import channel_shuffle, load_checkpoint
+
+
+class SpatialWeighting(nn.Module):
+ """Spatial weighting module.
+
+ Args:
+ channels (int): The channels of the module.
+ ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: None.
+ act_cfg (dict): Config dict for activation layer.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid')).
+ The last ConvModule uses Sigmoid by default.
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+ self.conv1 = ConvModule(
+ in_channels=channels,
+ out_channels=int(channels / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(channels / ratio),
+ out_channels=channels,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ out = self.global_avgpool(x)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ return x * out
+
+
+class CrossResolutionWeighting(nn.Module):
+ """Cross-resolution channel weighting module.
+
+ Args:
+ channels (int): The channels of the module.
+ ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: None.
+ act_cfg (dict): Config dict for activation layer.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid')).
+ The last ConvModule uses Sigmoid by default.
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.channels = channels
+ total_channel = sum(channels)
+ self.conv1 = ConvModule(
+ in_channels=total_channel,
+ out_channels=int(total_channel / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(total_channel / ratio),
+ out_channels=total_channel,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ mini_size = x[-1].size()[-2:]
+ out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
+ out = torch.cat(out, dim=1)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ out = torch.split(out, self.channels, dim=1)
+ out = [
+ s * F.interpolate(a, size=s.size()[-2:], mode='nearest')
+ for s, a in zip(x, out)
+ ]
+ return out
+
+
+class ConditionalChannelWeighting(nn.Module):
+ """Conditional channel weighting block.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ stride (int): Stride of the 3x3 convolution layer.
+ reduce_ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ stride,
+ reduce_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False):
+ super().__init__()
+ self.with_cp = with_cp
+ self.stride = stride
+ assert stride in [1, 2]
+
+ branch_channels = [channel // 2 for channel in in_channels]
+
+ self.cross_resolution_weighting = CrossResolutionWeighting(
+ branch_channels,
+ ratio=reduce_ratio,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ self.depthwise_convs = nn.ModuleList([
+ ConvModule(
+ channel,
+ channel,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=channel,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for channel in branch_channels
+ ])
+
+ self.spatial_weighting = nn.ModuleList([
+ SpatialWeighting(channels=channel, ratio=4)
+ for channel in branch_channels
+ ])
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ x = [s.chunk(2, dim=1) for s in x]
+ x1 = [s[0] for s in x]
+ x2 = [s[1] for s in x]
+
+ x2 = self.cross_resolution_weighting(x2)
+ x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+ x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+ out = [torch.cat([s1, s2], dim=1) for s1, s2 in zip(x1, x2)]
+ out = [channel_shuffle(s, 2) for s in out]
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class Stem(nn.Module):
+ """Stem network block.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ stem_channels (int): Output channels of the stem layer.
+ out_channels (int): The output channels of the block.
+ expand_ratio (int): adjusts number of channels of the hidden layer
+ in InvertedResidual by this amount.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ stem_channels,
+ out_channels,
+ expand_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+
+ self.conv1 = ConvModule(
+ in_channels=in_channels,
+ out_channels=stem_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'))
+
+ mid_channels = int(round(stem_channels * expand_ratio))
+ branch_channels = stem_channels // 2
+ if stem_channels == self.out_channels:
+ inc_channels = self.out_channels - branch_channels
+ else:
+ inc_channels = self.out_channels - stem_channels
+
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ branch_channels,
+ branch_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=branch_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_channels,
+ inc_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU')),
+ )
+
+ self.expand_conv = ConvModule(
+ branch_channels,
+ mid_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'))
+ self.depthwise_conv = ConvModule(
+ mid_channels,
+ mid_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=mid_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+ self.linear_conv = ConvModule(
+ mid_channels,
+ branch_channels
+ if stem_channels == self.out_channels else stem_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ x = self.conv1(x)
+ x1, x2 = x.chunk(2, dim=1)
+
+ x2 = self.expand_conv(x2)
+ x2 = self.depthwise_conv(x2)
+ x2 = self.linear_conv(x2)
+
+ out = torch.cat((self.branch1(x1), x2), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class IterativeHead(nn.Module):
+ """Extra iterative head for feature learning.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ """
+
+ def __init__(self, in_channels, norm_cfg=dict(type='BN')):
+ super().__init__()
+ projects = []
+ num_branchs = len(in_channels)
+ self.in_channels = in_channels[::-1]
+
+ for i in range(num_branchs):
+ if i != num_branchs - 1:
+ projects.append(
+ DepthwiseSeparableConvModule(
+ in_channels=self.in_channels[i],
+ out_channels=self.in_channels[i + 1],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ dw_act_cfg=None,
+ pw_act_cfg=dict(type='ReLU')))
+ else:
+ projects.append(
+ DepthwiseSeparableConvModule(
+ in_channels=self.in_channels[i],
+ out_channels=self.in_channels[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ dw_act_cfg=None,
+ pw_act_cfg=dict(type='ReLU')))
+ self.projects = nn.ModuleList(projects)
+
+ def forward(self, x):
+ x = x[::-1]
+
+ y = []
+ last_x = None
+ for i, s in enumerate(x):
+ if last_x is not None:
+ last_x = F.interpolate(
+ last_x,
+ size=s.size()[-2:],
+ mode='bilinear',
+ align_corners=True)
+ s = s + last_x
+ s = self.projects[i](s)
+ y.append(s)
+ last_x = s
+
+ return y[::-1]
+
+
+class ShuffleUnit(nn.Module):
+ """InvertedResidual block for ShuffleNetV2 backbone.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ out_channels (int): The output channels of the block.
+ stride (int): Stride of the 3x3 convolution layer. Default: 1
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride=1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ super().__init__()
+ self.stride = stride
+ self.with_cp = with_cp
+
+ branch_features = out_channels // 2
+ if self.stride == 1:
+ assert in_channels == branch_features * 2, (
+ f'in_channels ({in_channels}) should equal to '
+ f'branch_features * 2 ({branch_features * 2}) '
+ 'when stride is 1')
+
+ if in_channels != branch_features * 2:
+ assert self.stride != 1, (
+ f'stride ({self.stride}) should not equal 1 when '
+ f'in_channels != branch_features * 2')
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=in_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ in_channels,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ )
+
+ self.branch2 = nn.Sequential(
+ ConvModule(
+ in_channels if (self.stride > 1) else branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=branch_features,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.stride > 1:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+ else:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class LiteHRModule(nn.Module):
+ """High-Resolution Module for LiteHRNet.
+
+ It contains conditional channel weighting blocks and
+ shuffle blocks.
+
+
+ Args:
+ num_branches (int): Number of branches in the module.
+ num_blocks (int): Number of blocks in the module.
+ in_channels (list(int)): Number of input image channels.
+ reduce_ratio (int): Channel reduction ratio.
+ module_type (str): 'LITE' or 'NAIVE'
+ multiscale_output (bool): Whether to output multi-scale features.
+ with_fuse (bool): Whether to use fuse layers.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(
+ self,
+ num_branches,
+ num_blocks,
+ in_channels,
+ reduce_ratio,
+ module_type,
+ multiscale_output=False,
+ with_fuse=True,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False,
+ ):
+ super().__init__()
+ self._check_branches(num_branches, in_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.module_type = module_type
+ self.multiscale_output = multiscale_output
+ self.with_fuse = with_fuse
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.with_cp = with_cp
+
+ if self.module_type.upper() == 'LITE':
+ self.layers = self._make_weighting_blocks(num_blocks, reduce_ratio)
+ elif self.module_type.upper() == 'NAIVE':
+ self.layers = self._make_naive_branches(num_branches, num_blocks)
+ else:
+ raise ValueError("module_type should be either 'LITE' or 'NAIVE'.")
+ if self.with_fuse:
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU()
+
+ def _check_branches(self, num_branches, in_channels):
+ """Check input to avoid ValueError."""
+ if num_branches != len(in_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_INCHANNELS({len(in_channels)})'
+ raise ValueError(error_msg)
+
+ def _make_weighting_blocks(self, num_blocks, reduce_ratio, stride=1):
+ """Make channel weighting blocks."""
+ layers = []
+ for i in range(num_blocks):
+ layers.append(
+ ConditionalChannelWeighting(
+ self.in_channels,
+ stride=stride,
+ reduce_ratio=reduce_ratio,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ with_cp=self.with_cp))
+
+ return nn.Sequential(*layers)
+
+ def _make_one_branch(self, branch_index, num_blocks, stride=1):
+ """Make one branch."""
+ layers = []
+ layers.append(
+ ShuffleUnit(
+ self.in_channels[branch_index],
+ self.in_channels[branch_index],
+ stride=stride,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ with_cp=self.with_cp))
+ for i in range(1, num_blocks):
+ layers.append(
+ ShuffleUnit(
+ self.in_channels[branch_index],
+ self.in_channels[branch_index],
+ stride=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ with_cp=self.with_cp))
+
+ return nn.Sequential(*layers)
+
+ def _make_naive_branches(self, num_branches, num_blocks):
+ """Make branches."""
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(self._make_one_branch(i, num_blocks))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ """Make fuse layer."""
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i), mode='nearest')))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels[j],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels[j],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=True)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.num_branches == 1:
+ return [self.layers[0](x[0])]
+
+ if self.module_type.upper() == 'LITE':
+ out = self.layers(x)
+ elif self.module_type.upper() == 'NAIVE':
+ for i in range(self.num_branches):
+ x[i] = self.layers[i](x[i])
+ out = x
+
+ if self.with_fuse:
+ out_fuse = []
+ for i in range(len(self.fuse_layers)):
+ # `y = 0` will lead to decreased accuracy (0.5~1 mAP)
+ y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+ for j in range(self.num_branches):
+ if i == j:
+ y += out[j]
+ else:
+ y += self.fuse_layers[i][j](out[j])
+ out_fuse.append(self.relu(y))
+ out = out_fuse
+ if not self.multiscale_output:
+ out = [out[0]]
+ return out
+
+
+@BACKBONES.register_module()
+class LiteHRNet(nn.Module):
+ """Lite-HRNet backbone.
+
+ `Lite-HRNet: A Lightweight High-Resolution Network
+ `_.
+
+ Code adapted from 'https://github.com/HRNet/Lite-HRNet'.
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Default: 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+
+ Example:
+ >>> from mmpose.models import LiteHRNet
+ >>> import torch
+ >>> extra=dict(
+ >>> stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ >>> num_stages=3,
+ >>> stages_spec=dict(
+ >>> num_modules=(2, 4, 2),
+ >>> num_branches=(2, 3, 4),
+ >>> num_blocks=(2, 2, 2),
+ >>> module_type=('LITE', 'LITE', 'LITE'),
+ >>> with_fuse=(True, True, True),
+ >>> reduce_ratios=(8, 8, 8),
+ >>> num_channels=(
+ >>> (40, 80),
+ >>> (40, 80, 160),
+ >>> (40, 80, 160, 320),
+ >>> )),
+ >>> with_head=False)
+ >>> self = LiteHRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 40, 8, 8)
+ """
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ norm_eval=False,
+ with_cp=False):
+ super().__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.stem = Stem(
+ in_channels,
+ stem_channels=self.extra['stem']['stem_channels'],
+ out_channels=self.extra['stem']['out_channels'],
+ expand_ratio=self.extra['stem']['expand_ratio'],
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+
+ self.num_stages = self.extra['num_stages']
+ self.stages_spec = self.extra['stages_spec']
+
+ num_channels_last = [
+ self.stem.out_channels,
+ ]
+ for i in range(self.num_stages):
+ num_channels = self.stages_spec['num_channels'][i]
+ num_channels = [num_channels[i] for i in range(len(num_channels))]
+ setattr(
+ self, f'transition{i}',
+ self._make_transition_layer(num_channels_last, num_channels))
+
+ stage, num_channels_last = self._make_stage(
+ self.stages_spec, i, num_channels, multiscale_output=True)
+ setattr(self, f'stage{i}', stage)
+
+ self.with_head = self.extra['with_head']
+ if self.with_head:
+ self.head_layer = IterativeHead(
+ in_channels=num_channels_last,
+ norm_cfg=self.norm_cfg,
+ )
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ """Make transition layer."""
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_pre_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=num_channels_pre_layer[i],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_pre_layer[i])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU()))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels)[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU()))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_stage(self,
+ stages_spec,
+ stage_index,
+ in_channels,
+ multiscale_output=True):
+ num_modules = stages_spec['num_modules'][stage_index]
+ num_branches = stages_spec['num_branches'][stage_index]
+ num_blocks = stages_spec['num_blocks'][stage_index]
+ reduce_ratio = stages_spec['reduce_ratios'][stage_index]
+ with_fuse = stages_spec['with_fuse'][stage_index]
+ module_type = stages_spec['module_type'][stage_index]
+
+ modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ modules.append(
+ LiteHRModule(
+ num_branches,
+ num_blocks,
+ in_channels,
+ reduce_ratio,
+ module_type,
+ multiscale_output=reset_multiscale_output,
+ with_fuse=with_fuse,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ with_cp=self.with_cp))
+ in_channels = modules[-1].in_channels
+
+ return nn.Sequential(*modules), in_channels
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.stem(x)
+
+ y_list = [x]
+ for i in range(self.num_stages):
+ x_list = []
+ transition = getattr(self, f'transition{i}')
+ for j in range(self.stages_spec['num_branches'][i]):
+ if transition[j]:
+ if j >= len(y_list):
+ x_list.append(transition[j](y_list[-1]))
+ else:
+ x_list.append(transition[j](y_list[j]))
+ else:
+ x_list.append(y_list[j])
+ y_list = getattr(self, f'stage{i}')(x_list)
+
+ x = y_list
+ if self.with_head:
+ x = self.head_layer(x)
+
+ return [x[0]]
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/mobilenet_v2.py b/mmpose/models/backbones/mobilenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dc0cd1b7dfdec2aa751861e39fc1c1a45ec488e
--- /dev/null
+++ b/mmpose/models/backbones/mobilenet_v2.py
@@ -0,0 +1,275 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint, make_divisible
+
+
+class InvertedResidual(nn.Module):
+ """InvertedResidual block for MobileNetV2.
+
+ Args:
+ in_channels (int): The input channels of the InvertedResidual block.
+ out_channels (int): The output channels of the InvertedResidual block.
+ stride (int): Stride of the middle (first) 3x3 convolution.
+ expand_ratio (int): adjusts number of channels of the hidden layer
+ in InvertedResidual by this amount.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU6').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride,
+ expand_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU6'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stride = stride
+ assert stride in [1, 2], f'stride must in [1, 2]. ' \
+ f'But received {stride}.'
+ self.with_cp = with_cp
+ self.use_res_connect = self.stride == 1 and in_channels == out_channels
+ hidden_dim = int(round(in_channels * expand_ratio))
+
+ layers = []
+ if expand_ratio != 1:
+ layers.append(
+ ConvModule(
+ in_channels=in_channels,
+ out_channels=hidden_dim,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+ layers.extend([
+ ConvModule(
+ in_channels=hidden_dim,
+ out_channels=hidden_dim,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ groups=hidden_dim,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ in_channels=hidden_dim,
+ out_channels=out_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+ ])
+ self.conv = nn.Sequential(*layers)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.use_res_connect:
+ return x + self.conv(x)
+ return self.conv(x)
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class MobileNetV2(BaseBackbone):
+ """MobileNetV2 backbone.
+
+ Args:
+ widen_factor (float): Width multiplier, multiply number of
+ channels in each layer by this amount. Default: 1.0.
+ out_indices (None or Sequence[int]): Output from which stages.
+ Default: (7, ).
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU6').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ # Parameters to build layers. 4 parameters are needed to construct a
+ # layer, from left to right: expand_ratio, channel, num_blocks, stride.
+ arch_settings = [[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2],
+ [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2],
+ [6, 320, 1, 1]]
+
+ def __init__(self,
+ widen_factor=1.,
+ out_indices=(7, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU6'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.widen_factor = widen_factor
+ self.out_indices = out_indices
+ for index in out_indices:
+ if index not in range(0, 8):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 8). But received {index}')
+
+ if frozen_stages not in range(-1, 8):
+ raise ValueError('frozen_stages must be in range(-1, 8). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.in_channels = make_divisible(32 * widen_factor, 8)
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ self.layers = []
+
+ for i, layer_cfg in enumerate(self.arch_settings):
+ expand_ratio, channel, num_blocks, stride = layer_cfg
+ out_channels = make_divisible(channel * widen_factor, 8)
+ inverted_res_layer = self.make_layer(
+ out_channels=out_channels,
+ num_blocks=num_blocks,
+ stride=stride,
+ expand_ratio=expand_ratio)
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, inverted_res_layer)
+ self.layers.append(layer_name)
+
+ if widen_factor > 1.0:
+ self.out_channel = int(1280 * widen_factor)
+ else:
+ self.out_channel = 1280
+
+ layer = ConvModule(
+ in_channels=self.in_channels,
+ out_channels=self.out_channel,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.add_module('conv2', layer)
+ self.layers.append('conv2')
+
+ def make_layer(self, out_channels, num_blocks, stride, expand_ratio):
+ """Stack InvertedResidual blocks to build a layer for MobileNetV2.
+
+ Args:
+ out_channels (int): out_channels of block.
+ num_blocks (int): number of blocks.
+ stride (int): stride of the first block. Default: 1
+ expand_ratio (int): Expand the number of channels of the
+ hidden layer in InvertedResidual by this ratio. Default: 6.
+ """
+ layers = []
+ for i in range(num_blocks):
+ if i >= 1:
+ stride = 1
+ layers.append(
+ InvertedResidual(
+ self.in_channels,
+ out_channels,
+ stride,
+ expand_ratio=expand_ratio,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/mobilenet_v3.py b/mmpose/models/backbones/mobilenet_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..d640abec79f06d689f2d4bc1e92999946bc07261
--- /dev/null
+++ b/mmpose/models/backbones/mobilenet_v3.py
@@ -0,0 +1,188 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import InvertedResidual, load_checkpoint
+
+
+@BACKBONES.register_module()
+class MobileNetV3(BaseBackbone):
+ """MobileNetV3 backbone.
+
+ Args:
+ arch (str): Architecture of mobilnetv3, from {small, big}.
+ Default: small.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ out_indices (None or Sequence[int]): Output from which stages.
+ Default: (-1, ), which means output tensors from final stage.
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save
+ some memory while slowing down the training speed.
+ Default: False.
+ """
+ # Parameters to build each block:
+ # [kernel size, mid channels, out channels, with_se, act type, stride]
+ arch_settings = {
+ 'small': [[3, 16, 16, True, 'ReLU', 2],
+ [3, 72, 24, False, 'ReLU', 2],
+ [3, 88, 24, False, 'ReLU', 1],
+ [5, 96, 40, True, 'HSwish', 2],
+ [5, 240, 40, True, 'HSwish', 1],
+ [5, 240, 40, True, 'HSwish', 1],
+ [5, 120, 48, True, 'HSwish', 1],
+ [5, 144, 48, True, 'HSwish', 1],
+ [5, 288, 96, True, 'HSwish', 2],
+ [5, 576, 96, True, 'HSwish', 1],
+ [5, 576, 96, True, 'HSwish', 1]],
+ 'big': [[3, 16, 16, False, 'ReLU', 1],
+ [3, 64, 24, False, 'ReLU', 2],
+ [3, 72, 24, False, 'ReLU', 1],
+ [5, 72, 40, True, 'ReLU', 2],
+ [5, 120, 40, True, 'ReLU', 1],
+ [5, 120, 40, True, 'ReLU', 1],
+ [3, 240, 80, False, 'HSwish', 2],
+ [3, 200, 80, False, 'HSwish', 1],
+ [3, 184, 80, False, 'HSwish', 1],
+ [3, 184, 80, False, 'HSwish', 1],
+ [3, 480, 112, True, 'HSwish', 1],
+ [3, 672, 112, True, 'HSwish', 1],
+ [5, 672, 160, True, 'HSwish', 1],
+ [5, 672, 160, True, 'HSwish', 2],
+ [5, 960, 160, True, 'HSwish', 1]]
+ } # yapf: disable
+
+ def __init__(self,
+ arch='small',
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ out_indices=(-1, ),
+ frozen_stages=-1,
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert arch in self.arch_settings
+ for index in out_indices:
+ if index not in range(-len(self.arch_settings[arch]),
+ len(self.arch_settings[arch])):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, {len(self.arch_settings[arch])}). '
+ f'But received {index}')
+
+ if frozen_stages not in range(-1, len(self.arch_settings[arch])):
+ raise ValueError('frozen_stages must be in range(-1, '
+ f'{len(self.arch_settings[arch])}). '
+ f'But received {frozen_stages}')
+ self.arch = arch
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.in_channels = 16
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='HSwish'))
+
+ self.layers = self._make_layer()
+ self.feat_dim = self.arch_settings[arch][-1][2]
+
+ def _make_layer(self):
+ layers = []
+ layer_setting = self.arch_settings[self.arch]
+ for i, params in enumerate(layer_setting):
+ (kernel_size, mid_channels, out_channels, with_se, act,
+ stride) = params
+ if with_se:
+ se_cfg = dict(
+ channels=mid_channels,
+ ratio=4,
+ act_cfg=(dict(type='ReLU'), dict(type='HSigmoid')))
+ else:
+ se_cfg = None
+
+ layer = InvertedResidual(
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ mid_channels=mid_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ se_cfg=se_cfg,
+ with_expand_conv=True,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type=act),
+ with_cp=self.with_cp)
+ self.in_channels = out_channels
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, layer)
+ layers.append(layer_name)
+ return layers
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+ if i in self.out_indices or \
+ i - len(self.layers) in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/mspn.py b/mmpose/models/backbones/mspn.py
new file mode 100644
index 0000000000000000000000000000000000000000..71cee34e399780e8b67eac43d862b65a3ce05412
--- /dev/null
+++ b/mmpose/models/backbones/mspn.py
@@ -0,0 +1,513 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+from collections import OrderedDict
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, MaxPool2d, constant_init, kaiming_init,
+ normal_init)
+from mmcv.runner.checkpoint import load_state_dict
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .resnet import Bottleneck as _Bottleneck
+from .utils.utils import get_state_dict
+
+
+class Bottleneck(_Bottleneck):
+ expansion = 4
+ """Bottleneck block for MSPN.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ stride (int): stride of the block. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super().__init__(in_channels, out_channels * 4, **kwargs)
+
+
+class DownsampleModule(nn.Module):
+ """Downsample module for MSPN.
+
+ Args:
+ block (nn.Module): Downsample block.
+ num_blocks (list): Number of blocks in each downsample unit.
+ num_units (int): Numbers of downsample units. Default: 4
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the input feature to
+ downsample module. Default: 64
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ num_units=4,
+ has_skip=False,
+ norm_cfg=dict(type='BN'),
+ in_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.has_skip = has_skip
+ self.in_channels = in_channels
+ assert len(num_blocks) == num_units
+ self.num_blocks = num_blocks
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.layer1 = self._make_layer(block, in_channels, num_blocks[0])
+ for i in range(1, num_units):
+ module_name = f'layer{i + 1}'
+ self.add_module(
+ module_name,
+ self._make_layer(
+ block, in_channels * pow(2, i), num_blocks[i], stride=2))
+
+ def _make_layer(self, block, out_channels, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.in_channels != out_channels * block.expansion:
+ downsample = ConvModule(
+ self.in_channels,
+ out_channels * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ units = list()
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ stride=stride,
+ downsample=downsample,
+ norm_cfg=self.norm_cfg))
+ self.in_channels = out_channels * block.expansion
+ for _ in range(1, blocks):
+ units.append(block(self.in_channels, out_channels))
+
+ return nn.Sequential(*units)
+
+ def forward(self, x, skip1, skip2):
+ out = list()
+ for i in range(self.num_units):
+ module_name = f'layer{i + 1}'
+ module_i = getattr(self, module_name)
+ x = module_i(x)
+ if self.has_skip:
+ x = x + skip1[i] + skip2[i]
+ out.append(x)
+ out.reverse()
+
+ return tuple(out)
+
+
+class UpsampleUnit(nn.Module):
+ """Upsample unit for upsample module.
+
+ Args:
+ ind (int): Indicates whether to interpolate (>0) and whether to
+ generate feature map for the next hourglass-like module.
+ num_units (int): Number of units that form a upsample module. Along
+ with ind and gen_cross_conv, nm_units is used to decide whether
+ to generate feature map for the next hourglass-like module.
+ in_channels (int): Channel number of the skip-in feature maps from
+ the corresponding downsample unit.
+ unit_channels (int): Channel number in this unit. Default:256.
+ gen_skip: (bool): Whether or not to generate skips for the posterior
+ downsample module. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ ind,
+ num_units,
+ in_channels,
+ unit_channels=256,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.in_skip = ConvModule(
+ in_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.ind = ind
+ if self.ind > 0:
+ self.up_conv = ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ self.gen_skip = gen_skip
+ if self.gen_skip:
+ self.out_skip1 = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.out_skip2 = ConvModule(
+ unit_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.gen_cross_conv = gen_cross_conv
+ if self.ind == num_units - 1 and self.gen_cross_conv:
+ self.cross_conv = ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ def forward(self, x, up_x):
+ out = self.in_skip(x)
+
+ if self.ind > 0:
+ up_x = F.interpolate(
+ up_x,
+ size=(x.size(2), x.size(3)),
+ mode='bilinear',
+ align_corners=True)
+ up_x = self.up_conv(up_x)
+ out = out + up_x
+ out = self.relu(out)
+
+ skip1 = None
+ skip2 = None
+ if self.gen_skip:
+ skip1 = self.out_skip1(x)
+ skip2 = self.out_skip2(out)
+
+ cross_conv = None
+ if self.ind == self.num_units - 1 and self.gen_cross_conv:
+ cross_conv = self.cross_conv(out)
+
+ return out, skip1, skip2, cross_conv
+
+
+class UpsampleModule(nn.Module):
+ """Upsample module for MSPN.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units.
+ Default:256.
+ num_units (int): Numbers of upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_units=4,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = list()
+ for i in range(num_units):
+ self.in_channels.append(Bottleneck.expansion * out_channels *
+ pow(2, i))
+ self.in_channels.reverse()
+ self.num_units = num_units
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.norm_cfg = norm_cfg
+ for i in range(num_units):
+ module_name = f'up{i + 1}'
+ self.add_module(
+ module_name,
+ UpsampleUnit(
+ i,
+ self.num_units,
+ self.in_channels[i],
+ unit_channels,
+ self.gen_skip,
+ self.gen_cross_conv,
+ norm_cfg=self.norm_cfg,
+ out_channels=64))
+
+ def forward(self, x):
+ out = list()
+ skip1 = list()
+ skip2 = list()
+ cross_conv = None
+ for i in range(self.num_units):
+ module_i = getattr(self, f'up{i + 1}')
+ if i == 0:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], None)
+ elif i == self.num_units - 1:
+ outi, skip1_i, skip2_i, cross_conv = module_i(x[i], out[i - 1])
+ else:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], out[i - 1])
+ out.append(outi)
+ skip1.append(skip1_i)
+ skip2.append(skip2_i)
+ skip1.reverse()
+ skip2.reverse()
+
+ return out, skip1, skip2, cross_conv
+
+
+class SingleStageNetwork(nn.Module):
+ """Single_stage Network.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units. Default:256.
+ num_units (int): Numbers of downsample/upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_blocks (list): Number of blocks in each downsample unit.
+ Default: [2, 2, 2, 2] Note: Make sure num_units==len(num_blocks)
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the feature from ResNetTop.
+ Default: 64.
+ """
+
+ def __init__(self,
+ has_skip=False,
+ gen_skip=False,
+ gen_cross_conv=False,
+ unit_channels=256,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ in_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ assert len(num_blocks) == num_units
+ self.has_skip = has_skip
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.num_units = num_units
+ self.unit_channels = unit_channels
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ self.downsample = DownsampleModule(Bottleneck, num_blocks, num_units,
+ has_skip, norm_cfg, in_channels)
+ self.upsample = UpsampleModule(unit_channels, num_units, gen_skip,
+ gen_cross_conv, norm_cfg, in_channels)
+
+ def forward(self, x, skip1, skip2):
+ mid = self.downsample(x, skip1, skip2)
+ out, skip1, skip2, cross_conv = self.upsample(mid)
+
+ return out, skip1, skip2, cross_conv
+
+
+class ResNetTop(nn.Module):
+ """ResNet top for MSPN.
+
+ Args:
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ channels (int): Number of channels of the feature output by ResNetTop.
+ """
+
+ def __init__(self, norm_cfg=dict(type='BN'), channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.top = nn.Sequential(
+ ConvModule(
+ 3,
+ channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ norm_cfg=norm_cfg,
+ inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ def forward(self, img):
+ return self.top(img)
+
+
+@BACKBONES.register_module()
+class MSPN(BaseBackbone):
+ """MSPN backbone. Paper ref: Li et al. "Rethinking on Multi-Stage Networks
+ for Human Pose Estimation" (CVPR 2020).
+
+ Args:
+ unit_channels (int): Number of Channels in an upsample unit.
+ Default: 256
+ num_stages (int): Number of stages in a multi-stage MSPN. Default: 4
+ num_units (int): Number of downsample/upsample units in a single-stage
+ network. Default: 4
+ Note: Make sure num_units == len(self.num_blocks)
+ num_blocks (list): Number of bottlenecks in each
+ downsample unit. Default: [2, 2, 2, 2]
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ res_top_channels (int): Number of channels of feature from ResNetTop.
+ Default: 64.
+
+ Example:
+ >>> from mmpose.models import MSPN
+ >>> import torch
+ >>> self = MSPN(num_stages=2,num_units=2,num_blocks=[2,2])
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... for feature in level_output:
+ ... print(tuple(feature.shape))
+ ...
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ res_top_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ assert self.num_stages > 0
+ assert self.num_units > 1
+ assert self.num_units == len(self.num_blocks)
+ self.top = ResNetTop(norm_cfg=norm_cfg)
+ self.multi_stage_mspn = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if i == 0:
+ has_skip = False
+ else:
+ has_skip = True
+ if i != self.num_stages - 1:
+ gen_skip = True
+ gen_cross_conv = True
+ else:
+ gen_skip = False
+ gen_cross_conv = False
+ self.multi_stage_mspn.append(
+ SingleStageNetwork(has_skip, gen_skip, gen_cross_conv,
+ unit_channels, num_units, num_blocks,
+ norm_cfg, res_top_channels))
+
+ def forward(self, x):
+ """Model forward function."""
+ out_feats = []
+ skip1 = None
+ skip2 = None
+ x = self.top(x)
+ for i in range(self.num_stages):
+ out, skip1, skip2, x = self.multi_stage_mspn[i](x, skip1, skip2)
+ out_feats.append(out)
+
+ return out_feats
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ state_dict_tmp = get_state_dict(pretrained)
+ state_dict = OrderedDict()
+ state_dict['top'] = OrderedDict()
+ state_dict['bottlenecks'] = OrderedDict()
+ for k, v in state_dict_tmp.items():
+ if k.startswith('layer'):
+ if 'downsample.0' in k:
+ state_dict['bottlenecks'][k.replace(
+ 'downsample.0', 'downsample.conv')] = v
+ elif 'downsample.1' in k:
+ state_dict['bottlenecks'][k.replace(
+ 'downsample.1', 'downsample.bn')] = v
+ else:
+ state_dict['bottlenecks'][k] = v
+ elif k.startswith('conv1'):
+ state_dict['top'][k.replace('conv1', 'top.0.conv')] = v
+ elif k.startswith('bn1'):
+ state_dict['top'][k.replace('bn1', 'top.0.bn')] = v
+
+ load_state_dict(
+ self.top, state_dict['top'], strict=False, logger=logger)
+ for i in range(self.num_stages):
+ load_state_dict(
+ self.multi_stage_mspn[i].downsample,
+ state_dict['bottlenecks'],
+ strict=False,
+ logger=logger)
+ else:
+ for m in self.multi_stage_mspn.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ for m in self.top.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
diff --git a/mmpose/models/backbones/regnet.py b/mmpose/models/backbones/regnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..693417c2d61066e4e9a90989ad61700448028e58
--- /dev/null
+++ b/mmpose/models/backbones/regnet.py
@@ -0,0 +1,317 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import ResNet
+from .resnext import Bottleneck
+
+
+@BACKBONES.register_module()
+class RegNet(ResNet):
+ """RegNet backbone.
+
+ More details can be found in `paper `__ .
+
+ Args:
+ arch (dict): The parameter of RegNets.
+ - w0 (int): initial width
+ - wa (float): slope of width
+ - wm (float): quantization parameter to quantize the width
+ - depth (int): depth of the backbone
+ - group_w (int): width of group
+ - bot_mul (float): bottleneck ratio, i.e. expansion of bottleneck.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ base_channels (int): Base channels after stem layer.
+ in_channels (int): Number of input image channels. Default: 3.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer. Default: "pytorch".
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters. Default: -1.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN', requires_grad=True).
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import RegNet
+ >>> import torch
+ >>> self = RegNet(
+ arch=dict(
+ w0=88,
+ wa=26.31,
+ wm=2.25,
+ group_w=48,
+ depth=25,
+ bot_mul=1.0),
+ out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 96, 8, 8)
+ (1, 192, 4, 4)
+ (1, 432, 2, 2)
+ (1, 1008, 1, 1)
+ """
+ arch_settings = {
+ 'regnetx_400mf':
+ dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
+ 'regnetx_800mf':
+ dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16, bot_mul=1.0),
+ 'regnetx_1.6gf':
+ dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18, bot_mul=1.0),
+ 'regnetx_3.2gf':
+ dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25, bot_mul=1.0),
+ 'regnetx_4.0gf':
+ dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23, bot_mul=1.0),
+ 'regnetx_6.4gf':
+ dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17, bot_mul=1.0),
+ 'regnetx_8.0gf':
+ dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23, bot_mul=1.0),
+ 'regnetx_12gf':
+ dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, bot_mul=1.0),
+ }
+
+ def __init__(self,
+ arch,
+ in_channels=3,
+ stem_channels=32,
+ base_channels=32,
+ strides=(2, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super(ResNet, self).__init__()
+
+ # Generate RegNet parameters first
+ if isinstance(arch, str):
+ assert arch in self.arch_settings, \
+ f'"arch": "{arch}" is not one of the' \
+ ' arch_settings'
+ arch = self.arch_settings[arch]
+ elif not isinstance(arch, dict):
+ raise TypeError('Expect "arch" to be either a string '
+ f'or a dict, got {type(arch)}')
+
+ widths, num_stages = self.generate_regnet(
+ arch['w0'],
+ arch['wa'],
+ arch['wm'],
+ arch['depth'],
+ )
+ # Convert to per stage format
+ stage_widths, stage_blocks = self.get_stages_from_blocks(widths)
+ # Generate group widths and bot muls
+ group_widths = [arch['group_w'] for _ in range(num_stages)]
+ self.bottleneck_ratio = [arch['bot_mul'] for _ in range(num_stages)]
+ # Adjust the compatibility of stage_widths and group_widths
+ stage_widths, group_widths = self.adjust_width_group(
+ stage_widths, self.bottleneck_ratio, group_widths)
+
+ # Group params by stage
+ self.stage_widths = stage_widths
+ self.group_widths = group_widths
+ self.depth = sum(stage_blocks)
+ self.stem_channels = stem_channels
+ self.base_channels = base_channels
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ if self.deep_stem:
+ raise NotImplementedError(
+ 'deep_stem has not been implemented for RegNet')
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.stage_blocks = stage_blocks[:num_stages]
+
+ self._make_stem_layer(in_channels, stem_channels)
+
+ _in_channels = stem_channels
+ self.res_layers = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = self.strides[i]
+ dilation = self.dilations[i]
+ group_width = self.group_widths[i]
+ width = int(round(self.stage_widths[i] * self.bottleneck_ratio[i]))
+ stage_groups = width // group_width
+
+ res_layer = self.make_res_layer(
+ block=Bottleneck,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=self.stage_widths[i],
+ expansion=1,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=self.with_cp,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ base_channels=self.stage_widths[i],
+ groups=stage_groups,
+ width_per_group=group_width)
+ _in_channels = self.stage_widths[i]
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = stage_widths[-1]
+
+ def _make_stem_layer(self, in_channels, base_channels):
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ base_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, base_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+
+ @staticmethod
+ def generate_regnet(initial_width,
+ width_slope,
+ width_parameter,
+ depth,
+ divisor=8):
+ """Generates per block width from RegNet parameters.
+
+ Args:
+ initial_width ([int]): Initial width of the backbone
+ width_slope ([float]): Slope of the quantized linear function
+ width_parameter ([int]): Parameter used to quantize the width.
+ depth ([int]): Depth of the backbone.
+ divisor (int, optional): The divisor of channels. Defaults to 8.
+
+ Returns:
+ list, int: return a list of widths of each stage and the number of
+ stages
+ """
+ assert width_slope >= 0
+ assert initial_width > 0
+ assert width_parameter > 1
+ assert initial_width % divisor == 0
+ widths_cont = np.arange(depth) * width_slope + initial_width
+ ks = np.round(
+ np.log(widths_cont / initial_width) / np.log(width_parameter))
+ widths = initial_width * np.power(width_parameter, ks)
+ widths = np.round(np.divide(widths, divisor)) * divisor
+ num_stages = len(np.unique(widths))
+ widths, widths_cont = widths.astype(int).tolist(), widths_cont.tolist()
+ return widths, num_stages
+
+ @staticmethod
+ def quantize_float(number, divisor):
+ """Converts a float to closest non-zero int divisible by divior.
+
+ Args:
+ number (int): Original number to be quantized.
+ divisor (int): Divisor used to quantize the number.
+
+ Returns:
+ int: quantized number that is divisible by devisor.
+ """
+ return int(round(number / divisor) * divisor)
+
+ def adjust_width_group(self, widths, bottleneck_ratio, groups):
+ """Adjusts the compatibility of widths and groups.
+
+ Args:
+ widths (list[int]): Width of each stage.
+ bottleneck_ratio (float): Bottleneck ratio.
+ groups (int): number of groups in each stage
+
+ Returns:
+ tuple(list): The adjusted widths and groups of each stage.
+ """
+ bottleneck_width = [
+ int(w * b) for w, b in zip(widths, bottleneck_ratio)
+ ]
+ groups = [min(g, w_bot) for g, w_bot in zip(groups, bottleneck_width)]
+ bottleneck_width = [
+ self.quantize_float(w_bot, g)
+ for w_bot, g in zip(bottleneck_width, groups)
+ ]
+ widths = [
+ int(w_bot / b)
+ for w_bot, b in zip(bottleneck_width, bottleneck_ratio)
+ ]
+ return widths, groups
+
+ def get_stages_from_blocks(self, widths):
+ """Gets widths/stage_blocks of network at each stage.
+
+ Args:
+ widths (list[int]): Width in each stage.
+
+ Returns:
+ tuple(list): width and depth of each stage
+ """
+ width_diff = [
+ width != width_prev
+ for width, width_prev in zip(widths + [0], [0] + widths)
+ ]
+ stage_widths = [
+ width for width, diff in zip(widths, width_diff[:-1]) if diff
+ ]
+ stage_blocks = np.diff([
+ depth for depth, diff in zip(range(len(width_diff)), width_diff)
+ if diff
+ ]).tolist()
+ return stage_widths, stage_blocks
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
diff --git a/mmpose/models/backbones/resnest.py b/mmpose/models/backbones/resnest.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a2d4081df1417155f0626646f5fe3d0dbfc2864
--- /dev/null
+++ b/mmpose/models/backbones/resnest.py
@@ -0,0 +1,338 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResLayer, ResNetV1d
+
+
+class RSoftmax(nn.Module):
+ """Radix Softmax module in ``SplitAttentionConv2d``.
+
+ Args:
+ radix (int): Radix of input.
+ groups (int): Groups of input.
+ """
+
+ def __init__(self, radix, groups):
+ super().__init__()
+ self.radix = radix
+ self.groups = groups
+
+ def forward(self, x):
+ batch = x.size(0)
+ if self.radix > 1:
+ x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2)
+ x = F.softmax(x, dim=1)
+ x = x.reshape(batch, -1)
+ else:
+ x = torch.sigmoid(x)
+ return x
+
+
+class SplitAttentionConv2d(nn.Module):
+ """Split-Attention Conv2d.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int | tuple[int]): Same as nn.Conv2d.
+ stride (int | tuple[int]): Same as nn.Conv2d.
+ padding (int | tuple[int]): Same as nn.Conv2d.
+ dilation (int | tuple[int]): Same as nn.Conv2d.
+ groups (int): Same as nn.Conv2d.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ radix=2,
+ reduction_factor=4,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ super().__init__()
+ inter_channels = max(in_channels * radix // reduction_factor, 32)
+ self.radix = radix
+ self.groups = groups
+ self.channels = channels
+ self.conv = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ channels * radix,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups * radix,
+ bias=False)
+ self.norm0_name, norm0 = build_norm_layer(
+ norm_cfg, channels * radix, postfix=0)
+ self.add_module(self.norm0_name, norm0)
+ self.relu = nn.ReLU(inplace=True)
+ self.fc1 = build_conv_layer(
+ None, channels, inter_channels, 1, groups=self.groups)
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, inter_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.fc2 = build_conv_layer(
+ None, inter_channels, channels * radix, 1, groups=self.groups)
+ self.rsoftmax = RSoftmax(radix, groups)
+
+ @property
+ def norm0(self):
+ return getattr(self, self.norm0_name)
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.norm0(x)
+ x = self.relu(x)
+
+ batch, rchannel = x.shape[:2]
+ if self.radix > 1:
+ splits = x.view(batch, self.radix, -1, *x.shape[2:])
+ gap = splits.sum(dim=1)
+ else:
+ gap = x
+ gap = F.adaptive_avg_pool2d(gap, 1)
+ gap = self.fc1(gap)
+
+ gap = self.norm1(gap)
+ gap = self.relu(gap)
+
+ atten = self.fc2(gap)
+ atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
+
+ if self.radix > 1:
+ attens = atten.view(batch, self.radix, -1, *atten.shape[2:])
+ out = torch.sum(attens * splits, dim=1)
+ else:
+ out = atten * x
+ return out.contiguous()
+
+
+class Bottleneck(_Bottleneck):
+ """Bottleneck block for ResNeSt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ avg_down_stride (bool): Whether to use average pool for stride in
+ Bottleneck. Default: True.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ groups=1,
+ width_per_group=4,
+ base_channels=64,
+ radix=2,
+ reduction_factor=4,
+ avg_down_stride=True,
+ **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # For ResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for ResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.avg_down_stride = avg_down_stride and self.conv2_stride > 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = SplitAttentionConv2d(
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=1 if self.avg_down_stride else self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ radix=radix,
+ reduction_factor=reduction_factor,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+ delattr(self, self.norm2_name)
+
+ if self.avg_down_stride:
+ self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)
+
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+
+ if self.avg_down_stride:
+ out = self.avd_layer(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ResNeSt(ResNetV1d):
+ """ResNeSt backbone.
+
+ Please refer to the `paper `__
+ for details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152, 200}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ avg_down_stride (bool): Whether to use average pool for stride in
+ Bottleneck. Default: True.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3)),
+ 200: (Bottleneck, (3, 24, 36, 3)),
+ 269: (Bottleneck, (3, 30, 48, 8))
+ }
+
+ def __init__(self,
+ depth,
+ groups=1,
+ width_per_group=4,
+ radix=2,
+ reduction_factor=4,
+ avg_down_stride=True,
+ **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ self.radix = radix
+ self.reduction_factor = reduction_factor
+ self.avg_down_stride = avg_down_stride
+ super().__init__(depth=depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ radix=self.radix,
+ reduction_factor=self.reduction_factor,
+ avg_down_stride=self.avg_down_stride,
+ **kwargs)
diff --git a/mmpose/models/backbones/resnet.py b/mmpose/models/backbones/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..649496a755020140d94eb32fbe79d1ff135c86ca
--- /dev/null
+++ b/mmpose/models/backbones/resnet.py
@@ -0,0 +1,701 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
+ constant_init, kaiming_init)
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class BasicBlock(nn.Module):
+ """BasicBlock for ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the output channels of conv1. This is a
+ reserved argument in BasicBlock and should always be 1. Default: 1.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): `pytorch` or `caffe`. It is unused and reserved for
+ unified API with Bottleneck.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=1,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert self.expansion == 1
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, out_channels, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ 3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ 3,
+ padding=1,
+ bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ """Bottleneck block for ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the input/output channels of conv2. Default: 4.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
+ stride-two layer is the 3x3 conv layer, otherwise the stride-two
+ layer is the first 1x1 conv layer. Default: "pytorch".
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=4,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert style in ['pytorch', 'caffe']
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ """nn.Module: the normalization layer named "norm3" """
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def get_expansion(block, expansion=None):
+ """Get the expansion of a residual block.
+
+ The block expansion will be obtained by the following order:
+
+ 1. If ``expansion`` is given, just return it.
+ 2. If ``block`` has the attribute ``expansion``, then return
+ ``block.expansion``.
+ 3. Return the default value according the the block type:
+ 1 for ``BasicBlock`` and 4 for ``Bottleneck``.
+
+ Args:
+ block (class): The block class.
+ expansion (int | None): The given expansion ratio.
+
+ Returns:
+ int: The expansion of the block.
+ """
+ if isinstance(expansion, int):
+ assert expansion > 0
+ elif expansion is None:
+ if hasattr(block, 'expansion'):
+ expansion = block.expansion
+ elif issubclass(block, BasicBlock):
+ expansion = 1
+ elif issubclass(block, Bottleneck):
+ expansion = 4
+ else:
+ raise TypeError(f'expansion is not specified for {block.__name__}')
+ else:
+ raise TypeError('expansion must be an integer or None')
+
+ return expansion
+
+
+class ResLayer(nn.Sequential):
+ """ResLayer to build ResNet style backbone.
+
+ Args:
+ block (nn.Module): Residual block used to build ResLayer.
+ num_blocks (int): Number of blocks.
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int, optional): The expansion for BasicBlock/Bottleneck.
+ If not specified, it will firstly be obtained via
+ ``block.expansion``. If the block has no attribute "expansion",
+ the following default values will be used: 1 for BasicBlock and
+ 4 for Bottleneck. Default: None.
+ stride (int): stride of the first block. Default: 1.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ downsample_first (bool): Downsample at the first block or last block.
+ False for Hourglass, True for ResNet. Default: True
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ in_channels,
+ out_channels,
+ expansion=None,
+ stride=1,
+ avg_down=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ downsample_first=True,
+ **kwargs):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ self.block = block
+ self.expansion = get_expansion(block, expansion)
+
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = []
+ conv_stride = stride
+ if avg_down and stride != 1:
+ conv_stride = 1
+ downsample.append(
+ nn.AvgPool2d(
+ kernel_size=stride,
+ stride=stride,
+ ceil_mode=True,
+ count_include_pad=False))
+ downsample.extend([
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=conv_stride,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1]
+ ])
+ downsample = nn.Sequential(*downsample)
+
+ layers = []
+ if downsample_first:
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ in_channels = out_channels
+ for _ in range(1, num_blocks):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ else: # downsample_first=False is for HourglassModule
+ for i in range(0, num_blocks - 1):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+
+ super().__init__(*layers)
+
+
+@BACKBONES.register_module()
+class ResNet(BaseBackbone):
+ """ResNet backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ base_channels (int): Middle channels of the first stage. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import ResNet
+ >>> import torch
+ >>> self = ResNet(depth=18, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 64, 8, 8)
+ (1, 128, 4, 4)
+ (1, 256, 2, 2)
+ (1, 512, 1, 1)
+ """
+
+ arch_settings = {
+ 18: (BasicBlock, (2, 2, 2, 2)),
+ 34: (BasicBlock, (3, 4, 6, 3)),
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ stem_channels=64,
+ base_channels=64,
+ expansion=None,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ self.depth = depth
+ self.stem_channels = stem_channels
+ self.base_channels = base_channels
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.block, stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ self.expansion = get_expansion(self.block, expansion)
+
+ self._make_stem_layer(in_channels, stem_channels)
+
+ self.res_layers = []
+ _in_channels = stem_channels
+ _out_channels = base_channels * self.expansion
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = strides[i]
+ dilation = dilations[i]
+ res_layer = self.make_res_layer(
+ block=self.block,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=_out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+ _in_channels = _out_channels
+ _out_channels *= 2
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = res_layer[-1].out_channels
+
+ def make_res_layer(self, **kwargs):
+ """Make a ResLayer."""
+ return ResLayer(**kwargs)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels, stem_channels):
+ """Make stem layer."""
+ if self.deep_stem:
+ self.stem = nn.Sequential(
+ ConvModule(
+ in_channels,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True))
+ else:
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, stem_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ if self.deep_stem:
+ self.stem.eval()
+ for param in self.stem.parameters():
+ param.requires_grad = False
+ else:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, f'layer{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.deep_stem:
+ x = self.stem(x)
+ else:
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
+
+
+@BACKBONES.register_module()
+class ResNetV1d(ResNet):
+ r"""ResNetV1d variant described in `Bag of Tricks
+ `__.
+
+ Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in
+ the input stem with three 3x3 convs. And in the downsampling block, a 2x2
+ avg_pool with stride 2 is added before conv, whose stride is changed to 1.
+ """
+
+ def __init__(self, **kwargs):
+ super().__init__(deep_stem=True, avg_down=True, **kwargs)
diff --git a/mmpose/models/backbones/resnext.py b/mmpose/models/backbones/resnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..c10dc33f98ac3229c77bf306acf19950c295f904
--- /dev/null
+++ b/mmpose/models/backbones/resnext.py
@@ -0,0 +1,162 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResLayer, ResNet
+
+
+class Bottleneck(_Bottleneck):
+ """Bottleneck block for ResNeXt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ base_channels=64,
+ groups=32,
+ width_per_group=4,
+ **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # For ResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for ResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class ResNeXt(ResNet):
+ """ResNeXt backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import ResNeXt
+ >>> import torch
+ >>> self = ResNeXt(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 8, 8)
+ (1, 512, 4, 4)
+ (1, 1024, 2, 2)
+ (1, 2048, 1, 1)
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, groups=32, width_per_group=4, **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ **kwargs)
diff --git a/mmpose/models/backbones/rsn.py b/mmpose/models/backbones/rsn.py
new file mode 100644
index 0000000000000000000000000000000000000000..29038afe2a77dcb3d3b027b1549d478916a50727
--- /dev/null
+++ b/mmpose/models/backbones/rsn.py
@@ -0,0 +1,616 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, MaxPool2d, constant_init, kaiming_init,
+ normal_init)
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class RSB(nn.Module):
+ """Residual Steps block for RSN. Paper ref: Cai et al. "Learning Delicate
+ Local Representations for Multi-Person Pose Estimation" (ECCV 2020).
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ num_steps (int): Numbers of steps in RSB
+ stride (int): stride of the block. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ expand_times (int): Times by which the in_channels are expanded.
+ Default:26.
+ res_top_channels (int): Number of channels of feature output by
+ ResNet_top. Default:64.
+ """
+
+ expansion = 1
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_steps=4,
+ stride=1,
+ downsample=None,
+ with_cp=False,
+ norm_cfg=dict(type='BN'),
+ expand_times=26,
+ res_top_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ assert num_steps > 1
+ self.in_channels = in_channels
+ self.branch_channels = self.in_channels * expand_times
+ self.branch_channels //= res_top_channels
+ self.out_channels = out_channels
+ self.stride = stride
+ self.downsample = downsample
+ self.with_cp = with_cp
+ self.norm_cfg = norm_cfg
+ self.num_steps = num_steps
+ self.conv_bn_relu1 = ConvModule(
+ self.in_channels,
+ self.num_steps * self.branch_channels,
+ kernel_size=1,
+ stride=self.stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ for i in range(self.num_steps):
+ for j in range(i + 1):
+ module_name = f'conv_bn_relu2_{i + 1}_{j + 1}'
+ self.add_module(
+ module_name,
+ ConvModule(
+ self.branch_channels,
+ self.branch_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=self.norm_cfg,
+ inplace=False))
+ self.conv_bn3 = ConvModule(
+ self.num_steps * self.branch_channels,
+ self.out_channels * self.expansion,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ act_cfg=None,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ self.relu = nn.ReLU(inplace=False)
+
+ def forward(self, x):
+ """Forward function."""
+
+ identity = x
+ x = self.conv_bn_relu1(x)
+ spx = torch.split(x, self.branch_channels, 1)
+ outputs = list()
+ outs = list()
+ for i in range(self.num_steps):
+ outputs_i = list()
+ outputs.append(outputs_i)
+ for j in range(i + 1):
+ if j == 0:
+ inputs = spx[i]
+ else:
+ inputs = outputs[i][j - 1]
+ if i > j:
+ inputs = inputs + outputs[i - 1][j]
+ module_name = f'conv_bn_relu2_{i + 1}_{j + 1}'
+ module_i_j = getattr(self, module_name)
+ outputs[i].append(module_i_j(inputs))
+
+ outs.append(outputs[i][i])
+ out = torch.cat(tuple(outs), 1)
+ out = self.conv_bn3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(identity)
+ out = out + identity
+
+ out = self.relu(out)
+
+ return out
+
+
+class Downsample_module(nn.Module):
+ """Downsample module for RSN.
+
+ Args:
+ block (nn.Module): Downsample block.
+ num_blocks (list): Number of blocks in each downsample unit.
+ num_units (int): Numbers of downsample units. Default: 4
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_steps (int): Number of steps in a block. Default:4
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the input feature to
+ downsample module. Default: 64
+ expand_times (int): Times by which the in_channels are expanded.
+ Default:26.
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ num_steps=4,
+ num_units=4,
+ has_skip=False,
+ norm_cfg=dict(type='BN'),
+ in_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.has_skip = has_skip
+ self.in_channels = in_channels
+ assert len(num_blocks) == num_units
+ self.num_blocks = num_blocks
+ self.num_units = num_units
+ self.num_steps = num_steps
+ self.norm_cfg = norm_cfg
+ self.layer1 = self._make_layer(
+ block,
+ in_channels,
+ num_blocks[0],
+ expand_times=expand_times,
+ res_top_channels=in_channels)
+ for i in range(1, num_units):
+ module_name = f'layer{i + 1}'
+ self.add_module(
+ module_name,
+ self._make_layer(
+ block,
+ in_channels * pow(2, i),
+ num_blocks[i],
+ stride=2,
+ expand_times=expand_times,
+ res_top_channels=in_channels))
+
+ def _make_layer(self,
+ block,
+ out_channels,
+ blocks,
+ stride=1,
+ expand_times=26,
+ res_top_channels=64):
+ downsample = None
+ if stride != 1 or self.in_channels != out_channels * block.expansion:
+ downsample = ConvModule(
+ self.in_channels,
+ out_channels * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ units = list()
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ num_steps=self.num_steps,
+ stride=stride,
+ downsample=downsample,
+ norm_cfg=self.norm_cfg,
+ expand_times=expand_times,
+ res_top_channels=res_top_channels))
+ self.in_channels = out_channels * block.expansion
+ for _ in range(1, blocks):
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ num_steps=self.num_steps,
+ expand_times=expand_times,
+ res_top_channels=res_top_channels))
+
+ return nn.Sequential(*units)
+
+ def forward(self, x, skip1, skip2):
+ out = list()
+ for i in range(self.num_units):
+ module_name = f'layer{i + 1}'
+ module_i = getattr(self, module_name)
+ x = module_i(x)
+ if self.has_skip:
+ x = x + skip1[i] + skip2[i]
+ out.append(x)
+ out.reverse()
+
+ return tuple(out)
+
+
+class Upsample_unit(nn.Module):
+ """Upsample unit for upsample module.
+
+ Args:
+ ind (int): Indicates whether to interpolate (>0) and whether to
+ generate feature map for the next hourglass-like module.
+ num_units (int): Number of units that form a upsample module. Along
+ with ind and gen_cross_conv, nm_units is used to decide whether
+ to generate feature map for the next hourglass-like module.
+ in_channels (int): Channel number of the skip-in feature maps from
+ the corresponding downsample unit.
+ unit_channels (int): Channel number in this unit. Default:256.
+ gen_skip: (bool): Whether or not to generate skips for the posterior
+ downsample module. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (in): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ ind,
+ num_units,
+ in_channels,
+ unit_channels=256,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.in_skip = ConvModule(
+ in_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.ind = ind
+ if self.ind > 0:
+ self.up_conv = ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ self.gen_skip = gen_skip
+ if self.gen_skip:
+ self.out_skip1 = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.out_skip2 = ConvModule(
+ unit_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.gen_cross_conv = gen_cross_conv
+ if self.ind == num_units - 1 and self.gen_cross_conv:
+ self.cross_conv = ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ def forward(self, x, up_x):
+ out = self.in_skip(x)
+
+ if self.ind > 0:
+ up_x = F.interpolate(
+ up_x,
+ size=(x.size(2), x.size(3)),
+ mode='bilinear',
+ align_corners=True)
+ up_x = self.up_conv(up_x)
+ out = out + up_x
+ out = self.relu(out)
+
+ skip1 = None
+ skip2 = None
+ if self.gen_skip:
+ skip1 = self.out_skip1(x)
+ skip2 = self.out_skip2(out)
+
+ cross_conv = None
+ if self.ind == self.num_units - 1 and self.gen_cross_conv:
+ cross_conv = self.cross_conv(out)
+
+ return out, skip1, skip2, cross_conv
+
+
+class Upsample_module(nn.Module):
+ """Upsample module for RSN.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units.
+ Default:256.
+ num_units (int): Numbers of upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_units=4,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = list()
+ for i in range(num_units):
+ self.in_channels.append(RSB.expansion * out_channels * pow(2, i))
+ self.in_channels.reverse()
+ self.num_units = num_units
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.norm_cfg = norm_cfg
+ for i in range(num_units):
+ module_name = f'up{i + 1}'
+ self.add_module(
+ module_name,
+ Upsample_unit(
+ i,
+ self.num_units,
+ self.in_channels[i],
+ unit_channels,
+ self.gen_skip,
+ self.gen_cross_conv,
+ norm_cfg=self.norm_cfg,
+ out_channels=64))
+
+ def forward(self, x):
+ out = list()
+ skip1 = list()
+ skip2 = list()
+ cross_conv = None
+ for i in range(self.num_units):
+ module_i = getattr(self, f'up{i + 1}')
+ if i == 0:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], None)
+ elif i == self.num_units - 1:
+ outi, skip1_i, skip2_i, cross_conv = module_i(x[i], out[i - 1])
+ else:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], out[i - 1])
+ out.append(outi)
+ skip1.append(skip1_i)
+ skip2.append(skip2_i)
+ skip1.reverse()
+ skip2.reverse()
+
+ return out, skip1, skip2, cross_conv
+
+
+class Single_stage_RSN(nn.Module):
+ """Single_stage Residual Steps Network.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units. Default:256.
+ num_units (int): Numbers of downsample/upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_steps (int): Number of steps in RSB. Default: 4
+ num_blocks (list): Number of blocks in each downsample unit.
+ Default: [2, 2, 2, 2] Note: Make sure num_units==len(num_blocks)
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the feature from ResNet_Top.
+ Default: 64.
+ expand_times (int): Times by which the in_channels are expanded in RSB.
+ Default:26.
+ """
+
+ def __init__(self,
+ has_skip=False,
+ gen_skip=False,
+ gen_cross_conv=False,
+ unit_channels=256,
+ num_units=4,
+ num_steps=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ in_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ assert len(num_blocks) == num_units
+ self.has_skip = has_skip
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.num_units = num_units
+ self.num_steps = num_steps
+ self.unit_channels = unit_channels
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ self.downsample = Downsample_module(RSB, num_blocks, num_steps,
+ num_units, has_skip, norm_cfg,
+ in_channels, expand_times)
+ self.upsample = Upsample_module(unit_channels, num_units, gen_skip,
+ gen_cross_conv, norm_cfg, in_channels)
+
+ def forward(self, x, skip1, skip2):
+ mid = self.downsample(x, skip1, skip2)
+ out, skip1, skip2, cross_conv = self.upsample(mid)
+
+ return out, skip1, skip2, cross_conv
+
+
+class ResNet_top(nn.Module):
+ """ResNet top for RSN.
+
+ Args:
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ channels (int): Number of channels of the feature output by ResNet_top.
+ """
+
+ def __init__(self, norm_cfg=dict(type='BN'), channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.top = nn.Sequential(
+ ConvModule(
+ 3,
+ channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ norm_cfg=norm_cfg,
+ inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ def forward(self, img):
+ return self.top(img)
+
+
+@BACKBONES.register_module()
+class RSN(BaseBackbone):
+ """Residual Steps Network backbone. Paper ref: Cai et al. "Learning
+ Delicate Local Representations for Multi-Person Pose Estimation" (ECCV
+ 2020).
+
+ Args:
+ unit_channels (int): Number of Channels in an upsample unit.
+ Default: 256
+ num_stages (int): Number of stages in a multi-stage RSN. Default: 4
+ num_units (int): NUmber of downsample/upsample units in a single-stage
+ RSN. Default: 4 Note: Make sure num_units == len(self.num_blocks)
+ num_blocks (list): Number of RSBs (Residual Steps Block) in each
+ downsample unit. Default: [2, 2, 2, 2]
+ num_steps (int): Number of steps in a RSB. Default:4
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ res_top_channels (int): Number of channels of feature from ResNet_top.
+ Default: 64.
+ expand_times (int): Times by which the in_channels are expanded in RSB.
+ Default:26.
+ Example:
+ >>> from mmpose.models import RSN
+ >>> import torch
+ >>> self = RSN(num_stages=2,num_units=2,num_blocks=[2,2])
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... for feature in level_output:
+ ... print(tuple(feature.shape))
+ ...
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ res_top_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+ self.num_blocks = num_blocks
+ self.num_steps = num_steps
+ self.norm_cfg = norm_cfg
+
+ assert self.num_stages > 0
+ assert self.num_steps > 1
+ assert self.num_units > 1
+ assert self.num_units == len(self.num_blocks)
+ self.top = ResNet_top(norm_cfg=norm_cfg)
+ self.multi_stage_rsn = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if i == 0:
+ has_skip = False
+ else:
+ has_skip = True
+ if i != self.num_stages - 1:
+ gen_skip = True
+ gen_cross_conv = True
+ else:
+ gen_skip = False
+ gen_cross_conv = False
+ self.multi_stage_rsn.append(
+ Single_stage_RSN(has_skip, gen_skip, gen_cross_conv,
+ unit_channels, num_units, num_steps,
+ num_blocks, norm_cfg, res_top_channels,
+ expand_times))
+
+ def forward(self, x):
+ """Model forward function."""
+ out_feats = []
+ skip1 = None
+ skip2 = None
+ x = self.top(x)
+ for i in range(self.num_stages):
+ out, skip1, skip2, x = self.multi_stage_rsn[i](x, skip1, skip2)
+ out_feats.append(out)
+
+ return out_feats
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ for m in self.multi_stage_rsn.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ for m in self.top.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
diff --git a/mmpose/models/backbones/scnet.py b/mmpose/models/backbones/scnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3786c5731d685638cfa64a83e5d4a5e2eee545de
--- /dev/null
+++ b/mmpose/models/backbones/scnet.py
@@ -0,0 +1,248 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck, ResNet
+
+
+class SCConv(nn.Module):
+ """SCConv (Self-calibrated Convolution)
+
+ Args:
+ in_channels (int): The input channels of the SCConv.
+ out_channels (int): The output channel of the SCConv.
+ stride (int): stride of SCConv.
+ pooling_r (int): size of pooling for scconv.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride,
+ pooling_r,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', momentum=0.1)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ assert in_channels == out_channels
+
+ self.k2 = nn.Sequential(
+ nn.AvgPool2d(kernel_size=pooling_r, stride=pooling_r),
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, in_channels)[1],
+ )
+ self.k3 = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, in_channels)[1],
+ )
+ self.k4 = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True),
+ )
+
+ def forward(self, x):
+ """Forward function."""
+ identity = x
+
+ out = torch.sigmoid(
+ torch.add(identity, F.interpolate(self.k2(x),
+ identity.size()[2:])))
+ out = torch.mul(self.k3(x), out)
+ out = self.k4(out)
+
+ return out
+
+
+class SCBottleneck(Bottleneck):
+ """SC(Self-calibrated) Bottleneck.
+
+ Args:
+ in_channels (int): The input channels of the SCBottleneck block.
+ out_channels (int): The output channel of the SCBottleneck block.
+ """
+
+ pooling_r = 4
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.mid_channels = out_channels // self.expansion // 2
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+
+ self.k1 = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, self.mid_channels)[1],
+ nn.ReLU(inplace=True))
+
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.scconv = SCConv(self.mid_channels, self.mid_channels, self.stride,
+ self.pooling_r, self.conv_cfg, self.norm_cfg)
+
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels * 2,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out_a = self.conv1(x)
+ out_a = self.norm1(out_a)
+ out_a = self.relu(out_a)
+
+ out_a = self.k1(out_a)
+
+ out_b = self.conv2(x)
+ out_b = self.norm2(out_b)
+ out_b = self.relu(out_b)
+
+ out_b = self.scconv(out_b)
+
+ out = self.conv3(torch.cat([out_a, out_b], dim=1))
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class SCNet(ResNet):
+ """SCNet backbone.
+
+ Improving Convolutional Networks with Self-Calibrated Convolutions,
+ Jiang-Jiang Liu, Qibin Hou, Ming-Ming Cheng, Changhu Wang, Jiashi Feng,
+ IEEE CVPR, 2020.
+ http://mftp.mmcheng.net/Papers/20cvprSCNet.pdf
+
+ Args:
+ depth (int): Depth of scnet, from {50, 101}.
+ in_channels (int): Number of input image channels. Normally 3.
+ base_channels (int): Number of base channels of hidden layer.
+ num_stages (int): SCNet stages, normally 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmpose.models import SCNet
+ >>> import torch
+ >>> self = SCNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SCBottleneck, [3, 4, 6, 3]),
+ 101: (SCBottleneck, [3, 4, 23, 3])
+ }
+
+ def __init__(self, depth, **kwargs):
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for SCNet')
+ super().__init__(depth, **kwargs)
diff --git a/mmpose/models/backbones/seresnet.py b/mmpose/models/backbones/seresnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac2d53b40a4593bce96d5c7c3bb4e06d38353d0b
--- /dev/null
+++ b/mmpose/models/backbones/seresnet.py
@@ -0,0 +1,125 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.utils.checkpoint as cp
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck, ResLayer, ResNet
+from .utils.se_layer import SELayer
+
+
+class SEBottleneck(Bottleneck):
+ """SEBottleneck block for SEResNet.
+
+ Args:
+ in_channels (int): The input channels of the SEBottleneck block.
+ out_channels (int): The output channel of the SEBottleneck block.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16
+ """
+
+ def __init__(self, in_channels, out_channels, se_ratio=16, **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.se_layer = SELayer(out_channels, ratio=se_ratio)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ out = self.se_layer(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class SEResNet(ResNet):
+ """SEResNet backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import SEResNet
+ >>> import torch
+ >>> self = SEResNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SEBottleneck, (3, 4, 6, 3)),
+ 101: (SEBottleneck, (3, 4, 23, 3)),
+ 152: (SEBottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, se_ratio=16, **kwargs):
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for SEResNet')
+ self.se_ratio = se_ratio
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(se_ratio=self.se_ratio, **kwargs)
diff --git a/mmpose/models/backbones/seresnext.py b/mmpose/models/backbones/seresnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c4e4ce03684f8a9bd0c6166969c01bace54bd2
--- /dev/null
+++ b/mmpose/models/backbones/seresnext.py
@@ -0,0 +1,168 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import ResLayer
+from .seresnet import SEBottleneck as _SEBottleneck
+from .seresnet import SEResNet
+
+
+class SEBottleneck(_SEBottleneck):
+ """SEBottleneck block for SEResNeXt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ base_channels (int): Middle channels of the first stage. Default: 64.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ base_channels=64,
+ groups=32,
+ width_per_group=4,
+ se_ratio=16,
+ **kwargs):
+ super().__init__(in_channels, out_channels, se_ratio, **kwargs)
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # We follow the same rational of ResNext to compute mid_channels.
+ # For SEResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for SEResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class SEResNeXt(SEResNet):
+ """SEResNeXt backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import SEResNeXt
+ >>> import torch
+ >>> self = SEResNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SEBottleneck, (3, 4, 6, 3)),
+ 101: (SEBottleneck, (3, 4, 23, 3)),
+ 152: (SEBottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, groups=32, width_per_group=4, **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ **kwargs)
diff --git a/mmpose/models/backbones/shufflenet_v1.py b/mmpose/models/backbones/shufflenet_v1.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f98cbd2132250ec13adcce6e642c966b0dbd7cc
--- /dev/null
+++ b/mmpose/models/backbones/shufflenet_v1.py
@@ -0,0 +1,329 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, build_activation_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import channel_shuffle, load_checkpoint, make_divisible
+
+
+class ShuffleUnit(nn.Module):
+ """ShuffleUnit block.
+
+ ShuffleNet unit with pointwise group convolution (GConv) and channel
+ shuffle.
+
+ Args:
+ in_channels (int): The input channels of the ShuffleUnit.
+ out_channels (int): The output channels of the ShuffleUnit.
+ groups (int, optional): The number of groups to be used in grouped 1x1
+ convolutions in each ShuffleUnit. Default: 3
+ first_block (bool, optional): Whether it is the first ShuffleUnit of a
+ sequential ShuffleUnits. Default: True, which means not using the
+ grouped 1x1 convolution.
+ combine (str, optional): The ways to combine the input and output
+ branches. Default: 'add'.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool, optional): Use checkpoint or not. Using checkpoint
+ will save some memory while slowing down the training speed.
+ Default: False.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ groups=3,
+ first_block=True,
+ combine='add',
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.first_block = first_block
+ self.combine = combine
+ self.groups = groups
+ self.bottleneck_channels = self.out_channels // 4
+ self.with_cp = with_cp
+
+ if self.combine == 'add':
+ self.depthwise_stride = 1
+ self._combine_func = self._add
+ assert in_channels == out_channels, (
+ 'in_channels must be equal to out_channels when combine '
+ 'is add')
+ elif self.combine == 'concat':
+ self.depthwise_stride = 2
+ self._combine_func = self._concat
+ self.out_channels -= self.in_channels
+ self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
+ else:
+ raise ValueError(f'Cannot combine tensors with {self.combine}. '
+ 'Only "add" and "concat" are supported')
+
+ self.first_1x1_groups = 1 if first_block else self.groups
+ self.g_conv_1x1_compress = ConvModule(
+ in_channels=self.in_channels,
+ out_channels=self.bottleneck_channels,
+ kernel_size=1,
+ groups=self.first_1x1_groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ self.depthwise_conv3x3_bn = ConvModule(
+ in_channels=self.bottleneck_channels,
+ out_channels=self.bottleneck_channels,
+ kernel_size=3,
+ stride=self.depthwise_stride,
+ padding=1,
+ groups=self.bottleneck_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ self.g_conv_1x1_expand = ConvModule(
+ in_channels=self.bottleneck_channels,
+ out_channels=self.out_channels,
+ kernel_size=1,
+ groups=self.groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ self.act = build_activation_layer(act_cfg)
+
+ @staticmethod
+ def _add(x, out):
+ # residual connection
+ return x + out
+
+ @staticmethod
+ def _concat(x, out):
+ # concatenate along channel axis
+ return torch.cat((x, out), 1)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ residual = x
+
+ out = self.g_conv_1x1_compress(x)
+ out = self.depthwise_conv3x3_bn(out)
+
+ if self.groups > 1:
+ out = channel_shuffle(out, self.groups)
+
+ out = self.g_conv_1x1_expand(out)
+
+ if self.combine == 'concat':
+ residual = self.avgpool(residual)
+ out = self.act(out)
+ out = self._combine_func(residual, out)
+ else:
+ out = self._combine_func(residual, out)
+ out = self.act(out)
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ShuffleNetV1(BaseBackbone):
+ """ShuffleNetV1 backbone.
+
+ Args:
+ groups (int, optional): The number of groups to be used in grouped 1x1
+ convolutions in each ShuffleUnit. Default: 3.
+ widen_factor (float, optional): Width multiplier - adjusts the number
+ of channels in each layer by this amount. Default: 1.0.
+ out_indices (Sequence[int]): Output from which stages.
+ Default: (2, )
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ groups=3,
+ widen_factor=1.0,
+ out_indices=(2, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stage_blocks = [4, 8, 4]
+ self.groups = groups
+
+ for index in out_indices:
+ if index not in range(0, 3):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 3). But received {index}')
+
+ if frozen_stages not in range(-1, 3):
+ raise ValueError('frozen_stages must be in range(-1, 3). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ if groups == 1:
+ channels = (144, 288, 576)
+ elif groups == 2:
+ channels = (200, 400, 800)
+ elif groups == 3:
+ channels = (240, 480, 960)
+ elif groups == 4:
+ channels = (272, 544, 1088)
+ elif groups == 8:
+ channels = (384, 768, 1536)
+ else:
+ raise ValueError(f'{groups} groups is not supported for 1x1 '
+ 'Grouped Convolutions')
+
+ channels = [make_divisible(ch * widen_factor, 8) for ch in channels]
+
+ self.in_channels = int(24 * widen_factor)
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layers = nn.ModuleList()
+ for i, num_blocks in enumerate(self.stage_blocks):
+ first_block = (i == 0)
+ layer = self.make_layer(channels[i], num_blocks, first_block)
+ self.layers.append(layer)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(self.frozen_stages):
+ layer = self.layers[i]
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for name, m in self.named_modules():
+ if isinstance(m, nn.Conv2d):
+ if 'conv1' in name:
+ normal_init(m, mean=0, std=0.01)
+ else:
+ normal_init(m, mean=0, std=1.0 / m.weight.shape[1])
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, val=1, bias=0.0001)
+ if isinstance(m, _BatchNorm):
+ if m.running_mean is not None:
+ nn.init.constant_(m.running_mean, 0)
+ else:
+ raise TypeError('pretrained must be a str or None. But received '
+ f'{type(pretrained)}')
+
+ def make_layer(self, out_channels, num_blocks, first_block=False):
+ """Stack ShuffleUnit blocks to make a layer.
+
+ Args:
+ out_channels (int): out_channels of the block.
+ num_blocks (int): Number of blocks.
+ first_block (bool, optional): Whether is the first ShuffleUnit of a
+ sequential ShuffleUnits. Default: False, which means using
+ the grouped 1x1 convolution.
+ """
+ layers = []
+ for i in range(num_blocks):
+ first_block = first_block if i == 0 else False
+ combine_mode = 'concat' if i == 0 else 'add'
+ layers.append(
+ ShuffleUnit(
+ self.in_channels,
+ out_channels,
+ groups=self.groups,
+ first_block=first_block,
+ combine=combine_mode,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.maxpool(x)
+
+ outs = []
+ for i, layer in enumerate(self.layers):
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/shufflenet_v2.py b/mmpose/models/backbones/shufflenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..e93533367afe4efa01fa67d14cafcca006c990e8
--- /dev/null
+++ b/mmpose/models/backbones/shufflenet_v2.py
@@ -0,0 +1,302 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import channel_shuffle, load_checkpoint
+
+
+class InvertedResidual(nn.Module):
+ """InvertedResidual block for ShuffleNetV2 backbone.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ out_channels (int): The output channels of the block.
+ stride (int): Stride of the 3x3 convolution layer. Default: 1
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride=1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stride = stride
+ self.with_cp = with_cp
+
+ branch_features = out_channels // 2
+ if self.stride == 1:
+ assert in_channels == branch_features * 2, (
+ f'in_channels ({in_channels}) should equal to '
+ f'branch_features * 2 ({branch_features * 2}) '
+ 'when stride is 1')
+
+ if in_channels != branch_features * 2:
+ assert self.stride != 1, (
+ f'stride ({self.stride}) should not equal 1 when '
+ f'in_channels != branch_features * 2')
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=in_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ in_channels,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ )
+
+ self.branch2 = nn.Sequential(
+ ConvModule(
+ in_channels if (self.stride > 1) else branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=branch_features,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.stride > 1:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+ else:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ShuffleNetV2(BaseBackbone):
+ """ShuffleNetV2 backbone.
+
+ Args:
+ widen_factor (float): Width multiplier - adjusts the number of
+ channels in each layer by this amount. Default: 1.0.
+ out_indices (Sequence[int]): Output from which stages.
+ Default: (0, 1, 2, 3).
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ widen_factor=1.0,
+ out_indices=(3, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stage_blocks = [4, 8, 4]
+ for index in out_indices:
+ if index not in range(0, 4):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 4). But received {index}')
+
+ if frozen_stages not in range(-1, 4):
+ raise ValueError('frozen_stages must be in range(-1, 4). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ if widen_factor == 0.5:
+ channels = [48, 96, 192, 1024]
+ elif widen_factor == 1.0:
+ channels = [116, 232, 464, 1024]
+ elif widen_factor == 1.5:
+ channels = [176, 352, 704, 1024]
+ elif widen_factor == 2.0:
+ channels = [244, 488, 976, 2048]
+ else:
+ raise ValueError('widen_factor must be in [0.5, 1.0, 1.5, 2.0]. '
+ f'But received {widen_factor}')
+
+ self.in_channels = 24
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layers = nn.ModuleList()
+ for i, num_blocks in enumerate(self.stage_blocks):
+ layer = self._make_layer(channels[i], num_blocks)
+ self.layers.append(layer)
+
+ output_channels = channels[-1]
+ self.layers.append(
+ ConvModule(
+ in_channels=self.in_channels,
+ out_channels=output_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def _make_layer(self, out_channels, num_blocks):
+ """Stack blocks to make a layer.
+
+ Args:
+ out_channels (int): out_channels of the block.
+ num_blocks (int): number of blocks.
+ """
+ layers = []
+ for i in range(num_blocks):
+ stride = 2 if i == 0 else 1
+ layers.append(
+ InvertedResidual(
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ stride=stride,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+
+ for i in range(self.frozen_stages):
+ m = self.layers[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for name, m in self.named_modules():
+ if isinstance(m, nn.Conv2d):
+ if 'conv1' in name:
+ normal_init(m, mean=0, std=0.01)
+ else:
+ normal_init(m, mean=0, std=1.0 / m.weight.shape[1])
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m.weight, val=1, bias=0.0001)
+ if isinstance(m, _BatchNorm):
+ if m.running_mean is not None:
+ nn.init.constant_(m.running_mean, 0)
+ else:
+ raise TypeError('pretrained must be a str or None. But received '
+ f'{type(pretrained)}')
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.maxpool(x)
+
+ outs = []
+ for i, layer in enumerate(self.layers):
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.eval()
diff --git a/mmpose/models/backbones/tcn.py b/mmpose/models/backbones/tcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..deca2290aeb1830bc3e241b819157369371aaf27
--- /dev/null
+++ b/mmpose/models/backbones/tcn.py
@@ -0,0 +1,267 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, build_conv_layer, constant_init, kaiming_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.core import WeightNormClipHook
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class BasicTemporalBlock(nn.Module):
+ """Basic block for VideoPose3D.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ mid_channels (int): The output channels of conv1. Default: 1024.
+ kernel_size (int): Size of the convolving kernel. Default: 3.
+ dilation (int): Spacing between kernel elements. Default: 3.
+ dropout (float): Dropout rate. Default: 0.25.
+ causal (bool): Use causal convolutions instead of symmetric
+ convolutions (for real-time applications). Default: False.
+ residual (bool): Use residual connection. Default: True.
+ use_stride_conv (bool): Use optimized TCN that designed
+ specifically for single-frame batching, i.e. where batches have
+ input length = receptive field, and output length = 1. This
+ implementation replaces dilated convolutions with strided
+ convolutions to avoid generating unused intermediate results.
+ Default: False.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: dict(type='Conv1d').
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN1d').
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ mid_channels=1024,
+ kernel_size=3,
+ dilation=3,
+ dropout=0.25,
+ causal=False,
+ residual=True,
+ use_stride_conv=False,
+ conv_cfg=dict(type='Conv1d'),
+ norm_cfg=dict(type='BN1d')):
+ # Protect mutable default arguments
+ conv_cfg = copy.deepcopy(conv_cfg)
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.mid_channels = mid_channels
+ self.kernel_size = kernel_size
+ self.dilation = dilation
+ self.dropout = dropout
+ self.causal = causal
+ self.residual = residual
+ self.use_stride_conv = use_stride_conv
+
+ self.pad = (kernel_size - 1) * dilation // 2
+ if use_stride_conv:
+ self.stride = kernel_size
+ self.causal_shift = kernel_size // 2 if causal else 0
+ self.dilation = 1
+ else:
+ self.stride = 1
+ self.causal_shift = kernel_size // 2 * dilation if causal else 0
+
+ self.conv1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ mid_channels,
+ kernel_size=kernel_size,
+ stride=self.stride,
+ dilation=self.dilation,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+ self.conv2 = nn.Sequential(
+ ConvModule(
+ mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+
+ if residual and in_channels != out_channels:
+ self.short_cut = build_conv_layer(conv_cfg, in_channels,
+ out_channels, 1)
+ else:
+ self.short_cut = None
+
+ self.dropout = nn.Dropout(dropout) if dropout > 0 else None
+
+ def forward(self, x):
+ """Forward function."""
+ if self.use_stride_conv:
+ assert self.causal_shift + self.kernel_size // 2 < x.shape[2]
+ else:
+ assert 0 <= self.pad + self.causal_shift < x.shape[2] - \
+ self.pad + self.causal_shift <= x.shape[2]
+
+ out = self.conv1(x)
+ if self.dropout is not None:
+ out = self.dropout(out)
+
+ out = self.conv2(out)
+ if self.dropout is not None:
+ out = self.dropout(out)
+
+ if self.residual:
+ if self.use_stride_conv:
+ res = x[:, :, self.causal_shift +
+ self.kernel_size // 2::self.kernel_size]
+ else:
+ res = x[:, :,
+ (self.pad + self.causal_shift):(x.shape[2] - self.pad +
+ self.causal_shift)]
+
+ if self.short_cut is not None:
+ res = self.short_cut(res)
+ out = out + res
+
+ return out
+
+
+@BACKBONES.register_module()
+class TCN(BaseBackbone):
+ """TCN backbone.
+
+ Temporal Convolutional Networks.
+ More details can be found in the
+ `paper `__ .
+
+ Args:
+ in_channels (int): Number of input channels, which equals to
+ num_keypoints * num_features.
+ stem_channels (int): Number of feature channels. Default: 1024.
+ num_blocks (int): NUmber of basic temporal convolutional blocks.
+ Default: 2.
+ kernel_sizes (Sequence[int]): Sizes of the convolving kernel of
+ each basic block. Default: ``(3, 3, 3)``.
+ dropout (float): Dropout rate. Default: 0.25.
+ causal (bool): Use causal convolutions instead of symmetric
+ convolutions (for real-time applications).
+ Default: False.
+ residual (bool): Use residual connection. Default: True.
+ use_stride_conv (bool): Use TCN backbone optimized for
+ single-frame batching, i.e. where batches have input length =
+ receptive field, and output length = 1. This implementation
+ replaces dilated convolutions with strided convolutions to avoid
+ generating unused intermediate results. The weights are
+ interchangeable with the reference implementation. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: dict(type='Conv1d').
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN1d').
+ max_norm (float|None): if not None, the weight of convolution layers
+ will be clipped to have a maximum norm of max_norm.
+
+ Example:
+ >>> from mmpose.models import TCN
+ >>> import torch
+ >>> self = TCN(in_channels=34)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 34, 243)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 1024, 235)
+ (1, 1024, 217)
+ """
+
+ def __init__(self,
+ in_channels,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ causal=False,
+ residual=True,
+ use_stride_conv=False,
+ conv_cfg=dict(type='Conv1d'),
+ norm_cfg=dict(type='BN1d'),
+ max_norm=None):
+ # Protect mutable default arguments
+ conv_cfg = copy.deepcopy(conv_cfg)
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.stem_channels = stem_channels
+ self.num_blocks = num_blocks
+ self.kernel_sizes = kernel_sizes
+ self.dropout = dropout
+ self.causal = causal
+ self.residual = residual
+ self.use_stride_conv = use_stride_conv
+ self.max_norm = max_norm
+
+ assert num_blocks == len(kernel_sizes) - 1
+ for ks in kernel_sizes:
+ assert ks % 2 == 1, 'Only odd filter widths are supported.'
+
+ self.expand_conv = ConvModule(
+ in_channels,
+ stem_channels,
+ kernel_size=kernel_sizes[0],
+ stride=kernel_sizes[0] if use_stride_conv else 1,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ dilation = kernel_sizes[0]
+ self.tcn_blocks = nn.ModuleList()
+ for i in range(1, num_blocks + 1):
+ self.tcn_blocks.append(
+ BasicTemporalBlock(
+ in_channels=stem_channels,
+ out_channels=stem_channels,
+ mid_channels=stem_channels,
+ kernel_size=kernel_sizes[i],
+ dilation=dilation,
+ dropout=dropout,
+ causal=causal,
+ residual=residual,
+ use_stride_conv=use_stride_conv,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+ dilation *= kernel_sizes[i]
+
+ if self.max_norm is not None:
+ # Apply weight norm clip to conv layers
+ weight_clip = WeightNormClipHook(self.max_norm)
+ for module in self.modules():
+ if isinstance(module, nn.modules.conv._ConvNd):
+ weight_clip.register(module)
+
+ self.dropout = nn.Dropout(dropout) if dropout > 0 else None
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.expand_conv(x)
+
+ if self.dropout is not None:
+ x = self.dropout(x)
+
+ outs = []
+ for i in range(self.num_blocks):
+ x = self.tcn_blocks[i](x)
+ outs.append(x)
+
+ return tuple(outs)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights."""
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.modules.conv._ConvNd):
+ kaiming_init(m, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
diff --git a/mmpose/models/backbones/utils/__init__.py b/mmpose/models/backbones/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..52a30ca9f7c8e90b6c6fa2fd8a9705ca0403b259
--- /dev/null
+++ b/mmpose/models/backbones/utils/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .channel_shuffle import channel_shuffle
+from .inverted_residual import InvertedResidual
+from .make_divisible import make_divisible
+from .se_layer import SELayer
+from .utils import load_checkpoint
+
+__all__ = [
+ 'channel_shuffle', 'make_divisible', 'InvertedResidual', 'SELayer',
+ 'load_checkpoint'
+]
diff --git a/mmpose/models/backbones/utils/__pycache__/__init__.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69e945f7f742fb8d4c64f1335cc80c64b7541b7c
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/__pycache__/channel_shuffle.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/channel_shuffle.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..944e3a25210775d06fc3f87d9d1378930c6d01ca
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/channel_shuffle.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/__pycache__/inverted_residual.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/inverted_residual.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7c08771e0e31acb44a357c5d8a31d6d18f5ab627
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/inverted_residual.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/__pycache__/make_divisible.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/make_divisible.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..080d0213a7c23afdd163ac8bd6a314ca1b52b37c
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/make_divisible.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/__pycache__/se_layer.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/se_layer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..086a592c0d0bade501917457b49f90ba2c0824e3
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/se_layer.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/__pycache__/utils.cpython-310.pyc b/mmpose/models/backbones/utils/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2ee2f65f2c9913c5c72b463f495ec9029b83997
Binary files /dev/null and b/mmpose/models/backbones/utils/__pycache__/utils.cpython-310.pyc differ
diff --git a/mmpose/models/backbones/utils/channel_shuffle.py b/mmpose/models/backbones/utils/channel_shuffle.py
new file mode 100644
index 0000000000000000000000000000000000000000..27006a8065db35a14c4207ce6613104374b064ad
--- /dev/null
+++ b/mmpose/models/backbones/utils/channel_shuffle.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def channel_shuffle(x, groups):
+ """Channel Shuffle operation.
+
+ This function enables cross-group information flow for multiple groups
+ convolution layers.
+
+ Args:
+ x (Tensor): The input tensor.
+ groups (int): The number of groups to divide the input tensor
+ in the channel dimension.
+
+ Returns:
+ Tensor: The output tensor after channel shuffle operation.
+ """
+
+ batch_size, num_channels, height, width = x.size()
+ assert (num_channels % groups == 0), ('num_channels should be '
+ 'divisible by groups')
+ channels_per_group = num_channels // groups
+
+ x = x.view(batch_size, groups, channels_per_group, height, width)
+ x = torch.transpose(x, 1, 2).contiguous()
+ x = x.view(batch_size, -1, height, width)
+
+ return x
diff --git a/mmpose/models/backbones/utils/inverted_residual.py b/mmpose/models/backbones/utils/inverted_residual.py
new file mode 100644
index 0000000000000000000000000000000000000000..dff762c570550e4a738ae1833a4c82c18777115d
--- /dev/null
+++ b/mmpose/models/backbones/utils/inverted_residual.py
@@ -0,0 +1,128 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule
+
+from .se_layer import SELayer
+
+
+class InvertedResidual(nn.Module):
+ """Inverted Residual Block.
+
+ Args:
+ in_channels (int): The input channels of this Module.
+ out_channels (int): The output channels of this Module.
+ mid_channels (int): The input channels of the depthwise convolution.
+ kernel_size (int): The kernel size of the depthwise convolution.
+ Default: 3.
+ groups (None or int): The group number of the depthwise convolution.
+ Default: None, which means group number = mid_channels.
+ stride (int): The stride of the depthwise convolution. Default: 1.
+ se_cfg (dict): Config dict for se layer. Default: None, which means no
+ se layer.
+ with_expand_conv (bool): Use expand conv or not. If set False,
+ mid_channels must be the same with in_channels.
+ Default: True.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ mid_channels,
+ kernel_size=3,
+ groups=None,
+ stride=1,
+ se_cfg=None,
+ with_expand_conv=True,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
+ assert stride in [1, 2]
+ self.with_cp = with_cp
+ self.with_se = se_cfg is not None
+ self.with_expand_conv = with_expand_conv
+
+ if groups is None:
+ groups = mid_channels
+
+ if self.with_se:
+ assert isinstance(se_cfg, dict)
+ if not self.with_expand_conv:
+ assert mid_channels == in_channels
+
+ if self.with_expand_conv:
+ self.expand_conv = ConvModule(
+ in_channels=in_channels,
+ out_channels=mid_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.depthwise_conv = ConvModule(
+ in_channels=mid_channels,
+ out_channels=mid_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=kernel_size // 2,
+ groups=groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ if self.with_se:
+ self.se = SELayer(**se_cfg)
+ self.linear_conv = ConvModule(
+ in_channels=mid_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ out = x
+
+ if self.with_expand_conv:
+ out = self.expand_conv(out)
+
+ out = self.depthwise_conv(out)
+
+ if self.with_se:
+ out = self.se(out)
+
+ out = self.linear_conv(out)
+
+ if self.with_res_shortcut:
+ return x + out
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
diff --git a/mmpose/models/backbones/utils/make_divisible.py b/mmpose/models/backbones/utils/make_divisible.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7666be65939d5c76057e73927c230029cb1871d
--- /dev/null
+++ b/mmpose/models/backbones/utils/make_divisible.py
@@ -0,0 +1,25 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
+ """Make divisible function.
+
+ This function rounds the channel number down to the nearest value that can
+ be divisible by the divisor.
+
+ Args:
+ value (int): The original channel number.
+ divisor (int): The divisor to fully divide the channel number.
+ min_value (int, optional): The minimum value of the output channel.
+ Default: None, means that the minimum value equal to the divisor.
+ min_ratio (float, optional): The minimum ratio of the rounded channel
+ number to the original channel number. Default: 0.9.
+ Returns:
+ int: The modified output channel number
+ """
+
+ if min_value is None:
+ min_value = divisor
+ new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than (1-min_ratio).
+ if new_value < min_ratio * value:
+ new_value += divisor
+ return new_value
diff --git a/mmpose/models/backbones/utils/se_layer.py b/mmpose/models/backbones/utils/se_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..07f70802eb1b98b1f22516ba62b1533557f428ed
--- /dev/null
+++ b/mmpose/models/backbones/utils/se_layer.py
@@ -0,0 +1,54 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+
+class SELayer(nn.Module):
+ """Squeeze-and-Excitation Module.
+
+ Args:
+ channels (int): The input (and output) channels of the SE layer.
+ ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
+ ``int(channels/ratio)``. Default: 16.
+ conv_cfg (None or dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ act_cfg (dict or Sequence[dict]): Config dict for activation layer.
+ If act_cfg is a dict, two activation layers will be configurated
+ by this dict. If act_cfg is a sequence of dicts, the first
+ activation layer will be configurated by the first dict and the
+ second activation layer will be configurated by the second dict.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid'))
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+ self.conv1 = ConvModule(
+ in_channels=channels,
+ out_channels=int(channels / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(channels / ratio),
+ out_channels=channels,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ out = self.global_avgpool(x)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ return x * out
diff --git a/mmpose/models/backbones/utils/utils.py b/mmpose/models/backbones/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9ac948653adeb849e0f510bc1014664741fe6f9
--- /dev/null
+++ b/mmpose/models/backbones/utils/utils.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+
+from mmcv.runner.checkpoint import _load_checkpoint, load_state_dict
+
+
+def load_checkpoint(model,
+ filename,
+ map_location='cpu',
+ strict=False,
+ logger=None):
+ """Load checkpoint from a file or URI.
+
+ Args:
+ model (Module): Module to load checkpoint.
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``.
+ map_location (str): Same as :func:`torch.load`.
+ strict (bool): Whether to allow different params for the model and
+ checkpoint.
+ logger (:mod:`logging.Logger` or None): The logger for error message.
+
+ Returns:
+ dict or OrderedDict: The loaded checkpoint.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict_tmp = checkpoint['state_dict']
+ else:
+ state_dict_tmp = checkpoint
+
+ state_dict = OrderedDict()
+ # strip prefix of state_dict
+ for k, v in state_dict_tmp.items():
+ if k.startswith('module.backbone.'):
+ state_dict[k[16:]] = v
+ elif k.startswith('module.'):
+ state_dict[k[7:]] = v
+ elif k.startswith('backbone.'):
+ state_dict[k[9:]] = v
+ else:
+ state_dict[k] = v
+ # load state_dict
+ load_state_dict(model, state_dict, strict, logger)
+ return checkpoint
+
+
+def get_state_dict(filename, map_location='cpu'):
+ """Get state_dict from a file or URI.
+
+ Args:
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``.
+ map_location (str): Same as :func:`torch.load`.
+
+ Returns:
+ OrderedDict: The state_dict.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict_tmp = checkpoint['state_dict']
+ else:
+ state_dict_tmp = checkpoint
+
+ state_dict = OrderedDict()
+ # strip prefix of state_dict
+ for k, v in state_dict_tmp.items():
+ if k.startswith('module.backbone.'):
+ state_dict[k[16:]] = v
+ elif k.startswith('module.'):
+ state_dict[k[7:]] = v
+ elif k.startswith('backbone.'):
+ state_dict[k[9:]] = v
+ else:
+ state_dict[k] = v
+
+ return state_dict
diff --git a/mmpose/models/backbones/v2v_net.py b/mmpose/models/backbones/v2v_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..99462af711069a34c13628364e2c466163507861
--- /dev/null
+++ b/mmpose/models/backbones/v2v_net.py
@@ -0,0 +1,257 @@
+# ------------------------------------------------------------------------------
+# Copyright and License Information
+# Adapted from
+# https://github.com/microsoft/voxelpose-pytorch/blob/main/lib/models/v2v_net.py
+# Original Licence: MIT License
+# ------------------------------------------------------------------------------
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class Basic3DBlock(nn.Module):
+ """A basic 3D convolutional block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the convolution operation
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: dict(type='Conv3d')
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN3d')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ conv_cfg=dict(type='Conv3d'),
+ norm_cfg=dict(type='BN3d')):
+ super(Basic3DBlock, self).__init__()
+ self.block = ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ bias=True)
+
+ def forward(self, x):
+ """Forward function."""
+ return self.block(x)
+
+
+class Res3DBlock(nn.Module):
+ """A residual 3D convolutional block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the convolution operation
+ Default: 3
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: dict(type='Conv3d')
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN3d')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ conv_cfg=dict(type='Conv3d'),
+ norm_cfg=dict(type='BN3d')):
+ super(Res3DBlock, self).__init__()
+ self.res_branch = nn.Sequential(
+ ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ bias=True),
+ ConvModule(
+ out_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ bias=True))
+
+ if in_channels == out_channels:
+ self.skip_con = nn.Sequential()
+ else:
+ self.skip_con = ConvModule(
+ in_channels,
+ out_channels,
+ 1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ bias=True)
+
+ def forward(self, x):
+ """Forward function."""
+ res = self.res_branch(x)
+ skip = self.skip_con(x)
+ return F.relu(res + skip, True)
+
+
+class Pool3DBlock(nn.Module):
+ """A 3D max-pool block.
+
+ Args:
+ pool_size (int): Pool size of the 3D max-pool layer
+ """
+
+ def __init__(self, pool_size):
+ super(Pool3DBlock, self).__init__()
+ self.pool_size = pool_size
+
+ def forward(self, x):
+ """Forward function."""
+ return F.max_pool3d(
+ x, kernel_size=self.pool_size, stride=self.pool_size)
+
+
+class Upsample3DBlock(nn.Module):
+ """A 3D upsample block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the transposed convolution operation.
+ Default: 2
+ stride (int): Kernel size of the transposed convolution operation.
+ Default: 2
+ """
+
+ def __init__(self, in_channels, out_channels, kernel_size=2, stride=2):
+ super(Upsample3DBlock, self).__init__()
+ assert kernel_size == 2
+ assert stride == 2
+ self.block = nn.Sequential(
+ nn.ConvTranspose3d(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=0,
+ output_padding=0), nn.BatchNorm3d(out_channels), nn.ReLU(True))
+
+ def forward(self, x):
+ """Forward function."""
+ return self.block(x)
+
+
+class EncoderDecorder(nn.Module):
+ """An encoder-decoder block.
+
+ Args:
+ in_channels (int): Input channels of this block
+ """
+
+ def __init__(self, in_channels=32):
+ super(EncoderDecorder, self).__init__()
+
+ self.encoder_pool1 = Pool3DBlock(2)
+ self.encoder_res1 = Res3DBlock(in_channels, in_channels * 2)
+ self.encoder_pool2 = Pool3DBlock(2)
+ self.encoder_res2 = Res3DBlock(in_channels * 2, in_channels * 4)
+
+ self.mid_res = Res3DBlock(in_channels * 4, in_channels * 4)
+
+ self.decoder_res2 = Res3DBlock(in_channels * 4, in_channels * 4)
+ self.decoder_upsample2 = Upsample3DBlock(in_channels * 4,
+ in_channels * 2, 2, 2)
+ self.decoder_res1 = Res3DBlock(in_channels * 2, in_channels * 2)
+ self.decoder_upsample1 = Upsample3DBlock(in_channels * 2, in_channels,
+ 2, 2)
+
+ self.skip_res1 = Res3DBlock(in_channels, in_channels)
+ self.skip_res2 = Res3DBlock(in_channels * 2, in_channels * 2)
+
+ def forward(self, x):
+ """Forward function."""
+ skip_x1 = self.skip_res1(x)
+ x = self.encoder_pool1(x)
+ x = self.encoder_res1(x)
+
+ skip_x2 = self.skip_res2(x)
+ x = self.encoder_pool2(x)
+ x = self.encoder_res2(x)
+
+ x = self.mid_res(x)
+
+ x = self.decoder_res2(x)
+ x = self.decoder_upsample2(x)
+ x = x + skip_x2
+
+ x = self.decoder_res1(x)
+ x = self.decoder_upsample1(x)
+ x = x + skip_x1
+
+ return x
+
+
+@BACKBONES.register_module()
+class V2VNet(BaseBackbone):
+ """V2VNet.
+
+ Please refer to the `paper `
+ for details.
+
+ Args:
+ input_channels (int):
+ Number of channels of the input feature volume.
+ output_channels (int):
+ Number of channels of the output volume.
+ mid_channels (int):
+ Input and output channels of the encoder-decoder block.
+ """
+
+ def __init__(self, input_channels, output_channels, mid_channels=32):
+ super(V2VNet, self).__init__()
+
+ self.front_layers = nn.Sequential(
+ Basic3DBlock(input_channels, mid_channels // 2, 7),
+ Res3DBlock(mid_channels // 2, mid_channels),
+ )
+
+ self.encoder_decoder = EncoderDecorder(in_channels=mid_channels)
+
+ self.output_layer = nn.Conv3d(
+ mid_channels, output_channels, kernel_size=1, stride=1, padding=0)
+
+ self._initialize_weights()
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.front_layers(x)
+ x = self.encoder_decoder(x)
+ x = self.output_layer(x)
+
+ return x
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv3d):
+ nn.init.normal_(m.weight, 0, 0.001)
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.ConvTranspose3d):
+ nn.init.normal_(m.weight, 0, 0.001)
+ nn.init.constant_(m.bias, 0)
diff --git a/mmpose/models/backbones/vgg.py b/mmpose/models/backbones/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7d467017a5520f399c84b1235ec64c99b805b42
--- /dev/null
+++ b/mmpose/models/backbones/vgg.py
@@ -0,0 +1,193 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init, normal_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+def make_vgg_layer(in_channels,
+ out_channels,
+ num_blocks,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ dilation=1,
+ with_norm=False,
+ ceil_mode=False):
+ layers = []
+ for _ in range(num_blocks):
+ layer = ConvModule(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3,
+ dilation=dilation,
+ padding=dilation,
+ bias=True,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ layers.append(layer)
+ in_channels = out_channels
+ layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))
+
+ return layers
+
+
+@BACKBONES.register_module()
+class VGG(BaseBackbone):
+ """VGG backbone.
+
+ Args:
+ depth (int): Depth of vgg, from {11, 13, 16, 19}.
+ with_norm (bool): Use BatchNorm or not.
+ num_classes (int): number of classes for classification.
+ num_stages (int): VGG stages, normally 5.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. When it is None, the default behavior depends on
+ whether num_classes is specified. If num_classes <= 0, the default
+ value is (4, ), outputting the last feature map before classifier.
+ If num_classes > 0, the default value is (5, ), outputting the
+ classification score. Default: None.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ ceil_mode (bool): Whether to use ceil_mode of MaxPool. Default: False.
+ with_last_pool (bool): Whether to keep the last pooling before
+ classifier. Default: True.
+ """
+
+ # Parameters to build layers. Each element specifies the number of conv in
+ # each stage. For example, VGG11 contains 11 layers with learnable
+ # parameters. 11 is computed as 11 = (1 + 1 + 2 + 2 + 2) + 3,
+ # where 3 indicates the last three fully-connected layers.
+ arch_settings = {
+ 11: (1, 1, 2, 2, 2),
+ 13: (2, 2, 2, 2, 2),
+ 16: (2, 2, 3, 3, 3),
+ 19: (2, 2, 4, 4, 4)
+ }
+
+ def __init__(self,
+ depth,
+ num_classes=-1,
+ num_stages=5,
+ dilations=(1, 1, 1, 1, 1),
+ out_indices=None,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ ceil_mode=False,
+ with_last_pool=True):
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for vgg')
+ assert num_stages >= 1 and num_stages <= 5
+ stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ assert len(dilations) == num_stages
+
+ self.num_classes = num_classes
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ with_norm = norm_cfg is not None
+
+ if out_indices is None:
+ out_indices = (5, ) if num_classes > 0 else (4, )
+ assert max(out_indices) <= num_stages
+ self.out_indices = out_indices
+
+ self.in_channels = 3
+ start_idx = 0
+ vgg_layers = []
+ self.range_sub_modules = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ num_modules = num_blocks + 1
+ end_idx = start_idx + num_modules
+ dilation = dilations[i]
+ out_channels = 64 * 2**i if i < 4 else 512
+ vgg_layer = make_vgg_layer(
+ self.in_channels,
+ out_channels,
+ num_blocks,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ dilation=dilation,
+ with_norm=with_norm,
+ ceil_mode=ceil_mode)
+ vgg_layers.extend(vgg_layer)
+ self.in_channels = out_channels
+ self.range_sub_modules.append([start_idx, end_idx])
+ start_idx = end_idx
+ if not with_last_pool:
+ vgg_layers.pop(-1)
+ self.range_sub_modules[-1][1] -= 1
+ self.module_name = 'features'
+ self.add_module(self.module_name, nn.Sequential(*vgg_layers))
+
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Linear(512 * 7 * 7, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, num_classes),
+ )
+
+ def init_weights(self, pretrained=None):
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ def forward(self, x):
+ outs = []
+ vgg_layers = getattr(self, self.module_name)
+ for i in range(len(self.stage_blocks)):
+ for j in range(*self.range_sub_modules[i]):
+ vgg_layer = vgg_layers[j]
+ x = vgg_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), -1)
+ x = self.classifier(x)
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ else:
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ vgg_layers = getattr(self, self.module_name)
+ for i in range(self.frozen_stages):
+ for j in range(*self.range_sub_modules[i]):
+ m = vgg_layers[j]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/vipnas_mbv3.py b/mmpose/models/backbones/vipnas_mbv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed990e3966b27301dbaf081e3ec0e908704dfc8b
--- /dev/null
+++ b/mmpose/models/backbones/vipnas_mbv3.py
@@ -0,0 +1,179 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import InvertedResidual, load_checkpoint
+
+
+@BACKBONES.register_module()
+class ViPNAS_MobileNetV3(BaseBackbone):
+ """ViPNAS_MobileNetV3 backbone.
+
+ "ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ wid (list(int)): Searched width config for each stage.
+ expan (list(int)): Searched expansion ratio config for each stage.
+ dep (list(int)): Searched depth config for each stage.
+ ks (list(int)): Searched kernel size config for each stage.
+ group (list(int)): Searched group number config for each stage.
+ att (list(bool)): Searched attention config for each stage.
+ stride (list(int)): Stride config for each stage.
+ act (list(dict)): Activation config for each stage.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save
+ some memory while slowing down the training speed.
+ Default: False.
+ """
+
+ def __init__(self,
+ wid=[16, 16, 24, 40, 80, 112, 160],
+ expan=[None, 1, 5, 4, 5, 5, 6],
+ dep=[None, 1, 4, 4, 4, 4, 4],
+ ks=[3, 3, 7, 7, 5, 7, 5],
+ group=[None, 8, 120, 20, 100, 280, 240],
+ att=[None, True, True, False, True, True, True],
+ stride=[2, 1, 2, 2, 2, 1, 2],
+ act=[
+ 'HSwish', 'ReLU', 'ReLU', 'ReLU', 'HSwish', 'HSwish',
+ 'HSwish'
+ ],
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ frozen_stages=-1,
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.wid = wid
+ self.expan = expan
+ self.dep = dep
+ self.ks = ks
+ self.group = group
+ self.att = att
+ self.stride = stride
+ self.act = act
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.wid[0],
+ kernel_size=self.ks[0],
+ stride=self.stride[0],
+ padding=self.ks[0] // 2,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type=self.act[0]))
+
+ self.layers = self._make_layer()
+
+ def _make_layer(self):
+ layers = []
+ layer_index = 0
+ for i, dep in enumerate(self.dep[1:]):
+ mid_channels = self.wid[i + 1] * self.expan[i + 1]
+
+ if self.att[i + 1]:
+ se_cfg = dict(
+ channels=mid_channels,
+ ratio=4,
+ act_cfg=(dict(type='ReLU'), dict(type='HSigmoid')))
+ else:
+ se_cfg = None
+
+ if self.expan[i + 1] == 1:
+ with_expand_conv = False
+ else:
+ with_expand_conv = True
+
+ for j in range(dep):
+ if j == 0:
+ stride = self.stride[i + 1]
+ in_channels = self.wid[i]
+ else:
+ stride = 1
+ in_channels = self.wid[i + 1]
+
+ layer = InvertedResidual(
+ in_channels=in_channels,
+ out_channels=self.wid[i + 1],
+ mid_channels=mid_channels,
+ kernel_size=self.ks[i + 1],
+ groups=self.group[i + 1],
+ stride=stride,
+ se_cfg=se_cfg,
+ with_expand_conv=with_expand_conv,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type=self.act[i + 1]),
+ with_cp=self.with_cp)
+ layer_index += 1
+ layer_name = f'layer{layer_index}'
+ self.add_module(layer_name, layer)
+ layers.append(layer_name)
+ return layers
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.normal_(m.weight, std=0.001)
+ for name, _ in m.named_parameters():
+ if name in ['bias']:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+
+ return x
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/vipnas_resnet.py b/mmpose/models/backbones/vipnas_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..81b028ed5f5caad5f59c68b7f82c1a4661cf4d6f
--- /dev/null
+++ b/mmpose/models/backbones/vipnas_resnet.py
@@ -0,0 +1,589 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, build_conv_layer, build_norm_layer
+from mmcv.cnn.bricks import ContextBlock
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class ViPNAS_Bottleneck(nn.Module):
+ """Bottleneck block for ViPNAS_ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the input/output channels of conv2. Default: 4.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
+ stride-two layer is the 3x3 conv layer, otherwise the stride-two
+ layer is the first 1x1 conv layer. Default: "pytorch".
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ kernel_size (int): kernel size of conv2 searched in ViPANS.
+ groups (int): group number of conv2 searched in ViPNAS.
+ attention (bool): whether to use attention module in the end of
+ the block.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=4,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ kernel_size=3,
+ groups=1,
+ attention=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert style in ['pytorch', 'caffe']
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=kernel_size,
+ stride=self.conv2_stride,
+ padding=kernel_size // 2,
+ groups=groups,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ if attention:
+ self.attention = ContextBlock(out_channels,
+ max(1.0 / 16, 16.0 / out_channels))
+ else:
+ self.attention = None
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ """nn.Module: the normalization layer named "norm3" """
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.attention is not None:
+ out = self.attention(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def get_expansion(block, expansion=None):
+ """Get the expansion of a residual block.
+
+ The block expansion will be obtained by the following order:
+
+ 1. If ``expansion`` is given, just return it.
+ 2. If ``block`` has the attribute ``expansion``, then return
+ ``block.expansion``.
+ 3. Return the default value according the the block type:
+ 4 for ``ViPNAS_Bottleneck``.
+
+ Args:
+ block (class): The block class.
+ expansion (int | None): The given expansion ratio.
+
+ Returns:
+ int: The expansion of the block.
+ """
+ if isinstance(expansion, int):
+ assert expansion > 0
+ elif expansion is None:
+ if hasattr(block, 'expansion'):
+ expansion = block.expansion
+ elif issubclass(block, ViPNAS_Bottleneck):
+ expansion = 1
+ else:
+ raise TypeError(f'expansion is not specified for {block.__name__}')
+ else:
+ raise TypeError('expansion must be an integer or None')
+
+ return expansion
+
+
+class ViPNAS_ResLayer(nn.Sequential):
+ """ViPNAS_ResLayer to build ResNet style backbone.
+
+ Args:
+ block (nn.Module): Residual block used to build ViPNAS ResLayer.
+ num_blocks (int): Number of blocks.
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int, optional): The expansion for BasicBlock/Bottleneck.
+ If not specified, it will firstly be obtained via
+ ``block.expansion``. If the block has no attribute "expansion",
+ the following default values will be used: 1 for BasicBlock and
+ 4 for Bottleneck. Default: None.
+ stride (int): stride of the first block. Default: 1.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ downsample_first (bool): Downsample at the first block or last block.
+ False for Hourglass, True for ResNet. Default: True
+ kernel_size (int): Kernel Size of the corresponding convolution layer
+ searched in the block.
+ groups (int): Group number of the corresponding convolution layer
+ searched in the block.
+ attention (bool): Whether to use attention module in the end of the
+ block.
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ in_channels,
+ out_channels,
+ expansion=None,
+ stride=1,
+ avg_down=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ downsample_first=True,
+ kernel_size=3,
+ groups=1,
+ attention=False,
+ **kwargs):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ self.block = block
+ self.expansion = get_expansion(block, expansion)
+
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = []
+ conv_stride = stride
+ if avg_down and stride != 1:
+ conv_stride = 1
+ downsample.append(
+ nn.AvgPool2d(
+ kernel_size=stride,
+ stride=stride,
+ ceil_mode=True,
+ count_include_pad=False))
+ downsample.extend([
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=conv_stride,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1]
+ ])
+ downsample = nn.Sequential(*downsample)
+
+ layers = []
+ if downsample_first:
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ in_channels = out_channels
+ for _ in range(1, num_blocks):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ else: # downsample_first=False is for HourglassModule
+ for i in range(0, num_blocks - 1):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+
+ super().__init__(*layers)
+
+
+@BACKBONES.register_module()
+class ViPNAS_ResNet(BaseBackbone):
+ """ViPNAS_ResNet backbone.
+
+ "ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ depth (int): Network depth, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Default: 3.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+ wid (list(int)): Searched width config for each stage.
+ expan (list(int)): Searched expansion ratio config for each stage.
+ dep (list(int)): Searched depth config for each stage.
+ ks (list(int)): Searched kernel size config for each stage.
+ group (list(int)): Searched group number config for each stage.
+ att (list(bool)): Searched attention config for each stage.
+ """
+
+ arch_settings = {
+ 50: ViPNAS_Bottleneck,
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True,
+ wid=[48, 80, 160, 304, 608],
+ expan=[None, 1, 1, 1, 1],
+ dep=[None, 4, 6, 7, 3],
+ ks=[7, 3, 5, 5, 5],
+ group=[None, 16, 16, 16, 16],
+ att=[None, True, False, True, True]):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ self.depth = depth
+ self.stem_channels = dep[0]
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.block = self.arch_settings[depth]
+ self.stage_blocks = dep[1:1 + num_stages]
+
+ self._make_stem_layer(in_channels, wid[0], ks[0])
+
+ self.res_layers = []
+ _in_channels = wid[0]
+ for i, num_blocks in enumerate(self.stage_blocks):
+ expansion = get_expansion(self.block, expan[i + 1])
+ _out_channels = wid[i + 1] * expansion
+ stride = strides[i]
+ dilation = dilations[i]
+ res_layer = self.make_res_layer(
+ block=self.block,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=_out_channels,
+ expansion=expansion,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=ks[i + 1],
+ groups=group[i + 1],
+ attention=att[i + 1])
+ _in_channels = _out_channels
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = res_layer[-1].out_channels
+
+ def make_res_layer(self, **kwargs):
+ """Make a ViPNAS ResLayer."""
+ return ViPNAS_ResLayer(**kwargs)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels, stem_channels, kernel_size):
+ """Make stem layer."""
+ if self.deep_stem:
+ self.stem = nn.Sequential(
+ ConvModule(
+ in_channels,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True))
+ else:
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels,
+ kernel_size=kernel_size,
+ stride=2,
+ padding=kernel_size // 2,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, stem_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ if self.deep_stem:
+ self.stem.eval()
+ for param in self.stem.parameters():
+ param.requires_grad = False
+ else:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, f'layer{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.normal_(m.weight, std=0.001)
+ for name, _ in m.named_parameters():
+ if name in ['bias']:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.deep_stem:
+ x = self.stem(x)
+ else:
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/backbones/vit.py b/mmpose/models/backbones/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..2719d1a6991b67e1b0832247c2f1259bbacda3f6
--- /dev/null
+++ b/mmpose/models/backbones/vit.py
@@ -0,0 +1,341 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+ def forward(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+@BACKBONES.register_module()
+class ViT(BaseBackbone):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+ ):
+ # Protect mutable default arguments
+ super(ViT, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ # since the pretraining model has class token
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained, patch_padding=self.patch_padding)
+
+ if pretrained is None:
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x)
+ else:
+ x = blk(x)
+
+ x = self.last_norm(x)
+
+ xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+
+ return xp
+
+ def forward(self, x):
+ x = self.forward_features(x)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/mmpose/models/backbones/vit_moe.py b/mmpose/models/backbones/vit_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..880a58fbb2ac2892ef6e1e349f4ef98e38c1d274
--- /dev/null
+++ b/mmpose/models/backbones/vit_moe.py
@@ -0,0 +1,385 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class MoEMlp(nn.Module):
+ def __init__(self, num_expert=1, in_features=1024, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., part_features=256):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.part_features = part_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features - part_features)
+ self.drop = nn.Dropout(drop)
+
+ self.num_expert = num_expert
+ experts = []
+
+ for i in range(num_expert):
+ experts.append(
+ nn.Linear(hidden_features, part_features)
+ )
+ self.experts = nn.ModuleList(experts)
+
+ def forward(self, x, indices):
+
+ expert_x = torch.zeros_like(x[:, :, -self.part_features:], device=x.device, dtype=x.dtype)
+
+ x = self.fc1(x)
+ x = self.act(x)
+ shared_x = self.fc2(x)
+ indices = indices.view(-1, 1, 1)
+
+ # to support ddp training
+ for i in range(self.num_expert):
+ selectedIndex = (indices == i)
+ current_x = self.experts[i](x) * selectedIndex
+ expert_x = expert_x + current_x
+
+ x = torch.cat([shared_x, expert_x], dim=-1)
+
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None, num_expert=1, part_features=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = MoEMlp(num_expert=num_expert, in_features=dim, hidden_features=mlp_hidden_dim,
+ act_layer=act_layer, drop=drop, part_features=part_features)
+
+ def forward(self, x, indices=None):
+
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x), indices))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+@BACKBONES.register_module()
+class ViTMoE(BaseBackbone):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+ num_expert=1, part_features=None
+ ):
+ # Protect mutable default arguments
+ super(ViTMoE, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ self.part_features = part_features
+
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ num_expert=num_expert, part_features=part_features
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained, patch_padding=self.patch_padding, part_features=self.part_features)
+
+ if pretrained is None:
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x, dataset_source=None):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x, dataset_source)
+ else:
+ x = blk(x, dataset_source)
+
+ x = self.last_norm(x)
+
+ xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+
+ return xp
+
+ def forward(self, x, dataset_source=None):
+ x = self.forward_features(x, dataset_source)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/mmpose/models/builder.py b/mmpose/models/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..220839d47d6b1e66a06eb143b1f1ef8145c6a3be
--- /dev/null
+++ b/mmpose/models/builder.py
@@ -0,0 +1,44 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import MODELS as MMCV_MODELS
+from mmcv.cnn import build_model_from_cfg
+from mmcv.utils import Registry
+
+MODELS = Registry(
+ 'models', build_func=build_model_from_cfg, parent=MMCV_MODELS)
+
+BACKBONES = MODELS
+NECKS = MODELS
+HEADS = MODELS
+LOSSES = MODELS
+POSENETS = MODELS
+MESH_MODELS = MODELS
+
+
+def build_backbone(cfg):
+ """Build backbone."""
+ return BACKBONES.build(cfg)
+
+
+def build_neck(cfg):
+ """Build neck."""
+ return NECKS.build(cfg)
+
+
+def build_head(cfg):
+ """Build head."""
+ return HEADS.build(cfg)
+
+
+def build_loss(cfg):
+ """Build loss."""
+ return LOSSES.build(cfg)
+
+
+def build_posenet(cfg):
+ """Build posenet."""
+ return POSENETS.build(cfg)
+
+
+def build_mesh_model(cfg):
+ """Build mesh model."""
+ return MESH_MODELS.build(cfg)
diff --git a/mmpose/models/detectors/__init__.py b/mmpose/models/detectors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0982094c96295f3f8a0e63e1e0a15964c2c286a
--- /dev/null
+++ b/mmpose/models/detectors/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .associative_embedding import AssociativeEmbedding
+from .interhand_3d import Interhand3D
+from .mesh import ParametricMesh
+from .multi_task import MultiTask
+from .multiview_pose import (DetectAndRegress, VoxelCenterDetector,
+ VoxelSinglePose)
+from .pose_lifter import PoseLifter
+from .posewarper import PoseWarper
+from .top_down import TopDown
+from .top_down_moe import TopDownMoE
+
+__all__ = [
+ 'TopDown', 'AssociativeEmbedding', 'ParametricMesh', 'MultiTask',
+ 'PoseLifter', 'Interhand3D', 'PoseWarper', 'DetectAndRegress',
+ 'VoxelCenterDetector', 'VoxelSinglePose', 'TopDownMoE'
+]
diff --git a/mmpose/models/detectors/__pycache__/__init__.cpython-310.pyc b/mmpose/models/detectors/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f04933b799275b48fa6700d775f9113515d7d67
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/associative_embedding.cpython-310.pyc b/mmpose/models/detectors/__pycache__/associative_embedding.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68efd8b8a0b51077fb4461c77e325ff40a494f49
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/associative_embedding.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/base.cpython-310.pyc b/mmpose/models/detectors/__pycache__/base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f362f02710d80470c4894a44bf87a70faaa3f7e
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/base.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/interhand_3d.cpython-310.pyc b/mmpose/models/detectors/__pycache__/interhand_3d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1502c9746f5f35947b64feaff436dde66dd393e9
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/interhand_3d.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/mesh.cpython-310.pyc b/mmpose/models/detectors/__pycache__/mesh.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b28dff9129f86dc0f3b9a506466ee907f13492d0
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/mesh.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/multi_task.cpython-310.pyc b/mmpose/models/detectors/__pycache__/multi_task.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27a332524a8bdaa78722e91c244d0dbcf980faa0
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/multi_task.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/multiview_pose.cpython-310.pyc b/mmpose/models/detectors/__pycache__/multiview_pose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2929d6f9e1bd21149fb4827ff11d508c11a327d
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/multiview_pose.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/pose_lifter.cpython-310.pyc b/mmpose/models/detectors/__pycache__/pose_lifter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..956277230816c9b52a049ee01e8c858fb38503f2
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/pose_lifter.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/posewarper.cpython-310.pyc b/mmpose/models/detectors/__pycache__/posewarper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..415a6464ab7fb1e97509605ea75f0dc817a52b64
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/posewarper.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/top_down.cpython-310.pyc b/mmpose/models/detectors/__pycache__/top_down.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b47ce719c2aa4834ee2fdf2e0b63d5754de11d76
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/top_down.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/__pycache__/top_down_moe.cpython-310.pyc b/mmpose/models/detectors/__pycache__/top_down_moe.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7602da21b66453a9502e60dbdc6c23983dc631b8
Binary files /dev/null and b/mmpose/models/detectors/__pycache__/top_down_moe.cpython-310.pyc differ
diff --git a/mmpose/models/detectors/associative_embedding.py b/mmpose/models/detectors/associative_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..100c7806d361d323abb720eb8ad5649ddc3c1a03
--- /dev/null
+++ b/mmpose/models/detectors/associative_embedding.py
@@ -0,0 +1,420 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import torch
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core.evaluation import (aggregate_scale, aggregate_stage_flip,
+ flip_feature_maps, get_group_preds,
+ split_ae_outputs)
+from mmpose.core.post_processing.group import HeatmapParser
+from mmpose.core.visualization import imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class AssociativeEmbedding(BasePose):
+ """Associative embedding pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ ``loss_keypoint`` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ if keypoint_head is not None:
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for BottomUp is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+ self.use_udp = test_cfg.get('use_udp', False)
+ self.parser = HeatmapParser(self.test_cfg)
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img=None,
+ targets=None,
+ masks=None,
+ joints=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss is True.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C
+ - img_width: imgW
+ - img_height: imgH
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input image.
+ targets (list(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (list(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (list(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ img_metas (dict): Information about val & test.
+ By default it includes:
+
+ - "image_file": image path
+ - "aug_data": input
+ - "test_scale_factor": test scale factor
+ - "base_size": base size of input
+ - "center": center of image
+ - "scale": scale of image
+ - "flip_index": flip index of keypoints
+ return loss (bool): ``return_loss=True`` for training,
+ ``return_loss=False`` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if 'return_loss' is true, then return losses. \
+ Otherwise, return predicted poses, scores, image \
+ paths and heatmaps.
+ """
+
+ if return_loss:
+ return self.forward_train(img, targets, masks, joints, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, targets, masks, joints, img_metas, **kwargs):
+ """Forward the bottom-up model and calculate the loss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps weight: W
+ heatmaps height: H
+ max_num_people: M
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input image.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ img_metas (dict):Information about val&test
+ By default this includes:
+ - "image_file": image path
+ - "aug_data": input
+ - "test_scale_factor": test scale factor
+ - "base_size": base size of input
+ - "center": center of image
+ - "scale": scale of image
+ - "flip_index": flip index of keypoints
+
+ Returns:
+ dict: The total loss for bottom-up
+ """
+
+ output = self.backbone(img)
+
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, targets, masks, joints)
+ losses.update(keypoint_losses)
+
+ return losses
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Outputs.
+ """
+ output = self.backbone(img)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Inference the bottom-up model.
+
+ Note:
+ - Batchsize: N (currently support batchsize = 1)
+ - num_img_channel: C
+ - img_width: imgW
+ - img_height: imgH
+
+ Args:
+ flip_index (List(int)):
+ aug_data (List(Tensor[NxCximgHximgW])): Multi-scale image
+ test_scale_factor (List(float)): Multi-scale factor
+ base_size (Tuple(int)): Base size of image when scale is 1
+ center (np.ndarray): center of image
+ scale (np.ndarray): the scale of image
+ """
+ assert img.size(0) == 1
+ assert len(img_metas) == 1
+
+ img_metas = img_metas[0]
+
+ aug_data = img_metas['aug_data']
+
+ test_scale_factor = img_metas['test_scale_factor']
+ base_size = img_metas['base_size']
+ center = img_metas['center']
+ scale = img_metas['scale']
+
+ result = {}
+
+ scale_heatmaps_list = []
+ scale_tags_list = []
+
+ for idx, s in enumerate(sorted(test_scale_factor, reverse=True)):
+ image_resized = aug_data[idx].to(img.device)
+
+ features = self.backbone(image_resized)
+ if self.with_keypoint:
+ outputs = self.keypoint_head(features)
+
+ heatmaps, tags = split_ae_outputs(
+ outputs, self.test_cfg['num_joints'],
+ self.test_cfg['with_heatmaps'], self.test_cfg['with_ae'],
+ self.test_cfg.get('select_output_index', range(len(outputs))))
+
+ if self.test_cfg.get('flip_test', True):
+ # use flip test
+ features_flipped = self.backbone(
+ torch.flip(image_resized, [3]))
+ if self.with_keypoint:
+ outputs_flipped = self.keypoint_head(features_flipped)
+
+ heatmaps_flipped, tags_flipped = split_ae_outputs(
+ outputs_flipped, self.test_cfg['num_joints'],
+ self.test_cfg['with_heatmaps'], self.test_cfg['with_ae'],
+ self.test_cfg.get('select_output_index',
+ range(len(outputs))))
+
+ heatmaps_flipped = flip_feature_maps(
+ heatmaps_flipped, flip_index=img_metas['flip_index'])
+ if self.test_cfg['tag_per_joint']:
+ tags_flipped = flip_feature_maps(
+ tags_flipped, flip_index=img_metas['flip_index'])
+ else:
+ tags_flipped = flip_feature_maps(
+ tags_flipped, flip_index=None, flip_output=True)
+
+ else:
+ heatmaps_flipped = None
+ tags_flipped = None
+
+ aggregated_heatmaps = aggregate_stage_flip(
+ heatmaps,
+ heatmaps_flipped,
+ index=-1,
+ project2image=self.test_cfg['project2image'],
+ size_projected=base_size,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_stage='average',
+ aggregate_flip='average')
+
+ aggregated_tags = aggregate_stage_flip(
+ tags,
+ tags_flipped,
+ index=-1,
+ project2image=self.test_cfg['project2image'],
+ size_projected=base_size,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_stage='concat',
+ aggregate_flip='concat')
+
+ if s == 1 or len(test_scale_factor) == 1:
+ if isinstance(aggregated_tags, list):
+ scale_tags_list.extend(aggregated_tags)
+ else:
+ scale_tags_list.append(aggregated_tags)
+
+ if isinstance(aggregated_heatmaps, list):
+ scale_heatmaps_list.extend(aggregated_heatmaps)
+ else:
+ scale_heatmaps_list.append(aggregated_heatmaps)
+
+ aggregated_heatmaps = aggregate_scale(
+ scale_heatmaps_list,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_scale='average')
+
+ aggregated_tags = aggregate_scale(
+ scale_tags_list,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_scale='unsqueeze_concat')
+
+ heatmap_size = aggregated_heatmaps.shape[2:4]
+ tag_size = aggregated_tags.shape[2:4]
+ if heatmap_size != tag_size:
+ tmp = []
+ for idx in range(aggregated_tags.shape[-1]):
+ tmp.append(
+ torch.nn.functional.interpolate(
+ aggregated_tags[..., idx],
+ size=heatmap_size,
+ mode='bilinear',
+ align_corners=self.test_cfg.get('align_corners',
+ True)).unsqueeze(-1))
+ aggregated_tags = torch.cat(tmp, dim=-1)
+
+ # perform grouping
+ grouped, scores = self.parser.parse(aggregated_heatmaps,
+ aggregated_tags,
+ self.test_cfg['adjust'],
+ self.test_cfg['refine'])
+
+ preds = get_group_preds(
+ grouped,
+ center,
+ scale, [aggregated_heatmaps.size(3),
+ aggregated_heatmaps.size(2)],
+ use_udp=self.use_udp)
+
+ image_paths = []
+ image_paths.append(img_metas['image_file'])
+
+ if return_heatmap:
+ output_heatmap = aggregated_heatmaps.detach().cpu().numpy()
+ else:
+ output_heatmap = None
+
+ result['preds'] = preds
+ result['scores'] = scores
+ result['image_paths'] = image_paths
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='AssociativeEmbedding')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized image only if not `show` or `out_file`
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+ img_h, img_w, _ = img.shape
+
+ pose_result = []
+ for res in result:
+ pose_result.append(res['keypoints'])
+
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius, thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/mmpose/models/detectors/base.py b/mmpose/models/detectors/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d459b42de66012c88ff37d7d845265d06efebc7
--- /dev/null
+++ b/mmpose/models/detectors/base.py
@@ -0,0 +1,131 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+from collections import OrderedDict
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+
+class BasePose(nn.Module, metaclass=ABCMeta):
+ """Base class for pose detectors.
+
+ All recognizers should subclass it.
+ All subclass should overwrite:
+ Methods:`forward_train`, supporting to forward when training.
+ Methods:`forward_test`, supporting to forward when testing.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ head (dict): Head modules to give output.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ """
+
+ @abstractmethod
+ def forward_train(self, img, img_metas, **kwargs):
+ """Defines the computation performed at training."""
+
+ @abstractmethod
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at testing."""
+
+ @abstractmethod
+ def forward(self, img, img_metas, return_loss=True, **kwargs):
+ """Forward function."""
+
+ @staticmethod
+ def _parse_losses(losses):
+ """Parse the raw outputs (losses) of the network.
+
+ Args:
+ losses (dict): Raw output of the network, which usually contain
+ losses and other necessary information.
+
+ Returns:
+ tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor \
+ which may be a weighted sum of all losses, log_vars \
+ contains all the variables to be sent to the logger.
+ """
+ log_vars = OrderedDict()
+ for loss_name, loss_value in losses.items():
+ if isinstance(loss_value, torch.Tensor):
+ log_vars[loss_name] = loss_value.mean()
+ elif isinstance(loss_value, float):
+ log_vars[loss_name] = loss_value
+ elif isinstance(loss_value, list):
+ log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+ else:
+ raise TypeError(
+ f'{loss_name} is not a tensor or list of tensors or float')
+
+ loss = sum(_value for _key, _value in log_vars.items()
+ if 'loss' in _key)
+
+ log_vars['loss'] = loss
+ for loss_name, loss_value in log_vars.items():
+ # reduce loss when distributed training
+ if not isinstance(loss_value, float):
+ if dist.is_available() and dist.is_initialized():
+ loss_value = loss_value.data.clone()
+ dist.all_reduce(loss_value.div_(dist.get_world_size()))
+ log_vars[loss_name] = loss_value.item()
+ else:
+ log_vars[loss_name] = loss_value
+
+ return loss, log_vars
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during training.
+
+ This method defines an iteration step during training, except for the
+ back propagation and optimizer updating, which are done in an optimizer
+ hook. Note that in some complicated cases or models, the whole process
+ including back propagation and optimizer updating is also defined in
+ this method, such as GAN.
+
+ Args:
+ data_batch (dict): The output of dataloader.
+ optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+ runner is passed to ``train_step()``. This argument is unused
+ and reserved.
+
+ Returns:
+ dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+ ``num_samples``.
+ ``loss`` is a tensor for back propagation, which can be a
+ weighted sum of multiple losses.
+ ``log_vars`` contains all the variables to be sent to the
+ logger.
+ ``num_samples`` indicates the batch size (when the model is
+ DDP, it means the batch size on each GPU), which is used for
+ averaging the logs.
+ """
+ losses = self.forward(**data_batch)
+
+ loss, log_vars = self._parse_losses(losses)
+
+ outputs = dict(
+ loss=loss,
+ log_vars=log_vars,
+ num_samples=len(next(iter(data_batch.values()))))
+
+ return outputs
+
+ def val_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during validation.
+
+ This method shares the same signature as :func:`train_step`, but used
+ during val epochs. Note that the evaluation after training epochs is
+ not implemented with this method, but an evaluation hook.
+ """
+ results = self.forward(return_loss=False, **data_batch)
+
+ outputs = dict(results=results)
+
+ return outputs
+
+ @abstractmethod
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
diff --git a/mmpose/models/detectors/interhand_3d.py b/mmpose/models/detectors/interhand_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a4d6bde1b097d1649a65de8075744ac1978ad15
--- /dev/null
+++ b/mmpose/models/detectors/interhand_3d.py
@@ -0,0 +1,227 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+from mmcv.utils.misc import deprecated_api_warning
+
+from mmpose.core import imshow_keypoints, imshow_keypoints_3d
+from ..builder import POSENETS
+from .top_down import TopDown
+
+
+@POSENETS.register_module()
+class Interhand3D(TopDown):
+ """Top-down interhand 3D pose detector of paper ref: Gyeongsik Moon.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image". A child class of TopDown detector.
+ """
+
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. list[Tensor], list[list[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (list[torch.Tensor]): Target heatmaps, relative hand
+ root depth and hand type.
+ target_weight (list[torch.Tensor]): Weights for target
+ heatmaps, relative hand root depth and hand type.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ - "heatmap3d_depth_bound": depth bound of hand keypoint 3D
+ heatmap
+ - "root_depth_bound": depth bound of relative root depth 1D
+ heatmap
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths, \
+ heatmaps, relative hand root depth and hand type.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ features = self.backbone(img)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output = [(out + out_flipped) * 0.5
+ for out, out_flipped in zip(output, output_flipped)]
+
+ if self.with_keypoint:
+ result = self.keypoint_head.decode(
+ img_metas, output, img_size=[img_width, img_height])
+ else:
+ result = {}
+ return result
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='Interhand3D')
+ def show_result(self,
+ result,
+ img=None,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ radius=8,
+ bbox_color='green',
+ thickness=2,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ vis_height=400,
+ num_instances=-1,
+ win_name='',
+ show=False,
+ wait_time=0,
+ out_file=None):
+ """Visualize 3D pose estimation results.
+
+ Args:
+ result (list[dict]): The pose estimation results containing:
+
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing
+ 2D inputs. If a sequence is given, only the last frame
+ will be used for visualization
+ - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs
+ - "title" (str): title for the subplot
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ radius (int): Radius of circles.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ thickness (int): Thickness of lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M limbs.
+ If None, do not draw limbs.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ num_instances (int): Number of instances to be shown in 3D. If
+ smaller than 0, all the instances in the pose_result will be
+ shown. Otherwise, pad or truncate the pose_result to a length
+ of num_instances.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ if num_instances < 0:
+ assert len(result) > 0
+ result = sorted(result, key=lambda x: x.get('track_id', 0))
+
+ # draw image and 2d poses
+ if img is not None:
+ img = mmcv.imread(img)
+
+ bbox_result = []
+ pose_2d = []
+ for res in result:
+ if 'bbox' in res:
+ bbox = np.array(res['bbox'])
+ if bbox.ndim != 1:
+ assert bbox.ndim == 2
+ bbox = bbox[-1] # Get bbox from the last frame
+ bbox_result.append(bbox)
+ if 'keypoints' in res:
+ kpts = np.array(res['keypoints'])
+ if kpts.ndim != 2:
+ assert kpts.ndim == 3
+ kpts = kpts[-1] # Get 2D keypoints from the last frame
+ pose_2d.append(kpts)
+
+ if len(bbox_result) > 0:
+ bboxes = np.vstack(bbox_result)
+ mmcv.imshow_bboxes(
+ img,
+ bboxes,
+ colors=bbox_color,
+ top_k=-1,
+ thickness=2,
+ show=False)
+ if len(pose_2d) > 0:
+ imshow_keypoints(
+ img,
+ pose_2d,
+ skeleton,
+ kpt_score_thr=kpt_score_thr,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ radius=radius,
+ thickness=thickness)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ img_vis = imshow_keypoints_3d(
+ result,
+ img,
+ skeleton,
+ pose_kpt_color,
+ pose_link_color,
+ vis_height,
+ axis_limit=300,
+ axis_azimuth=-115,
+ axis_elev=15,
+ kpt_score_thr=kpt_score_thr,
+ num_instances=num_instances)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/mmpose/models/detectors/mesh.py b/mmpose/models/detectors/mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..0af18e3844659c7d2a3755ab891819bbf7ef4c22
--- /dev/null
+++ b/mmpose/models/detectors/mesh.py
@@ -0,0 +1,438 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import mmcv
+import numpy as np
+import torch
+
+from mmpose.core.visualization.image import imshow_mesh_3d
+from mmpose.models.misc.discriminator import SMPLDiscriminator
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+
+def set_requires_grad(nets, requires_grad=False):
+ """Set requies_grad for all the networks.
+
+ Args:
+ nets (nn.Module | list[nn.Module]): A list of networks or a single
+ network.
+ requires_grad (bool): Whether the networks require gradients or not
+ """
+ if not isinstance(nets, list):
+ nets = [nets]
+ for net in nets:
+ if net is not None:
+ for param in net.parameters():
+ param.requires_grad = requires_grad
+
+
+@POSENETS.register_module()
+class ParametricMesh(BasePose):
+ """Model-based 3D human mesh detector. Take a single color image as input
+ and output 3D joints, SMPL parameters and camera parameters.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ mesh_head (dict): Mesh head to process feature.
+ smpl (dict): Config for SMPL model.
+ disc (dict): Discriminator for SMPL parameters. Default: None.
+ loss_gan (dict): Config for adversarial loss. Default: None.
+ loss_mesh (dict): Config for mesh loss. Default: None.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ """
+
+ def __init__(self,
+ backbone,
+ mesh_head,
+ smpl,
+ disc=None,
+ loss_gan=None,
+ loss_mesh=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super().__init__()
+
+ self.backbone = builder.build_backbone(backbone)
+ self.mesh_head = builder.build_head(mesh_head)
+ self.generator = torch.nn.Sequential(self.backbone, self.mesh_head)
+
+ self.smpl = builder.build_mesh_model(smpl)
+
+ self.with_gan = disc is not None and loss_gan is not None
+ if self.with_gan:
+ self.discriminator = SMPLDiscriminator(**disc)
+ self.loss_gan = builder.build_loss(loss_gan)
+ self.disc_step_count = 0
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ self.loss_mesh = builder.build_loss(loss_mesh)
+ self.init_weights(pretrained=pretrained)
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ self.mesh_head.init_weights()
+ if self.with_gan:
+ self.discriminator.init_weights()
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """Train step function.
+
+ In this function, the detector will finish the train step following
+ the pipeline:
+
+ 1. get fake and real SMPL parameters
+ 2. optimize discriminator (if have)
+ 3. optimize generator
+
+ If `self.train_cfg.disc_step > 1`, the train step will contain multiple
+ iterations for optimizing discriminator with different input data and
+ only one iteration for optimizing generator after `disc_step`
+ iterations for discriminator.
+
+ Args:
+ data_batch (torch.Tensor): Batch of data as input.
+ optimizer (dict[torch.optim.Optimizer]): Dict with optimizers for
+ generator and discriminator (if have).
+
+ Returns:
+ outputs (dict): Dict with loss, information for logger,
+ the number of samples.
+ """
+
+ img = data_batch['img']
+ pred_smpl = self.generator(img)
+ pred_pose, pred_beta, pred_camera = pred_smpl
+
+ # optimize discriminator (if have)
+ if self.train_cfg['disc_step'] > 0 and self.with_gan:
+ set_requires_grad(self.discriminator, True)
+ fake_data = (pred_camera.detach(), pred_pose.detach(),
+ pred_beta.detach())
+ mosh_theta = data_batch['mosh_theta']
+ real_data = (mosh_theta[:, :3], mosh_theta[:,
+ 3:75], mosh_theta[:,
+ 75:])
+ fake_score = self.discriminator(fake_data)
+ real_score = self.discriminator(real_data)
+
+ disc_losses = {}
+ disc_losses['real_loss'] = self.loss_gan(
+ real_score, target_is_real=True, is_disc=True)
+ disc_losses['fake_loss'] = self.loss_gan(
+ fake_score, target_is_real=False, is_disc=True)
+ loss_disc, log_vars_d = self._parse_losses(disc_losses)
+
+ optimizer['discriminator'].zero_grad()
+ loss_disc.backward()
+ optimizer['discriminator'].step()
+ self.disc_step_count = \
+ (self.disc_step_count + 1) % self.train_cfg['disc_step']
+
+ if self.disc_step_count != 0:
+ outputs = dict(
+ loss=loss_disc,
+ log_vars=log_vars_d,
+ num_samples=len(next(iter(data_batch.values()))))
+ return outputs
+
+ # optimize generator
+ pred_out = self.smpl(
+ betas=pred_beta,
+ body_pose=pred_pose[:, 1:],
+ global_orient=pred_pose[:, :1])
+ pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[
+ 'joints']
+
+ gt_beta = data_batch['beta']
+ gt_pose = data_batch['pose']
+ gt_vertices = self.smpl(
+ betas=gt_beta,
+ body_pose=gt_pose[:, 3:],
+ global_orient=gt_pose[:, :3])['vertices']
+
+ pred = dict(
+ pose=pred_pose,
+ beta=pred_beta,
+ camera=pred_camera,
+ vertices=pred_vertices,
+ joints_3d=pred_joints_3d)
+
+ target = {
+ key: data_batch[key]
+ for key in [
+ 'pose', 'beta', 'has_smpl', 'joints_3d', 'joints_2d',
+ 'joints_3d_visible', 'joints_2d_visible'
+ ]
+ }
+ target['vertices'] = gt_vertices
+
+ losses = self.loss_mesh(pred, target)
+
+ if self.with_gan:
+ set_requires_grad(self.discriminator, False)
+ pred_theta = (pred_camera, pred_pose, pred_beta)
+ pred_score = self.discriminator(pred_theta)
+ loss_adv = self.loss_gan(
+ pred_score, target_is_real=True, is_disc=False)
+ losses['adv_loss'] = loss_adv
+
+ loss, log_vars = self._parse_losses(losses)
+ optimizer['generator'].zero_grad()
+ loss.backward()
+ optimizer['generator'].step()
+
+ outputs = dict(
+ loss=loss,
+ log_vars=log_vars,
+ num_samples=len(next(iter(data_batch.values()))))
+
+ return outputs
+
+ def forward_train(self, *args, **kwargs):
+ """Forward function for training.
+
+ For ParametricMesh, we do not use this interface.
+ """
+ raise NotImplementedError('This interface should not be used in '
+ 'current training schedule. Please use '
+ '`train_step` for training.')
+
+ def val_step(self, data_batch, **kwargs):
+ """Forward function for evaluation.
+
+ Args:
+ data_batch (dict): Contain data for forward.
+
+ Returns:
+ dict: Contain the results from model.
+ """
+ output = self.forward_test(**data_batch, **kwargs)
+ return output
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Outputs.
+ """
+ output = self.generator(img)
+ return output
+
+ def forward_test(self,
+ img,
+ img_metas,
+ return_vertices=False,
+ return_faces=False,
+ **kwargs):
+ """Defines the computation performed at every call when testing."""
+
+ pred_smpl = self.generator(img)
+ pred_pose, pred_beta, pred_camera = pred_smpl
+ pred_out = self.smpl(
+ betas=pred_beta,
+ body_pose=pred_pose[:, 1:],
+ global_orient=pred_pose[:, :1])
+ pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[
+ 'joints']
+
+ all_preds = {}
+ all_preds['keypoints_3d'] = pred_joints_3d.detach().cpu().numpy()
+ all_preds['smpl_pose'] = pred_pose.detach().cpu().numpy()
+ all_preds['smpl_beta'] = pred_beta.detach().cpu().numpy()
+ all_preds['camera'] = pred_camera.detach().cpu().numpy()
+
+ if return_vertices:
+ all_preds['vertices'] = pred_vertices.detach().cpu().numpy()
+ if return_faces:
+ all_preds['faces'] = self.smpl.get_faces()
+
+ all_boxes = []
+ image_path = []
+ for img_meta in img_metas:
+ box = np.zeros(6, dtype=np.float32)
+ c = img_meta['center']
+ s = img_meta['scale']
+ if 'bbox_score' in img_metas:
+ score = np.array(img_metas['bbox_score']).reshape(-1)
+ else:
+ score = 1.0
+ box[0:2] = c
+ box[2:4] = s
+ box[4] = np.prod(s * 200.0, axis=0)
+ box[5] = score
+ all_boxes.append(box)
+ image_path.append(img_meta['image_file'])
+
+ all_preds['bboxes'] = np.stack(all_boxes, axis=0)
+ all_preds['image_path'] = image_path
+ return all_preds
+
+ def get_3d_joints_from_mesh(self, vertices):
+ """Get 3D joints from 3D mesh using predefined joints regressor."""
+ return torch.matmul(
+ self.joints_regressor.to(vertices.device), vertices)
+
+ def forward(self, img, img_metas=None, return_loss=False, **kwargs):
+ """Forward function.
+
+ Calls either forward_train or forward_test depending on whether
+ return_loss=True.
+
+ Note:
+ - batch_size: N
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+
+ Args:
+ img (torch.Tensor[N x C x imgH x imgW]): Input images.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ Return predicted 3D joints, SMPL parameters, boxes and image paths.
+ """
+
+ if return_loss:
+ return self.forward_train(img, img_metas, **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def show_result(self,
+ result,
+ img,
+ show=False,
+ out_file=None,
+ win_name='',
+ wait_time=0,
+ bbox_color='green',
+ mesh_color=(76, 76, 204),
+ **kwargs):
+ """Visualize 3D mesh estimation results.
+
+ Args:
+ result (list[dict]): The mesh estimation results containing:
+
+ - "bbox" (ndarray[4]): instance bounding bbox
+ - "center" (ndarray[2]): bbox center
+ - "scale" (ndarray[2]): bbox scale
+ - "keypoints_3d" (ndarray[K,3]): predicted 3D keypoints
+ - "camera" (ndarray[3]): camera parameters
+ - "vertices" (ndarray[V, 3]): predicted 3D vertices
+ - "faces" (ndarray[F, 3]): mesh faces
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ wait_time (int): Value of waitKey param. Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ mesh_color (str or tuple or :obj:`Color`): Color of mesh surface.
+
+ Returns:
+ ndarray: Visualized img, only if not `show` or `out_file`.
+ """
+
+ if img is not None:
+ img = mmcv.imread(img)
+
+ focal_length = self.loss_mesh.focal_length
+ H, W, C = img.shape
+ img_center = np.array([[0.5 * W], [0.5 * H]])
+
+ # show bounding boxes
+ bboxes = [res['bbox'] for res in result]
+ bboxes = np.vstack(bboxes)
+ mmcv.imshow_bboxes(
+ img, bboxes, colors=bbox_color, top_k=-1, thickness=2, show=False)
+
+ vertex_list = []
+ face_list = []
+ for res in result:
+ vertices = res['vertices']
+ faces = res['faces']
+ camera = res['camera']
+ camera_center = res['center']
+ scale = res['scale']
+
+ # predicted vertices are in root-relative space,
+ # we need to translate them to camera space.
+ translation = np.array([
+ camera[1], camera[2],
+ 2 * focal_length / (scale[0] * 200.0 * camera[0] + 1e-9)
+ ])
+ mean_depth = vertices[:, -1].mean() + translation[-1]
+ translation[:2] += (camera_center -
+ img_center[:, 0]) / focal_length * mean_depth
+ vertices += translation[None, :]
+
+ vertex_list.append(vertices)
+ face_list.append(faces)
+
+ # render from front view
+ img_vis = imshow_mesh_3d(
+ img,
+ vertex_list,
+ face_list,
+ img_center, [focal_length, focal_length],
+ colors=mesh_color)
+
+ # render from side view
+ # rotate mesh vertices
+ R = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0]
+ rot_vertex_list = [np.dot(vert, R) for vert in vertex_list]
+
+ # get the 3D bbox containing all meshes
+ rot_vertices = np.concatenate(rot_vertex_list, axis=0)
+ min_corner = rot_vertices.min(0)
+ max_corner = rot_vertices.max(0)
+
+ center_3d = 0.5 * (min_corner + max_corner)
+ ratio = 0.8
+ bbox3d_size = max_corner - min_corner
+
+ # set appropriate translation to make all meshes appear in the image
+ z_x = bbox3d_size[0] * focal_length / (ratio * W) - min_corner[2]
+ z_y = bbox3d_size[1] * focal_length / (ratio * H) - min_corner[2]
+ z = max(z_x, z_y)
+ translation = -center_3d
+ translation[2] = z
+ translation = translation[None, :]
+ rot_vertex_list = [
+ rot_vert + translation for rot_vert in rot_vertex_list
+ ]
+
+ # render from side view
+ img_side = imshow_mesh_3d(
+ np.ones_like(img) * 255, rot_vertex_list, face_list, img_center,
+ [focal_length, focal_length])
+
+ # merger images from front view and side view
+ img_vis = np.concatenate([img_vis, img_side], axis=1)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/mmpose/models/detectors/multi_task.py b/mmpose/models/detectors/multi_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b6f3178a4b0413f5118eee27b535f46a1baaf84
--- /dev/null
+++ b/mmpose/models/detectors/multi_task.py
@@ -0,0 +1,187 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+
+from .. import builder
+from ..builder import POSENETS
+
+
+@POSENETS.register_module()
+class MultiTask(nn.Module):
+ """Multi-task detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ heads (list[dict]): heads to output predictions.
+ necks (list[dict] | None): necks to process feature.
+ head2neck (dict{int:int}): head index to neck index.
+ pretrained (str): Path to the pretrained models.
+ """
+
+ def __init__(self,
+ backbone,
+ heads,
+ necks=None,
+ head2neck=None,
+ pretrained=None):
+ super().__init__()
+
+ self.backbone = builder.build_backbone(backbone)
+
+ if head2neck is None:
+ assert necks is None
+ head2neck = {}
+
+ self.head2neck = {}
+ for i in range(len(heads)):
+ self.head2neck[i] = head2neck[i] if i in head2neck else -1
+
+ self.necks = nn.ModuleList([])
+ if necks is not None:
+ for neck in necks:
+ self.necks.append(builder.build_neck(neck))
+ self.necks.append(nn.Identity())
+
+ self.heads = nn.ModuleList([])
+ assert heads is not None
+ for head in heads:
+ assert head is not None
+ self.heads.append(builder.build_head(head))
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_necks(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'necks')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_necks:
+ for neck in self.necks:
+ if hasattr(neck, 'init_weights'):
+ neck.init_weights()
+
+ for head in self.heads:
+ if hasattr(head, 'init_weights'):
+ head.init_weights()
+
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img weight: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input images.
+ target (list[torch.Tensor]): Targets.
+ target_weight (List[torch.Tensor]): Weights.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ features = self.backbone(img)
+ outputs = []
+
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ outputs.append(head(self.necks[neck_id](features)))
+
+ # if return loss
+ losses = dict()
+
+ for head, output, gt, gt_weight in zip(self.heads, outputs, target,
+ target_weight):
+ loss = head.get_loss(output, gt, gt_weight)
+ assert len(set(losses.keys()).intersection(set(loss.keys()))) == 0
+ losses.update(loss)
+
+ if hasattr(head, 'get_accuracy'):
+ acc = head.get_accuracy(output, gt, gt_weight)
+ assert len(set(losses.keys()).intersection(set(
+ acc.keys()))) == 0
+ losses.update(acc)
+
+ return losses
+
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ results = {}
+
+ features = self.backbone(img)
+ outputs = []
+
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ if hasattr(head, 'inference_model'):
+ head_output = head.inference_model(
+ self.necks[neck_id](features), flip_pairs=None)
+ else:
+ head_output = head(
+ self.necks[neck_id](features)).detach().cpu().numpy()
+ outputs.append(head_output)
+
+ for head, output in zip(self.heads, outputs):
+ result = head.decode(
+ img_metas, output, img_size=[img_width, img_height])
+ results.update(result)
+ return results
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ list[Tensor]: Outputs.
+ """
+ features = self.backbone(img)
+ outputs = []
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ outputs.append(head(self.necks[neck_id](features)))
+ return outputs
diff --git a/mmpose/models/detectors/multiview_pose.py b/mmpose/models/detectors/multiview_pose.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3d2221eee4198d0cbaad7c8e7031f85dc35cf33
--- /dev/null
+++ b/mmpose/models/detectors/multiview_pose.py
@@ -0,0 +1,889 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.runner import load_checkpoint
+
+from mmpose.core.camera import SimpleCameraTorch
+from mmpose.core.post_processing.post_transforms import (
+ affine_transform_torch, get_affine_transform)
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+
+class ProjectLayer(nn.Module):
+
+ def __init__(self, image_size, heatmap_size):
+ """Project layer to get voxel feature. Adapted from
+ https://github.com/microsoft/voxelpose-
+ pytorch/blob/main/lib/models/project_layer.py.
+
+ Args:
+ image_size (int or list): input size of the 2D model
+ heatmap_size (int or list): output size of the 2D model
+ """
+ super(ProjectLayer, self).__init__()
+ self.image_size = image_size
+ self.heatmap_size = heatmap_size
+ if isinstance(self.image_size, int):
+ self.image_size = [self.image_size, self.image_size]
+ if isinstance(self.heatmap_size, int):
+ self.heatmap_size = [self.heatmap_size, self.heatmap_size]
+
+ def compute_grid(self, box_size, box_center, num_bins, device=None):
+ if isinstance(box_size, int) or isinstance(box_size, float):
+ box_size = [box_size, box_size, box_size]
+ if isinstance(num_bins, int):
+ num_bins = [num_bins, num_bins, num_bins]
+
+ grid_1D_x = torch.linspace(
+ -box_size[0] / 2, box_size[0] / 2, num_bins[0], device=device)
+ grid_1D_y = torch.linspace(
+ -box_size[1] / 2, box_size[1] / 2, num_bins[1], device=device)
+ grid_1D_z = torch.linspace(
+ -box_size[2] / 2, box_size[2] / 2, num_bins[2], device=device)
+ grid_x, grid_y, grid_z = torch.meshgrid(
+ grid_1D_x + box_center[0],
+ grid_1D_y + box_center[1],
+ grid_1D_z + box_center[2],
+ )
+ grid_x = grid_x.contiguous().view(-1, 1)
+ grid_y = grid_y.contiguous().view(-1, 1)
+ grid_z = grid_z.contiguous().view(-1, 1)
+ grid = torch.cat([grid_x, grid_y, grid_z], dim=1)
+
+ return grid
+
+ def get_voxel(self, feature_maps, meta, grid_size, grid_center, cube_size):
+ device = feature_maps[0].device
+ batch_size = feature_maps[0].shape[0]
+ num_channels = feature_maps[0].shape[1]
+ num_bins = cube_size[0] * cube_size[1] * cube_size[2]
+ n = len(feature_maps)
+ cubes = torch.zeros(
+ batch_size, num_channels, 1, num_bins, n, device=device)
+ w, h = self.heatmap_size
+ grids = torch.zeros(batch_size, num_bins, 3, device=device)
+ bounding = torch.zeros(batch_size, 1, 1, num_bins, n, device=device)
+ for i in range(batch_size):
+ if len(grid_center[0]) == 3 or grid_center[i][3] >= 0:
+ if len(grid_center) == 1:
+ grid = self.compute_grid(
+ grid_size, grid_center[0], cube_size, device=device)
+ else:
+ grid = self.compute_grid(
+ grid_size, grid_center[i], cube_size, device=device)
+ grids[i:i + 1] = grid
+ for c in range(n):
+ center = meta[i]['center'][c]
+ scale = meta[i]['scale'][c]
+
+ width, height = center * 2
+ trans = torch.as_tensor(
+ get_affine_transform(center, scale / 200.0, 0,
+ self.image_size),
+ dtype=torch.float,
+ device=device)
+
+ cam_param = meta[i]['camera'][c].copy()
+
+ single_view_camera = SimpleCameraTorch(
+ param=cam_param, device=device)
+ xy = single_view_camera.world_to_pixel(grid)
+
+ bounding[i, 0, 0, :, c] = (xy[:, 0] >= 0) & (
+ xy[:, 1] >= 0) & (xy[:, 0] < width) & (
+ xy[:, 1] < height)
+ xy = torch.clamp(xy, -1.0, max(width, height))
+ xy = affine_transform_torch(xy, trans)
+ xy = xy * torch.tensor(
+ [w, h], dtype=torch.float,
+ device=device) / torch.tensor(
+ self.image_size, dtype=torch.float, device=device)
+ sample_grid = xy / torch.tensor([w - 1, h - 1],
+ dtype=torch.float,
+ device=device) * 2.0 - 1.0
+ sample_grid = torch.clamp(
+ sample_grid.view(1, 1, num_bins, 2), -1.1, 1.1)
+
+ cubes[i:i + 1, :, :, :, c] += F.grid_sample(
+ feature_maps[c][i:i + 1, :, :, :],
+ sample_grid,
+ align_corners=True)
+
+ cubes = torch.sum(
+ torch.mul(cubes, bounding), dim=-1) / (
+ torch.sum(bounding, dim=-1) + 1e-6)
+ cubes[cubes != cubes] = 0.0
+ cubes = cubes.clamp(0.0, 1.0)
+
+ cubes = cubes.view(batch_size, num_channels, cube_size[0],
+ cube_size[1], cube_size[2])
+ return cubes, grids
+
+ def forward(self, feature_maps, meta, grid_size, grid_center, cube_size):
+ cubes, grids = self.get_voxel(feature_maps, meta, grid_size,
+ grid_center, cube_size)
+ return cubes, grids
+
+
+@POSENETS.register_module()
+class DetectAndRegress(BasePose):
+ """DetectAndRegress approach for multiview human pose detection.
+
+ Args:
+ backbone (ConfigDict): Dictionary to construct the 2D pose detector
+ human_detector (ConfigDict): dictionary to construct human detector
+ pose_regressor (ConfigDict): dictionary to construct pose regressor
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained 2D model. Default: None.
+ freeze_2d (bool): Whether to freeze the 2D model in training.
+ Default: True.
+ """
+
+ def __init__(self,
+ backbone,
+ human_detector,
+ pose_regressor,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ freeze_2d=True):
+ super(DetectAndRegress, self).__init__()
+ if backbone is not None:
+ self.backbone = builder.build_posenet(backbone)
+ if self.training and pretrained is not None:
+ load_checkpoint(self.backbone, pretrained)
+ else:
+ self.backbone = None
+
+ self.freeze_2d = freeze_2d
+ self.human_detector = builder.MODELS.build(human_detector)
+ self.pose_regressor = builder.MODELS.build(pose_regressor)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ @staticmethod
+ def _freeze(model):
+ """Freeze parameters."""
+ model.eval()
+ for param in model.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ """Sets the module in training mode.
+ Args:
+ mode (bool): whether to set training mode (``True``)
+ or evaluation mode (``False``). Default: ``True``.
+
+ Returns:
+ Module: self
+ """
+ super().train(mode)
+ if mode and self.freeze_2d and self.backbone is not None:
+ self._freeze(self.backbone)
+
+ return self
+
+ def forward(self,
+ img=None,
+ img_metas=None,
+ return_loss=True,
+ targets=None,
+ masks=None,
+ targets_3d=None,
+ input_heatmaps=None,
+ **kwargs):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ targets (list(torch.Tensor[NxKxHxW])):
+ Multi-camera target feature_maps of the 2D model.
+ masks (list(torch.Tensor[NxHxW])):
+ Multi-camera masks of the input to the 2D model.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+ **kwargs:
+
+ Returns:
+ dict: if 'return_loss' is true, then return losses.
+ Otherwise, return predicted poses, human centers and sample_id
+
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, targets, masks,
+ targets_3d, input_heatmaps)
+ else:
+ return self.forward_test(img, img_metas, input_heatmaps)
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during training.
+
+ This method defines an iteration step during training, except for the
+ back propagation and optimizer updating, which are done in an optimizer
+ hook. Note that in some complicated cases or models, the whole process
+ including back propagation and optimizer updating is also defined in
+ this method, such as GAN.
+
+ Args:
+ data_batch (dict): The output of dataloader.
+ optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+ runner is passed to ``train_step()``. This argument is unused
+ and reserved.
+
+ Returns:
+ dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+ ``num_samples``.
+ ``loss`` is a tensor for back propagation, which can be a
+ weighted sum of multiple losses.
+ ``log_vars`` contains all the variables to be sent to the
+ logger.
+ ``num_samples`` indicates the batch size (when the model is
+ DDP, it means the batch size on each GPU), which is used for
+ averaging the logs.
+ """
+ losses = self.forward(**data_batch)
+
+ loss, log_vars = self._parse_losses(losses)
+ if 'img' in data_batch:
+ batch_size = data_batch['img'][0].shape[0]
+ else:
+ assert 'input_heatmaps' in data_batch
+ batch_size = data_batch['input_heatmaps'][0][0].shape[0]
+
+ outputs = dict(loss=loss, log_vars=log_vars, num_samples=batch_size)
+
+ return outputs
+
+ def forward_train(self,
+ img,
+ img_metas,
+ targets=None,
+ masks=None,
+ targets_3d=None,
+ input_heatmaps=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ targets (list(torch.Tensor[NxKxHxW])):
+ Multi-camera target feature_maps of the 2D model.
+ masks (list(torch.Tensor[NxHxW])):
+ Multi-camera masks of the input to the 2D model.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+
+ Returns:
+ dict: losses.
+
+ """
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ losses = dict()
+ human_candidates, human_loss = self.human_detector.forward_train(
+ None, img_metas, feature_maps, targets_3d, return_preds=True)
+ losses.update(human_loss)
+
+ pose_loss = self.pose_regressor(
+ None,
+ img_metas,
+ return_loss=True,
+ feature_maps=feature_maps,
+ human_candidates=human_candidates)
+ losses.update(pose_loss)
+
+ if not self.freeze_2d:
+ losses_2d = {}
+ heatmaps_tensor = torch.cat(feature_maps, dim=0)
+ targets_tensor = torch.cat(targets, dim=0)
+ masks_tensor = torch.cat(masks, dim=0)
+ losses_2d_ = self.backbone.get_loss(heatmaps_tensor,
+ targets_tensor, masks_tensor)
+ for k, v in losses_2d_.items():
+ losses_2d[k + '_2d'] = v
+ losses.update(losses_2d)
+
+ return losses
+
+ def forward_test(
+ self,
+ img,
+ img_metas,
+ input_heatmaps=None,
+ ):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+
+ Returns:
+ dict: predicted poses, human centers and sample_id
+
+ """
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ human_candidates = self.human_detector.forward_test(
+ None, img_metas, feature_maps)
+
+ human_poses = self.pose_regressor(
+ None,
+ img_metas,
+ return_loss=False,
+ feature_maps=feature_maps,
+ human_candidates=human_candidates)
+
+ result = {}
+ result['pose_3d'] = human_poses.cpu().numpy()
+ result['human_detection_3d'] = human_candidates.cpu().numpy()
+ result['sample_id'] = [img_meta['sample_id'] for img_meta in img_metas]
+
+ return result
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, img, input_heatmaps=None, num_candidates=5):
+ """Used for computing network FLOPs."""
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ _ = self.human_detector.forward_dummy(feature_maps)
+
+ _ = self.pose_regressor.forward_dummy(feature_maps, num_candidates)
+
+
+@POSENETS.register_module()
+class VoxelSinglePose(BasePose):
+ """VoxelPose Please refer to the `paper `
+ for details.
+
+ Args:
+ image_size (list): input size of the 2D model.
+ heatmap_size (list): output size of the 2D model.
+ sub_space_size (list): Size of the cuboid human proposal.
+ sub_cube_size (list): Size of the input volume to the pose net.
+ pose_net (ConfigDict): Dictionary to construct the pose net.
+ pose_head (ConfigDict): Dictionary to construct the pose head.
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ """
+
+ def __init__(
+ self,
+ image_size,
+ heatmap_size,
+ sub_space_size,
+ sub_cube_size,
+ num_joints,
+ pose_net,
+ pose_head,
+ train_cfg=None,
+ test_cfg=None,
+ ):
+ super(VoxelSinglePose, self).__init__()
+ self.project_layer = ProjectLayer(image_size, heatmap_size)
+ self.pose_net = builder.build_backbone(pose_net)
+ self.pose_head = builder.build_head(pose_head)
+
+ self.sub_space_size = sub_space_size
+ self.sub_cube_size = sub_cube_size
+
+ self.num_joints = num_joints
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ def forward(self,
+ img,
+ img_metas,
+ return_loss=True,
+ feature_maps=None,
+ human_candidates=None,
+ **kwargs):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, feature_maps,
+ human_candidates)
+ else:
+ return self.forward_test(img, img_metas, feature_maps,
+ human_candidates)
+
+ def forward_train(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ human_candidates=None,
+ return_preds=False,
+ **kwargs):
+ """Defines the computation performed at training.
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+ return_preds (bool): Whether to return prediction results
+
+ Returns:
+ dict: losses.
+
+ """
+ batch_size, num_candidates, _ = human_candidates.shape
+ pred = human_candidates.new_zeros(batch_size, num_candidates,
+ self.num_joints, 5)
+ pred[:, :, :, 3:] = human_candidates[:, :, None, 3:]
+
+ device = feature_maps[0].device
+ gt_3d = torch.stack([
+ torch.tensor(img_meta['joints_3d'], device=device)
+ for img_meta in img_metas
+ ])
+ gt_3d_vis = torch.stack([
+ torch.tensor(img_meta['joints_3d_visible'], device=device)
+ for img_meta in img_metas
+ ])
+ valid_preds = []
+ valid_targets = []
+ valid_weights = []
+
+ for n in range(num_candidates):
+ index = pred[:, n, 0, 3] >= 0
+ num_valid = index.sum()
+ if num_valid > 0:
+ pose_input_cube, coordinates \
+ = self.project_layer(feature_maps,
+ img_metas,
+ self.sub_space_size,
+ human_candidates[:, n, :3],
+ self.sub_cube_size)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d[index],
+ coordinates[index])
+
+ pred[index, n, :, 0:3] = pose_3d.detach()
+ valid_targets.append(gt_3d[index, pred[index, n, 0, 3].long()])
+ valid_weights.append(gt_3d_vis[index, pred[index, n, 0,
+ 3].long(), :,
+ 0:1].float())
+ valid_preds.append(pose_3d)
+
+ losses = dict()
+ if len(valid_preds) > 0:
+ valid_targets = torch.cat(valid_targets, dim=0)
+ valid_weights = torch.cat(valid_weights, dim=0)
+ valid_preds = torch.cat(valid_preds, dim=0)
+ losses.update(
+ self.pose_head.get_loss(valid_preds, valid_targets,
+ valid_weights))
+ else:
+ pose_input_cube = feature_maps[0].new_zeros(
+ batch_size, self.num_joints, *self.sub_cube_size)
+ coordinates = feature_maps[0].new_zeros(batch_size,
+ *self.sub_cube_size,
+ 3).view(batch_size, -1, 3)
+ pseudo_targets = feature_maps[0].new_zeros(batch_size,
+ self.num_joints, 3)
+ pseudo_weights = feature_maps[0].new_zeros(batch_size,
+ self.num_joints, 1)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d, coordinates)
+ losses.update(
+ self.pose_head.get_loss(pose_3d, pseudo_targets,
+ pseudo_weights))
+ if return_preds:
+ return pred, losses
+ else:
+ return losses
+
+ def forward_test(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ human_candidates=None,
+ **kwargs):
+ """Defines the computation performed at training.
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+
+ Returns:
+ dict: predicted poses, human centers and sample_id
+
+ """
+ batch_size, num_candidates, _ = human_candidates.shape
+ pred = human_candidates.new_zeros(batch_size, num_candidates,
+ self.num_joints, 5)
+ pred[:, :, :, 3:] = human_candidates[:, :, None, 3:]
+
+ for n in range(num_candidates):
+ index = pred[:, n, 0, 3] >= 0
+ num_valid = index.sum()
+ if num_valid > 0:
+ pose_input_cube, coordinates \
+ = self.project_layer(feature_maps,
+ img_metas,
+ self.sub_space_size,
+ human_candidates[:, n, :3],
+ self.sub_cube_size)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d[index],
+ coordinates[index])
+
+ pred[index, n, :, 0:3] = pose_3d.detach()
+
+ return pred
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, feature_maps, num_candidates=5):
+ """Used for computing network FLOPs."""
+ batch_size, num_channels = feature_maps[0].shape
+ pose_input_cube = feature_maps[0].new_zeros(batch_size, num_channels,
+ *self.sub_cube_size)
+ for n in range(num_candidates):
+ _ = self.pose_net(pose_input_cube)
+
+
+@POSENETS.register_module()
+class VoxelCenterDetector(BasePose):
+ """Detect human center by 3D CNN on voxels.
+
+ Please refer to the
+ `paper ` for details.
+ Args:
+ image_size (list): input size of the 2D model.
+ heatmap_size (list): output size of the 2D model.
+ space_size (list): Size of the 3D space.
+ cube_size (list): Size of the input volume to the 3D CNN.
+ space_center (list): Coordinate of the center of the 3D space.
+ center_net (ConfigDict): Dictionary to construct the center net.
+ center_head (ConfigDict): Dictionary to construct the center head.
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ """
+
+ def __init__(
+ self,
+ image_size,
+ heatmap_size,
+ space_size,
+ cube_size,
+ space_center,
+ center_net,
+ center_head,
+ train_cfg=None,
+ test_cfg=None,
+ ):
+ super(VoxelCenterDetector, self).__init__()
+ self.project_layer = ProjectLayer(image_size, heatmap_size)
+ self.center_net = builder.build_backbone(center_net)
+ self.center_head = builder.build_head(center_head)
+
+ self.space_size = space_size
+ self.cube_size = cube_size
+ self.space_center = space_center
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ def assign2gt(self, center_candidates, gt_centers, gt_num_persons):
+ """"Assign gt id to each valid human center candidate."""
+ det_centers = center_candidates[..., :3]
+ batch_size = center_candidates.shape[0]
+ cand_num = center_candidates.shape[1]
+ cand2gt = torch.zeros(batch_size, cand_num)
+
+ for i in range(batch_size):
+ cand = det_centers[i].view(cand_num, 1, -1)
+ gt = gt_centers[None, i, :gt_num_persons[i]]
+
+ dist = torch.sqrt(torch.sum((cand - gt)**2, dim=-1))
+ min_dist, min_gt = torch.min(dist, dim=-1)
+
+ cand2gt[i] = min_gt
+ cand2gt[i][min_dist > self.train_cfg['dist_threshold']] = -1.0
+
+ center_candidates[:, :, 3] = cand2gt
+
+ return center_candidates
+
+ def forward(self,
+ img,
+ img_metas,
+ return_loss=True,
+ feature_maps=None,
+ targets_3d=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ Returns:
+ dict: if 'return_loss' is true, then return losses.
+ Otherwise, return predicted poses
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, feature_maps, targets_3d)
+ else:
+ return self.forward_test(img, img_metas, feature_maps)
+
+ def forward_train(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ targets_3d=None,
+ return_preds=False):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ return_preds (bool): Whether to return prediction results
+ Returns:
+ dict: if 'return_pred' is true, then return losses
+ and human centers. Otherwise, return losses only
+ """
+ initial_cubes, _ = self.project_layer(feature_maps, img_metas,
+ self.space_size,
+ [self.space_center],
+ self.cube_size)
+ center_heatmaps_3d = self.center_net(initial_cubes)
+ center_heatmaps_3d = center_heatmaps_3d.squeeze(1)
+ center_candidates = self.center_head(center_heatmaps_3d)
+
+ device = center_candidates.device
+
+ gt_centers = torch.stack([
+ torch.tensor(img_meta['roots_3d'], device=device)
+ for img_meta in img_metas
+ ])
+ gt_num_persons = torch.stack([
+ torch.tensor(img_meta['num_persons'], device=device)
+ for img_meta in img_metas
+ ])
+ center_candidates = self.assign2gt(center_candidates, gt_centers,
+ gt_num_persons)
+
+ losses = dict()
+ losses.update(
+ self.center_head.get_loss(center_heatmaps_3d, targets_3d))
+
+ if return_preds:
+ return center_candidates, losses
+ else:
+ return losses
+
+ def forward_test(self, img, img_metas, feature_maps=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ Returns:
+ human centers
+ """
+ initial_cubes, _ = self.project_layer(feature_maps, img_metas,
+ self.space_size,
+ [self.space_center],
+ self.cube_size)
+ center_heatmaps_3d = self.center_net(initial_cubes)
+ center_heatmaps_3d = center_heatmaps_3d.squeeze(1)
+ center_candidates = self.center_head(center_heatmaps_3d)
+ center_candidates[..., 3] = \
+ (center_candidates[..., 4] >
+ self.test_cfg['center_threshold']).float() - 1.0
+
+ return center_candidates
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, feature_maps):
+ """Used for computing network FLOPs."""
+ batch_size, num_channels, _, _ = feature_maps[0].shape
+ initial_cubes = feature_maps[0].new_zeros(batch_size, num_channels,
+ *self.cube_size)
+ _ = self.center_net(initial_cubes)
diff --git a/mmpose/models/detectors/pose_lifter.py b/mmpose/models/detectors/pose_lifter.py
new file mode 100644
index 0000000000000000000000000000000000000000..ace6b9f3e8b0363666da5d96858b3864213aeabe
--- /dev/null
+++ b/mmpose/models/detectors/pose_lifter.py
@@ -0,0 +1,392 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+from mmcv.utils.misc import deprecated_api_warning
+
+from mmpose.core import imshow_bboxes, imshow_keypoints, imshow_keypoints_3d
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class PoseLifter(BasePose):
+ """Pose lifter that lifts 2D pose to 3D pose.
+
+ The basic model is a pose model that predicts root-relative pose. If
+ traj_head is not None, a trajectory model that predicts absolute root joint
+ position is also built.
+
+ Args:
+ backbone (dict): Config for the backbone of pose model.
+ neck (dict|None): Config for the neck of pose model.
+ keypoint_head (dict|None): Config for the head of pose model.
+ traj_backbone (dict|None): Config for the backbone of trajectory model.
+ If traj_backbone is None and traj_head is not None, trajectory
+ model will share backbone with pose model.
+ traj_neck (dict|None): Config for the neck of trajectory model.
+ traj_head (dict|None): Config for the head of trajectory model.
+ loss_semi (dict|None): Config for semi-supervision loss.
+ train_cfg (dict|None): Config for keypoint head during training.
+ test_cfg (dict|None): Config for keypoint head during testing.
+ pretrained (str|None): Path to pretrained weights.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ traj_backbone=None,
+ traj_neck=None,
+ traj_head=None,
+ loss_semi=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ # pose model
+ self.backbone = builder.build_backbone(backbone)
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ # trajectory model
+ if traj_head is not None:
+ self.traj_head = builder.build_head(traj_head)
+
+ if traj_backbone is not None:
+ self.traj_backbone = builder.build_backbone(traj_backbone)
+ else:
+ self.traj_backbone = self.backbone
+
+ if traj_neck is not None:
+ self.traj_neck = builder.build_neck(traj_neck)
+
+ # semi-supervised learning
+ self.semi = loss_semi is not None
+ if self.semi:
+ assert keypoint_head is not None and traj_head is not None
+ self.loss_semi = builder.build_loss(loss_semi)
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has keypoint_neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ @property
+ def with_traj_backbone(self):
+ """Check if has trajectory_backbone."""
+ return hasattr(self, 'traj_backbone')
+
+ @property
+ def with_traj_neck(self):
+ """Check if has trajectory_neck."""
+ return hasattr(self, 'traj_neck')
+
+ @property
+ def with_traj(self):
+ """Check if has trajectory_head."""
+ return hasattr(self, 'traj_head')
+
+ @property
+ def causal(self):
+ if hasattr(self.backbone, 'causal'):
+ return self.backbone.causal
+ else:
+ raise AttributeError('A PoseLifter\'s backbone should have '
+ 'the bool attribute "causal" to indicate if'
+ 'it performs causal inference.')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+ if self.with_traj_backbone:
+ self.traj_backbone.init_weights(pretrained)
+ if self.with_traj_neck:
+ self.traj_neck.init_weights()
+ if self.with_traj:
+ self.traj_head.init_weights()
+
+ @auto_fp16(apply_to=('input', ))
+ def forward(self,
+ input,
+ target=None,
+ target_weight=None,
+ metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True.
+
+ Note:
+ - batch_size: N
+ - num_input_keypoints: Ki
+ - input_keypoint_dim: Ci
+ - input_sequence_len: Ti
+ - num_output_keypoints: Ko
+ - output_keypoint_dim: Co
+ - input_sequence_len: To
+
+ Args:
+ input (torch.Tensor[NxKixCixTi]): Input keypoint coordinates.
+ target (torch.Tensor[NxKoxCoxTo]): Output keypoint coordinates.
+ Defaults to None.
+ target_weight (torch.Tensor[NxKox1]): Weights across different
+ joint types. Defaults to None.
+ metas (list(dict)): Information about data augmentation
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|Tensor: If `reutrn_loss` is true, return losses. \
+ Otherwise return predicted poses.
+ """
+ if return_loss:
+ return self.forward_train(input, target, target_weight, metas,
+ **kwargs)
+ else:
+ return self.forward_test(input, metas, **kwargs)
+
+ def forward_train(self, input, target, target_weight, metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ assert input.size(0) == len(metas)
+
+ # supervised learning
+ # pose model
+ features = self.backbone(input)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head(features)
+
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight, metas)
+ losses.update(keypoint_losses)
+ losses.update(keypoint_accuracy)
+
+ # trajectory model
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_traj_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head(traj_features)
+
+ traj_losses = self.traj_head.get_loss(traj_output,
+ kwargs['traj_target'], None)
+ losses.update(traj_losses)
+
+ # semi-supervised learning
+ if self.semi:
+ ul_input = kwargs['unlabeled_input']
+ ul_features = self.backbone(ul_input)
+ if self.with_neck:
+ ul_features = self.neck(ul_features)
+ ul_output = self.keypoint_head(ul_features)
+
+ ul_traj_features = self.traj_backbone(ul_input)
+ if self.with_traj_neck:
+ ul_traj_features = self.traj_neck(ul_traj_features)
+ ul_traj_output = self.traj_head(ul_traj_features)
+
+ output_semi = dict(
+ labeled_pose=output,
+ unlabeled_pose=ul_output,
+ unlabeled_traj=ul_traj_output)
+ target_semi = dict(
+ unlabeled_target_2d=kwargs['unlabeled_target_2d'],
+ intrinsics=kwargs['intrinsics'])
+
+ semi_losses = self.loss_semi(output_semi, target_semi)
+ losses.update(semi_losses)
+
+ return losses
+
+ def forward_test(self, input, metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ assert input.size(0) == len(metas)
+
+ results = {}
+
+ features = self.backbone(input)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head.inference_model(features)
+ keypoint_result = self.keypoint_head.decode(metas, output)
+ results.update(keypoint_result)
+
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_traj_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head.inference_model(traj_features)
+ results['traj_preds'] = traj_output
+
+ return results
+
+ def forward_dummy(self, input):
+ """Used for computing network FLOPs. See ``tools/get_flops.py``.
+
+ Args:
+ input (torch.Tensor): Input pose
+
+ Returns:
+ Tensor: Model output
+ """
+ output = self.backbone(input)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head(traj_features)
+ output = output + traj_output
+
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='PoseLifter')
+ def show_result(self,
+ result,
+ img=None,
+ skeleton=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=8,
+ thickness=2,
+ vis_height=400,
+ num_instances=-1,
+ win_name='',
+ show=False,
+ wait_time=0,
+ out_file=None):
+ """Visualize 3D pose estimation results.
+
+ Args:
+ result (list[dict]): The pose estimation results containing:
+
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing
+ 2D inputs. If a sequence is given, only the last frame
+ will be used for visualization
+ - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs
+ - "title" (str): title for the subplot
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ win_name (str): The window name.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ if num_instances < 0:
+ assert len(result) > 0
+ result = sorted(result, key=lambda x: x.get('track_id', 1e4))
+
+ # draw image and input 2d poses
+ if img is not None:
+ img = mmcv.imread(img)
+
+ bbox_result = []
+ pose_input_2d = []
+ for res in result:
+ if 'bbox' in res:
+ bbox = np.array(res['bbox'])
+ if bbox.ndim != 1:
+ assert bbox.ndim == 2
+ bbox = bbox[-1] # Get bbox from the last frame
+ bbox_result.append(bbox)
+ if 'keypoints' in res:
+ kpts = np.array(res['keypoints'])
+ if kpts.ndim != 2:
+ assert kpts.ndim == 3
+ kpts = kpts[-1] # Get 2D keypoints from the last frame
+ pose_input_2d.append(kpts)
+
+ if len(bbox_result) > 0:
+ bboxes = np.vstack(bbox_result)
+ imshow_bboxes(
+ img,
+ bboxes,
+ colors='green',
+ thickness=thickness,
+ show=False)
+ if len(pose_input_2d) > 0:
+ imshow_keypoints(
+ img,
+ pose_input_2d,
+ skeleton,
+ kpt_score_thr=0.3,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ radius=radius,
+ thickness=thickness)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ img_vis = imshow_keypoints_3d(
+ result,
+ img,
+ skeleton,
+ pose_kpt_color,
+ pose_link_color,
+ vis_height,
+ num_instances=num_instances)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/mmpose/models/detectors/posewarper.py b/mmpose/models/detectors/posewarper.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa1d05f2a4f73728400ebe5205703bf96110c31a
--- /dev/null
+++ b/mmpose/models/detectors/posewarper.py
@@ -0,0 +1,244 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+import torch
+
+from ..builder import POSENETS
+from .top_down import TopDown
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class PoseWarper(TopDown):
+ """Top-down pose detectors for multi-frame settings for video inputs.
+
+ `"Learning temporal pose estimation from sparsely-labeled videos"
+ `_.
+
+ A child class of TopDown detector. The main difference between PoseWarper
+ and TopDown lies in that the former takes a list of tensors as input image
+ while the latter takes a single tensor as input image in forward method.
+
+ Args:
+ backbone (dict): Backbone modules to extract features.
+ neck (dict): intermediate modules to transform features.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ concat_tensors (bool): Whether to concat the tensors on the batch dim,
+ which can speed up, Default: True
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None,
+ concat_tensors=True):
+ super().__init__(
+ backbone=backbone,
+ neck=neck,
+ keypoint_head=keypoint_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained,
+ loss_pose=loss_pose)
+ self.concat_tensors = concat_tensors
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - number of frames: F
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ imgs (list[F,torch.Tensor[N,C,imgH,imgW]]): multiple input frames
+ target (torch.Tensor[N,K,H,W]): Target heatmaps for one frame.
+ target_weight (torch.Tensor[N,K,1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: paths to multiple video frames
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, imgs, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ # imgs (list[Fxtorch.Tensor[NxCximgHximgW]]): multiple input frames
+ assert imgs[0].size(0) == len(img_metas)
+ num_frames = len(imgs)
+ frame_weight = img_metas[0]['frame_weight']
+
+ assert num_frames == len(frame_weight), f'The number of frames ' \
+ f'({num_frames}) and the length of weights for each frame ' \
+ f'({len(frame_weight)}) must match'
+
+ if self.concat_tensors:
+ features = [self.backbone(torch.cat(imgs, 0))]
+ else:
+ features = [self.backbone(img) for img in imgs]
+
+ if self.with_neck:
+ features = self.neck(features, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output = self.keypoint_head(features)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ losses.update(keypoint_losses)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight)
+ losses.update(keypoint_accuracy)
+
+ return losses
+
+ def forward_test(self, imgs, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ # imgs (list[Fxtorch.Tensor[NxCximgHximgW]]): multiple input frames
+ assert imgs[0].size(0) == len(img_metas)
+ num_frames = len(imgs)
+ frame_weight = img_metas[0]['frame_weight']
+
+ assert num_frames == len(frame_weight), f'The number of frames ' \
+ f'({num_frames}) and the length of weights for each frame ' \
+ f'({len(frame_weight)}) must match'
+
+ batch_size, _, img_height, img_width = imgs[0].shape
+
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+
+ if self.concat_tensors:
+ features = [self.backbone(torch.cat(imgs, 0))]
+ else:
+ features = [self.backbone(img) for img in imgs]
+
+ if self.with_neck:
+ features = self.neck(features, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ imgs_flipped = [img.flip(3) for img in imgs]
+
+ if self.concat_tensors:
+ features_flipped = [self.backbone(torch.cat(imgs_flipped, 0))]
+ else:
+ features_flipped = [
+ self.backbone(img_flipped) for img_flipped in imgs_flipped
+ ]
+
+ if self.with_neck:
+ features_flipped = self.neck(
+ features_flipped, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW], or list|tuple of tensors):
+ multiple input frames, N >= 2.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ # concat tensors if they are in a list
+ if isinstance(img, (list, tuple)):
+ img = torch.cat(img, 0)
+
+ batch_size = img.size(0)
+ assert batch_size > 1, 'Input batch size to PoseWarper ' \
+ 'should be larger than 1.'
+ if batch_size == 2:
+ warnings.warn('Current batch size: 2, for pytorch2onnx and '
+ 'getting flops both.')
+ else:
+ warnings.warn(
+ f'Current batch size: {batch_size}, for getting flops only.')
+
+ frame_weight = np.random.uniform(0, 1, batch_size)
+ output = [self.backbone(img)]
+
+ if self.with_neck:
+ output = self.neck(output, frame_weight=frame_weight)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
diff --git a/mmpose/models/detectors/top_down.py b/mmpose/models/detectors/top_down.py
new file mode 100644
index 0000000000000000000000000000000000000000..af0ab51c5b230f4bd39d2fdd082e0fb2daf4594f
--- /dev/null
+++ b/mmpose/models/detectors/top_down.py
@@ -0,0 +1,307 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core import imshow_bboxes, imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class TopDown(BasePose):
+ """Top-down pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for TopDown is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ losses.update(keypoint_losses)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight)
+ losses.update(keypoint_accuracy)
+
+ return losses
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+
+ features = self.backbone(img)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='TopDown')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ pose_kpt_color=None,
+ pose_link_color=None,
+ text_color='white',
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ bbox_thickness=1,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+
+ bbox_result = []
+ bbox_labels = []
+ pose_result = []
+ for res in result:
+ if 'bbox' in res:
+ bbox_result.append(res['bbox'])
+ bbox_labels.append(res.get('label', None))
+ pose_result.append(res['keypoints'])
+
+ if bbox_result:
+ bboxes = np.vstack(bbox_result)
+ # draw bounding boxes
+ imshow_bboxes(
+ img,
+ bboxes,
+ labels=bbox_labels,
+ colors=bbox_color,
+ text_color=text_color,
+ thickness=bbox_thickness,
+ font_scale=font_scale,
+ show=False)
+
+ if pose_result:
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius,
+ thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/mmpose/models/detectors/top_down_moe.py b/mmpose/models/detectors/top_down_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d499b7ff2723b96104815b3f15fcfcb79489d7d
--- /dev/null
+++ b/mmpose/models/detectors/top_down_moe.py
@@ -0,0 +1,351 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+import torch.nn as nn
+
+import mmcv
+import numpy as np
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core import imshow_bboxes, imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class TopDownMoE(BasePose):
+ """Top-down pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ associate_keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for TopDown is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+
+ associate_keypoint_heads = []
+ keypoint_heads_cnt = 1
+
+ if associate_keypoint_head is not None:
+ if not isinstance(associate_keypoint_head, list):
+ associate_keypoint_head = [associate_keypoint_head]
+ for single_keypoint_head in associate_keypoint_head:
+ single_keypoint_head['train_cfg'] = train_cfg
+ single_keypoint_head['test_cfg'] = test_cfg
+ associate_keypoint_heads.append(builder.build_head(single_keypoint_head))
+ keypoint_heads_cnt += 1
+
+ self.associate_keypoint_heads = nn.ModuleList(associate_keypoint_heads)
+
+ self.keypoint_heads_cnt = keypoint_heads_cnt
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+ for item in self.associate_keypoint_heads:
+ item.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+
+ img_sources = torch.from_numpy(np.array([ele['dataset_idx'] for ele in img_metas])).to(img.device)
+
+ output = self.backbone(img, img_sources)
+ if self.with_neck:
+ output = self.neck(output)
+ # if return loss
+ losses = dict()
+
+ main_stream_select = (img_sources == 0)
+ # if torch.sum(main_stream_select) > 0:
+ output_select = self.keypoint_head(output)
+
+ target_select = target * main_stream_select.view(-1, 1, 1, 1)
+ target_weight_select = target_weight * main_stream_select.view(-1, 1, 1)
+
+ keypoint_losses = self.keypoint_head.get_loss(
+ output_select, target_select, target_weight_select)
+ losses['main_stream_loss'] = keypoint_losses['heatmap_loss']
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output_select, target_select, target_weight_select)
+ losses['main_stream_acc'] = keypoint_accuracy['acc_pose']
+
+ for idx in range(1, self.keypoint_heads_cnt):
+ idx_select = (img_sources == idx)
+ target_select = target * idx_select.view(-1, 1, 1, 1)
+ target_weight_select = target_weight * idx_select.view(-1, 1, 1)
+ output_select = self.associate_keypoint_heads[idx - 1](output)
+ keypoint_losses = self.associate_keypoint_heads[idx - 1].get_loss(
+ output_select, target_select, target_weight_select)
+ losses[f'{idx}_loss'] = keypoint_losses['heatmap_loss']
+ keypoint_accuracy = self.associate_keypoint_heads[idx - 1].get_accuracy(
+ output_select, target_select, target_weight_select)
+ losses[f'{idx}_acc'] = keypoint_accuracy['acc_pose']
+
+ return losses
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+ img_sources = torch.from_numpy(np.array([ele['dataset_idx'] for ele in img_metas])).to(img.device)
+
+ features = self.backbone(img, img_sources)
+
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped, img_sources)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='TopDown')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ pose_kpt_color=None,
+ pose_link_color=None,
+ text_color='white',
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ bbox_thickness=1,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+
+ bbox_result = []
+ bbox_labels = []
+ pose_result = []
+ for res in result:
+ if 'bbox' in res:
+ bbox_result.append(res['bbox'])
+ bbox_labels.append(res.get('label', None))
+ pose_result.append(res['keypoints'])
+
+ if bbox_result:
+ bboxes = np.vstack(bbox_result)
+ # draw bounding boxes
+ imshow_bboxes(
+ img,
+ bboxes,
+ labels=bbox_labels,
+ colors=bbox_color,
+ text_color=text_color,
+ thickness=bbox_thickness,
+ font_scale=font_scale,
+ show=False)
+
+ if pose_result:
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius,
+ thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a98e91140e7af574816787e9ace4ede24214c189
--- /dev/null
+++ b/mmpose/models/heads/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .ae_higher_resolution_head import AEHigherResolutionHead
+from .ae_multi_stage_head import AEMultiStageHead
+from .ae_simple_head import AESimpleHead
+from .deconv_head import DeconvHead
+from .deeppose_regression_head import DeepposeRegressionHead
+from .hmr_head import HMRMeshHead
+from .interhand_3d_head import Interhand3DHead
+from .temporal_regression_head import TemporalRegressionHead
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+from .topdown_heatmap_multi_stage_head import (TopdownHeatmapMSMUHead,
+ TopdownHeatmapMultiStageHead)
+from .topdown_heatmap_simple_head import TopdownHeatmapSimpleHead
+from .vipnas_heatmap_simple_head import ViPNASHeatmapSimpleHead
+from .voxelpose_head import CuboidCenterHead, CuboidPoseHead
+
+__all__ = [
+ 'TopdownHeatmapSimpleHead', 'TopdownHeatmapMultiStageHead',
+ 'TopdownHeatmapMSMUHead', 'TopdownHeatmapBaseHead',
+ 'AEHigherResolutionHead', 'AESimpleHead', 'AEMultiStageHead',
+ 'DeepposeRegressionHead', 'TemporalRegressionHead', 'Interhand3DHead',
+ 'HMRMeshHead', 'DeconvHead', 'ViPNASHeatmapSimpleHead', 'CuboidCenterHead',
+ 'CuboidPoseHead'
+]
diff --git a/mmpose/models/heads/__pycache__/__init__.cpython-310.pyc b/mmpose/models/heads/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b34f9476ed1c510eb370f63673ceedd80ccc172
Binary files /dev/null and b/mmpose/models/heads/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/ae_higher_resolution_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/ae_higher_resolution_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c2b49b75da4ce666c2e9eb25ae15705425d80d56
Binary files /dev/null and b/mmpose/models/heads/__pycache__/ae_higher_resolution_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/ae_multi_stage_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/ae_multi_stage_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ab5c0d944b87ee5d43295139d4cf2aa909c4c3e
Binary files /dev/null and b/mmpose/models/heads/__pycache__/ae_multi_stage_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/ae_simple_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/ae_simple_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d3a619dd45ae11c80b2b8735a2f87ce83a80a48
Binary files /dev/null and b/mmpose/models/heads/__pycache__/ae_simple_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/deconv_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/deconv_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55501c01c6ac01cca5e95f8ecd42d8bf3e886f62
Binary files /dev/null and b/mmpose/models/heads/__pycache__/deconv_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/deeppose_regression_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/deeppose_regression_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a92d4ba017f8d3100a3b28f8242feb2f2ef76cd
Binary files /dev/null and b/mmpose/models/heads/__pycache__/deeppose_regression_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/hmr_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/hmr_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..744088e8b2ee982752a8f036a52bf57edaded5c8
Binary files /dev/null and b/mmpose/models/heads/__pycache__/hmr_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/interhand_3d_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/interhand_3d_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0c34a06f6d0a200d0fc77aa884c7ddd5e90c301
Binary files /dev/null and b/mmpose/models/heads/__pycache__/interhand_3d_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/temporal_regression_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/temporal_regression_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d5e2601105e7050a184a31ed5a6b9bfe4cff1b1
Binary files /dev/null and b/mmpose/models/heads/__pycache__/temporal_regression_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/topdown_heatmap_base_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/topdown_heatmap_base_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7041ba40193cae3cd6d8d3cdc02e4b7a2fc2c5a5
Binary files /dev/null and b/mmpose/models/heads/__pycache__/topdown_heatmap_base_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/topdown_heatmap_multi_stage_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/topdown_heatmap_multi_stage_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd160f855e6792bae196f0b27259446e8e0824b3
Binary files /dev/null and b/mmpose/models/heads/__pycache__/topdown_heatmap_multi_stage_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/topdown_heatmap_simple_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/topdown_heatmap_simple_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e8997399485ef763ee2665dbe93d9fd31cdf95e
Binary files /dev/null and b/mmpose/models/heads/__pycache__/topdown_heatmap_simple_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/vipnas_heatmap_simple_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/vipnas_heatmap_simple_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f531da4971b63221f475d233fd09c3843a029dc
Binary files /dev/null and b/mmpose/models/heads/__pycache__/vipnas_heatmap_simple_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/__pycache__/voxelpose_head.cpython-310.pyc b/mmpose/models/heads/__pycache__/voxelpose_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e19b63ddd68951408e73fa898e0778062f096a1
Binary files /dev/null and b/mmpose/models/heads/__pycache__/voxelpose_head.cpython-310.pyc differ
diff --git a/mmpose/models/heads/ae_higher_resolution_head.py b/mmpose/models/heads/ae_higher_resolution_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bf3399cb6facb232931ab9a763fadaf717b138b
--- /dev/null
+++ b/mmpose/models/heads/ae_higher_resolution_head.py
@@ -0,0 +1,249 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
+ normal_init)
+
+from mmpose.models.builder import build_loss
+from ..backbones.resnet import BasicBlock
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class AEHigherResolutionHead(nn.Module):
+ """Associative embedding with higher resolution head. paper ref: Bowen
+ Cheng et al. "HigherHRNet: Scale-Aware Representation Learning for Bottom-
+ Up Human Pose Estimation".
+
+ Args:
+ in_channels (int): Number of input channels.
+ num_joints (int): Number of joints
+ tag_per_joint (bool): If tag_per_joint is True,
+ the dimension of tags equals to num_joints,
+ else the dimension of tags is 1. Default: True
+ extra (dict): Configs for extra conv layers. Default: None
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ cat_output (list[bool]): Option to concat outputs.
+ with_ae_loss (list[bool]): Option to use ae loss.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ tag_per_joint=True,
+ extra=None,
+ num_deconv_layers=1,
+ num_deconv_filters=(32, ),
+ num_deconv_kernels=(4, ),
+ num_basic_blocks=4,
+ cat_output=None,
+ with_ae_loss=None,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.loss = build_loss(loss_keypoint)
+ dim_tag = num_joints if tag_per_joint else 1
+
+ self.num_deconvs = num_deconv_layers
+ self.cat_output = cat_output
+
+ final_layer_output_channels = []
+
+ if with_ae_loss[0]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+
+ final_layer_output_channels.append(out_channels)
+ for i in range(num_deconv_layers):
+ if with_ae_loss[i + 1]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+ final_layer_output_channels.append(out_channels)
+
+ deconv_layer_output_channels = []
+ for i in range(num_deconv_layers):
+ if with_ae_loss[i]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+ deconv_layer_output_channels.append(out_channels)
+
+ self.final_layers = self._make_final_layers(
+ in_channels, final_layer_output_channels, extra, num_deconv_layers,
+ num_deconv_filters)
+ self.deconv_layers = self._make_deconv_layers(
+ in_channels, deconv_layer_output_channels, num_deconv_layers,
+ num_deconv_filters, num_deconv_kernels, num_basic_blocks,
+ cat_output)
+
+ @staticmethod
+ def _make_final_layers(in_channels, final_layer_output_channels, extra,
+ num_deconv_layers, num_deconv_filters):
+ """Make final layers."""
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ else:
+ padding = 0
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ final_layers = []
+ final_layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=final_layer_output_channels[0],
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ for i in range(num_deconv_layers):
+ in_channels = num_deconv_filters[i]
+ final_layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=final_layer_output_channels[i + 1],
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ return nn.ModuleList(final_layers)
+
+ def _make_deconv_layers(self, in_channels, deconv_layer_output_channels,
+ num_deconv_layers, num_deconv_filters,
+ num_deconv_kernels, num_basic_blocks, cat_output):
+ """Make deconv layers."""
+ deconv_layers = []
+ for i in range(num_deconv_layers):
+ if cat_output[i]:
+ in_channels += deconv_layer_output_channels[i]
+
+ planes = num_deconv_filters[i]
+ deconv_kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_deconv_kernels[i])
+
+ layers = []
+ layers.append(
+ nn.Sequential(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=in_channels,
+ out_channels=planes,
+ kernel_size=deconv_kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False), nn.BatchNorm2d(planes, momentum=0.1),
+ nn.ReLU(inplace=True)))
+ for _ in range(num_basic_blocks):
+ layers.append(nn.Sequential(BasicBlock(planes, planes), ))
+ deconv_layers.append(nn.Sequential(*layers))
+ in_channels = planes
+
+ return nn.ModuleList(deconv_layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def get_loss(self, outputs, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (list(torch.Tensor[N,K,H,W])): Multi-scale output heatmaps.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ outputs, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
+
+ def forward(self, x):
+ """Forward function."""
+ if isinstance(x, list):
+ x = x[0]
+
+ final_outputs = []
+ y = self.final_layers[0](x)
+ final_outputs.append(y)
+
+ for i in range(self.num_deconvs):
+ if self.cat_output[i]:
+ x = torch.cat((x, y), 1)
+
+ x = self.deconv_layers[i](x)
+ y = self.final_layers[i + 1](x)
+ final_outputs.append(y)
+
+ return final_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for _, m in self.final_layers.named_modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
diff --git a/mmpose/models/heads/ae_multi_stage_head.py b/mmpose/models/heads/ae_multi_stage_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..195666b27ed50402a073c9eff7c5579c710a36f6
--- /dev/null
+++ b/mmpose/models/heads/ae_multi_stage_head.py
@@ -0,0 +1,222 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
+ normal_init)
+
+from mmpose.models.builder import build_loss
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class AEMultiStageHead(nn.Module):
+ """Associative embedding multi-stage head.
+ paper ref: Alejandro Newell et al. "Associative
+ Embedding: End-to-end Learning for Joint Detection
+ and Grouping"
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.in_channels = in_channels
+ self.num_stages = num_stages
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ # build multi-stage deconv layers
+ self.multi_deconv_layers = nn.ModuleList([])
+ for _ in range(self.num_stages):
+ if num_deconv_layers > 0:
+ deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+ self.multi_deconv_layers.append(deconv_layers)
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ # build multi-stage final layers
+ self.multi_final_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if identity_final_layer:
+ final_layer = nn.Identity()
+ else:
+ final_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=num_deconv_filters[-1]
+ if num_deconv_layers > 0 else in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+ self.multi_final_layers.append(final_layer)
+
+ def get_loss(self, output, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (List(torch.Tensor[NxKxHxW])): Output heatmaps.
+ targets(List(List(torch.Tensor[NxKxHxW]))):
+ Multi-stage and multi-scale target heatmaps.
+ masks(List(List(torch.Tensor[NxHxW]))):
+ Masks of multi-stage and multi-scale target heatmaps
+ joints(List(List(torch.Tensor[NxMxKx2]))):
+ Joints of multi-stage multi-scale target heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ # Flatten list:
+ # [stage_1_scale_1, stage_1_scale_2, ... , stage_1_scale_m,
+ # ...
+ # stage_n_scale_1, stage_n_scale_2, ... , stage_n_scale_m]
+ targets = [target for _targets in targets for target in _targets]
+ masks = [mask for _masks in masks for mask in _masks]
+ joints = [joint for _joints in joints for joint in _joints]
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ output, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages.
+ """
+ out = []
+ assert isinstance(x, list)
+ for i in range(self.num_stages):
+ y = self.multi_deconv_layers[i](x[i])
+ y = self.multi_final_layers[i](y)
+ out.append(y)
+ return out
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.multi_deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.multi_final_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
diff --git a/mmpose/models/heads/ae_simple_head.py b/mmpose/models/heads/ae_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..9297f71fd319ab26700f90d797fdd7fea508cb7a
--- /dev/null
+++ b/mmpose/models/heads/ae_simple_head.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from ..builder import HEADS
+from .deconv_head import DeconvHead
+
+
+@HEADS.register_module()
+class AESimpleHead(DeconvHead):
+ """Associative embedding simple head.
+ paper ref: Alejandro Newell et al. "Associative
+ Embedding: End-to-end Learning for Joint Detection
+ and Grouping"
+
+ Args:
+ in_channels (int): Number of input channels.
+ num_joints (int): Number of joints.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ tag_per_joint (bool): If tag_per_joint is True,
+ the dimension of tags equals to num_joints,
+ else the dimension of tags is 1. Default: True
+ with_ae_loss (list[bool]): Option to use ae loss or not.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ tag_per_joint=True,
+ with_ae_loss=None,
+ extra=None,
+ loss_keypoint=None):
+
+ dim_tag = num_joints if tag_per_joint else 1
+ if with_ae_loss[0]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+
+ super().__init__(
+ in_channels,
+ out_channels,
+ num_deconv_layers=num_deconv_layers,
+ num_deconv_filters=num_deconv_filters,
+ num_deconv_kernels=num_deconv_kernels,
+ extra=extra,
+ loss_keypoint=loss_keypoint)
+
+ def get_loss(self, outputs, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (list(torch.Tensor[N,K,H,W])): Multi-scale output heatmaps.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints(List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ outputs, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
diff --git a/mmpose/models/heads/deconv_head.py b/mmpose/models/heads/deconv_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..90846d27af46d65091f4ad7e0e6687377ebd86e1
--- /dev/null
+++ b/mmpose/models/heads/deconv_head.py
@@ -0,0 +1,295 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.models.builder import HEADS, build_loss
+from mmpose.models.utils.ops import resize
+
+
+@HEADS.register_module()
+class DeconvHead(nn.Module):
+ """Simple deconv head.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resized to the
+ same size as the first one and then concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels=3,
+ out_channels=17,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def get_loss(self, outputs, targets, masks):
+ """Calculate bottom-up masked mse loss.
+
+ Note:
+ - batch_size: N
+ - num_channels: C
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (List(torch.Tensor[N,C,H,W])): Multi-scale outputs.
+ targets (List(torch.Tensor[N,C,H,W])): Multi-scale targets.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale targets.
+ """
+
+ losses = dict()
+
+ for idx in range(len(targets)):
+ if 'loss' not in losses:
+ losses['loss'] = self.loss(outputs[idx], targets[idx],
+ masks[idx])
+ else:
+ losses['loss'] += self.loss(outputs[idx], targets[idx],
+ masks[idx])
+
+ return losses
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ final_outputs = []
+ x = self.deconv_layers(x)
+ y = self.final_layer(x)
+ final_outputs.append(y)
+ return final_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/mmpose/models/heads/deeppose_regression_head.py b/mmpose/models/heads/deeppose_regression_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..f326e26fa624bd99e9603ad28ff71dccb29b5638
--- /dev/null
+++ b/mmpose/models/heads/deeppose_regression_head.py
@@ -0,0 +1,176 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmpose.core.evaluation import (keypoint_pck_accuracy,
+ keypoints_from_regression)
+from mmpose.core.post_processing import fliplr_regression
+from mmpose.models.builder import HEADS, build_loss
+
+
+@HEADS.register_module()
+class DeepposeRegressionHead(nn.Module):
+ """Deeppose regression head with fully connected layers.
+
+ "DeepPose: Human Pose Estimation via Deep Neural Networks".
+
+ Args:
+ in_channels (int): Number of input channels
+ num_joints (int): Number of joints
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_joints = num_joints
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+
+ self.fc = nn.Linear(self.in_channels, self.num_joints * 2)
+
+ def forward(self, x):
+ """Forward function."""
+ output = self.fc(x)
+ N, C = output.shape
+ return output.reshape([N, C // 2, 2])
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output keypoints.
+ target (torch.Tensor[N, K, 2]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 3 and target_weight.dim() == 3
+ losses['reg_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output keypoints.
+ target (torch.Tensor[N, K, 2]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ N = output.shape[0]
+
+ _, avg_acc, cnt = keypoint_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight[:, :, 0].detach().cpu().numpy() > 0,
+ thr=0.05,
+ normalize=np.ones((N, 2), dtype=np.float32))
+ accuracy['acc_pose'] = avg_acc
+
+ return accuracy
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_regression (np.ndarray): Output regression.
+
+ Args:
+ x (torch.Tensor[N, K, 2]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_regression = fliplr_regression(
+ output.detach().cpu().numpy(), flip_pairs)
+ else:
+ output_regression = output.detach().cpu().numpy()
+ return output_regression
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode the keypoints from output regression.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ output (np.ndarray[N, K, 2]): predicted regression vector.
+ kwargs: dict contains 'img_size'.
+ img_size (tuple(img_width, img_height)): input image size.
+ """
+ batch_size = len(img_metas)
+
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ c = np.zeros((batch_size, 2), dtype=np.float32)
+ s = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size)
+ for i in range(batch_size):
+ c[i, :] = img_metas[i]['center']
+ s[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ preds, maxvals = keypoints_from_regression(output, c, s,
+ kwargs['img_size'])
+
+ all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_preds[:, :, 0:2] = preds[:, :, 0:2]
+ all_preds[:, :, 2:3] = maxvals
+ all_boxes[:, 0:2] = c[:, 0:2]
+ all_boxes[:, 2:4] = s[:, 0:2]
+ all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
+ all_boxes[:, 5] = score
+
+ result = {}
+
+ result['preds'] = all_preds
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ return result
+
+ def init_weights(self):
+ normal_init(self.fc, mean=0, std=0.01, bias=0)
diff --git a/mmpose/models/heads/hmr_head.py b/mmpose/models/heads/hmr_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..015a3076bcba53d1590de226fab39444708cb3f9
--- /dev/null
+++ b/mmpose/models/heads/hmr_head.py
@@ -0,0 +1,94 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import xavier_init
+
+from ..builder import HEADS
+from ..utils.geometry import rot6d_to_rotmat
+
+
+@HEADS.register_module()
+class HMRMeshHead(nn.Module):
+ """SMPL parameters regressor head of simple baseline. "End-to-end Recovery
+ of Human Shape and Pose", CVPR'2018.
+
+ Args:
+ in_channels (int): Number of input channels
+ smpl_mean_params (str): The file name of the mean SMPL parameters
+ n_iter (int): The iterations of estimating delta parameters
+ """
+
+ def __init__(self, in_channels, smpl_mean_params=None, n_iter=3):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.n_iter = n_iter
+
+ npose = 24 * 6
+ nbeta = 10
+ ncam = 3
+ hidden_dim = 1024
+
+ self.fc1 = nn.Linear(in_channels + npose + nbeta + ncam, hidden_dim)
+ self.drop1 = nn.Dropout()
+ self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+ self.drop2 = nn.Dropout()
+ self.decpose = nn.Linear(hidden_dim, npose)
+ self.decshape = nn.Linear(hidden_dim, nbeta)
+ self.deccam = nn.Linear(hidden_dim, ncam)
+
+ # Load mean SMPL parameters
+ if smpl_mean_params is None:
+ init_pose = torch.zeros([1, npose])
+ init_shape = torch.zeros([1, nbeta])
+ init_cam = torch.FloatTensor([[1, 0, 0]])
+ else:
+ mean_params = np.load(smpl_mean_params)
+ init_pose = torch.from_numpy(
+ mean_params['pose'][:]).unsqueeze(0).float()
+ init_shape = torch.from_numpy(
+ mean_params['shape'][:]).unsqueeze(0).float()
+ init_cam = torch.from_numpy(
+ mean_params['cam']).unsqueeze(0).float()
+ self.register_buffer('init_pose', init_pose)
+ self.register_buffer('init_shape', init_shape)
+ self.register_buffer('init_cam', init_cam)
+
+ def forward(self, x):
+ """Forward function.
+
+ x is the image feature map and is expected to be in shape (batch size x
+ channel number x height x width)
+ """
+ batch_size = x.shape[0]
+ # extract the global feature vector by average along
+ # spatial dimension.
+ x = x.mean(dim=-1).mean(dim=-1)
+
+ init_pose = self.init_pose.expand(batch_size, -1)
+ init_shape = self.init_shape.expand(batch_size, -1)
+ init_cam = self.init_cam.expand(batch_size, -1)
+
+ pred_pose = init_pose
+ pred_shape = init_shape
+ pred_cam = init_cam
+ for _ in range(self.n_iter):
+ xc = torch.cat([x, pred_pose, pred_shape, pred_cam], 1)
+ xc = self.fc1(xc)
+ xc = self.drop1(xc)
+ xc = self.fc2(xc)
+ xc = self.drop2(xc)
+ pred_pose = self.decpose(xc) + pred_pose
+ pred_shape = self.decshape(xc) + pred_shape
+ pred_cam = self.deccam(xc) + pred_cam
+
+ pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
+ out = (pred_rotmat, pred_shape, pred_cam)
+ return out
+
+ def init_weights(self):
+ """Initialize model weights."""
+ xavier_init(self.decpose, gain=0.01)
+ xavier_init(self.decshape, gain=0.01)
+ xavier_init(self.deccam, gain=0.01)
diff --git a/mmpose/models/heads/interhand_3d_head.py b/mmpose/models/heads/interhand_3d_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..aebe4a5f61e5fd1dcd5ecfb64962f88da94d5664
--- /dev/null
+++ b/mmpose/models/heads/interhand_3d_head.py
@@ -0,0 +1,521 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation.top_down_eval import (
+ keypoints_from_heatmaps3d, multilabel_classification_accuracy)
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.necks import GlobalAveragePooling
+from ..builder import HEADS
+
+
+class Heatmap3DHead(nn.Module):
+ """Heatmap3DHead is a sub-module of Interhand3DHead, and outputs 3D
+ heatmaps. Heatmap3DHead is composed of (>=0) number of deconv layers and a
+ simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ depth_size (int): Number of depth discretization size
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ extra (dict): Configs for extra conv layers. Default: None
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ depth_size=64,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None):
+
+ super().__init__()
+
+ assert out_channels % depth_size == 0
+ self.depth_size = depth_size
+ self.in_channels = in_channels
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ N, C, H, W = x.shape
+ # reshape the 2D heatmap to 3D heatmap
+ x = x.reshape(N, C // self.depth_size, self.depth_size, H, W)
+ return x
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+
+
+class Heatmap1DHead(nn.Module):
+ """Heatmap1DHead is a sub-module of Interhand3DHead, and outputs 1D
+ heatmaps.
+
+ Args:
+ in_channels (int): Number of input channels
+ heatmap_size (int): Heatmap size
+ hidden_dims (list|tuple): Number of feature dimension of FC layers.
+ """
+
+ def __init__(self, in_channels=2048, heatmap_size=64, hidden_dims=(512, )):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.heatmap_size = heatmap_size
+
+ feature_dims = [in_channels, *hidden_dims, heatmap_size]
+ self.fc = self._make_linear_layers(feature_dims, relu_final=False)
+
+ def soft_argmax_1d(self, heatmap1d):
+ heatmap1d = F.softmax(heatmap1d, 1)
+ accu = heatmap1d * torch.arange(
+ self.heatmap_size, dtype=heatmap1d.dtype,
+ device=heatmap1d.device)[None, :]
+ coord = accu.sum(dim=1)
+ return coord
+
+ def _make_linear_layers(self, feat_dims, relu_final=False):
+ """Make linear layers."""
+ layers = []
+ for i in range(len(feat_dims) - 1):
+ layers.append(nn.Linear(feat_dims[i], feat_dims[i + 1]))
+ if i < len(feat_dims) - 2 or \
+ (i == len(feat_dims) - 2 and relu_final):
+ layers.append(nn.ReLU(inplace=True))
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Forward function."""
+ heatmap1d = self.fc(x)
+ value = self.soft_argmax_1d(heatmap1d).view(-1, 1)
+ return value
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.fc.modules():
+ if isinstance(m, nn.Linear):
+ normal_init(m, mean=0, std=0.01, bias=0)
+
+
+class MultilabelClassificationHead(nn.Module):
+ """MultilabelClassificationHead is a sub-module of Interhand3DHead, and
+ outputs hand type classification.
+
+ Args:
+ in_channels (int): Number of input channels
+ num_labels (int): Number of labels
+ hidden_dims (list|tuple): Number of hidden dimension of FC layers.
+ """
+
+ def __init__(self, in_channels=2048, num_labels=2, hidden_dims=(512, )):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_labesl = num_labels
+
+ feature_dims = [in_channels, *hidden_dims, num_labels]
+ self.fc = self._make_linear_layers(feature_dims, relu_final=False)
+
+ def _make_linear_layers(self, feat_dims, relu_final=False):
+ """Make linear layers."""
+ layers = []
+ for i in range(len(feat_dims) - 1):
+ layers.append(nn.Linear(feat_dims[i], feat_dims[i + 1]))
+ if i < len(feat_dims) - 2 or \
+ (i == len(feat_dims) - 2 and relu_final):
+ layers.append(nn.ReLU(inplace=True))
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Forward function."""
+ labels = torch.sigmoid(self.fc(x))
+ return labels
+
+ def init_weights(self):
+ for m in self.fc.modules():
+ if isinstance(m, nn.Linear):
+ normal_init(m, mean=0, std=0.01, bias=0)
+
+
+@HEADS.register_module()
+class Interhand3DHead(nn.Module):
+ """Interhand 3D head of paper ref: Gyeongsik Moon. "InterHand2.6M: A
+ Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single
+ RGB Image".
+
+ Args:
+ keypoint_head_cfg (dict): Configs of Heatmap3DHead for hand
+ keypoint estimation.
+ root_head_cfg (dict): Configs of Heatmap1DHead for relative
+ hand root depth estimation.
+ hand_type_head_cfg (dict): Configs of MultilabelClassificationHead
+ for hand type classification.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ loss_root_depth (dict): Config for relative root depth loss.
+ Default: None.
+ loss_hand_type (dict): Config for hand type classification
+ loss. Default: None.
+ """
+
+ def __init__(self,
+ keypoint_head_cfg,
+ root_head_cfg,
+ hand_type_head_cfg,
+ loss_keypoint=None,
+ loss_root_depth=None,
+ loss_hand_type=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ # build sub-module heads
+ self.right_hand_head = Heatmap3DHead(**keypoint_head_cfg)
+ self.left_hand_head = Heatmap3DHead(**keypoint_head_cfg)
+ self.root_head = Heatmap1DHead(**root_head_cfg)
+ self.hand_type_head = MultilabelClassificationHead(
+ **hand_type_head_cfg)
+ self.neck = GlobalAveragePooling()
+
+ # build losses
+ self.keypoint_loss = build_loss(loss_keypoint)
+ self.root_depth_loss = build_loss(loss_root_depth)
+ self.hand_type_loss = build_loss(loss_hand_type)
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ def init_weights(self):
+ self.left_hand_head.init_weights()
+ self.right_hand_head.init_weights()
+ self.root_head.init_weights()
+ self.hand_type_head.init_weights()
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate loss for hand keypoint heatmaps, relative root depth and
+ hand type.
+
+ Args:
+ output (list[Tensor]): a list of outputs from multiple heads.
+ target (list[Tensor]): a list of targets for multiple heads.
+ target_weight (list[Tensor]): a list of targets weight for
+ multiple heads.
+ """
+ losses = dict()
+
+ # hand keypoint loss
+ assert not isinstance(self.keypoint_loss, nn.Sequential)
+ out, tar, tar_weight = output[0], target[0], target_weight[0]
+ assert tar.dim() == 5 and tar_weight.dim() == 3
+ losses['hand_loss'] = self.keypoint_loss(out, tar, tar_weight)
+
+ # relative root depth loss
+ assert not isinstance(self.root_depth_loss, nn.Sequential)
+ out, tar, tar_weight = output[1], target[1], target_weight[1]
+ assert tar.dim() == 2 and tar_weight.dim() == 2
+ losses['rel_root_loss'] = self.root_depth_loss(out, tar, tar_weight)
+
+ # hand type loss
+ assert not isinstance(self.hand_type_loss, nn.Sequential)
+ out, tar, tar_weight = output[2], target[2], target_weight[2]
+ assert tar.dim() == 2 and tar_weight.dim() in [1, 2]
+ losses['hand_type_loss'] = self.hand_type_loss(out, tar, tar_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for hand type.
+
+ Args:
+ output (list[Tensor]): a list of outputs from multiple heads.
+ target (list[Tensor]): a list of targets for multiple heads.
+ target_weight (list[Tensor]): a list of targets weight for
+ multiple heads.
+ """
+ accuracy = dict()
+ avg_acc = multilabel_classification_accuracy(
+ output[2].detach().cpu().numpy(),
+ target[2].detach().cpu().numpy(),
+ target_weight[2].detach().cpu().numpy(),
+ )
+ accuracy['acc_classification'] = float(avg_acc)
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ outputs = []
+ outputs.append(
+ torch.cat([self.right_hand_head(x),
+ self.left_hand_head(x)], dim=1))
+ x = self.neck(x)
+ outputs.append(self.root_head(x))
+ outputs.append(self.hand_type_head(x))
+ return outputs
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output (list[np.ndarray]): list of output hand keypoint
+ heatmaps, relative root depth and hand type.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ # flip 3D heatmap
+ heatmap_3d = output[0]
+ N, K, D, H, W = heatmap_3d.shape
+ # reshape 3D heatmap to 2D heatmap
+ heatmap_3d = heatmap_3d.reshape(N, K * D, H, W)
+ # 2D heatmap flip
+ heatmap_3d_flipped_back = flip_back(
+ heatmap_3d.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # reshape back to 3D heatmap
+ heatmap_3d_flipped_back = heatmap_3d_flipped_back.reshape(
+ N, K, D, H, W)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ heatmap_3d_flipped_back[...,
+ 1:] = heatmap_3d_flipped_back[..., :-1]
+ output[0] = heatmap_3d_flipped_back
+
+ # flip relative hand root depth
+ output[1] = -output[1].detach().cpu().numpy()
+
+ # flip hand type
+ hand_type = output[2].detach().cpu().numpy()
+ hand_type_flipped_back = hand_type.copy()
+ hand_type_flipped_back[:, 0] = hand_type[:, 1]
+ hand_type_flipped_back[:, 1] = hand_type[:, 0]
+ output[2] = hand_type_flipped_back
+ else:
+ output = [out.detach().cpu().numpy() for out in output]
+
+ return output
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode hand keypoint, relative root depth and hand type.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ - "heatmap3d_depth_bound": depth bound of hand keypoint
+ 3D heatmap
+ - "root_depth_bound": depth bound of relative root depth
+ 1D heatmap
+ output (list[np.ndarray]): model predicted 3D heatmaps, relative
+ root depth and hand type.
+ """
+
+ batch_size = len(img_metas)
+ result = {}
+
+ heatmap3d_depth_bound = np.ones(batch_size, dtype=np.float32)
+ root_depth_bound = np.ones(batch_size, dtype=np.float32)
+ center = np.zeros((batch_size, 2), dtype=np.float32)
+ scale = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size, dtype=np.float32)
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ for i in range(batch_size):
+ heatmap3d_depth_bound[i] = img_metas[i]['heatmap3d_depth_bound']
+ root_depth_bound[i] = img_metas[i]['root_depth_bound']
+ center[i, :] = img_metas[i]['center']
+ scale[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_boxes[:, 0:2] = center[:, 0:2]
+ all_boxes[:, 2:4] = scale[:, 0:2]
+ # scale is defined as: bbox_size / 200.0, so we
+ # need multiply 200.0 to get bbox size
+ all_boxes[:, 4] = np.prod(scale * 200.0, axis=1)
+ all_boxes[:, 5] = score
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ # decode 3D heatmaps of hand keypoints
+ heatmap3d = output[0]
+ preds, maxvals = keypoints_from_heatmaps3d(heatmap3d, center, scale)
+ keypoints_3d = np.zeros((batch_size, preds.shape[1], 4),
+ dtype=np.float32)
+ keypoints_3d[:, :, 0:3] = preds[:, :, 0:3]
+ keypoints_3d[:, :, 3:4] = maxvals
+ # transform keypoint depth to camera space
+ keypoints_3d[:, :, 2] = \
+ (keypoints_3d[:, :, 2] / self.right_hand_head.depth_size - 0.5) \
+ * heatmap3d_depth_bound[:, np.newaxis]
+
+ result['preds'] = keypoints_3d
+
+ # decode relative hand root depth
+ # transform relative root depth to camera space
+ result['rel_root_depth'] = (output[1] / self.root_head.heatmap_size -
+ 0.5) * root_depth_bound
+
+ # decode hand type
+ result['hand_type'] = output[2] > 0.5
+ return result
diff --git a/mmpose/models/heads/temporal_regression_head.py b/mmpose/models/heads/temporal_regression_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..97a07f9cf2c9ef0497380ca5c602142b206f3b52
--- /dev/null
+++ b/mmpose/models/heads/temporal_regression_head.py
@@ -0,0 +1,319 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import build_conv_layer, constant_init, kaiming_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.core import (WeightNormClipHook, compute_similarity_transform,
+ fliplr_regression)
+from mmpose.models.builder import HEADS, build_loss
+
+
+@HEADS.register_module()
+class TemporalRegressionHead(nn.Module):
+ """Regression head of VideoPose3D.
+
+ "3D human pose estimation in video with temporal convolutions and
+ semi-supervised training", CVPR'2019.
+
+ Args:
+ in_channels (int): Number of input channels
+ num_joints (int): Number of joints
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ max_norm (float|None): if not None, the weight of convolution layers
+ will be clipped to have a maximum norm of max_norm.
+ is_trajectory (bool): If the model only predicts root joint
+ position, then this arg should be set to True. In this case,
+ traj_loss will be calculated. Otherwise, it should be set to
+ False. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ max_norm=None,
+ loss_keypoint=None,
+ is_trajectory=False,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_joints = num_joints
+ self.max_norm = max_norm
+ self.loss = build_loss(loss_keypoint)
+ self.is_trajectory = is_trajectory
+ if self.is_trajectory:
+ assert self.num_joints == 1
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+
+ self.conv = build_conv_layer(
+ dict(type='Conv1d'), in_channels, num_joints * 3, 1)
+
+ if self.max_norm is not None:
+ # Apply weight norm clip to conv layers
+ weight_clip = WeightNormClipHook(self.max_norm)
+ for module in self.modules():
+ if isinstance(module, nn.modules.conv._ConvNd):
+ weight_clip.register(module)
+
+ @staticmethod
+ def _transform_inputs(x):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (tuple or list of Tensor | Tensor): multi-level features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(x, (list, tuple)):
+ return x
+
+ assert len(x) > 0
+
+ # return the top-level feature of the 1D feature pyramid
+ return x[-1]
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+
+ assert x.ndim == 3 and x.shape[2] == 1, f'Invalid shape {x.shape}'
+ output = self.conv(x)
+ N = output.shape[0]
+ return output.reshape(N, self.num_joints, 3)
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 3]): Output keypoints.
+ target (torch.Tensor[N, K, 3]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 3]):
+ Weights across different joint types.
+ If self.is_trajectory is True and target_weight is None,
+ target_weight will be set inversely proportional to joint
+ depth.
+ """
+ losses = dict()
+ assert not isinstance(self.loss, nn.Sequential)
+
+ # trajectory model
+ if self.is_trajectory:
+ if target.dim() == 2:
+ target.unsqueeze_(1)
+
+ if target_weight is None:
+ target_weight = (1 / target[:, :, 2:]).expand(target.shape)
+ assert target.dim() == 3 and target_weight.dim() == 3
+
+ losses['traj_loss'] = self.loss(output, target, target_weight)
+
+ # pose model
+ else:
+ if target_weight is None:
+ target_weight = target.new_ones(target.shape)
+ assert target.dim() == 3 and target_weight.dim() == 3
+ losses['reg_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight, metas):
+ """Calculate accuracy for keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 3]): Output keypoints.
+ target (torch.Tensor[N, K, 3]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 3]):
+ Weights across different joint types.
+ metas (list(dict)): Information about data augmentation including:
+
+ - target_image_path (str): Optional, path to the image file
+ - target_mean (float): Optional, normalization parameter of
+ the target pose.
+ - target_std (float): Optional, normalization parameter of the
+ target pose.
+ - root_position (np.ndarray[3,1]): Optional, global
+ position of the root joint.
+ - root_index (torch.ndarray[1,]): Optional, original index of
+ the root joint before root-centering.
+ """
+
+ accuracy = dict()
+
+ N = output.shape[0]
+ output_ = output.detach().cpu().numpy()
+ target_ = target.detach().cpu().numpy()
+ # Denormalize the predicted pose
+ if 'target_mean' in metas[0] and 'target_std' in metas[0]:
+ target_mean = np.stack([m['target_mean'] for m in metas])
+ target_std = np.stack([m['target_std'] for m in metas])
+ output_ = self._denormalize_joints(output_, target_mean,
+ target_std)
+ target_ = self._denormalize_joints(target_, target_mean,
+ target_std)
+
+ # Restore global position
+ if self.test_cfg.get('restore_global_position', False):
+ root_pos = np.stack([m['root_position'] for m in metas])
+ root_idx = metas[0].get('root_position_index', None)
+ output_ = self._restore_global_position(output_, root_pos,
+ root_idx)
+ target_ = self._restore_global_position(target_, root_pos,
+ root_idx)
+ # Get target weight
+ if target_weight is None:
+ target_weight_ = np.ones_like(target_)
+ else:
+ target_weight_ = target_weight.detach().cpu().numpy()
+ if self.test_cfg.get('restore_global_position', False):
+ root_idx = metas[0].get('root_position_index', None)
+ root_weight = metas[0].get('root_joint_weight', 1.0)
+ target_weight_ = self._restore_root_target_weight(
+ target_weight_, root_weight, root_idx)
+
+ mpjpe = np.mean(
+ np.linalg.norm((output_ - target_) * target_weight_, axis=-1))
+
+ transformed_output = np.zeros_like(output_)
+ for i in range(N):
+ transformed_output[i, :, :] = compute_similarity_transform(
+ output_[i, :, :], target_[i, :, :])
+ p_mpjpe = np.mean(
+ np.linalg.norm(
+ (transformed_output - target_) * target_weight_, axis=-1))
+
+ accuracy['mpjpe'] = output.new_tensor(mpjpe)
+ accuracy['p_mpjpe'] = output.new_tensor(p_mpjpe)
+
+ return accuracy
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_regression (np.ndarray): Output regression.
+
+ Args:
+ x (torch.Tensor[N, K, 2]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_regression = fliplr_regression(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ center_mode='static',
+ center_x=0)
+ else:
+ output_regression = output.detach().cpu().numpy()
+ return output_regression
+
+ def decode(self, metas, output):
+ """Decode the keypoints from output regression.
+
+ Args:
+ metas (list(dict)): Information about data augmentation.
+ By default this includes:
+
+ - "target_image_path": path to the image file
+ output (np.ndarray[N, K, 3]): predicted regression vector.
+ metas (list(dict)): Information about data augmentation including:
+
+ - target_image_path (str): Optional, path to the image file
+ - target_mean (float): Optional, normalization parameter of
+ the target pose.
+ - target_std (float): Optional, normalization parameter of the
+ target pose.
+ - root_position (np.ndarray[3,1]): Optional, global
+ position of the root joint.
+ - root_index (torch.ndarray[1,]): Optional, original index of
+ the root joint before root-centering.
+ """
+
+ # Denormalize the predicted pose
+ if 'target_mean' in metas[0] and 'target_std' in metas[0]:
+ target_mean = np.stack([m['target_mean'] for m in metas])
+ target_std = np.stack([m['target_std'] for m in metas])
+ output = self._denormalize_joints(output, target_mean, target_std)
+
+ # Restore global position
+ if self.test_cfg.get('restore_global_position', False):
+ root_pos = np.stack([m['root_position'] for m in metas])
+ root_idx = metas[0].get('root_position_index', None)
+ output = self._restore_global_position(output, root_pos, root_idx)
+
+ target_image_paths = [m.get('target_image_path', None) for m in metas]
+ result = {'preds': output, 'target_image_paths': target_image_paths}
+
+ return result
+
+ @staticmethod
+ def _denormalize_joints(x, mean, std):
+ """Denormalize joint coordinates with given statistics mean and std.
+
+ Args:
+ x (np.ndarray[N, K, 3]): Normalized joint coordinates.
+ mean (np.ndarray[K, 3]): Mean value.
+ std (np.ndarray[K, 3]): Std value.
+ """
+ assert x.ndim == 3
+ assert x.shape == mean.shape == std.shape
+
+ return x * std + mean
+
+ @staticmethod
+ def _restore_global_position(x, root_pos, root_idx=None):
+ """Restore global position of the root-centered joints.
+
+ Args:
+ x (np.ndarray[N, K, 3]): root-centered joint coordinates
+ root_pos (np.ndarray[N,1,3]): The global position of the
+ root joint.
+ root_idx (int|None): If not none, the root joint will be inserted
+ back to the pose at the given index.
+ """
+ x = x + root_pos
+ if root_idx is not None:
+ x = np.insert(x, root_idx, root_pos.squeeze(1), axis=1)
+ return x
+
+ @staticmethod
+ def _restore_root_target_weight(target_weight, root_weight, root_idx=None):
+ """Restore the target weight of the root joint after the restoration of
+ the global position.
+
+ Args:
+ target_weight (np.ndarray[N, K, 1]): Target weight of relativized
+ joints.
+ root_weight (float): The target weight value of the root joint.
+ root_idx (int|None): If not none, the root joint weight will be
+ inserted back to the target weight at the given index.
+ """
+ if root_idx is not None:
+ root_weight = np.full(
+ target_weight.shape[0], root_weight, dtype=target_weight.dtype)
+ target_weight = np.insert(
+ target_weight, root_idx, root_weight[:, None], axis=1)
+ return target_weight
+
+ def init_weights(self):
+ """Initialize the weights."""
+ for m in self.modules():
+ if isinstance(m, nn.modules.conv._ConvNd):
+ kaiming_init(m, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
diff --git a/mmpose/models/heads/topdown_heatmap_base_head.py b/mmpose/models/heads/topdown_heatmap_base_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..09646ead353fb054f066b9fc6816748a43287e2c
--- /dev/null
+++ b/mmpose/models/heads/topdown_heatmap_base_head.py
@@ -0,0 +1,120 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch.nn as nn
+
+from mmpose.core.evaluation.top_down_eval import keypoints_from_heatmaps
+
+
+class TopdownHeatmapBaseHead(nn.Module):
+ """Base class for top-down heatmap heads.
+
+ All top-down heatmap heads should subclass it.
+ All subclass should overwrite:
+
+ Methods:`get_loss`, supporting to calculate loss.
+ Methods:`get_accuracy`, supporting to calculate accuracy.
+ Methods:`forward`, supporting to forward model.
+ Methods:`inference_model`, supporting to inference model.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def get_loss(self, **kwargs):
+ """Gets the loss."""
+
+ @abstractmethod
+ def get_accuracy(self, **kwargs):
+ """Gets the accuracy."""
+
+ @abstractmethod
+ def forward(self, **kwargs):
+ """Forward function."""
+
+ @abstractmethod
+ def inference_model(self, **kwargs):
+ """Inference function."""
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode keypoints from heatmaps.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ output (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ """
+ batch_size = len(img_metas)
+
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ c = np.zeros((batch_size, 2), dtype=np.float32)
+ s = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size)
+ for i in range(batch_size):
+ c[i, :] = img_metas[i]['center']
+ s[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ preds, maxvals = keypoints_from_heatmaps(
+ output,
+ c,
+ s,
+ unbiased=self.test_cfg.get('unbiased_decoding', False),
+ post_process=self.test_cfg.get('post_process', 'default'),
+ kernel=self.test_cfg.get('modulate_kernel', 11),
+ valid_radius_factor=self.test_cfg.get('valid_radius_factor',
+ 0.0546875),
+ use_udp=self.test_cfg.get('use_udp', False),
+ target_type=self.test_cfg.get('target_type', 'GaussianHeatmap'))
+
+ all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_preds[:, :, 0:2] = preds[:, :, 0:2]
+ all_preds[:, :, 2:3] = maxvals
+ all_boxes[:, 0:2] = c[:, 0:2]
+ all_boxes[:, 2:4] = s[:, 0:2]
+ all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
+ all_boxes[:, 5] = score
+
+ result = {}
+
+ result['preds'] = all_preds
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ return result
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
diff --git a/mmpose/models/heads/topdown_heatmap_multi_stage_head.py b/mmpose/models/heads/topdown_heatmap_multi_stage_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c439f5b6332d72a66db75bf599035411c4e1e0d1
--- /dev/null
+++ b/mmpose/models/heads/topdown_heatmap_multi_stage_head.py
@@ -0,0 +1,572 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+
+import torch.nn as nn
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, Linear,
+ build_activation_layer, build_conv_layer,
+ build_norm_layer, build_upsample_layer, constant_init,
+ kaiming_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from ..builder import HEADS
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class TopdownHeatmapMultiStageHead(TopdownHeatmapBaseHead):
+ """Top-down heatmap multi-stage head.
+
+ TopdownHeatmapMultiStageHead is consisted of multiple branches,
+ each of which has num_deconv_layers(>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_stages (int): Number of stages.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels=512,
+ out_channels=17,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_stages = num_stages
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ # build multi-stage deconv layers
+ self.multi_deconv_layers = nn.ModuleList([])
+ for _ in range(self.num_stages):
+ if num_deconv_layers > 0:
+ deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+ self.multi_deconv_layers.append(deconv_layers)
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ # build multi-stage final layers
+ self.multi_final_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if identity_final_layer:
+ final_layer = nn.Identity()
+ else:
+ final_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=num_deconv_filters[-1]
+ if num_deconv_layers > 0 else in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+ self.multi_final_layers.append(final_layer)
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]):
+ Output heatmaps.
+ target (torch.Tensor[N,K,H,W]):
+ Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert isinstance(output, list)
+ assert target.dim() == 4 and target_weight.dim() == 3
+
+ if isinstance(self.loss, nn.Sequential):
+ assert len(self.loss) == len(output)
+ for i in range(len(output)):
+ target_i = target
+ target_weight_i = target_weight
+ if isinstance(self.loss, nn.Sequential):
+ loss_func = self.loss[i]
+ else:
+ loss_func = self.loss
+ loss_i = loss_func(output[i], target_i, target_weight_i)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = loss_i
+ else:
+ losses['heatmap_loss'] += loss_i
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ _, avg_acc, _ = pose_pck_accuracy(
+ output[-1].detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages.
+ """
+ out = []
+ assert isinstance(x, list)
+ for i in range(self.num_stages):
+ y = self.multi_deconv_layers[i](x[i])
+ y = self.multi_final_layers[i](y)
+ out.append(y)
+ return out
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (List[torch.Tensor[NxKxHxW]]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+ assert isinstance(output, list)
+ output = output[-1]
+
+ if flip_pairs is not None:
+ # perform flip
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+
+ return output_heatmap
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.multi_deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.multi_final_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+
+
+class PredictHeatmap(nn.Module):
+ """Predict the heat map for an input feature.
+
+ Args:
+ unit_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ out_shape (tuple): Shape of the output heatmap.
+ use_prm (bool): Whether to use pose refine machine. Default: False.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ unit_channels,
+ out_channels,
+ out_shape,
+ use_prm=False,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.out_channels = out_channels
+ self.out_shape = out_shape
+ self.use_prm = use_prm
+ if use_prm:
+ self.prm = PRM(out_channels, norm_cfg=norm_cfg)
+ self.conv_layers = nn.Sequential(
+ ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=norm_cfg,
+ inplace=False),
+ ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ inplace=False))
+
+ def forward(self, feature):
+ feature = self.conv_layers(feature)
+ output = nn.functional.interpolate(
+ feature, size=self.out_shape, mode='bilinear', align_corners=True)
+ if self.use_prm:
+ output = self.prm(output)
+ return output
+
+
+class PRM(nn.Module):
+ """Pose Refine Machine.
+
+ Please refer to "Learning Delicate Local Representations
+ for Multi-Person Pose Estimation" (ECCV 2020).
+
+ Args:
+ out_channels (int): Channel number of the output. Equals to
+ the number of key points.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self, out_channels, norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.out_channels = out_channels
+ self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
+ self.middle_path = nn.Sequential(
+ Linear(self.out_channels, self.out_channels),
+ build_norm_layer(dict(type='BN1d'), out_channels)[1],
+ build_activation_layer(dict(type='ReLU')),
+ Linear(self.out_channels, self.out_channels),
+ build_norm_layer(dict(type='BN1d'), out_channels)[1],
+ build_activation_layer(dict(type='ReLU')),
+ build_activation_layer(dict(type='Sigmoid')))
+
+ self.bottom_path = nn.Sequential(
+ ConvModule(
+ self.out_channels,
+ self.out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=norm_cfg,
+ inplace=False),
+ DepthwiseSeparableConvModule(
+ self.out_channels,
+ 1,
+ kernel_size=9,
+ stride=1,
+ padding=4,
+ norm_cfg=norm_cfg,
+ inplace=False), build_activation_layer(dict(type='Sigmoid')))
+ self.conv_bn_relu_prm_1 = ConvModule(
+ self.out_channels,
+ self.out_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ inplace=False)
+
+ def forward(self, x):
+ out = self.conv_bn_relu_prm_1(x)
+ out_1 = out
+
+ out_2 = self.global_pooling(out_1)
+ out_2 = out_2.view(out_2.size(0), -1)
+ out_2 = self.middle_path(out_2)
+ out_2 = out_2.unsqueeze(2)
+ out_2 = out_2.unsqueeze(3)
+
+ out_3 = self.bottom_path(out_1)
+ out = out_1 * (1 + out_2 * out_3)
+
+ return out
+
+
+@HEADS.register_module()
+class TopdownHeatmapMSMUHead(TopdownHeatmapBaseHead):
+ """Heads for multi-stage multi-unit heads used in Multi-Stage Pose
+ estimation Network (MSPN), and Residual Steps Networks (RSN).
+
+ Args:
+ unit_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ out_shape (tuple): Shape of the output heatmap.
+ num_stages (int): Number of stages.
+ num_units (int): Number of units in each stage.
+ use_prm (bool): Whether to use pose refine machine (PRM).
+ Default: False.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ out_shape,
+ unit_channels=256,
+ out_channels=17,
+ num_stages=4,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self.out_shape = out_shape
+ self.unit_channels = unit_channels
+ self.out_channels = out_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.predict_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ for j in range(self.num_units):
+ self.predict_layers.append(
+ PredictHeatmap(
+ unit_channels,
+ out_channels,
+ out_shape,
+ use_prm,
+ norm_cfg=norm_cfg))
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,O,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,O,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,O,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert isinstance(output, list)
+ assert target.dim() == 5 and target_weight.dim() == 4
+ assert target.size(1) == len(output)
+
+ if isinstance(self.loss, nn.Sequential):
+ assert len(self.loss) == len(output)
+ for i in range(len(output)):
+ target_i = target[:, i, :, :, :]
+ target_weight_i = target_weight[:, i, :, :]
+
+ if isinstance(self.loss, nn.Sequential):
+ loss_func = self.loss[i]
+ else:
+ loss_func = self.loss
+
+ loss_i = loss_func(output[i], target_i, target_weight_i)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = loss_i
+ else:
+ losses['heatmap_loss'] += loss_i
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ assert isinstance(output, list)
+ assert target.dim() == 5 and target_weight.dim() == 4
+ _, avg_acc, _ = pose_pck_accuracy(
+ output[-1].detach().cpu().numpy(),
+ target[:, -1, ...].detach().cpu().numpy(),
+ target_weight[:, -1,
+ ...].detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages
+ and units.
+ """
+ out = []
+ assert isinstance(x, list)
+ assert len(x) == self.num_stages
+ assert isinstance(x[0], list)
+ assert len(x[0]) == self.num_units
+ assert x[0][0].shape[1] == self.unit_channels
+ for i in range(self.num_stages):
+ for j in range(self.num_units):
+ y = self.predict_layers[i * self.num_units + j](x[i][j])
+ out.append(y)
+
+ return out
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (list[torch.Tensor[N,K,H,W]]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+ assert isinstance(output, list)
+ output = output[-1]
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.predict_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
diff --git a/mmpose/models/heads/topdown_heatmap_simple_head.py b/mmpose/models/heads/topdown_heatmap_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f3348b2ba06d43e6489e0235c4a883d567e5cd
--- /dev/null
+++ b/mmpose/models/heads/topdown_heatmap_simple_head.py
@@ -0,0 +1,350 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.utils.ops import resize
+from ..builder import HEADS
+import torch.nn.functional as F
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class TopdownHeatmapSimpleHead(TopdownHeatmapBaseHead):
+ """Top-down heatmap simple head. paper ref: Bin Xiao et al. ``Simple
+ Baselines for Human Pose Estimation and Tracking``.
+
+ TopdownHeatmapSimpleHead is consisted of (>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resized to the
+ same size as the first one and then concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None,
+ upsample=0,):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+ self.upsample = upsample
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 4 and target_weight.dim() == 3
+ losses['heatmap_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ _, avg_acc, _ = pose_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ return x
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ if not isinstance(inputs, list):
+ if self.upsample > 0:
+ inputs = resize(
+ input=F.relu(inputs),
+ scale_factor=self.upsample,
+ mode='bilinear',
+ align_corners=self.align_corners
+ )
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/mmpose/models/heads/vipnas_heatmap_simple_head.py b/mmpose/models/heads/vipnas_heatmap_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..41703128c45909733159a0869e091f61e9805756
--- /dev/null
+++ b/mmpose/models/heads/vipnas_heatmap_simple_head.py
@@ -0,0 +1,349 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.utils.ops import resize
+from ..builder import HEADS
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class ViPNASHeatmapSimpleHead(TopdownHeatmapBaseHead):
+ """ViPNAS heatmap simple head.
+
+ ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search.
+ More details can be found in the `paper
+ `__ .
+
+ TopdownHeatmapSimpleHead is consisted of (>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ num_deconv_groups (list|tuple): Group number.
+ in_index (int|Sequence[int]): Input feature index. Default: -1
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_deconv_layers=3,
+ num_deconv_filters=(144, 144, 144),
+ num_deconv_kernels=(4, 4, 4),
+ num_deconv_groups=(16, 16, 16),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers, num_deconv_filters, num_deconv_kernels,
+ num_deconv_groups)
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 4 and target_weight.dim() == 3
+ losses['heatmap_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type.lower() == 'GaussianHeatmap'.lower():
+ _, avg_acc, _ = pose_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ return x
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels,
+ num_groups):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_groups):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_groups({len(num_groups)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ groups = num_groups[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ groups=groups,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/mmpose/models/heads/voxelpose_head.py b/mmpose/models/heads/voxelpose_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..8799bdc2c0a888973f6cf98f3da00c60a891e699
--- /dev/null
+++ b/mmpose/models/heads/voxelpose_head.py
@@ -0,0 +1,167 @@
+# ------------------------------------------------------------------------------
+# Copyright and License Information
+# https://github.com/microsoft/voxelpose-pytorch/blob/main/lib/models
+# Original Licence: MIT License
+# ------------------------------------------------------------------------------
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class CuboidCenterHead(nn.Module):
+ """Get results from the 3D human center heatmap. In this module, human 3D
+ centers are local maximums obtained from the 3D heatmap via NMS (max-
+ pooling).
+
+ Args:
+ space_size (list[3]): The size of the 3D space.
+ cube_size (list[3]): The size of the heatmap volume.
+ space_center (list[3]): The coordinate of space center.
+ max_num (int): Maximum of human center detections.
+ max_pool_kernel (int): Kernel size of the max-pool kernel in nms.
+ """
+
+ def __init__(self,
+ space_size,
+ space_center,
+ cube_size,
+ max_num=10,
+ max_pool_kernel=3):
+ super(CuboidCenterHead, self).__init__()
+ # use register_buffer
+ self.register_buffer('grid_size', torch.tensor(space_size))
+ self.register_buffer('cube_size', torch.tensor(cube_size))
+ self.register_buffer('grid_center', torch.tensor(space_center))
+
+ self.num_candidates = max_num
+ self.max_pool_kernel = max_pool_kernel
+ self.loss = nn.MSELoss()
+
+ def _get_real_locations(self, indices):
+ """
+ Args:
+ indices (torch.Tensor(NXP)): Indices of points in the 3D tensor
+
+ Returns:
+ real_locations (torch.Tensor(NXPx3)): Locations of points
+ in the world coordinate system
+ """
+ real_locations = indices.float() / (
+ self.cube_size - 1) * self.grid_size + \
+ self.grid_center - self.grid_size / 2.0
+ return real_locations
+
+ def _nms_by_max_pool(self, heatmap_volumes):
+ max_num = self.num_candidates
+ batch_size = heatmap_volumes.shape[0]
+ root_cubes_nms = self._max_pool(heatmap_volumes)
+ root_cubes_nms_reshape = root_cubes_nms.reshape(batch_size, -1)
+ topk_values, topk_index = root_cubes_nms_reshape.topk(max_num)
+ topk_unravel_index = self._get_3d_indices(topk_index,
+ heatmap_volumes[0].shape)
+
+ return topk_values, topk_unravel_index
+
+ def _max_pool(self, inputs):
+ kernel = self.max_pool_kernel
+ padding = (kernel - 1) // 2
+ max = F.max_pool3d(
+ inputs, kernel_size=kernel, stride=1, padding=padding)
+ keep = (inputs == max).float()
+ return keep * inputs
+
+ @staticmethod
+ def _get_3d_indices(indices, shape):
+ """Get indices in the 3-D tensor.
+
+ Args:
+ indices (torch.Tensor(NXp)): Indices of points in the 1D tensor
+ shape (torch.Size(3)): The shape of the original 3D tensor
+
+ Returns:
+ indices: Indices of points in the original 3D tensor
+ """
+ batch_size = indices.shape[0]
+ num_people = indices.shape[1]
+ indices_x = (indices //
+ (shape[1] * shape[2])).reshape(batch_size, num_people, -1)
+ indices_y = ((indices % (shape[1] * shape[2])) //
+ shape[2]).reshape(batch_size, num_people, -1)
+ indices_z = (indices % shape[2]).reshape(batch_size, num_people, -1)
+ indices = torch.cat([indices_x, indices_y, indices_z], dim=2)
+ return indices
+
+ def forward(self, heatmap_volumes):
+ """
+
+ Args:
+ heatmap_volumes (torch.Tensor(NXLXWXH)):
+ 3D human center heatmaps predicted by the network.
+ Returns:
+ human_centers (torch.Tensor(NXPX5)):
+ Coordinates of human centers.
+ """
+ batch_size = heatmap_volumes.shape[0]
+
+ topk_values, topk_unravel_index = self._nms_by_max_pool(
+ heatmap_volumes.detach())
+
+ topk_unravel_index = self._get_real_locations(topk_unravel_index)
+
+ human_centers = torch.zeros(
+ batch_size, self.num_candidates, 5, device=heatmap_volumes.device)
+ human_centers[:, :, 0:3] = topk_unravel_index
+ human_centers[:, :, 4] = topk_values
+
+ return human_centers
+
+ def get_loss(self, pred_cubes, gt):
+
+ return dict(loss_center=self.loss(pred_cubes, gt))
+
+
+@HEADS.register_module()
+class CuboidPoseHead(nn.Module):
+
+ def __init__(self, beta):
+ """Get results from the 3D human pose heatmap. Instead of obtaining
+ maximums on the heatmap, this module regresses the coordinates of
+ keypoints via integral pose regression. Refer to `paper.
+
+ ` for more details.
+
+ Args:
+ beta: Constant to adjust the magnification of soft-maxed heatmap.
+ """
+ super(CuboidPoseHead, self).__init__()
+ self.beta = beta
+ self.loss = nn.L1Loss()
+
+ def forward(self, heatmap_volumes, grid_coordinates):
+ """
+
+ Args:
+ heatmap_volumes (torch.Tensor(NxKxLxWxH)):
+ 3D human pose heatmaps predicted by the network.
+ grid_coordinates (torch.Tensor(Nx(LxWxH)x3)):
+ Coordinates of the grids in the heatmap volumes.
+ Returns:
+ human_poses (torch.Tensor(NxKx3)): Coordinates of human poses.
+ """
+ batch_size = heatmap_volumes.size(0)
+ channel = heatmap_volumes.size(1)
+ x = heatmap_volumes.reshape(batch_size, channel, -1, 1)
+ x = F.softmax(self.beta * x, dim=2)
+ grid_coordinates = grid_coordinates.unsqueeze(1)
+ x = torch.mul(x, grid_coordinates)
+ human_poses = torch.sum(x, dim=2)
+
+ return human_poses
+
+ def get_loss(self, preds, targets, weights):
+
+ return dict(loss_pose=self.loss(preds * weights, targets * weights))
diff --git a/mmpose/models/losses/__init__.py b/mmpose/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d67973fc5cb53e85faa918719944d8c02f2190cd
--- /dev/null
+++ b/mmpose/models/losses/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .classfication_loss import BCELoss
+from .heatmap_loss import AdaptiveWingLoss
+from .mesh_loss import GANLoss, MeshLoss
+from .mse_loss import JointsMSELoss, JointsOHKMMSELoss
+from .multi_loss_factory import AELoss, HeatmapLoss, MultiLossFactory
+from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, MSELoss,
+ SemiSupervisionLoss, SmoothL1Loss, SoftWingLoss,
+ WingLoss)
+
+__all__ = [
+ 'JointsMSELoss', 'JointsOHKMMSELoss', 'HeatmapLoss', 'AELoss',
+ 'MultiLossFactory', 'MeshLoss', 'GANLoss', 'SmoothL1Loss', 'WingLoss',
+ 'MPJPELoss', 'MSELoss', 'L1Loss', 'BCELoss', 'BoneLoss',
+ 'SemiSupervisionLoss', 'SoftWingLoss', 'AdaptiveWingLoss'
+]
diff --git a/mmpose/models/losses/__pycache__/__init__.cpython-310.pyc b/mmpose/models/losses/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7cab714179e6ee0036444277aeef74b427632599
Binary files /dev/null and b/mmpose/models/losses/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/classfication_loss.cpython-310.pyc b/mmpose/models/losses/__pycache__/classfication_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..256f42a0ac5918af0ad4c452f015d2fdb0d1e1df
Binary files /dev/null and b/mmpose/models/losses/__pycache__/classfication_loss.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/heatmap_loss.cpython-310.pyc b/mmpose/models/losses/__pycache__/heatmap_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f3bb005731de7b631ea0f20bf551c9c7ad1ab459
Binary files /dev/null and b/mmpose/models/losses/__pycache__/heatmap_loss.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/mesh_loss.cpython-310.pyc b/mmpose/models/losses/__pycache__/mesh_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05ccf024213e003123e577273d8ddf4cf4052ac5
Binary files /dev/null and b/mmpose/models/losses/__pycache__/mesh_loss.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/mse_loss.cpython-310.pyc b/mmpose/models/losses/__pycache__/mse_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30c3c00616f864eae9f9302193ce901b6087fefd
Binary files /dev/null and b/mmpose/models/losses/__pycache__/mse_loss.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/multi_loss_factory.cpython-310.pyc b/mmpose/models/losses/__pycache__/multi_loss_factory.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d12eaf1d11e6982e021b1ea6835597a672ce66e2
Binary files /dev/null and b/mmpose/models/losses/__pycache__/multi_loss_factory.cpython-310.pyc differ
diff --git a/mmpose/models/losses/__pycache__/regression_loss.cpython-310.pyc b/mmpose/models/losses/__pycache__/regression_loss.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a11f013e8240723c23fd43c1c030de1d76a242a0
Binary files /dev/null and b/mmpose/models/losses/__pycache__/regression_loss.cpython-310.pyc differ
diff --git a/mmpose/models/losses/classfication_loss.py b/mmpose/models/losses/classfication_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..b79b69d035611f75f10e8722aaea4362659509e2
--- /dev/null
+++ b/mmpose/models/losses/classfication_loss.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class BCELoss(nn.Module):
+ """Binary Cross Entropy loss."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.binary_cross_entropy
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_labels: K
+
+ Args:
+ output (torch.Tensor[N, K]): Output classification.
+ target (torch.Tensor[N, K]): Target classification.
+ target_weight (torch.Tensor[N, K] or torch.Tensor[N]):
+ Weights across different labels.
+ """
+
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output, target, reduction='none')
+ if target_weight.dim() == 1:
+ target_weight = target_weight[:, None]
+ loss = (loss * target_weight).mean()
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
diff --git a/mmpose/models/losses/heatmap_loss.py b/mmpose/models/losses/heatmap_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..9471457ca0da2d43441da1d394bc45b3e8ca3ee7
--- /dev/null
+++ b/mmpose/models/losses/heatmap_loss.py
@@ -0,0 +1,86 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class AdaptiveWingLoss(nn.Module):
+ """Adaptive wing loss. paper ref: 'Adaptive Wing Loss for Robust Face
+ Alignment via Heatmap Regression' Wang et al. ICCV'2019.
+
+ Args:
+ alpha (float), omega (float), epsilon (float), theta (float)
+ are hyper-parameters.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ alpha=2.1,
+ omega=14,
+ epsilon=1,
+ theta=0.5,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.alpha = float(alpha)
+ self.omega = float(omega)
+ self.epsilon = float(epsilon)
+ self.theta = float(theta)
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+
+ Args:
+ pred (torch.Tensor[NxKxHxW]): Predicted heatmaps.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ """
+ H, W = pred.shape[2:4]
+ delta = (target - pred).abs()
+
+ A = self.omega * (
+ 1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
+ ) * (self.alpha - target) * (torch.pow(
+ self.theta / self.epsilon,
+ self.alpha - target - 1)) * (1 / self.epsilon)
+ C = self.theta * A - self.omega * torch.log(
+ 1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
+
+ losses = torch.where(
+ delta < self.theta,
+ self.omega *
+ torch.log(1 +
+ torch.pow(delta / self.epsilon, self.alpha - target)),
+ A * delta - C)
+
+ return torch.mean(losses)
+
+ def forward(self, output, target, target_weight):
+ """Forward function.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+
+ Args:
+ output (torch.Tensor[NxKxHxW]): Output heatmaps.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ loss = self.criterion(output * target_weight.unsqueeze(-1),
+ target * target_weight.unsqueeze(-1))
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
diff --git a/mmpose/models/losses/mesh_loss.py b/mmpose/models/losses/mesh_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9d18bd7296a189ec2f24c422cc05a19035d3224
--- /dev/null
+++ b/mmpose/models/losses/mesh_loss.py
@@ -0,0 +1,340 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from ..utils.geometry import batch_rodrigues
+
+
+def perspective_projection(points, rotation, translation, focal_length,
+ camera_center):
+ """This function computes the perspective projection of a set of 3D points.
+
+ Note:
+ - batch size: B
+ - point number: N
+
+ Args:
+ points (Tensor([B, N, 3])): A set of 3D points
+ rotation (Tensor([B, 3, 3])): Camera rotation matrix
+ translation (Tensor([B, 3])): Camera translation
+ focal_length (Tensor([B,])): Focal length
+ camera_center (Tensor([B, 2])): Camera center
+
+ Returns:
+ projected_points (Tensor([B, N, 2])): Projected 2D
+ points in image space.
+ """
+
+ batch_size = points.shape[0]
+ K = torch.zeros([batch_size, 3, 3], device=points.device)
+ K[:, 0, 0] = focal_length
+ K[:, 1, 1] = focal_length
+ K[:, 2, 2] = 1.
+ K[:, :-1, -1] = camera_center
+
+ # Transform points
+ points = torch.einsum('bij,bkj->bki', rotation, points)
+ points = points + translation.unsqueeze(1)
+
+ # Apply perspective distortion
+ projected_points = points / points[:, :, -1].unsqueeze(-1)
+
+ # Apply camera intrinsics
+ projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+ projected_points = projected_points[:, :, :-1]
+ return projected_points
+
+
+@LOSSES.register_module()
+class MeshLoss(nn.Module):
+ """Mix loss for 3D human mesh. It is composed of loss on 2D joints, 3D
+ joints, mesh vertices and smpl parameters (if any).
+
+ Args:
+ joints_2d_loss_weight (float): Weight for loss on 2D joints.
+ joints_3d_loss_weight (float): Weight for loss on 3D joints.
+ vertex_loss_weight (float): Weight for loss on 3D verteices.
+ smpl_pose_loss_weight (float): Weight for loss on SMPL
+ pose parameters.
+ smpl_beta_loss_weight (float): Weight for loss on SMPL
+ shape parameters.
+ img_res (int): Input image resolution.
+ focal_length (float): Focal length of camera model. Default=5000.
+ """
+
+ def __init__(self,
+ joints_2d_loss_weight,
+ joints_3d_loss_weight,
+ vertex_loss_weight,
+ smpl_pose_loss_weight,
+ smpl_beta_loss_weight,
+ img_res,
+ focal_length=5000):
+
+ super().__init__()
+ # Per-vertex loss on the mesh
+ self.criterion_vertex = nn.L1Loss(reduction='none')
+
+ # Joints (2D and 3D) loss
+ self.criterion_joints_2d = nn.SmoothL1Loss(reduction='none')
+ self.criterion_joints_3d = nn.SmoothL1Loss(reduction='none')
+
+ # Loss for SMPL parameter regression
+ self.criterion_regr = nn.MSELoss(reduction='none')
+
+ self.joints_2d_loss_weight = joints_2d_loss_weight
+ self.joints_3d_loss_weight = joints_3d_loss_weight
+ self.vertex_loss_weight = vertex_loss_weight
+ self.smpl_pose_loss_weight = smpl_pose_loss_weight
+ self.smpl_beta_loss_weight = smpl_beta_loss_weight
+ self.focal_length = focal_length
+ self.img_res = img_res
+
+ def joints_2d_loss(self, pred_joints_2d, gt_joints_2d, joints_2d_visible):
+ """Compute 2D reprojection loss on the joints.
+
+ The loss is weighted by joints_2d_visible.
+ """
+ conf = joints_2d_visible.float()
+ loss = (conf *
+ self.criterion_joints_2d(pred_joints_2d, gt_joints_2d)).mean()
+ return loss
+
+ def joints_3d_loss(self, pred_joints_3d, gt_joints_3d, joints_3d_visible):
+ """Compute 3D joints loss for the examples that 3D joint annotations
+ are available.
+
+ The loss is weighted by joints_3d_visible.
+ """
+ conf = joints_3d_visible.float()
+ if len(gt_joints_3d) > 0:
+ gt_pelvis = (gt_joints_3d[:, 2, :] + gt_joints_3d[:, 3, :]) / 2
+ gt_joints_3d = gt_joints_3d - gt_pelvis[:, None, :]
+ pred_pelvis = (pred_joints_3d[:, 2, :] +
+ pred_joints_3d[:, 3, :]) / 2
+ pred_joints_3d = pred_joints_3d - pred_pelvis[:, None, :]
+ return (
+ conf *
+ self.criterion_joints_3d(pred_joints_3d, gt_joints_3d)).mean()
+ return pred_joints_3d.sum() * 0
+
+ def vertex_loss(self, pred_vertices, gt_vertices, has_smpl):
+ """Compute 3D vertex loss for the examples that 3D human mesh
+ annotations are available.
+
+ The loss is weighted by the has_smpl.
+ """
+ conf = has_smpl.float()
+ loss_vertex = self.criterion_vertex(pred_vertices, gt_vertices)
+ loss_vertex = (conf[:, None, None] * loss_vertex).mean()
+ return loss_vertex
+
+ def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas,
+ has_smpl):
+ """Compute SMPL parameters loss for the examples that SMPL parameter
+ annotations are available.
+
+ The loss is weighted by has_smpl.
+ """
+ conf = has_smpl.float()
+ gt_rotmat = batch_rodrigues(gt_pose.view(-1, 3)).view(-1, 24, 3, 3)
+ loss_regr_pose = self.criterion_regr(pred_rotmat, gt_rotmat)
+ loss_regr_betas = self.criterion_regr(pred_betas, gt_betas)
+ loss_regr_pose = (conf[:, None, None, None] * loss_regr_pose).mean()
+ loss_regr_betas = (conf[:, None] * loss_regr_betas).mean()
+ return loss_regr_pose, loss_regr_betas
+
+ def project_points(self, points_3d, camera):
+ """Perform orthographic projection of 3D points using the camera
+ parameters, return projected 2D points in image plane.
+
+ Note:
+ - batch size: B
+ - point number: N
+
+ Args:
+ points_3d (Tensor([B, N, 3])): 3D points.
+ camera (Tensor([B, 3])): camera parameters with the
+ 3 channel as (scale, translation_x, translation_y)
+
+ Returns:
+ Tensor([B, N, 2]): projected 2D points \
+ in image space.
+ """
+ batch_size = points_3d.shape[0]
+ device = points_3d.device
+ cam_t = torch.stack([
+ camera[:, 1], camera[:, 2], 2 * self.focal_length /
+ (self.img_res * camera[:, 0] + 1e-9)
+ ],
+ dim=-1)
+ camera_center = camera.new_zeros([batch_size, 2])
+ rot_t = torch.eye(
+ 3, device=device,
+ dtype=points_3d.dtype).unsqueeze(0).expand(batch_size, -1, -1)
+ joints_2d = perspective_projection(
+ points_3d,
+ rotation=rot_t,
+ translation=cam_t,
+ focal_length=self.focal_length,
+ camera_center=camera_center)
+ return joints_2d
+
+ def forward(self, output, target):
+ """Forward function.
+
+ Args:
+ output (dict): dict of network predicted results.
+ Keys: 'vertices', 'joints_3d', 'camera',
+ 'pose'(optional), 'beta'(optional)
+ target (dict): dict of ground-truth labels.
+ Keys: 'vertices', 'joints_3d', 'joints_3d_visible',
+ 'joints_2d', 'joints_2d_visible', 'pose', 'beta',
+ 'has_smpl'
+
+ Returns:
+ dict: dict of losses.
+ """
+ losses = {}
+
+ # Per-vertex loss for the shape
+ pred_vertices = output['vertices']
+
+ gt_vertices = target['vertices']
+ has_smpl = target['has_smpl']
+ loss_vertex = self.vertex_loss(pred_vertices, gt_vertices, has_smpl)
+ losses['vertex_loss'] = loss_vertex * self.vertex_loss_weight
+
+ # Compute loss on SMPL parameters, if available
+ if 'pose' in output.keys() and 'beta' in output.keys():
+ pred_rotmat = output['pose']
+ pred_betas = output['beta']
+ gt_pose = target['pose']
+ gt_betas = target['beta']
+ loss_regr_pose, loss_regr_betas = self.smpl_losses(
+ pred_rotmat, pred_betas, gt_pose, gt_betas, has_smpl)
+ losses['smpl_pose_loss'] = \
+ loss_regr_pose * self.smpl_pose_loss_weight
+ losses['smpl_beta_loss'] = \
+ loss_regr_betas * self.smpl_beta_loss_weight
+
+ # Compute 3D joints loss
+ pred_joints_3d = output['joints_3d']
+ gt_joints_3d = target['joints_3d']
+ joints_3d_visible = target['joints_3d_visible']
+ loss_joints_3d = self.joints_3d_loss(pred_joints_3d, gt_joints_3d,
+ joints_3d_visible)
+ losses['joints_3d_loss'] = loss_joints_3d * self.joints_3d_loss_weight
+
+ # Compute 2D reprojection loss for the 2D joints
+ pred_camera = output['camera']
+ gt_joints_2d = target['joints_2d']
+ joints_2d_visible = target['joints_2d_visible']
+ pred_joints_2d = self.project_points(pred_joints_3d, pred_camera)
+
+ # Normalize keypoints to [-1,1]
+ # The coordinate origin of pred_joints_2d is
+ # the center of the input image.
+ pred_joints_2d = 2 * pred_joints_2d / (self.img_res - 1)
+ # The coordinate origin of gt_joints_2d is
+ # the top left corner of the input image.
+ gt_joints_2d = 2 * gt_joints_2d / (self.img_res - 1) - 1
+ loss_joints_2d = self.joints_2d_loss(pred_joints_2d, gt_joints_2d,
+ joints_2d_visible)
+ losses['joints_2d_loss'] = loss_joints_2d * self.joints_2d_loss_weight
+
+ return losses
+
+
+@LOSSES.register_module()
+class GANLoss(nn.Module):
+ """Define GAN loss.
+
+ Args:
+ gan_type (str): Support 'vanilla', 'lsgan', 'wgan', 'hinge'.
+ real_label_val (float): The value for real label. Default: 1.0.
+ fake_label_val (float): The value for fake label. Default: 0.0.
+ loss_weight (float): Loss weight. Default: 1.0.
+ Note that loss_weight is only for generators; and it is always 1.0
+ for discriminators.
+ """
+
+ def __init__(self,
+ gan_type,
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=1.0):
+ super().__init__()
+ self.gan_type = gan_type
+ self.loss_weight = loss_weight
+ self.real_label_val = real_label_val
+ self.fake_label_val = fake_label_val
+
+ if self.gan_type == 'vanilla':
+ self.loss = nn.BCEWithLogitsLoss()
+ elif self.gan_type == 'lsgan':
+ self.loss = nn.MSELoss()
+ elif self.gan_type == 'wgan':
+ self.loss = self._wgan_loss
+ elif self.gan_type == 'hinge':
+ self.loss = nn.ReLU()
+ else:
+ raise NotImplementedError(
+ f'GAN type {self.gan_type} is not implemented.')
+
+ @staticmethod
+ def _wgan_loss(input, target):
+ """wgan loss.
+
+ Args:
+ input (Tensor): Input tensor.
+ target (bool): Target label.
+
+ Returns:
+ Tensor: wgan loss.
+ """
+ return -input.mean() if target else input.mean()
+
+ def get_target_label(self, input, target_is_real):
+ """Get target label.
+
+ Args:
+ input (Tensor): Input tensor.
+ target_is_real (bool): Whether the target is real or fake.
+
+ Returns:
+ (bool | Tensor): Target tensor. Return bool for wgan, \
+ otherwise, return Tensor.
+ """
+
+ if self.gan_type == 'wgan':
+ return target_is_real
+ target_val = (
+ self.real_label_val if target_is_real else self.fake_label_val)
+ return input.new_ones(input.size()) * target_val
+
+ def forward(self, input, target_is_real, is_disc=False):
+ """
+ Args:
+ input (Tensor): The input for the loss module, i.e., the network
+ prediction.
+ target_is_real (bool): Whether the targe is real or fake.
+ is_disc (bool): Whether the loss for discriminators or not.
+ Default: False.
+
+ Returns:
+ Tensor: GAN loss value.
+ """
+ target_label = self.get_target_label(input, target_is_real)
+ if self.gan_type == 'hinge':
+ if is_disc: # for discriminators in hinge-gan
+ input = -input if target_is_real else input
+ loss = self.loss(1 + input).mean()
+ else: # for generators in hinge-gan
+ loss = -input.mean()
+ else: # other gan types
+ loss = self.loss(input, target_label)
+
+ # loss_weight is always 1.0 for discriminators
+ return loss if is_disc else loss * self.loss_weight
diff --git a/mmpose/models/losses/mse_loss.py b/mmpose/models/losses/mse_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f972efadfdfe0093c9ae1b308c6f82a9ccd72f73
--- /dev/null
+++ b/mmpose/models/losses/mse_loss.py
@@ -0,0 +1,153 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class JointsMSELoss(nn.Module):
+ """MSE loss for heatmaps.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = nn.MSELoss()
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight):
+ """Forward function."""
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ loss = 0.
+
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ loss += self.criterion(heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx])
+ else:
+ loss += self.criterion(heatmap_pred, heatmap_gt)
+
+ return loss / num_joints * self.loss_weight
+
+
+@LOSSES.register_module()
+class CombinedTargetMSELoss(nn.Module):
+ """MSE loss for combined target.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight, loss_weight=1.):
+ super().__init__()
+ self.criterion = nn.MSELoss(reduction='mean')
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight):
+ batch_size = output.size(0)
+ num_channels = output.size(1)
+ heatmaps_pred = output.reshape(
+ (batch_size, num_channels, -1)).split(1, 1)
+ heatmaps_gt = target.reshape(
+ (batch_size, num_channels, -1)).split(1, 1)
+ loss = 0.
+ num_joints = num_channels // 3
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx * 3].squeeze()
+ heatmap_gt = heatmaps_gt[idx * 3].squeeze()
+ offset_x_pred = heatmaps_pred[idx * 3 + 1].squeeze()
+ offset_x_gt = heatmaps_gt[idx * 3 + 1].squeeze()
+ offset_y_pred = heatmaps_pred[idx * 3 + 2].squeeze()
+ offset_y_gt = heatmaps_gt[idx * 3 + 2].squeeze()
+ if self.use_target_weight:
+ heatmap_pred = heatmap_pred * target_weight[:, idx]
+ heatmap_gt = heatmap_gt * target_weight[:, idx]
+ # classification loss
+ loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
+ # regression loss
+ loss += 0.5 * self.criterion(heatmap_gt * offset_x_pred,
+ heatmap_gt * offset_x_gt)
+ loss += 0.5 * self.criterion(heatmap_gt * offset_y_pred,
+ heatmap_gt * offset_y_gt)
+ return loss / num_joints * self.loss_weight
+
+
+@LOSSES.register_module()
+class JointsOHKMMSELoss(nn.Module):
+ """MSE loss with online hard keypoint mining.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ topk (int): Only top k joint losses are kept.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, topk=8, loss_weight=1.):
+ super().__init__()
+ assert topk > 0
+ self.criterion = nn.MSELoss(reduction='none')
+ self.use_target_weight = use_target_weight
+ self.topk = topk
+ self.loss_weight = loss_weight
+
+ def _ohkm(self, loss):
+ """Online hard keypoint mining."""
+ ohkm_loss = 0.
+ N = len(loss)
+ for i in range(N):
+ sub_loss = loss[i]
+ _, topk_idx = torch.topk(
+ sub_loss, k=self.topk, dim=0, sorted=False)
+ tmp_loss = torch.gather(sub_loss, 0, topk_idx)
+ ohkm_loss += torch.sum(tmp_loss) / self.topk
+ ohkm_loss /= N
+ return ohkm_loss
+
+ def forward(self, output, target, target_weight):
+ """Forward function."""
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+ if num_joints < self.topk:
+ raise ValueError(f'topk ({self.topk}) should not '
+ f'larger than num_joints ({num_joints}).')
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ losses = []
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ losses.append(
+ self.criterion(heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx]))
+ else:
+ losses.append(self.criterion(heatmap_pred, heatmap_gt))
+
+ losses = [loss.mean(dim=1).unsqueeze(dim=1) for loss in losses]
+ losses = torch.cat(losses, dim=1)
+
+ return self._ohkm(losses) * self.loss_weight
diff --git a/mmpose/models/losses/multi_loss_factory.py b/mmpose/models/losses/multi_loss_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..65f90a761d0e5f94309023288f0d3ec848ec82dd
--- /dev/null
+++ b/mmpose/models/losses/multi_loss_factory.py
@@ -0,0 +1,281 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+def _make_input(t, requires_grad=False, device=torch.device('cpu')):
+ """Make zero inputs for AE loss.
+
+ Args:
+ t (torch.Tensor): input
+ requires_grad (bool): Option to use requires_grad.
+ device: torch device
+
+ Returns:
+ torch.Tensor: zero input.
+ """
+ inp = torch.autograd.Variable(t, requires_grad=requires_grad)
+ inp = inp.sum()
+ inp = inp.to(device)
+ return inp
+
+
+@LOSSES.register_module()
+class HeatmapLoss(nn.Module):
+ """Accumulate the heatmap loss for each image in the batch.
+
+ Args:
+ supervise_empty (bool): Whether to supervise empty channels.
+ """
+
+ def __init__(self, supervise_empty=True):
+ super().__init__()
+ self.supervise_empty = supervise_empty
+
+ def forward(self, pred, gt, mask):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ pred (torch.Tensor[N,K,H,W]):heatmap of output.
+ gt (torch.Tensor[N,K,H,W]): target heatmap.
+ mask (torch.Tensor[N,H,W]): mask of target.
+ """
+ assert pred.size() == gt.size(
+ ), f'pred.size() is {pred.size()}, gt.size() is {gt.size()}'
+
+ if not self.supervise_empty:
+ empty_mask = (gt.sum(dim=[2, 3], keepdim=True) > 0).float()
+ loss = ((pred - gt)**2) * empty_mask.expand_as(
+ pred) * mask[:, None, :, :].expand_as(pred)
+ else:
+ loss = ((pred - gt)**2) * mask[:, None, :, :].expand_as(pred)
+ loss = loss.mean(dim=3).mean(dim=2).mean(dim=1)
+ return loss
+
+
+@LOSSES.register_module()
+class AELoss(nn.Module):
+ """Associative Embedding loss.
+
+ `Associative Embedding: End-to-End Learning for Joint Detection and
+ Grouping `_.
+ """
+
+ def __init__(self, loss_type):
+ super().__init__()
+ self.loss_type = loss_type
+
+ def singleTagLoss(self, pred_tag, joints):
+ """Associative embedding loss for one image.
+
+ Note:
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ pred_tag (torch.Tensor[KxHxW,1]): tag of output for one image.
+ joints (torch.Tensor[M,K,2]): joints information for one image.
+ """
+ tags = []
+ pull = 0
+ for joints_per_person in joints:
+ tmp = []
+ for joint in joints_per_person:
+ if joint[1] > 0:
+ tmp.append(pred_tag[joint[0]])
+ if len(tmp) == 0:
+ continue
+ tmp = torch.stack(tmp)
+ tags.append(torch.mean(tmp, dim=0))
+ pull = pull + torch.mean((tmp - tags[-1].expand_as(tmp))**2)
+
+ num_tags = len(tags)
+ if num_tags == 0:
+ return (
+ _make_input(torch.zeros(1).float(), device=pred_tag.device),
+ _make_input(torch.zeros(1).float(), device=pred_tag.device))
+ elif num_tags == 1:
+ return (_make_input(
+ torch.zeros(1).float(), device=pred_tag.device), pull)
+
+ tags = torch.stack(tags)
+
+ size = (num_tags, num_tags)
+ A = tags.expand(*size)
+ B = A.permute(1, 0)
+
+ diff = A - B
+
+ if self.loss_type == 'exp':
+ diff = torch.pow(diff, 2)
+ push = torch.exp(-diff)
+ push = torch.sum(push) - num_tags
+ elif self.loss_type == 'max':
+ diff = 1 - torch.abs(diff)
+ push = torch.clamp(diff, min=0).sum() - num_tags
+ else:
+ raise ValueError('Unknown ae loss type')
+
+ push_loss = push / ((num_tags - 1) * num_tags) * 0.5
+ pull_loss = pull / (num_tags)
+
+ return push_loss, pull_loss
+
+ def forward(self, tags, joints):
+ """Accumulate the tag loss for each image in the batch.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ tags (torch.Tensor[N,KxHxW,1]): tag channels of output.
+ joints (torch.Tensor[N,M,K,2]): joints information.
+ """
+ pushes, pulls = [], []
+ joints = joints.cpu().data.numpy()
+ batch_size = tags.size(0)
+ for i in range(batch_size):
+ push, pull = self.singleTagLoss(tags[i], joints[i])
+ pushes.append(push)
+ pulls.append(pull)
+ return torch.stack(pushes), torch.stack(pulls)
+
+
+@LOSSES.register_module()
+class MultiLossFactory(nn.Module):
+ """Loss for bottom-up models.
+
+ Args:
+ num_joints (int): Number of keypoints.
+ num_stages (int): Number of stages.
+ ae_loss_type (str): Type of ae loss.
+ with_ae_loss (list[bool]): Use ae loss or not in multi-heatmap.
+ push_loss_factor (list[float]):
+ Parameter of push loss in multi-heatmap.
+ pull_loss_factor (list[float]):
+ Parameter of pull loss in multi-heatmap.
+ with_heatmap_loss (list[bool]):
+ Use heatmap loss or not in multi-heatmap.
+ heatmaps_loss_factor (list[float]):
+ Parameter of heatmap loss in multi-heatmap.
+ supervise_empty (bool): Whether to supervise empty channels.
+ """
+
+ def __init__(self,
+ num_joints,
+ num_stages,
+ ae_loss_type,
+ with_ae_loss,
+ push_loss_factor,
+ pull_loss_factor,
+ with_heatmaps_loss,
+ heatmaps_loss_factor,
+ supervise_empty=True):
+ super().__init__()
+
+ assert isinstance(with_heatmaps_loss, (list, tuple)), \
+ 'with_heatmaps_loss should be a list or tuple'
+ assert isinstance(heatmaps_loss_factor, (list, tuple)), \
+ 'heatmaps_loss_factor should be a list or tuple'
+ assert isinstance(with_ae_loss, (list, tuple)), \
+ 'with_ae_loss should be a list or tuple'
+ assert isinstance(push_loss_factor, (list, tuple)), \
+ 'push_loss_factor should be a list or tuple'
+ assert isinstance(pull_loss_factor, (list, tuple)), \
+ 'pull_loss_factor should be a list or tuple'
+
+ self.num_joints = num_joints
+ self.num_stages = num_stages
+ self.ae_loss_type = ae_loss_type
+ self.with_ae_loss = with_ae_loss
+ self.push_loss_factor = push_loss_factor
+ self.pull_loss_factor = pull_loss_factor
+ self.with_heatmaps_loss = with_heatmaps_loss
+ self.heatmaps_loss_factor = heatmaps_loss_factor
+
+ self.heatmaps_loss = \
+ nn.ModuleList(
+ [
+ HeatmapLoss(supervise_empty)
+ if with_heatmaps_loss else None
+ for with_heatmaps_loss in self.with_heatmaps_loss
+ ]
+ )
+
+ self.ae_loss = \
+ nn.ModuleList(
+ [
+ AELoss(self.ae_loss_type) if with_ae_loss else None
+ for with_ae_loss in self.with_ae_loss
+ ]
+ )
+
+ def forward(self, outputs, heatmaps, masks, joints):
+ """Forward function to calculate losses.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+ - output_channel: C C=2K if use ae loss else K
+
+ Args:
+ outputs (list(torch.Tensor[N,C,H,W])): outputs of stages.
+ heatmaps (list(torch.Tensor[N,K,H,W])): target of heatmaps.
+ masks (list(torch.Tensor[N,H,W])): masks of heatmaps.
+ joints (list(torch.Tensor[N,M,K,2])): joints of ae loss.
+ """
+ heatmaps_losses = []
+ push_losses = []
+ pull_losses = []
+ for idx in range(len(outputs)):
+ offset_feat = 0
+ if self.heatmaps_loss[idx]:
+ heatmaps_pred = outputs[idx][:, :self.num_joints]
+ offset_feat = self.num_joints
+ heatmaps_loss = self.heatmaps_loss[idx](heatmaps_pred,
+ heatmaps[idx],
+ masks[idx])
+ heatmaps_loss = heatmaps_loss * self.heatmaps_loss_factor[idx]
+ heatmaps_losses.append(heatmaps_loss)
+ else:
+ heatmaps_losses.append(None)
+
+ if self.ae_loss[idx]:
+ tags_pred = outputs[idx][:, offset_feat:]
+ batch_size = tags_pred.size()[0]
+ tags_pred = tags_pred.contiguous().view(batch_size, -1, 1)
+
+ push_loss, pull_loss = self.ae_loss[idx](tags_pred,
+ joints[idx])
+ push_loss = push_loss * self.push_loss_factor[idx]
+ pull_loss = pull_loss * self.pull_loss_factor[idx]
+
+ push_losses.append(push_loss)
+ pull_losses.append(pull_loss)
+ else:
+ push_losses.append(None)
+ pull_losses.append(None)
+
+ return heatmaps_losses, push_losses, pull_losses
diff --git a/mmpose/models/losses/regression_loss.py b/mmpose/models/losses/regression_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..db4178355ed4d16978d487ed92120a4cf427bf83
--- /dev/null
+++ b/mmpose/models/losses/regression_loss.py
@@ -0,0 +1,448 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class SmoothL1Loss(nn.Module):
+ """SmoothL1Loss loss.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.smooth_l1_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K, D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class WingLoss(nn.Module):
+ """Wing Loss. paper ref: 'Wing Loss for Robust Facial Landmark Localisation
+ with Convolutional Neural Networks' Feng et al. CVPR'2018.
+
+ Args:
+ omega (float): Also referred to as width.
+ epsilon (float): Also referred to as curvature.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ omega=10.0,
+ epsilon=2.0,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.omega = omega
+ self.epsilon = epsilon
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ # constant that smoothly links the piecewise-defined linear
+ # and nonlinear parts
+ self.C = self.omega * (1.0 - math.log(1.0 + self.omega / self.epsilon))
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ pred (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ """
+ delta = (target - pred).abs()
+ losses = torch.where(
+ delta < self.omega,
+ self.omega * torch.log(1.0 + delta / self.epsilon), delta - self.C)
+ return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N,K,D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class SoftWingLoss(nn.Module):
+ """Soft Wing Loss 'Structure-Coherent Deep Feature Learning for Robust Face
+ Alignment' Lin et al. TIP'2021.
+
+ loss =
+ 1. |x| , if |x| < omega1
+ 2. omega2*ln(1+|x|/epsilon) + B, if |x| >= omega1
+
+ Args:
+ omega1 (float): The first threshold.
+ omega2 (float): The second threshold.
+ epsilon (float): Also referred to as curvature.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ omega1=2.0,
+ omega2=20.0,
+ epsilon=0.5,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.omega1 = omega1
+ self.omega2 = omega2
+ self.epsilon = epsilon
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ # constant that smoothly links the piecewise-defined linear
+ # and nonlinear parts
+ self.B = self.omega1 - self.omega2 * math.log(1.0 + self.omega1 /
+ self.epsilon)
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ pred (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ """
+ delta = (target - pred).abs()
+ losses = torch.where(
+ delta < self.omega1, delta,
+ self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B)
+ return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K, D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class MPJPELoss(nn.Module):
+ """MPJPE (Mean Per Joint Position Error) loss.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N,K,D]):
+ Weights across different joint types.
+ """
+
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = torch.mean(
+ torch.norm((output - target) * target_weight, dim=-1))
+ else:
+ loss = torch.mean(torch.norm(output - target, dim=-1))
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class L1Loss(nn.Module):
+ """L1Loss loss ."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.l1_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output regression.
+ target (torch.Tensor[N, K, 2]): Target regression.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class MSELoss(nn.Module):
+ """MSE loss for coordinate regression."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.mse_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output regression.
+ target (torch.Tensor[N, K, 2]): Target regression.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class BoneLoss(nn.Module):
+ """Bone length loss.
+
+ Args:
+ joint_parents (list): Indices of each joint's parent joint.
+ use_target_weight (bool): Option to use weighted bone loss.
+ Different bone types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, joint_parents, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.joint_parents = joint_parents
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ self.non_root_indices = []
+ for i in range(len(self.joint_parents)):
+ if i != self.joint_parents[i]:
+ self.non_root_indices.append(i)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K-1]):
+ Weights across different bone types.
+ """
+ output_bone = torch.norm(
+ output - output[:, self.joint_parents, :],
+ dim=-1)[:, self.non_root_indices]
+ target_bone = torch.norm(
+ target - target[:, self.joint_parents, :],
+ dim=-1)[:, self.non_root_indices]
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = torch.mean(
+ torch.abs((output_bone * target_weight).mean(dim=0) -
+ (target_bone * target_weight).mean(dim=0)))
+ else:
+ loss = torch.mean(
+ torch.abs(output_bone.mean(dim=0) - target_bone.mean(dim=0)))
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class SemiSupervisionLoss(nn.Module):
+ """Semi-supervision loss for unlabeled data. It is composed of projection
+ loss and bone loss.
+
+ Paper ref: `3D human pose estimation in video with temporal convolutions
+ and semi-supervised training` Dario Pavllo et al. CVPR'2019.
+
+ Args:
+ joint_parents (list): Indices of each joint's parent joint.
+ projection_loss_weight (float): Weight for projection loss.
+ bone_loss_weight (float): Weight for bone loss.
+ warmup_iterations (int): Number of warmup iterations. In the first
+ `warmup_iterations` iterations, the model is trained only on
+ labeled data, and semi-supervision loss will be 0.
+ This is a workaround since currently we cannot access
+ epoch number in loss functions. Note that the iteration number in
+ an epoch can be changed due to different GPU numbers in multi-GPU
+ settings. So please set this parameter carefully.
+ warmup_iterations = dataset_size // samples_per_gpu // gpu_num
+ * warmup_epochs
+ """
+
+ def __init__(self,
+ joint_parents,
+ projection_loss_weight=1.,
+ bone_loss_weight=1.,
+ warmup_iterations=0):
+ super().__init__()
+ self.criterion_projection = MPJPELoss(
+ loss_weight=projection_loss_weight)
+ self.criterion_bone = BoneLoss(
+ joint_parents, loss_weight=bone_loss_weight)
+ self.warmup_iterations = warmup_iterations
+ self.num_iterations = 0
+
+ @staticmethod
+ def project_joints(x, intrinsics):
+ """Project 3D joint coordinates to 2D image plane using camera
+ intrinsic parameters.
+
+ Args:
+ x (torch.Tensor[N, K, 3]): 3D joint coordinates.
+ intrinsics (torch.Tensor[N, 4] | torch.Tensor[N, 9]): Camera
+ intrinsics: f (2), c (2), k (3), p (2).
+ """
+ while intrinsics.dim() < x.dim():
+ intrinsics.unsqueeze_(1)
+ f = intrinsics[..., :2]
+ c = intrinsics[..., 2:4]
+ _x = torch.clamp(x[:, :, :2] / x[:, :, 2:], -1, 1)
+ if intrinsics.shape[-1] == 9:
+ k = intrinsics[..., 4:7]
+ p = intrinsics[..., 7:9]
+
+ r2 = torch.sum(_x[:, :, :2]**2, dim=-1, keepdim=True)
+ radial = 1 + torch.sum(
+ k * torch.cat((r2, r2**2, r2**3), dim=-1),
+ dim=-1,
+ keepdim=True)
+ tan = torch.sum(p * _x, dim=-1, keepdim=True)
+ _x = _x * (radial + tan) + p * r2
+ _x = f * _x + c
+ return _x
+
+ def forward(self, output, target):
+ losses = dict()
+
+ self.num_iterations += 1
+ if self.num_iterations <= self.warmup_iterations:
+ return losses
+
+ labeled_pose = output['labeled_pose']
+ unlabeled_pose = output['unlabeled_pose']
+ unlabeled_traj = output['unlabeled_traj']
+ unlabeled_target_2d = target['unlabeled_target_2d']
+ intrinsics = target['intrinsics']
+
+ # projection loss
+ unlabeled_output = unlabeled_pose + unlabeled_traj
+ unlabeled_output_2d = self.project_joints(unlabeled_output, intrinsics)
+ loss_proj = self.criterion_projection(unlabeled_output_2d,
+ unlabeled_target_2d, None)
+ losses['proj_loss'] = loss_proj
+
+ # bone loss
+ loss_bone = self.criterion_bone(unlabeled_pose, labeled_pose, None)
+ losses['bone_loss'] = loss_bone
+
+ return losses
diff --git a/mmpose/models/misc/__init__.py b/mmpose/models/misc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef101fec61e72abc0eb90266d453b5b22331378d
--- /dev/null
+++ b/mmpose/models/misc/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/mmpose/models/misc/__pycache__/__init__.cpython-310.pyc b/mmpose/models/misc/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13b10512911c8a438670f569b62ddc656e415e3e
Binary files /dev/null and b/mmpose/models/misc/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/misc/__pycache__/discriminator.cpython-310.pyc b/mmpose/models/misc/__pycache__/discriminator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ec94ac9650a0800c603b258244ca0f0eed8c5649
Binary files /dev/null and b/mmpose/models/misc/__pycache__/discriminator.cpython-310.pyc differ
diff --git a/mmpose/models/misc/discriminator.py b/mmpose/models/misc/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..712f0a8b566e3dcbc0cd13206610d3c750b942ab
--- /dev/null
+++ b/mmpose/models/misc/discriminator.py
@@ -0,0 +1,307 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/akanazawa/hmr
+# Original licence: Copyright (c) 2018 akanazawa, under the MIT License.
+# ------------------------------------------------------------------------------
+
+from abc import abstractmethod
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+
+from mmpose.models.utils.geometry import batch_rodrigues
+
+
+class BaseDiscriminator(nn.Module):
+ """Base linear module for SMPL parameter discriminator.
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count,
+ such as (9, 32, 32, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or not
+ for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active function
+ or not, such as (True, True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ super().__init__()
+ self.fc_layers = fc_layers
+ self.use_dropout = use_dropout
+ self.drop_prob = drop_prob
+ self.use_activation = use_activation
+ self._check()
+ self.create_layers()
+
+ def _check(self):
+ """Check input to avoid ValueError."""
+ if not isinstance(self.fc_layers, tuple):
+ raise TypeError(f'fc_layers require tuple, '
+ f'get {type(self.fc_layers)}')
+
+ if not isinstance(self.use_dropout, tuple):
+ raise TypeError(f'use_dropout require tuple, '
+ f'get {type(self.use_dropout)}')
+
+ if not isinstance(self.drop_prob, tuple):
+ raise TypeError(f'drop_prob require tuple, '
+ f'get {type(self.drop_prob)}')
+
+ if not isinstance(self.use_activation, tuple):
+ raise TypeError(f'use_activation require tuple, '
+ f'get {type(self.use_activation)}')
+
+ l_fc_layer = len(self.fc_layers)
+ l_use_drop = len(self.use_dropout)
+ l_drop_prob = len(self.drop_prob)
+ l_use_activation = len(self.use_activation)
+
+ pass_check = (
+ l_fc_layer >= 2 and l_use_drop < l_fc_layer
+ and l_drop_prob < l_fc_layer and l_use_activation < l_fc_layer
+ and l_drop_prob == l_use_drop)
+
+ if not pass_check:
+ msg = 'Wrong BaseDiscriminator parameters!'
+ raise ValueError(msg)
+
+ def create_layers(self):
+ """Create layers."""
+ l_fc_layer = len(self.fc_layers)
+ l_use_drop = len(self.use_dropout)
+ l_use_activation = len(self.use_activation)
+
+ self.fc_blocks = nn.Sequential()
+
+ for i in range(l_fc_layer - 1):
+ self.fc_blocks.add_module(
+ name=f'regressor_fc_{i}',
+ module=nn.Linear(
+ in_features=self.fc_layers[i],
+ out_features=self.fc_layers[i + 1]))
+
+ if i < l_use_activation and self.use_activation[i]:
+ self.fc_blocks.add_module(
+ name=f'regressor_af_{i}', module=nn.ReLU())
+
+ if i < l_use_drop and self.use_dropout[i]:
+ self.fc_blocks.add_module(
+ name=f'regressor_fc_dropout_{i}',
+ module=nn.Dropout(p=self.drop_prob[i]))
+
+ @abstractmethod
+ def forward(self, inputs):
+ """Forward function."""
+ msg = 'the base class [BaseDiscriminator] is not callable!'
+ raise NotImplementedError(msg)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.fc_blocks.named_modules():
+ if isinstance(m, nn.Linear):
+ xavier_init(m, gain=0.01)
+
+
+class ShapeDiscriminator(BaseDiscriminator):
+ """Discriminator for SMPL shape parameters, the inputs is (batch_size x 10)
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count, such as (10, 5, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or
+ not for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active
+ function or not, such as (True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ if fc_layers[-1] != 1:
+ msg = f'the neuron count of the last layer ' \
+ f'must be 1, but got {fc_layers[-1]}'
+ raise ValueError(msg)
+
+ super().__init__(fc_layers, use_dropout, drop_prob, use_activation)
+
+ def forward(self, inputs):
+ """Forward function."""
+ return self.fc_blocks(inputs)
+
+
+class PoseDiscriminator(nn.Module):
+ """Discriminator for SMPL pose parameters of each joint. It is composed of
+ discriminators for each joints. The inputs is (batch_size x joint_count x
+ 9)
+
+ Args:
+ channels (Tuple): Tuple of channel number,
+ such as (9, 32, 32, 1)
+ joint_count (int): Joint number, such as 23
+ """
+
+ def __init__(self, channels, joint_count):
+ super().__init__()
+ if channels[-1] != 1:
+ msg = f'the neuron count of the last layer ' \
+ f'must be 1, but got {channels[-1]}'
+ raise ValueError(msg)
+ self.joint_count = joint_count
+
+ self.conv_blocks = nn.Sequential()
+ len_channels = len(channels)
+ for idx in range(len_channels - 2):
+ self.conv_blocks.add_module(
+ name=f'conv_{idx}',
+ module=nn.Conv2d(
+ in_channels=channels[idx],
+ out_channels=channels[idx + 1],
+ kernel_size=1,
+ stride=1))
+
+ self.fc_layer = nn.ModuleList()
+ for idx in range(joint_count):
+ self.fc_layer.append(
+ nn.Linear(
+ in_features=channels[len_channels - 2], out_features=1))
+
+ def forward(self, inputs):
+ """Forward function.
+
+ The input is (batch_size x joint_count x 9).
+ """
+ # shape: batch_size x 9 x 1 x joint_count
+ inputs = inputs.transpose(1, 2).unsqueeze(2).contiguous()
+ # shape: batch_size x c x 1 x joint_count
+ internal_outputs = self.conv_blocks(inputs)
+ outputs = []
+ for idx in range(self.joint_count):
+ outputs.append(self.fc_layer[idx](internal_outputs[:, :, 0, idx]))
+
+ return torch.cat(outputs, 1), internal_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.conv_blocks:
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ for m in self.fc_layer.named_modules():
+ if isinstance(m, nn.Linear):
+ xavier_init(m, gain=0.01)
+
+
+class FullPoseDiscriminator(BaseDiscriminator):
+ """Discriminator for SMPL pose parameters of all joints.
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count,
+ such as (736, 1024, 1024, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or not
+ for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active
+ function or not, such as (True, True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ if fc_layers[-1] != 1:
+ msg = f'the neuron count of the last layer must be 1,' \
+ f' but got {fc_layers[-1]}'
+ raise ValueError(msg)
+
+ super().__init__(fc_layers, use_dropout, drop_prob, use_activation)
+
+ def forward(self, inputs):
+ """Forward function."""
+ return self.fc_blocks(inputs)
+
+
+class SMPLDiscriminator(nn.Module):
+ """Discriminator for SMPL pose and shape parameters. It is composed of a
+ discriminator for SMPL shape parameters, a discriminator for SMPL pose
+ parameters of all joints and a discriminator for SMPL pose parameters of
+ each joint.
+
+ Args:
+ beta_channel (tuple of int): Tuple of neuron count of the
+ discriminator of shape parameters. Defaults to (10, 5, 1)
+ per_joint_channel (tuple of int): Tuple of neuron count of the
+ discriminator of each joint. Defaults to (9, 32, 32, 1)
+ full_pose_channel (tuple of int): Tuple of neuron count of the
+ discriminator of full pose. Defaults to (23*32, 1024, 1024, 1)
+ """
+
+ def __init__(self,
+ beta_channel=(10, 5, 1),
+ per_joint_channel=(9, 32, 32, 1),
+ full_pose_channel=(23 * 32, 1024, 1024, 1)):
+ super().__init__()
+ self.joint_count = 23
+ # The count of SMPL shape parameter is 10.
+ assert beta_channel[0] == 10
+ # Use 3 x 3 rotation matrix as the pose parameters
+ # of each joint, so the input channel is 9.
+ assert per_joint_channel[0] == 9
+ assert self.joint_count * per_joint_channel[-2] \
+ == full_pose_channel[0]
+
+ self.beta_channel = beta_channel
+ self.per_joint_channel = per_joint_channel
+ self.full_pose_channel = full_pose_channel
+ self._create_sub_modules()
+
+ def _create_sub_modules(self):
+ """Create sub discriminators."""
+
+ # create theta discriminator for each joint
+ self.pose_discriminator = PoseDiscriminator(self.per_joint_channel,
+ self.joint_count)
+
+ # create full pose discriminator for total joints
+ fc_layers = self.full_pose_channel
+ use_dropout = tuple([False] * (len(fc_layers) - 1))
+ drop_prob = tuple([0.5] * (len(fc_layers) - 1))
+ use_activation = tuple([True] * (len(fc_layers) - 2) + [False])
+
+ self.full_pose_discriminator = FullPoseDiscriminator(
+ fc_layers, use_dropout, drop_prob, use_activation)
+
+ # create shape discriminator for betas
+ fc_layers = self.beta_channel
+ use_dropout = tuple([False] * (len(fc_layers) - 1))
+ drop_prob = tuple([0.5] * (len(fc_layers) - 1))
+ use_activation = tuple([True] * (len(fc_layers) - 2) + [False])
+ self.shape_discriminator = ShapeDiscriminator(fc_layers, use_dropout,
+ drop_prob,
+ use_activation)
+
+ def forward(self, thetas):
+ """Forward function."""
+ _, poses, shapes = thetas
+
+ batch_size = poses.shape[0]
+ shape_disc_value = self.shape_discriminator(shapes)
+
+ # The first rotation matrix is global rotation
+ # and is NOT used in discriminator.
+ if poses.dim() == 2:
+ rotate_matrixs = \
+ batch_rodrigues(poses.contiguous().view(-1, 3)
+ ).view(batch_size, 24, 9)[:, 1:, :]
+ else:
+ rotate_matrixs = poses.contiguous().view(batch_size, 24,
+ 9)[:, 1:, :].contiguous()
+ pose_disc_value, pose_inter_disc_value \
+ = self.pose_discriminator(rotate_matrixs)
+ full_pose_disc_value = self.full_pose_discriminator(
+ pose_inter_disc_value.contiguous().view(batch_size, -1))
+ return torch.cat(
+ (pose_disc_value, full_pose_disc_value, shape_disc_value), 1)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ self.full_pose_discriminator.init_weights()
+ self.pose_discriminator.init_weights()
+ self.shape_discriminator.init_weights()
diff --git a/mmpose/models/necks/__init__.py b/mmpose/models/necks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d3a5cc01a93604f3d9da9242ea2eac0fe60638c
--- /dev/null
+++ b/mmpose/models/necks/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .gap_neck import GlobalAveragePooling
+from .posewarper_neck import PoseWarperNeck
+
+__all__ = ['GlobalAveragePooling', 'PoseWarperNeck']
diff --git a/mmpose/models/necks/__pycache__/__init__.cpython-310.pyc b/mmpose/models/necks/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67e4e427144891cbb952097da5e106cec998c575
Binary files /dev/null and b/mmpose/models/necks/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/necks/__pycache__/gap_neck.cpython-310.pyc b/mmpose/models/necks/__pycache__/gap_neck.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef6fa92d8e98d9eaa628de32afe9e727fc017f58
Binary files /dev/null and b/mmpose/models/necks/__pycache__/gap_neck.cpython-310.pyc differ
diff --git a/mmpose/models/necks/__pycache__/posewarper_neck.cpython-310.pyc b/mmpose/models/necks/__pycache__/posewarper_neck.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a410be852597bf1fd713a84957c31f9ee6baa480
Binary files /dev/null and b/mmpose/models/necks/__pycache__/posewarper_neck.cpython-310.pyc differ
diff --git a/mmpose/models/necks/gap_neck.py b/mmpose/models/necks/gap_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e6ad68ec11110daaad3a66e09d67efb355c4b93
--- /dev/null
+++ b/mmpose/models/necks/gap_neck.py
@@ -0,0 +1,37 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import NECKS
+
+
+@NECKS.register_module()
+class GlobalAveragePooling(nn.Module):
+ """Global Average Pooling neck.
+
+ Note that we use `view` to remove extra channel after pooling. We do not
+ use `squeeze` as it will also remove the batch dimension when the tensor
+ has a batch dimension of size 1, which can lead to unexpected errors.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.gap = nn.AdaptiveAvgPool2d((1, 1))
+
+ def init_weights(self):
+ pass
+
+ def forward(self, inputs):
+ if isinstance(inputs, tuple):
+ outs = tuple([self.gap(x) for x in inputs])
+ outs = tuple(
+ [out.view(x.size(0), -1) for out, x in zip(outs, inputs)])
+ elif isinstance(inputs, list):
+ outs = [self.gap(x) for x in inputs]
+ outs = [out.view(x.size(0), -1) for out, x in zip(outs, inputs)]
+ elif isinstance(inputs, torch.Tensor):
+ outs = self.gap(inputs)
+ outs = outs.view(inputs.size(0), -1)
+ else:
+ raise TypeError('neck inputs should be tuple or torch.tensor')
+ return outs
diff --git a/mmpose/models/necks/posewarper_neck.py b/mmpose/models/necks/posewarper_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd4ddfbf8984857a6110f19b0a7d703b53f1c433
--- /dev/null
+++ b/mmpose/models/necks/posewarper_neck.py
@@ -0,0 +1,329 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from mmcv.utils import digit_version
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.utils.ops import resize
+from ..backbones.resnet import BasicBlock, Bottleneck
+from ..builder import NECKS
+
+try:
+ from mmcv.ops import DeformConv2d
+ has_mmcv_full = True
+except (ImportError, ModuleNotFoundError):
+ has_mmcv_full = False
+
+
+@NECKS.register_module()
+class PoseWarperNeck(nn.Module):
+ """PoseWarper neck.
+
+ `"Learning temporal pose estimation from sparsely-labeled videos"
+ `_.
+
+ Args:
+ in_channels (int): Number of input channels from backbone
+ out_channels (int): Number of output channels
+ inner_channels (int): Number of intermediate channels of the res block
+ deform_groups (int): Number of groups in the deformable conv
+ dilations (list|tuple): different dilations of the offset conv layers
+ trans_conv_kernel (int): the kernel of the trans conv layer, which is
+ used to get heatmap from the output of backbone. Default: 1
+ res_blocks_cfg (dict|None): config of residual blocks. If None,
+ use the default values. If not None, it should contain the
+ following keys:
+
+ - block (str): the type of residual block, Default: 'BASIC'.
+ - num_blocks (int): the number of blocks, Default: 20.
+
+ offsets_kernel (int): the kernel of offset conv layer.
+ deform_conv_kernel (int): the kernel of defomrable conv layer.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resize to \
+ the same size as first one and than concat together. \
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into \
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+
+ freeze_trans_layer (bool): Whether to freeze the transition layer
+ (stop grad and set eval mode). Default: True.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ im2col_step (int): the argument `im2col_step` in deformable conv,
+ Default: 80.
+ """
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+ minimum_mmcv_version = '1.3.17'
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ inner_channels,
+ deform_groups=17,
+ dilations=(3, 6, 12, 18, 24),
+ trans_conv_kernel=1,
+ res_blocks_cfg=None,
+ offsets_kernel=3,
+ deform_conv_kernel=3,
+ in_index=0,
+ input_transform=None,
+ freeze_trans_layer=True,
+ norm_eval=False,
+ im2col_step=80):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.inner_channels = inner_channels
+ self.deform_groups = deform_groups
+ self.dilations = dilations
+ self.trans_conv_kernel = trans_conv_kernel
+ self.res_blocks_cfg = res_blocks_cfg
+ self.offsets_kernel = offsets_kernel
+ self.deform_conv_kernel = deform_conv_kernel
+ self.in_index = in_index
+ self.input_transform = input_transform
+ self.freeze_trans_layer = freeze_trans_layer
+ self.norm_eval = norm_eval
+ self.im2col_step = im2col_step
+
+ identity_trans_layer = False
+
+ assert trans_conv_kernel in [0, 1, 3]
+ kernel_size = trans_conv_kernel
+ if kernel_size == 3:
+ padding = 1
+ elif kernel_size == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_trans_layer = True
+
+ if identity_trans_layer:
+ self.trans_layer = nn.Identity()
+ else:
+ self.trans_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+
+ # build chain of residual blocks
+ if res_blocks_cfg is not None and not isinstance(res_blocks_cfg, dict):
+ raise TypeError('res_blocks_cfg should be dict or None.')
+
+ if res_blocks_cfg is None:
+ block_type = 'BASIC'
+ num_blocks = 20
+ else:
+ block_type = res_blocks_cfg.get('block', 'BASIC')
+ num_blocks = res_blocks_cfg.get('num_blocks', 20)
+
+ block = self.blocks_dict[block_type]
+
+ res_layers = []
+ downsample = nn.Sequential(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=out_channels,
+ out_channels=inner_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False),
+ build_norm_layer(dict(type='BN'), inner_channels)[1])
+ res_layers.append(
+ block(
+ in_channels=out_channels,
+ out_channels=inner_channels,
+ downsample=downsample))
+
+ for _ in range(1, num_blocks):
+ res_layers.append(block(inner_channels, inner_channels))
+ self.offset_feats = nn.Sequential(*res_layers)
+
+ # build offset layers
+ self.num_offset_layers = len(dilations)
+ assert self.num_offset_layers > 0, 'Number of offset layers ' \
+ 'should be larger than 0.'
+
+ target_offset_channels = 2 * offsets_kernel**2 * deform_groups
+
+ offset_layers = [
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=inner_channels,
+ out_channels=target_offset_channels,
+ kernel_size=offsets_kernel,
+ stride=1,
+ dilation=dilations[i],
+ padding=dilations[i],
+ bias=False,
+ ) for i in range(self.num_offset_layers)
+ ]
+ self.offset_layers = nn.ModuleList(offset_layers)
+
+ # build deformable conv layers
+ assert digit_version(mmcv.__version__) >= \
+ digit_version(self.minimum_mmcv_version), \
+ f'Current MMCV version: {mmcv.__version__}, ' \
+ f'but MMCV >= {self.minimum_mmcv_version} is required, see ' \
+ f'https://github.com/open-mmlab/mmcv/issues/1440, ' \
+ f'Please install the latest MMCV.'
+
+ if has_mmcv_full:
+ deform_conv_layers = [
+ DeformConv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=deform_conv_kernel,
+ stride=1,
+ padding=int(deform_conv_kernel / 2) * dilations[i],
+ dilation=dilations[i],
+ deform_groups=deform_groups,
+ im2col_step=self.im2col_step,
+ ) for i in range(self.num_offset_layers)
+ ]
+ else:
+ raise ImportError('Please install the full version of mmcv '
+ 'to use `DeformConv2d`.')
+
+ self.deform_conv_layers = nn.ModuleList(deform_conv_layers)
+
+ self.freeze_layers()
+
+ def freeze_layers(self):
+ if self.freeze_trans_layer:
+ self.trans_layer.eval()
+
+ for param in self.trans_layer.parameters():
+ param.requires_grad = False
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ elif isinstance(m, DeformConv2d):
+ filler = torch.zeros([
+ m.weight.size(0),
+ m.weight.size(1),
+ m.weight.size(2),
+ m.weight.size(3)
+ ],
+ dtype=torch.float32,
+ device=m.weight.device)
+ for k in range(m.weight.size(0)):
+ filler[k, k,
+ int(m.weight.size(2) / 2),
+ int(m.weight.size(3) / 2)] = 1.0
+ m.weight = torch.nn.Parameter(filler)
+ m.weight.requires_grad = True
+
+ # posewarper offset layer weight initialization
+ for m in self.offset_layers.modules():
+ constant_init(m, 0)
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def forward(self, inputs, frame_weight):
+ assert isinstance(inputs, (list, tuple)), 'PoseWarperNeck inputs ' \
+ 'should be list or tuple, even though the length is 1, ' \
+ 'for unified processing.'
+
+ output_heatmap = 0
+ if len(inputs) > 1:
+ inputs = [self._transform_inputs(input) for input in inputs]
+ inputs = [self.trans_layer(input) for input in inputs]
+
+ # calculate difference features
+ diff_features = [
+ self.offset_feats(inputs[0] - input) for input in inputs
+ ]
+
+ for i in range(len(inputs)):
+ if frame_weight[i] == 0:
+ continue
+ warped_heatmap = 0
+ for j in range(self.num_offset_layers):
+ offset = (self.offset_layers[j](diff_features[i]))
+ warped_heatmap_tmp = self.deform_conv_layers[j](inputs[i],
+ offset)
+ warped_heatmap += warped_heatmap_tmp / \
+ self.num_offset_layers
+
+ output_heatmap += warped_heatmap * frame_weight[i]
+
+ else:
+ inputs = inputs[0]
+ inputs = self._transform_inputs(inputs)
+ inputs = self.trans_layer(inputs)
+
+ num_frames = len(frame_weight)
+ batch_size = inputs.size(0) // num_frames
+ ref_x = inputs[:batch_size]
+ ref_x_tiled = ref_x.repeat(num_frames, 1, 1, 1)
+
+ offset_features = self.offset_feats(ref_x_tiled - inputs)
+
+ warped_heatmap = 0
+ for j in range(self.num_offset_layers):
+ offset = self.offset_layers[j](offset_features)
+
+ warped_heatmap_tmp = self.deform_conv_layers[j](inputs, offset)
+ warped_heatmap += warped_heatmap_tmp / self.num_offset_layers
+
+ for i in range(num_frames):
+ if frame_weight[i] == 0:
+ continue
+ output_heatmap += warped_heatmap[i * batch_size:(i + 1) *
+ batch_size] * frame_weight[i]
+
+ return output_heatmap
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self.freeze_layers()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmpose/models/registry.py b/mmpose/models/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..f354ae9e137262e2f375a64aef74c3af20baae63
--- /dev/null
+++ b/mmpose/models/registry.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .builder import BACKBONES, HEADS, LOSSES, NECKS, POSENETS
+
+__all__ = ['BACKBONES', 'HEADS', 'LOSSES', 'NECKS', 'POSENETS']
+
+warnings.simplefilter('once', DeprecationWarning)
+warnings.warn(
+ 'Registries (BACKBONES, NECKS, HEADS, LOSSES, POSENETS) have '
+ 'been moved to mmpose.models.builder. Importing from '
+ 'mmpose.models.registry will be deprecated in the future.',
+ DeprecationWarning)
diff --git a/mmpose/models/utils/__init__.py b/mmpose/models/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6871c66e50708f928ead8714aa83cb4ef6447e09
--- /dev/null
+++ b/mmpose/models/utils/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .smpl import SMPL
+
+__all__ = ['SMPL']
diff --git a/mmpose/models/utils/__pycache__/__init__.cpython-310.pyc b/mmpose/models/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..63fe9deca03ac58d5b133365d0e34b1a57a1eb84
Binary files /dev/null and b/mmpose/models/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/models/utils/__pycache__/geometry.cpython-310.pyc b/mmpose/models/utils/__pycache__/geometry.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f93be81e84481b2a94bce492350dce6f66c02081
Binary files /dev/null and b/mmpose/models/utils/__pycache__/geometry.cpython-310.pyc differ
diff --git a/mmpose/models/utils/__pycache__/ops.cpython-310.pyc b/mmpose/models/utils/__pycache__/ops.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e41dbfd1c0b622e5d6965f3388be98013578cba1
Binary files /dev/null and b/mmpose/models/utils/__pycache__/ops.cpython-310.pyc differ
diff --git a/mmpose/models/utils/__pycache__/smpl.cpython-310.pyc b/mmpose/models/utils/__pycache__/smpl.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7d593d4f3aedc307c395e1e23f48c82f2fd9fc5
Binary files /dev/null and b/mmpose/models/utils/__pycache__/smpl.cpython-310.pyc differ
diff --git a/mmpose/models/utils/geometry.py b/mmpose/models/utils/geometry.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ceadaec30cd2c9bb3fbada132e1ea674f2e8754
--- /dev/null
+++ b/mmpose/models/utils/geometry.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.nn import functional as F
+
+
+def rot6d_to_rotmat(x):
+ """Convert 6D rotation representation to 3x3 rotation matrix.
+
+ Based on Zhou et al., "On the Continuity of Rotation
+ Representations in Neural Networks", CVPR 2019
+ Input:
+ (B,6) Batch of 6-D rotation representations
+ Output:
+ (B,3,3) Batch of corresponding rotation matrices
+ """
+ x = x.view(-1, 3, 2)
+ a1 = x[:, :, 0]
+ a2 = x[:, :, 1]
+ b1 = F.normalize(a1)
+ b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
+ b3 = torch.cross(b1, b2)
+ return torch.stack((b1, b2, b3), dim=-1)
+
+
+def batch_rodrigues(theta):
+ """Convert axis-angle representation to rotation matrix.
+ Args:
+ theta: size = [B, 3]
+ Returns:
+ Rotation matrix corresponding to the quaternion
+ -- size = [B, 3, 3]
+ """
+ l2norm = torch.norm(theta + 1e-8, p=2, dim=1)
+ angle = torch.unsqueeze(l2norm, -1)
+ normalized = torch.div(theta, angle)
+ angle = angle * 0.5
+ v_cos = torch.cos(angle)
+ v_sin = torch.sin(angle)
+ quat = torch.cat([v_cos, v_sin * normalized], dim=1)
+ return quat_to_rotmat(quat)
+
+
+def quat_to_rotmat(quat):
+ """Convert quaternion coefficients to rotation matrix.
+ Args:
+ quat: size = [B, 4] 4 <===>(w, x, y, z)
+ Returns:
+ Rotation matrix corresponding to the quaternion
+ -- size = [B, 3, 3]
+ """
+ norm_quat = quat
+ norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
+ w, x, y, z = norm_quat[:, 0], norm_quat[:, 1],\
+ norm_quat[:, 2], norm_quat[:, 3]
+
+ B = quat.size(0)
+
+ w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+ wx, wy, wz = w * x, w * y, w * z
+ xy, xz, yz = x * y, x * z, y * z
+
+ rotMat = torch.stack([
+ w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy,
+ w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz,
+ w2 - x2 - y2 + z2
+ ],
+ dim=1).view(B, 3, 3)
+ return rotMat
diff --git a/mmpose/models/utils/ops.py b/mmpose/models/utils/ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..858d0a92148a591d235e58bfce8990207632fb39
--- /dev/null
+++ b/mmpose/models/utils/ops.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+import torch.nn.functional as F
+
+
+def resize(input,
+ size=None,
+ scale_factor=None,
+ mode='nearest',
+ align_corners=None,
+ warning=True):
+ if warning:
+ if size is not None and align_corners:
+ input_h, input_w = tuple(int(x) for x in input.shape[2:])
+ output_h, output_w = tuple(int(x) for x in size)
+ if output_h > input_h or output_w > output_h:
+ if ((output_h > 1 and output_w > 1 and input_h > 1
+ and input_w > 1) and (output_h - 1) % (input_h - 1)
+ and (output_w - 1) % (input_w - 1)):
+ warnings.warn(
+ f'When align_corners={align_corners}, '
+ 'the output would more aligned if '
+ f'input size {(input_h, input_w)} is `x+1` and '
+ f'out size {(output_h, output_w)} is `nx+1`')
+ if isinstance(size, torch.Size):
+ size = tuple(int(x) for x in size)
+ return F.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/mmpose/models/utils/smpl.py b/mmpose/models/utils/smpl.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe723d483aadb7ce7e0e9f50ef8da7b10e7529e5
--- /dev/null
+++ b/mmpose/models/utils/smpl.py
@@ -0,0 +1,184 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+
+from ..builder import MESH_MODELS
+
+try:
+ from smplx import SMPL as SMPL_
+ has_smpl = True
+except (ImportError, ModuleNotFoundError):
+ has_smpl = False
+
+
+@MESH_MODELS.register_module()
+class SMPL(nn.Module):
+ """SMPL 3d human mesh model of paper ref: Matthew Loper. ``SMPL: A skinned
+ multi-person linear model''. This module is based on the smplx project
+ (https://github.com/vchoutas/smplx).
+
+ Args:
+ smpl_path (str): The path to the folder where the model weights are
+ stored.
+ joints_regressor (str): The path to the file where the joints
+ regressor weight are stored.
+ """
+
+ def __init__(self, smpl_path, joints_regressor):
+ super().__init__()
+
+ assert has_smpl, 'Please install smplx to use SMPL.'
+
+ self.smpl_neutral = SMPL_(
+ model_path=smpl_path,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='neutral')
+
+ self.smpl_male = SMPL_(
+ model_path=smpl_path,
+ create_betas=False,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='male')
+
+ self.smpl_female = SMPL_(
+ model_path=smpl_path,
+ create_betas=False,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='female')
+
+ joints_regressor = torch.tensor(
+ np.load(joints_regressor), dtype=torch.float)[None, ...]
+ self.register_buffer('joints_regressor', joints_regressor)
+
+ self.num_verts = self.smpl_neutral.get_num_verts()
+ self.num_joints = self.joints_regressor.shape[1]
+
+ def smpl_forward(self, model, **kwargs):
+ """Apply a specific SMPL model with given model parameters.
+
+ Note:
+ B: batch size
+ V: number of vertices
+ K: number of joints
+
+ Returns:
+ outputs (dict): Dict with mesh vertices and joints.
+ - vertices: Tensor([B, V, 3]), mesh vertices
+ - joints: Tensor([B, K, 3]), 3d joints regressed
+ from mesh vertices.
+ """
+
+ betas = kwargs['betas']
+ batch_size = betas.shape[0]
+ device = betas.device
+ output = {}
+ if batch_size == 0:
+ output['vertices'] = betas.new_zeros([0, self.num_verts, 3])
+ output['joints'] = betas.new_zeros([0, self.num_joints, 3])
+ else:
+ smpl_out = model(**kwargs)
+ output['vertices'] = smpl_out.vertices
+ output['joints'] = torch.matmul(
+ self.joints_regressor.to(device), output['vertices'])
+ return output
+
+ def get_faces(self):
+ """Return mesh faces.
+
+ Note:
+ F: number of faces
+
+ Returns:
+ faces: np.ndarray([F, 3]), mesh faces
+ """
+ return self.smpl_neutral.faces
+
+ def forward(self,
+ betas,
+ body_pose,
+ global_orient,
+ transl=None,
+ gender=None):
+ """Forward function.
+
+ Note:
+ B: batch size
+ J: number of controllable joints of model, for smpl model J=23
+ K: number of joints
+
+ Args:
+ betas: Tensor([B, 10]), human body shape parameters of SMPL model.
+ body_pose: Tensor([B, J*3] or [B, J, 3, 3]), human body pose
+ parameters of SMPL model. It should be axis-angle vector
+ ([B, J*3]) or rotation matrix ([B, J, 3, 3)].
+ global_orient: Tensor([B, 3] or [B, 1, 3, 3]), global orientation
+ of human body. It should be axis-angle vector ([B, 3]) or
+ rotation matrix ([B, 1, 3, 3)].
+ transl: Tensor([B, 3]), global translation of human body.
+ gender: Tensor([B]), gender parameters of human body. -1 for
+ neutral, 0 for male , 1 for female.
+
+ Returns:
+ outputs (dict): Dict with mesh vertices and joints.
+ - vertices: Tensor([B, V, 3]), mesh vertices
+ - joints: Tensor([B, K, 3]), 3d joints regressed from
+ mesh vertices.
+ """
+
+ batch_size = betas.shape[0]
+ pose2rot = True if body_pose.dim() == 2 else False
+ if batch_size > 0 and gender is not None:
+ output = {
+ 'vertices': betas.new_zeros([batch_size, self.num_verts, 3]),
+ 'joints': betas.new_zeros([batch_size, self.num_joints, 3])
+ }
+
+ mask = gender < 0
+ _out = self.smpl_forward(
+ self.smpl_neutral,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+
+ mask = gender == 0
+ _out = self.smpl_forward(
+ self.smpl_male,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+
+ mask = gender == 1
+ _out = self.smpl_forward(
+ self.smpl_male,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+ else:
+ return self.smpl_forward(
+ self.smpl_neutral,
+ betas=betas,
+ body_pose=body_pose,
+ global_orient=global_orient,
+ transl=transl,
+ pose2rot=pose2rot)
+
+ return output
diff --git a/mmpose/utils/__init__.py b/mmpose/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1293ca05aab2632e0d6df29734438bc38ed79c6c
--- /dev/null
+++ b/mmpose/utils/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .collect_env import collect_env
+from .logger import get_root_logger
+from .setup_env import setup_multi_processes
+from .timer import StopWatch
+
+__all__ = [
+ 'get_root_logger', 'collect_env', 'StopWatch', 'setup_multi_processes'
+]
diff --git a/mmpose/utils/__pycache__/__init__.cpython-310.pyc b/mmpose/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dbdb8c2d5af30708d81492f4a3b33d6c52dd9cf5
Binary files /dev/null and b/mmpose/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mmpose/utils/__pycache__/collect_env.cpython-310.pyc b/mmpose/utils/__pycache__/collect_env.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e13a6b9ea48e0c1beca34e5b7e724d730cfb2bb8
Binary files /dev/null and b/mmpose/utils/__pycache__/collect_env.cpython-310.pyc differ
diff --git a/mmpose/utils/__pycache__/hooks.cpython-310.pyc b/mmpose/utils/__pycache__/hooks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90996bdaba08e8157a00d56614d79ca796a8cbbb
Binary files /dev/null and b/mmpose/utils/__pycache__/hooks.cpython-310.pyc differ
diff --git a/mmpose/utils/__pycache__/logger.cpython-310.pyc b/mmpose/utils/__pycache__/logger.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5b4129c3a089739f1211ae219d48eb64a9ae1bb
Binary files /dev/null and b/mmpose/utils/__pycache__/logger.cpython-310.pyc differ
diff --git a/mmpose/utils/__pycache__/setup_env.cpython-310.pyc b/mmpose/utils/__pycache__/setup_env.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a68f863ff010fe1d74045731024ae99ea6802b5f
Binary files /dev/null and b/mmpose/utils/__pycache__/setup_env.cpython-310.pyc differ
diff --git a/mmpose/utils/__pycache__/timer.cpython-310.pyc b/mmpose/utils/__pycache__/timer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3325b34786122a076dc9e661df0e2416896cbfd
Binary files /dev/null and b/mmpose/utils/__pycache__/timer.cpython-310.pyc differ
diff --git a/mmpose/utils/collect_env.py b/mmpose/utils/collect_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75c5ea73383ccef367632cf497227498ac50078
--- /dev/null
+++ b/mmpose/utils/collect_env.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.utils import collect_env as collect_basic_env
+from mmcv.utils import get_git_hash
+
+import mmpose
+
+
+def collect_env():
+ env_info = collect_basic_env()
+ env_info['MMPose'] = (mmpose.__version__ + '+' + get_git_hash(digits=7))
+ return env_info
+
+
+if __name__ == '__main__':
+ for name, val in collect_env().items():
+ print(f'{name}: {val}')
diff --git a/mmpose/utils/hooks.py b/mmpose/utils/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..b68940f2b7a8a618916ea5aab331e3ce45ba98e7
--- /dev/null
+++ b/mmpose/utils/hooks.py
@@ -0,0 +1,60 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools
+
+
+class OutputHook:
+
+ def __init__(self, module, outputs=None, as_tensor=False):
+ self.outputs = outputs
+ self.as_tensor = as_tensor
+ self.layer_outputs = {}
+ self.register(module)
+
+ def register(self, module):
+
+ def hook_wrapper(name):
+
+ def hook(model, input, output):
+ if self.as_tensor:
+ self.layer_outputs[name] = output
+ else:
+ if isinstance(output, list):
+ self.layer_outputs[name] = [
+ out.detach().cpu().numpy() for out in output
+ ]
+ else:
+ self.layer_outputs[name] = output.detach().cpu().numpy(
+ )
+
+ return hook
+
+ self.handles = []
+ if isinstance(self.outputs, (list, tuple)):
+ for name in self.outputs:
+ try:
+ layer = rgetattr(module, name)
+ h = layer.register_forward_hook(hook_wrapper(name))
+ except ModuleNotFoundError as module_not_found:
+ raise ModuleNotFoundError(
+ f'Module {name} not found') from module_not_found
+ self.handles.append(h)
+
+ def remove(self):
+ for h in self.handles:
+ h.remove()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.remove()
+
+
+# using wonder's beautiful simplification:
+# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects
+def rgetattr(obj, attr, *args):
+
+ def _getattr(obj, attr):
+ return getattr(obj, attr, *args)
+
+ return functools.reduce(_getattr, [obj] + attr.split('.'))
diff --git a/mmpose/utils/logger.py b/mmpose/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..294837fa6aec1e1896de8c8accf470f366f81296
--- /dev/null
+++ b/mmpose/utils/logger.py
@@ -0,0 +1,25 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+
+from mmcv.utils import get_logger
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO):
+ """Use `get_logger` method in mmcv to get the root logger.
+
+ The logger will be initialized if it has not been initialized. By default a
+ StreamHandler will be added. If `log_file` is specified, a FileHandler will
+ also be added. The name of the root logger is the top-level package name,
+ e.g., "mmpose".
+
+ Args:
+ log_file (str | None): The log filename. If specified, a FileHandler
+ will be added to the root logger.
+ log_level (int): The root logger level. Note that only the process of
+ rank 0 is affected, while other processes will set the level to
+ "Error" and be silent most of the time.
+
+ Returns:
+ logging.Logger: The root logger.
+ """
+ return get_logger(__name__.split('.')[0], log_file, log_level)
diff --git a/mmpose/utils/setup_env.py b/mmpose/utils/setup_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..21def2f0809153a5f755af2431f7e702db625e5c
--- /dev/null
+++ b/mmpose/utils/setup_env.py
@@ -0,0 +1,47 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import platform
+import warnings
+
+import cv2
+import torch.multiprocessing as mp
+
+
+def setup_multi_processes(cfg):
+ """Setup multi-processing environment variables."""
+ # set multi-process start method as `fork` to speed up the training
+ if platform.system() != 'Windows':
+ mp_start_method = cfg.get('mp_start_method', 'fork')
+ current_method = mp.get_start_method(allow_none=True)
+ if current_method is not None and current_method != mp_start_method:
+ warnings.warn(
+ f'Multi-processing start method `{mp_start_method}` is '
+ f'different from the previous setting `{current_method}`.'
+ f'It will be force set to `{mp_start_method}`. You can change '
+ f'this behavior by changing `mp_start_method` in your config.')
+ mp.set_start_method(mp_start_method, force=True)
+
+ # disable opencv multithreading to avoid system being overloaded
+ opencv_num_threads = cfg.get('opencv_num_threads', 0)
+ cv2.setNumThreads(opencv_num_threads)
+
+ # setup OMP threads
+ # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa
+ if 'OMP_NUM_THREADS' not in os.environ and cfg.data.workers_per_gpu > 1:
+ omp_num_threads = 1
+ warnings.warn(
+ f'Setting OMP_NUM_THREADS environment variable for each process '
+ f'to be {omp_num_threads} in default, to avoid your system being '
+ f'overloaded, please further tune the variable for optimal '
+ f'performance in your application as needed.')
+ os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
+
+ # setup MKL threads
+ if 'MKL_NUM_THREADS' not in os.environ and cfg.data.workers_per_gpu > 1:
+ mkl_num_threads = 1
+ warnings.warn(
+ f'Setting MKL_NUM_THREADS environment variable for each process '
+ f'to be {mkl_num_threads} in default, to avoid your system being '
+ f'overloaded, please further tune the variable for optimal '
+ f'performance in your application as needed.')
+ os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
diff --git a/mmpose/utils/timer.py b/mmpose/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a3185c5e89ce73bd33591c22ce74fc73ef8e770
--- /dev/null
+++ b/mmpose/utils/timer.py
@@ -0,0 +1,117 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import defaultdict
+from contextlib import contextmanager
+from functools import partial
+
+import numpy as np
+from mmcv import Timer
+
+
+class RunningAverage():
+ r"""A helper class to calculate running average in a sliding window.
+
+ Args:
+ window (int): The size of the sliding window.
+ """
+
+ def __init__(self, window: int = 1):
+ self.window = window
+ self._data = []
+
+ def update(self, value):
+ """Update a new data sample."""
+ self._data.append(value)
+ self._data = self._data[-self.window:]
+
+ def average(self):
+ """Get the average value of current window."""
+ return np.mean(self._data)
+
+
+class StopWatch:
+ r"""A helper class to measure FPS and detailed time consuming of each phase
+ in a video processing loop or similar scenarios.
+
+ Args:
+ window (int): The sliding window size to calculate the running average
+ of the time consuming.
+
+ Example:
+ >>> from mmpose.utils import StopWatch
+ >>> import time
+ >>> stop_watch = StopWatch(window=10)
+ >>> with stop_watch.timeit('total'):
+ >>> time.sleep(0.1)
+ >>> # 'timeit' support nested use
+ >>> with stop_watch.timeit('phase1'):
+ >>> time.sleep(0.1)
+ >>> with stop_watch.timeit('phase2'):
+ >>> time.sleep(0.2)
+ >>> time.sleep(0.2)
+ >>> report = stop_watch.report()
+ """
+
+ def __init__(self, window=1):
+ self.window = window
+ self._record = defaultdict(partial(RunningAverage, window=self.window))
+ self._timer_stack = []
+
+ @contextmanager
+ def timeit(self, timer_name='_FPS_'):
+ """Timing a code snippet with an assigned name.
+
+ Args:
+ timer_name (str): The unique name of the interested code snippet to
+ handle multiple timers and generate reports. Note that '_FPS_'
+ is a special key that the measurement will be in `fps` instead
+ of `millisecond`. Also see `report` and `report_strings`.
+ Default: '_FPS_'.
+ Note:
+ This function should always be used in a `with` statement, as shown
+ in the example.
+ """
+ self._timer_stack.append((timer_name, Timer()))
+ try:
+ yield
+ finally:
+ timer_name, timer = self._timer_stack.pop()
+ self._record[timer_name].update(timer.since_start())
+
+ def report(self, key=None):
+ """Report timing information.
+
+ Returns:
+ dict: The key is the timer name and the value is the \
+ corresponding average time consuming.
+ """
+ result = {
+ name: r.average() * 1000.
+ for name, r in self._record.items()
+ }
+
+ if '_FPS_' in result:
+ result['_FPS_'] = 1000. / result.pop('_FPS_')
+
+ if key is None:
+ return result
+ return result[key]
+
+ def report_strings(self):
+ """Report timing information in texture strings.
+
+ Returns:
+ list(str): Each element is the information string of a timed \
+ event, in format of '{timer_name}: {time_in_ms}'. \
+ Specially, if timer_name is '_FPS_', the result will \
+ be converted to fps.
+ """
+ result = self.report()
+ strings = []
+ if '_FPS_' in result:
+ strings.append(f'FPS: {result["_FPS_"]:>5.1f}')
+ strings += [f'{name}: {val:>3.0f}' for name, val in result.items()]
+ return strings
+
+ def reset(self):
+ self._record = defaultdict(list)
+ self._active_timer_stack = []
diff --git a/mmpose/version.py b/mmpose/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a10826ab75786cbc8aaaf2a6a87e0465be35801
--- /dev/null
+++ b/mmpose/version.py
@@ -0,0 +1,19 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+
+__version__ = '0.24.0'
+short_version = __version__
+
+
+def parse_version_info(version_str):
+ version_info = []
+ for x in version_str.split('.'):
+ if x.isdigit():
+ version_info.append(int(x))
+ elif x.find('rc') != -1:
+ patch_version = x.split('rc')
+ version_info.append(int(patch_version[0]))
+ version_info.append(f'rc{patch_version[1]}')
+ return tuple(version_info)
+
+
+version_info = parse_version_info(__version__)
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c8a97e25767d9ef3045b86a981b0dfd74c83be3
--- /dev/null
+++ b/packages.txt
@@ -0,0 +1,13 @@
+libglfw3-dev
+libgles2-mesa-dev
+libgl1
+freeglut3-dev
+unzip
+ffmpeg
+libsm6
+libxext6
+libgl1-mesa-dri
+libegl1-mesa
+libgbm1
+build-essential
+libturbojpeg
\ No newline at end of file
diff --git a/pyrender/.coveragerc b/pyrender/.coveragerc
new file mode 100644
index 0000000000000000000000000000000000000000..ee31cded3509cbd991a33dd27e2525b93a1a6558
--- /dev/null
+++ b/pyrender/.coveragerc
@@ -0,0 +1,5 @@
+[report]
+exclude_lines =
+ def __repr__
+ def __str__
+ @abc.abstractmethod
diff --git a/pyrender/.flake8 b/pyrender/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..fec4bcfc3ba774b53a866d839ea15bae6ebdb4a6
--- /dev/null
+++ b/pyrender/.flake8
@@ -0,0 +1,8 @@
+[flake8]
+ignore = E231,W504,F405,F403
+max-line-length = 79
+select = B,C,E,F,W,T4,B9
+exclude =
+ docs/source/conf.py,
+ __pycache__,
+ examples/*
diff --git a/pyrender/.gitignore b/pyrender/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ae59dec631f71a23d4255aaf9c0274a699f4ba25
--- /dev/null
+++ b/pyrender/.gitignore
@@ -0,0 +1,106 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+docs/**/generated/**
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/pyrender/.pre-commit-config.yaml b/pyrender/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1817eb39bf409aff80c7d2cc79a3bc3856c70dbd
--- /dev/null
+++ b/pyrender/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 3.7.1
+ hooks:
+ - id: flake8
+ exclude: ^setup.py
diff --git a/pyrender/.travis.yml b/pyrender/.travis.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ad289ae1513eaf8fda74f8d5ab7840be3ef56cb
--- /dev/null
+++ b/pyrender/.travis.yml
@@ -0,0 +1,43 @@
+language: python
+sudo: required
+dist: xenial
+
+python:
+- '3.6'
+- '3.7'
+
+before_install:
+ # Pre-install osmesa
+ - sudo apt update
+ - sudo wget https://github.com/mmatl/travis_debs/raw/master/xenial/mesa_18.3.3-0.deb
+ - sudo dpkg -i ./mesa_18.3.3-0.deb || true
+ - sudo apt install -f
+ - git clone https://github.com/mmatl/pyopengl.git
+ - cd pyopengl
+ - pip install .
+ - cd ..
+
+install:
+ - pip install .
+ # - pip install -q pytest pytest-cov coveralls
+ - pip install pytest pytest-cov coveralls
+ - pip install ./pyopengl
+
+script:
+ - PYOPENGL_PLATFORM=osmesa pytest --cov=pyrender tests
+
+after_success:
+- coveralls || true
+
+deploy:
+ provider: pypi
+ skip_existing: true
+ user: mmatl
+ on:
+ tags: true
+ branch: master
+ password:
+ secure: O4WWMbTYb2eVYIO4mMOVa6/xyhX7mPvJpd96cxfNvJdyuqho8VapOhzqsI5kahMB1hFjWWr61yR4+Ru5hoDYf3XA6BQVk8eCY9+0H7qRfvoxex71lahKAqfHLMoE1xNdiVTgl+QN9hYjOnopLod24rx8I8eXfpHu/mfCpuTYGyLlNcDP5St3bXpXLPB5wg8Jo1YRRv6W/7fKoXyuWjewk9cJAS0KrEgnDnSkdwm6Pb+80B2tcbgdGvpGaByw5frndwKiMUMgVUownepDU5POQq2p29wwn9lCvRucULxjEgO+63jdbZRj5fNutLarFa2nISfYnrd72LOyDfbJubwAzzAIsy2JbFORyeHvCgloiuE9oE7a9oOQt/1QHBoIV0seiawMWn55Yp70wQ7HlJs4xSGJWCGa5+9883QRNsvj420atkb3cgO8P+PXwiwTi78Dq7Z/xHqccsU0b8poqBneQoA+pUGgNnF6V7Z8e9RsCcse2gAWSZWuOK3ua+9xCgH7I7MeL3afykr2aJ+yFCoYJMFrUjJeodMX2RbL0q+3FzIPZeGW3WdhTEAL9TSKRcJBSQTskaQlZx/OcpobxS7t3d2S68CCLG9uMTqOTYws55WZ1etalA75sRk9K2MR7ZGjZW3jdtvMViISc/t6Rrjea1GE8ZHGJC6/IeLIWA2c7nc=
+ distributions: sdist bdist_wheel
+notifications:
+ email: false
diff --git a/pyrender/LICENSE b/pyrender/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..4276f7d204e4d85104246df637e0e36adbef14a7
--- /dev/null
+++ b/pyrender/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Matthew Matl
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/pyrender/MANIFEST.in b/pyrender/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..097bcca3b4fccdc39ddd63c10f710ad524898e95
--- /dev/null
+++ b/pyrender/MANIFEST.in
@@ -0,0 +1,5 @@
+# Include the license
+include LICENSE
+include README.rst
+include pyrender/fonts/*
+include pyrender/shaders/*
diff --git a/pyrender/README.md b/pyrender/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ae88ed1c5e78f247e38291ed83cf4c81230bf976
--- /dev/null
+++ b/pyrender/README.md
@@ -0,0 +1,92 @@
+# Pyrender
+
+[![Build Status](https://travis-ci.org/mmatl/pyrender.svg?branch=master)](https://travis-ci.org/mmatl/pyrender)
+[![Documentation Status](https://readthedocs.org/projects/pyrender/badge/?version=latest)](https://pyrender.readthedocs.io/en/latest/?badge=latest)
+[![Coverage Status](https://coveralls.io/repos/github/mmatl/pyrender/badge.svg?branch=master)](https://coveralls.io/github/mmatl/pyrender?branch=master)
+[![PyPI version](https://badge.fury.io/py/pyrender.svg)](https://badge.fury.io/py/pyrender)
+[![Downloads](https://pepy.tech/badge/pyrender)](https://pepy.tech/project/pyrender)
+
+Pyrender is a pure Python (2.7, 3.4, 3.5, 3.6) library for physically-based
+rendering and visualization.
+It is designed to meet the [glTF 2.0 specification from Khronos](https://www.khronos.org/gltf/).
+
+Pyrender is lightweight, easy to install, and simple to use.
+It comes packaged with both an intuitive scene viewer and a headache-free
+offscreen renderer with support for GPU-accelerated rendering on headless
+servers, which makes it perfect for machine learning applications.
+
+Extensive documentation, including a quickstart guide, is provided [here](https://pyrender.readthedocs.io/en/latest/).
+
+For a minimal working example of GPU-accelerated offscreen rendering using EGL,
+check out the [EGL Google CoLab Notebook](https://colab.research.google.com/drive/1pcndwqeY8vker3bLKQNJKr3B-7-SYenE?usp=sharing).
+
+
+
+
+
+
+
+## Installation
+You can install pyrender directly from pip.
+
+```bash
+pip install pyrender
+```
+
+## Features
+
+Despite being lightweight, pyrender has lots of features, including:
+
+* Simple interoperation with the amazing [trimesh](https://github.com/mikedh/trimesh) project,
+which enables out-of-the-box support for dozens of mesh types, including OBJ,
+STL, DAE, OFF, PLY, and GLB.
+* An easy-to-use scene viewer with support for animation, showing face and vertex
+normals, toggling lighting conditions, and saving images and GIFs.
+* An offscreen rendering module that supports OSMesa and EGL backends.
+* Shadow mapping for directional and spot lights.
+* Metallic-roughness materials for physically-based rendering, including several
+types of texture and normal mapping.
+* Transparency.
+* Depth and color image generation.
+
+## Sample Usage
+
+For sample usage, check out the [quickstart
+guide](https://pyrender.readthedocs.io/en/latest/examples/index.html) or one of
+the Google CoLab Notebooks:
+
+* [EGL Google CoLab Notebook](https://colab.research.google.com/drive/1pcndwqeY8vker3bLKQNJKr3B-7-SYenE?usp=sharing)
+
+## Viewer Keyboard and Mouse Controls
+
+When using the viewer, the basic controls for moving about the scene are as follows:
+
+* To rotate the camera about the center of the scene, hold the left mouse button and drag the cursor.
+* To rotate the camera about its viewing axis, hold `CTRL` left mouse button and drag the cursor.
+* To pan the camera, do one of the following:
+ * Hold `SHIFT`, then hold the left mouse button and drag the cursor.
+ * Hold the middle mouse button and drag the cursor.
+* To zoom the camera in or out, do one of the following:
+ * Scroll the mouse wheel.
+ * Hold the right mouse button and drag the cursor.
+
+The available keyboard commands are as follows:
+
+* `a`: Toggles rotational animation mode.
+* `c`: Toggles backface culling.
+* `f`: Toggles fullscreen mode.
+* `h`: Toggles shadow rendering.
+* `i`: Toggles axis display mode (no axes, world axis, mesh axes, all axes).
+* `l`: Toggles lighting mode (scene lighting, Raymond lighting, or direct lighting).
+* `m`: Toggles face normal visualization.
+* `n`: Toggles vertex normal visualization.
+* `o`: Toggles orthographic camera mode.
+* `q`: Quits the viewer.
+* `r`: Starts recording a GIF, and pressing again stops recording and opens a file dialog.
+* `s`: Opens a file dialog to save the current view as an image.
+* `w`: Toggles wireframe mode (scene default, flip wireframes, all wireframe, or all solid).
+* `z`: Resets the camera to the default view.
+
+As a note, displaying shadows significantly slows down rendering, so if you're
+experiencing low framerates, just kill shadows or reduce the number of lights in
+your scene.
diff --git a/pyrender/docs/Makefile b/pyrender/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b1064a04362a0c4372fae351f99ed3bd9f82ff92
--- /dev/null
+++ b/pyrender/docs/Makefile
@@ -0,0 +1,23 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SOURCEDIR = source
+BUILDDIR = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+clean:
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+ rm -rf ./source/generated/*
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/pyrender/docs/make.bat b/pyrender/docs/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..4d9eb83d9f9309029f4b14ff09024658bb0f5563
--- /dev/null
+++ b/pyrender/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/pyrender/docs/source/api/index.rst b/pyrender/docs/source/api/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b6e473149d8f132f176e242c93406fdb84ce0b04
--- /dev/null
+++ b/pyrender/docs/source/api/index.rst
@@ -0,0 +1,59 @@
+Pyrender API Documentation
+==========================
+
+Constants
+---------
+.. automodapi:: pyrender.constants
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
+
+Cameras
+-------
+.. automodapi:: pyrender.camera
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
+
+Lighting
+--------
+.. automodapi:: pyrender.light
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
+
+Objects
+-------
+.. automodapi:: pyrender
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
+ :skip: Camera, DirectionalLight, Light, OffscreenRenderer, Node
+ :skip: OrthographicCamera, PerspectiveCamera, PointLight, RenderFlags
+ :skip: Renderer, Scene, SpotLight, TextAlign, Viewer, GLTF
+
+Scenes
+------
+.. automodapi:: pyrender
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
+ :skip: Camera, DirectionalLight, Light, OffscreenRenderer
+ :skip: OrthographicCamera, PerspectiveCamera, PointLight, RenderFlags
+ :skip: Renderer, SpotLight, TextAlign, Viewer, Sampler, Texture, Material
+ :skip: MetallicRoughnessMaterial, Primitive, Mesh, GLTF
+
+On-Screen Viewer
+----------------
+.. automodapi:: pyrender.viewer
+ :no-inheritance-diagram:
+ :no-inherited-members:
+ :no-main-docstr:
+ :no-heading:
+
+Off-Screen Rendering
+--------------------
+.. automodapi:: pyrender.offscreen
+ :no-inheritance-diagram:
+ :no-main-docstr:
+ :no-heading:
diff --git a/pyrender/docs/source/conf.py b/pyrender/docs/source/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bf194c375e7e789b334a838953adfeaf2eb59b6
--- /dev/null
+++ b/pyrender/docs/source/conf.py
@@ -0,0 +1,352 @@
+# -*- coding: utf-8 -*-
+#
+# core documentation build configuration file, created by
+# sphinx-quickstart on Sun Oct 16 14:33:48 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+from pyrender import __version__
+from sphinx.domains.python import PythonDomain
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('../../'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.coverage',
+ 'sphinx.ext.githubpages',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.napoleon',
+ 'sphinx.ext.viewcode',
+ 'sphinx_automodapi.automodapi',
+ 'sphinx_automodapi.smart_resolver'
+]
+numpydoc_class_members_toctree = False
+automodapi_toctreedirnm = 'generated'
+automodsumm_inherited_members = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pyrender'
+copyright = u'2018, Matthew Matl'
+author = u'Matthew Matl'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = __version__
+# The full version, including alpha/beta/rc tags.
+release = __version__
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+import sphinx_rtd_theme
+html_theme = 'sphinx_rtd_theme'
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# " v documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'coredoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'pyrender.tex', u'pyrender Documentation',
+ u'Matthew Matl', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'pyrender', u'pyrender Documentation',
+ [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'pyrender', u'pyrender Documentation',
+ author, 'pyrender', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+intersphinx_mapping = {
+ 'python' : ('https://docs.python.org/', None),
+ 'pyrender' : ('https://pyrender.readthedocs.io/en/latest/', None),
+}
+
+# Autosummary fix
+autosummary_generate = True
+
+# Try to suppress multiple-definition warnings by always taking the shorter
+# path when two or more paths have the same base module
+
+class MyPythonDomain(PythonDomain):
+
+ def find_obj(self, env, modname, classname, name, type, searchmode=0):
+ """Ensures an object always resolves to the desired module
+ if defined there."""
+ orig_matches = PythonDomain.find_obj(
+ self, env, modname, classname, name, type, searchmode
+ )
+
+ if len(orig_matches) <= 1:
+ return orig_matches
+
+ # If multiple matches, try to take the shortest if all the modules are
+ # the same
+ first_match_name_sp = orig_matches[0][0].split('.')
+ base_name = first_match_name_sp[0]
+ min_len = len(first_match_name_sp)
+ best_match = orig_matches[0]
+
+ for match in orig_matches[1:]:
+ match_name = match[0]
+ match_name_sp = match_name.split('.')
+ match_base = match_name_sp[0]
+
+ # If we have mismatched bases, return them all to trigger warnings
+ if match_base != base_name:
+ return orig_matches
+
+ # Otherwise, check and see if it's shorter
+ if len(match_name_sp) < min_len:
+ min_len = len(match_name_sp)
+ best_match = match
+
+ return (best_match,)
+
+
+def setup(sphinx):
+ """Use MyPythonDomain in place of PythonDomain"""
+ sphinx.override_domain(MyPythonDomain)
+
diff --git a/pyrender/docs/source/examples/cameras.rst b/pyrender/docs/source/examples/cameras.rst
new file mode 100644
index 0000000000000000000000000000000000000000..39186b75b16584d11fd1606b92291c104e0452bd
--- /dev/null
+++ b/pyrender/docs/source/examples/cameras.rst
@@ -0,0 +1,26 @@
+.. _camera_guide:
+
+Creating Cameras
+================
+
+Pyrender supports three camera types -- :class:`.PerspectiveCamera` and
+:class:`.IntrinsicsCamera` types,
+which render scenes as a human would see them, and
+:class:`.OrthographicCamera` types, which preserve distances between points.
+
+Creating cameras is easy -- just specify their basic attributes:
+
+>>> pc = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.414)
+>>> oc = pyrender.OrthographicCamera(xmag=1.0, ymag=1.0)
+
+For more information, see the Khronos group's documentation here_:
+
+.. _here: https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#projection-matrices
+
+When you add cameras to the scene, make sure that you're using OpenGL camera
+coordinates to specify their pose. See the illustration below for details.
+Basically, the camera z-axis points away from the scene, the x-axis points
+right in image space, and the y-axis points up in image space.
+
+.. image:: /_static/camera_coords.png
+
diff --git a/pyrender/docs/source/examples/index.rst b/pyrender/docs/source/examples/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4be536cd62c1cca112228f4e114e783be77a0ab8
--- /dev/null
+++ b/pyrender/docs/source/examples/index.rst
@@ -0,0 +1,20 @@
+.. _guide:
+
+User Guide
+==========
+
+This section contains guides on how to use Pyrender to quickly visualize
+your 3D data, including a quickstart guide and more detailed descriptions
+of each part of the rendering pipeline.
+
+
+.. toctree::
+ :maxdepth: 2
+
+ quickstart.rst
+ models.rst
+ lighting.rst
+ cameras.rst
+ scenes.rst
+ offscreen.rst
+ viewer.rst
diff --git a/pyrender/docs/source/examples/lighting.rst b/pyrender/docs/source/examples/lighting.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f89bee7d15027a0f52711622b053b49cc6e1b410
--- /dev/null
+++ b/pyrender/docs/source/examples/lighting.rst
@@ -0,0 +1,21 @@
+.. _lighting_guide:
+
+Creating Lights
+===============
+
+Pyrender supports three types of punctual light:
+
+- :class:`.PointLight`: Point-based light sources, such as light bulbs.
+- :class:`.SpotLight`: A conical light source, like a flashlight.
+- :class:`.DirectionalLight`: A general light that does not attenuate with
+ distance.
+
+Creating lights is easy -- just specify their basic attributes:
+
+>>> pl = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=2.0)
+>>> sl = pyrender.SpotLight(color=[1.0, 1.0, 1.0], intensity=2.0,
+... innerConeAngle=0.05, outerConeAngle=0.5)
+>>> dl = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=2.0)
+
+For more information about how these lighting models are implemented,
+see their class documentation.
diff --git a/pyrender/docs/source/examples/models.rst b/pyrender/docs/source/examples/models.rst
new file mode 100644
index 0000000000000000000000000000000000000000..84e71c4ff41a8d2e0eb2dc48434caedb757ff954
--- /dev/null
+++ b/pyrender/docs/source/examples/models.rst
@@ -0,0 +1,143 @@
+.. _model_guide:
+
+Loading and Configuring Models
+==============================
+The first step to any rendering application is loading your models.
+Pyrender implements the GLTF 2.0 specification, which means that all
+models are composed of a hierarchy of objects.
+
+At the top level, we have a :class:`.Mesh`. The :class:`.Mesh` is
+basically a wrapper of any number of :class:`.Primitive` types,
+which actually represent geometry that can be drawn to the screen.
+
+Primitives are composed of a variety of parameters, including
+vertex positions, vertex normals, color and texture information,
+and triangle indices if smooth rendering is desired.
+They can implement point clouds, triangular meshes, or lines
+depending on how you configure their data and set their
+:attr:`.Primitive.mode` parameter.
+
+Although you can create primitives yourself if you want to,
+it's probably easier to just use the utility functions provided
+in the :class:`.Mesh` class.
+
+Creating Triangular Meshes
+--------------------------
+
+Simple Construction
+~~~~~~~~~~~~~~~~~~~
+Pyrender allows you to create a :class:`.Mesh` containing a
+triangular mesh model directly from a :class:`~trimesh.base.Trimesh` object
+using the :meth:`.Mesh.from_trimesh` static method.
+
+>>> import trimesh
+>>> import pyrender
+>>> import numpy as np
+>>> tm = trimesh.load('examples/models/fuze.obj')
+>>> m = pyrender.Mesh.from_trimesh(tm)
+>>> m.primitives
+[]
+
+You can also create a single :class:`.Mesh` from a list of
+:class:`~trimesh.base.Trimesh` objects:
+
+>>> tms = [trimesh.creation.icosahedron(), trimesh.creation.cylinder()]
+>>> m = pyrender.Mesh.from_trimesh(tms)
+[,
+ ]
+
+Vertex Smoothing
+~~~~~~~~~~~~~~~~
+
+The :meth:`.Mesh.from_trimesh` method has a few additional optional parameters.
+If you want to render the mesh without interpolating face normals, which can
+be useful for meshes that are supposed to be angular (e.g. a cube), you
+can specify ``smooth=False``.
+
+>>> m = pyrender.Mesh.from_trimesh(tm, smooth=False)
+
+Per-Face or Per-Vertex Coloration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you have an untextured trimesh, you can color it in with per-face or
+per-vertex colors:
+
+>>> tm.visual.vertex_colors = np.random.uniform(size=tm.vertices.shape)
+>>> tm.visual.face_colors = np.random.uniform(size=tm.faces.shape)
+>>> m = pyrender.Mesh.from_trimesh(tm)
+
+Instancing
+~~~~~~~~~~
+
+If you want to render many copies of the same mesh at different poses,
+you can statically create a vast array of them in an efficient manner.
+Simply specify the ``poses`` parameter to be a list of ``N`` 4x4 homogenous
+transformation matrics that position the meshes relative to their common
+base frame:
+
+>>> tfs = np.tile(np.eye(4), (3,1,1))
+>>> tfs[1,:3,3] = [0.1, 0.0, 0.0]
+>>> tfs[2,:3,3] = [0.2, 0.0, 0.0]
+>>> tfs
+array([[[1. , 0. , 0. , 0. ],
+ [0. , 1. , 0. , 0. ],
+ [0. , 0. , 1. , 0. ],
+ [0. , 0. , 0. , 1. ]],
+ [[1. , 0. , 0. , 0.1],
+ [0. , 1. , 0. , 0. ],
+ [0. , 0. , 1. , 0. ],
+ [0. , 0. , 0. , 1. ]],
+ [[1. , 0. , 0. , 0.2],
+ [0. , 1. , 0. , 0. ],
+ [0. , 0. , 1. , 0. ],
+ [0. , 0. , 0. , 1. ]]])
+
+>>> m = pyrender.Mesh.from_trimesh(tm, poses=tfs)
+
+Custom Materials
+~~~~~~~~~~~~~~~~
+
+You can also specify a custom material for any triangular mesh you create
+in the ``material`` parameter of :meth:`.Mesh.from_trimesh`.
+The main material supported by Pyrender is the
+:class:`.MetallicRoughnessMaterial`.
+The metallic-roughness model supports rendering highly-realistic objects across
+a wide gamut of materials.
+
+For more information, see the documentation of the
+:class:`.MetallicRoughnessMaterial` constructor or look at the Khronos_
+documentation for more information.
+
+.. _Khronos: https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#materials
+
+Creating Point Clouds
+---------------------
+
+Point Sprites
+~~~~~~~~~~~~~
+Pyrender also allows you to create a :class:`.Mesh` containing a
+point cloud directly from :class:`numpy.ndarray` instances
+using the :meth:`.Mesh.from_points` static method.
+
+Simply provide a list of points and optional per-point colors and normals.
+
+>>> pts = tm.vertices.copy()
+>>> colors = np.random.uniform(size=pts.shape)
+>>> m = pyrender.Mesh.from_points(pts, colors=colors)
+
+Point clouds created in this way will be rendered as square point sprites.
+
+.. image:: /_static/points.png
+
+Point Spheres
+~~~~~~~~~~~~~
+If you have a monochromatic point cloud and would like to render it with
+spheres, you can render it by instancing a spherical trimesh:
+
+>>> sm = trimesh.creation.uv_sphere(radius=0.1)
+>>> sm.visual.vertex_colors = [1.0, 0.0, 0.0]
+>>> tfs = np.tile(np.eye(4), (len(pts), 1, 1))
+>>> tfs[:,:3,3] = pts
+>>> m = pyrender.Mesh.from_trimesh(sm, poses=tfs)
+
+.. image:: /_static/points2.png
diff --git a/pyrender/docs/source/examples/offscreen.rst b/pyrender/docs/source/examples/offscreen.rst
new file mode 100644
index 0000000000000000000000000000000000000000..291532b6e0c0e512df35a97e3c826cc83015aeca
--- /dev/null
+++ b/pyrender/docs/source/examples/offscreen.rst
@@ -0,0 +1,87 @@
+.. _offscreen_guide:
+
+Offscreen Rendering
+===================
+
+.. note::
+ If you're using a headless server, you'll need to use either EGL (for
+ GPU-accelerated rendering) or OSMesa (for CPU-only software rendering).
+ If you're using OSMesa, be sure that you've installed it properly. See
+ :ref:`osmesa` for details.
+
+Choosing a Backend
+------------------
+
+Once you have a scene set up with its geometry, cameras, and lights,
+you can render it using the :class:`.OffscreenRenderer`. Pyrender supports
+three backends for offscreen rendering:
+
+- Pyglet, the same engine that runs the viewer. This requires an active
+ display manager, so you can't run it on a headless server. This is the
+ default option.
+- OSMesa, a software renderer.
+- EGL, which allows for GPU-accelerated rendering without a display manager.
+
+If you want to use OSMesa or EGL, you need to set the ``PYOPENGL_PLATFORM``
+environment variable before importing pyrender or any other OpenGL library.
+You can do this at the command line:
+
+.. code-block:: bash
+
+ PYOPENGL_PLATFORM=osmesa python render.py
+
+or at the top of your Python script:
+
+.. code-block:: bash
+
+ # Top of main python script
+ import os
+ os.environ['PYOPENGL_PLATFORM'] = 'egl'
+
+The handle for EGL is ``egl``, and the handle for OSMesa is ``osmesa``.
+
+Running the Renderer
+--------------------
+
+Once you've set your environment variable appropriately, create your scene and
+then configure the :class:`.OffscreenRenderer` object with a window width,
+a window height, and a size for point-cloud points:
+
+>>> r = pyrender.OffscreenRenderer(viewport_width=640,
+... viewport_height=480,
+... point_size=1.0)
+
+Then, just call the :meth:`.OffscreenRenderer.render` function:
+
+>>> color, depth = r.render(scene)
+
+.. image:: /_static/scene.png
+
+This will return a ``(w,h,3)`` channel floating-point color image and
+a ``(w,h)`` floating-point depth image rendered from the scene's main camera.
+
+You can customize the rendering process by using flag options from
+:class:`.RenderFlags` and bitwise or-ing them together. For example,
+the following code renders a color image with an alpha channel
+and enables shadow mapping for all directional lights:
+
+>>> flags = RenderFlags.RGBA | RenderFlags.SHADOWS_DIRECTIONAL
+>>> color, depth = r.render(scene, flags=flags)
+
+Once you're done with the offscreen renderer, you need to close it before you
+can run a different renderer or open the viewer for the same scene:
+
+>>> r.delete()
+
+Google CoLab Examples
+---------------------
+
+For a minimal working example of offscreen rendering using OSMesa,
+see the `OSMesa Google CoLab notebook`_.
+
+.. _OSMesa Google CoLab notebook: https://colab.research.google.com/drive/1Z71mHIc-Sqval92nK290vAsHZRUkCjUx
+
+For a minimal working example of offscreen rendering using EGL,
+see the `EGL Google CoLab notebook`_.
+
+.. _EGL Google CoLab notebook: https://colab.research.google.com/drive/1rTLHk0qxh4dn8KNe-mCnN8HAWdd2_BEh
diff --git a/pyrender/docs/source/examples/quickstart.rst b/pyrender/docs/source/examples/quickstart.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ac556419e5206c2ccd4bc985feb1a8c7347310af
--- /dev/null
+++ b/pyrender/docs/source/examples/quickstart.rst
@@ -0,0 +1,71 @@
+.. _quickstart_guide:
+
+Quickstart
+==========
+
+
+Minimal Example for 3D Viewer
+-----------------------------
+Here is a minimal example of loading and viewing a triangular mesh model
+in pyrender.
+
+>>> import trimesh
+>>> import pyrender
+>>> fuze_trimesh = trimesh.load('examples/models/fuze.obj')
+>>> mesh = pyrender.Mesh.from_trimesh(fuze_trimesh)
+>>> scene = pyrender.Scene()
+>>> scene.add(mesh)
+>>> pyrender.Viewer(scene, use_raymond_lighting=True)
+
+.. image:: /_static/fuze.png
+
+
+Minimal Example for Offscreen Rendering
+---------------------------------------
+.. note::
+ If you're using a headless server, make sure that you followed the guide
+ for installing OSMesa. See :ref:`osmesa`.
+
+Here is a minimal example of rendering a mesh model offscreen in pyrender.
+The only additional necessities are that you need to add lighting and a camera.
+
+>>> import numpy as np
+>>> import trimesh
+>>> import pyrender
+>>> import matplotlib.pyplot as plt
+
+>>> fuze_trimesh = trimesh.load('examples/models/fuze.obj')
+>>> mesh = pyrender.Mesh.from_trimesh(fuze_trimesh)
+>>> scene = pyrender.Scene()
+>>> scene.add(mesh)
+>>> camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
+>>> s = np.sqrt(2)/2
+>>> camera_pose = np.array([
+... [0.0, -s, s, 0.3],
+... [1.0, 0.0, 0.0, 0.0],
+... [0.0, s, s, 0.35],
+... [0.0, 0.0, 0.0, 1.0],
+... ])
+>>> scene.add(camera, pose=camera_pose)
+>>> light = pyrender.SpotLight(color=np.ones(3), intensity=3.0,
+... innerConeAngle=np.pi/16.0,
+... outerConeAngle=np.pi/6.0)
+>>> scene.add(light, pose=camera_pose)
+>>> r = pyrender.OffscreenRenderer(400, 400)
+>>> color, depth = r.render(scene)
+>>> plt.figure()
+>>> plt.subplot(1,2,1)
+>>> plt.axis('off')
+>>> plt.imshow(color)
+>>> plt.subplot(1,2,2)
+>>> plt.axis('off')
+>>> plt.imshow(depth, cmap=plt.cm.gray_r)
+>>> plt.show()
+
+.. image:: /_static/minexcolor.png
+ :width: 45%
+ :align: left
+.. image:: /_static/minexdepth.png
+ :width: 45%
+ :align: right
+
diff --git a/pyrender/docs/source/examples/scenes.rst b/pyrender/docs/source/examples/scenes.rst
new file mode 100644
index 0000000000000000000000000000000000000000..94c243f8b860b9669ac26105fd2b9906054f4568
--- /dev/null
+++ b/pyrender/docs/source/examples/scenes.rst
@@ -0,0 +1,78 @@
+.. _scene_guide:
+
+Creating Scenes
+===============
+
+Before you render anything, you need to put all of your lights, cameras,
+and meshes into a scene. The :class:`.Scene` object keeps track of the relative
+poses of these primitives by inserting them into :class:`.Node` objects and
+keeping them in a directed acyclic graph.
+
+Adding Objects
+--------------
+
+To create a :class:`.Scene`, simply call the constructor. You can optionally
+specify an ambient light color and a background color:
+
+>>> scene = pyrender.Scene(ambient_light=[0.02, 0.02, 0.02],
+... bg_color=[1.0, 1.0, 1.0])
+
+You can add objects to a scene by first creating a :class:`.Node` object
+and adding the object and its pose to the :class:`.Node`. Poses are specified
+as 4x4 homogenous transformation matrices that are stored in the node's
+:attr:`.Node.matrix` attribute. Note that the :class:`.Node`
+constructor requires you to specify whether you're adding a mesh, light,
+or camera.
+
+>>> mesh = pyrender.Mesh.from_trimesh(tm)
+>>> light = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=2.0)
+>>> cam = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.414)
+>>> nm = pyrender.Node(mesh=mesh, matrix=np.eye(4))
+>>> nl = pyrender.Node(light=light, matrix=np.eye(4))
+>>> nc = pyrender.Node(camera=cam, matrix=np.eye(4))
+>>> scene.add_node(nm)
+>>> scene.add_node(nl)
+>>> scene.add_node(nc)
+
+You can also add objects directly to a scene with the :meth:`.Scene.add` function,
+which takes care of creating a :class:`.Node` for you.
+
+>>> scene.add(mesh, pose=np.eye(4))
+>>> scene.add(light, pose=np.eye(4))
+>>> scene.add(cam, pose=np.eye(4))
+
+Nodes can be hierarchical, in which case the node's :attr:`.Node.matrix`
+specifies that node's pose relative to its parent frame. You can add nodes to
+a scene hierarchically by specifying a parent node in your calls to
+:meth:`.Scene.add` or :meth:`.Scene.add_node`:
+
+>>> scene.add_node(nl, parent_node=nc)
+>>> scene.add(cam, parent_node=nm)
+
+If you add multiple cameras to a scene, you can specify which one to render from
+by setting the :attr:`.Scene.main_camera_node` attribute.
+
+Updating Objects
+----------------
+
+You can update the poses of existing nodes with the :meth:`.Scene.set_pose`
+function. Simply call it with a :class:`.Node` that is already in the scene
+and the new pose of that node with respect to its parent as a 4x4 homogenous
+transformation matrix:
+
+>>> scene.set_pose(nl, pose=np.eye(4))
+
+If you want to get the local pose of a node, you can just access its
+:attr:`.Node.matrix` attribute. However, if you want to the get
+the pose of a node *with respect to the world frame*, you can call the
+:meth:`.Scene.get_pose` method.
+
+>>> tf = scene.get_pose(nl)
+
+Removing Objects
+----------------
+
+Finally, you can remove a :class:`.Node` and all of its children from the
+scene with the :meth:`.Scene.remove_node` function:
+
+>>> scene.remove_node(nl)
diff --git a/pyrender/docs/source/examples/viewer.rst b/pyrender/docs/source/examples/viewer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..00a7973b46ec7da33b51b65581af6f25c1b1652f
--- /dev/null
+++ b/pyrender/docs/source/examples/viewer.rst
@@ -0,0 +1,61 @@
+.. _viewer_guide:
+
+Live Scene Viewer
+=================
+
+Standard Usage
+--------------
+In addition to the offscreen renderer, Pyrender comes with a live scene viewer.
+In its standard invocation, calling the :class:`.Viewer`'s constructor will
+immediately pop a viewing window that you can navigate around in.
+
+>>> pyrender.Viewer(scene)
+
+By default, the viewer uses your scene's lighting. If you'd like to start with
+some additional lighting that moves around with the camera, you can specify that
+with:
+
+>>> pyrender.Viewer(scene, use_raymond_lighting=True)
+
+For a full list of the many options that the :class:`.Viewer` supports, check out its
+documentation.
+
+.. image:: /_static/rotation.gif
+
+Running the Viewer in a Separate Thread
+---------------------------------------
+If you'd like to animate your models, you'll want to run the viewer in a
+separate thread so that you can update the scene while the viewer is running.
+To do this, first pop the viewer in a separate thread by calling its constructor
+with the ``run_in_thread`` option set:
+
+>>> v = pyrender.Viewer(scene, run_in_thread=True)
+
+Then, you can manipulate the :class:`.Scene` while the viewer is running to
+animate things. However, be careful to acquire the viewer's
+:attr:`.Viewer.render_lock` before editing the scene to prevent data corruption:
+
+>>> i = 0
+>>> while True:
+... pose = np.eye(4)
+... pose[:3,3] = [i, 0, 0]
+... v.render_lock.acquire()
+... scene.set_pose(mesh_node, pose)
+... v.render_lock.release()
+... i += 0.01
+
+.. image:: /_static/scissors.gif
+
+You can wait on the viewer to be closed manually:
+
+>>> while v.is_active:
+... pass
+
+Or you can close it from the main thread forcibly.
+Make sure to still loop and block for the viewer to actually exit before using
+the scene object again.
+
+>>> v.close_external()
+>>> while v.is_active:
+... pass
+
diff --git a/pyrender/docs/source/index.rst b/pyrender/docs/source/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..baf189ede6bb3435cad5b8795e1937ef1a3c2c56
--- /dev/null
+++ b/pyrender/docs/source/index.rst
@@ -0,0 +1,41 @@
+.. core documentation master file, created by
+ sphinx-quickstart on Sun Oct 16 14:33:48 2016.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Pyrender Documentation
+========================
+Pyrender is a pure Python (2.7, 3.4, 3.5, 3.6) library for physically-based
+rendering and visualization.
+It is designed to meet the glTF 2.0 specification_ from Khronos
+
+.. _specification: https://www.khronos.org/gltf/
+
+Pyrender is lightweight, easy to install, and simple to use.
+It comes packaged with both an intuitive scene viewer and a headache-free
+offscreen renderer with support for GPU-accelerated rendering on headless
+servers, which makes it perfect for machine learning applications.
+Check out the :ref:`guide` for a full tutorial, or fork me on
+Github_.
+
+.. _Github: https://github.com/mmatl/pyrender
+
+.. image:: _static/rotation.gif
+
+.. image:: _static/damaged_helmet.png
+
+.. toctree::
+ :maxdepth: 2
+
+ install/index.rst
+ examples/index.rst
+ api/index.rst
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/pyrender/docs/source/install/index.rst b/pyrender/docs/source/install/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c785f202d877f8bbaf286c21eddca1925973f75e
--- /dev/null
+++ b/pyrender/docs/source/install/index.rst
@@ -0,0 +1,172 @@
+Installation Guide
+==================
+
+Python Installation
+-------------------
+
+This package is available via ``pip``.
+
+.. code-block:: bash
+
+ pip install pyrender
+
+If you're on MacOS, you'll need
+to pre-install my fork of ``pyglet``, as the version on PyPI hasn't yet included
+my change that enables OpenGL contexts on MacOS.
+
+.. code-block:: bash
+
+ git clone https://github.com/mmatl/pyglet.git
+ cd pyglet
+ pip install .
+
+.. _osmesa:
+
+Getting Pyrender Working with OSMesa
+------------------------------------
+If you want to render scenes offscreen but don't want to have to
+install a display manager or deal with the pains of trying to get
+OpenGL to work over SSH, you have two options.
+
+The first (and preferred) option is using EGL, which enables you to perform
+GPU-accelerated rendering on headless servers.
+However, you'll need EGL 1.5 to get modern OpenGL contexts.
+This comes packaged with NVIDIA's current drivers, but if you are having issues
+getting EGL to work with your hardware, you can try using OSMesa,
+a software-based offscreen renderer that is included with any Mesa
+install.
+
+If you want to use OSMesa with pyrender, you'll have to perform two additional
+installation steps:
+
+- :ref:`installmesa`
+- :ref:`installpyopengl`
+
+Then, read the offscreen rendering tutorial. See :ref:`offscreen_guide`.
+
+.. _installmesa:
+
+Installing OSMesa
+*****************
+
+As a first step, you'll need to rebuild and re-install Mesa with support
+for fast offscreen rendering and OpenGL 3+ contexts.
+I'd recommend installing from source, but you can also try my ``.deb``
+for Ubuntu 16.04 and up.
+
+Installing from a Debian Package
+********************************
+
+If you're running Ubuntu 16.04 or newer, you should be able to install the
+required version of Mesa from my ``.deb`` file.
+
+.. code-block:: bash
+
+ sudo apt update
+ sudo wget https://github.com/mmatl/travis_debs/raw/master/xenial/mesa_18.3.3-0.deb
+ sudo dpkg -i ./mesa_18.3.3-0.deb || true
+ sudo apt install -f
+
+If this doesn't work, try building from source.
+
+Building From Source
+********************
+
+First, install build dependencies via `apt` or your system's package manager.
+
+.. code-block:: bash
+
+ sudo apt-get install llvm-6.0 freeglut3 freeglut3-dev
+
+Then, download the current release of Mesa from here_.
+Unpack the source and go to the source folder:
+
+.. _here: https://archive.mesa3d.org/mesa-18.3.3.tar.gz
+
+.. code-block:: bash
+
+ tar xfv mesa-18.3.3.tar.gz
+ cd mesa-18.3.3
+
+Replace ``PREFIX`` with the path you want to install Mesa at.
+If you're not worried about overwriting your default Mesa install,
+a good place is at ``/usr/local``.
+
+Now, configure the installation by running the following command:
+
+.. code-block:: bash
+
+ ./configure --prefix=PREFIX \
+ --enable-opengl --disable-gles1 --disable-gles2 \
+ --disable-va --disable-xvmc --disable-vdpau \
+ --enable-shared-glapi \
+ --disable-texture-float \
+ --enable-gallium-llvm --enable-llvm-shared-libs \
+ --with-gallium-drivers=swrast,swr \
+ --disable-dri --with-dri-drivers= \
+ --disable-egl --with-egl-platforms= --disable-gbm \
+ --disable-glx \
+ --disable-osmesa --enable-gallium-osmesa \
+ ac_cv_path_LLVM_CONFIG=llvm-config-6.0
+
+Finally, build and install Mesa.
+
+.. code-block:: bash
+
+ make -j8
+ make install
+
+Finally, if you didn't install Mesa in the system path,
+add the following lines to your ``~/.bashrc`` file after
+changing ``MESA_HOME`` to your mesa installation path (i.e. what you used as
+``PREFIX`` during the configure command).
+
+.. code-block:: bash
+
+ MESA_HOME=/path/to/your/mesa/installation
+ export LIBRARY_PATH=$LIBRARY_PATH:$MESA_HOME/lib
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$MESA_HOME/lib
+ export C_INCLUDE_PATH=$C_INCLUDE_PATH:$MESA_HOME/include/
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$MESA_HOME/include/
+
+.. _installpyopengl:
+
+Installing a Compatible Fork of PyOpenGL
+****************************************
+
+Next, install and use my fork of ``PyOpenGL``.
+This fork enables getting modern OpenGL contexts with OSMesa.
+My patch has been included in ``PyOpenGL``, but it has not yet been released
+on PyPI.
+
+.. code-block:: bash
+
+ git clone https://github.com/mmatl/pyopengl.git
+ pip install ./pyopengl
+
+
+Building Documentation
+----------------------
+
+The online documentation for ``pyrender`` is automatically built by Read The Docs.
+Building ``pyrender``'s documentation locally requires a few extra dependencies --
+specifically, `sphinx`_ and a few plugins.
+
+.. _sphinx: http://www.sphinx-doc.org/en/master/
+
+To install the dependencies required, simply change directories into the `pyrender` source and run
+
+.. code-block:: bash
+
+ $ pip install .[docs]
+
+Then, go to the ``docs`` directory and run ``make`` with the appropriate target.
+For example,
+
+.. code-block:: bash
+
+ $ cd docs/
+ $ make html
+
+will generate a set of web pages. Any documentation files
+generated in this manner can be found in ``docs/build``.
diff --git a/pyrender/examples/duck.py b/pyrender/examples/duck.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a94bad5bfb30493f7364f2e52cbb4badbccb2c7
--- /dev/null
+++ b/pyrender/examples/duck.py
@@ -0,0 +1,13 @@
+from pyrender import Mesh, Scene, Viewer
+from io import BytesIO
+import numpy as np
+import trimesh
+import requests
+
+duck_source = "https://github.com/KhronosGroup/glTF-Sample-Models/raw/master/2.0/Duck/glTF-Binary/Duck.glb"
+
+duck = trimesh.load(BytesIO(requests.get(duck_source).content), file_type='glb')
+duckmesh = Mesh.from_trimesh(list(duck.geometry.values())[0])
+scene = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]))
+scene.add(duckmesh)
+Viewer(scene)
diff --git a/pyrender/examples/example.py b/pyrender/examples/example.py
new file mode 100644
index 0000000000000000000000000000000000000000..599a4850a5899cdeb1a76db1c5cf1c91c263cd41
--- /dev/null
+++ b/pyrender/examples/example.py
@@ -0,0 +1,157 @@
+"""Examples of using pyrender for viewing and offscreen rendering.
+"""
+import pyglet
+pyglet.options['shadow_window'] = False
+import os
+import numpy as np
+import trimesh
+
+from pyrender import PerspectiveCamera,\
+ DirectionalLight, SpotLight, PointLight,\
+ MetallicRoughnessMaterial,\
+ Primitive, Mesh, Node, Scene,\
+ Viewer, OffscreenRenderer, RenderFlags
+
+#==============================================================================
+# Mesh creation
+#==============================================================================
+
+#------------------------------------------------------------------------------
+# Creating textured meshes from trimeshes
+#------------------------------------------------------------------------------
+
+# Fuze trimesh
+fuze_trimesh = trimesh.load('./models/fuze.obj')
+fuze_mesh = Mesh.from_trimesh(fuze_trimesh)
+
+# Drill trimesh
+drill_trimesh = trimesh.load('./models/drill.obj')
+drill_mesh = Mesh.from_trimesh(drill_trimesh)
+drill_pose = np.eye(4)
+drill_pose[0,3] = 0.1
+drill_pose[2,3] = -np.min(drill_trimesh.vertices[:,2])
+
+# Wood trimesh
+wood_trimesh = trimesh.load('./models/wood.obj')
+wood_mesh = Mesh.from_trimesh(wood_trimesh)
+
+# Water bottle trimesh
+bottle_gltf = trimesh.load('./models/WaterBottle.glb')
+bottle_trimesh = bottle_gltf.geometry[list(bottle_gltf.geometry.keys())[0]]
+bottle_mesh = Mesh.from_trimesh(bottle_trimesh)
+bottle_pose = np.array([
+ [1.0, 0.0, 0.0, 0.1],
+ [0.0, 0.0, -1.0, -0.16],
+ [0.0, 1.0, 0.0, 0.13],
+ [0.0, 0.0, 0.0, 1.0],
+])
+
+#------------------------------------------------------------------------------
+# Creating meshes with per-vertex colors
+#------------------------------------------------------------------------------
+boxv_trimesh = trimesh.creation.box(extents=0.1*np.ones(3))
+boxv_vertex_colors = np.random.uniform(size=(boxv_trimesh.vertices.shape))
+boxv_trimesh.visual.vertex_colors = boxv_vertex_colors
+boxv_mesh = Mesh.from_trimesh(boxv_trimesh, smooth=False)
+
+#------------------------------------------------------------------------------
+# Creating meshes with per-face colors
+#------------------------------------------------------------------------------
+boxf_trimesh = trimesh.creation.box(extents=0.1*np.ones(3))
+boxf_face_colors = np.random.uniform(size=boxf_trimesh.faces.shape)
+boxf_trimesh.visual.face_colors = boxf_face_colors
+boxf_mesh = Mesh.from_trimesh(boxf_trimesh, smooth=False)
+
+#------------------------------------------------------------------------------
+# Creating meshes from point clouds
+#------------------------------------------------------------------------------
+points = trimesh.creation.icosphere(radius=0.05).vertices
+point_colors = np.random.uniform(size=points.shape)
+points_mesh = Mesh.from_points(points, colors=point_colors)
+
+#==============================================================================
+# Light creation
+#==============================================================================
+
+direc_l = DirectionalLight(color=np.ones(3), intensity=1.0)
+spot_l = SpotLight(color=np.ones(3), intensity=10.0,
+ innerConeAngle=np.pi/16, outerConeAngle=np.pi/6)
+point_l = PointLight(color=np.ones(3), intensity=10.0)
+
+#==============================================================================
+# Camera creation
+#==============================================================================
+
+cam = PerspectiveCamera(yfov=(np.pi / 3.0))
+cam_pose = np.array([
+ [0.0, -np.sqrt(2)/2, np.sqrt(2)/2, 0.5],
+ [1.0, 0.0, 0.0, 0.0],
+ [0.0, np.sqrt(2)/2, np.sqrt(2)/2, 0.4],
+ [0.0, 0.0, 0.0, 1.0]
+])
+
+#==============================================================================
+# Scene creation
+#==============================================================================
+
+scene = Scene(ambient_light=np.array([0.02, 0.02, 0.02, 1.0]))
+
+#==============================================================================
+# Adding objects to the scene
+#==============================================================================
+
+#------------------------------------------------------------------------------
+# By manually creating nodes
+#------------------------------------------------------------------------------
+fuze_node = Node(mesh=fuze_mesh, translation=np.array([0.1, 0.15, -np.min(fuze_trimesh.vertices[:,2])]))
+scene.add_node(fuze_node)
+boxv_node = Node(mesh=boxv_mesh, translation=np.array([-0.1, 0.10, 0.05]))
+scene.add_node(boxv_node)
+boxf_node = Node(mesh=boxf_mesh, translation=np.array([-0.1, -0.10, 0.05]))
+scene.add_node(boxf_node)
+
+#------------------------------------------------------------------------------
+# By using the add() utility function
+#------------------------------------------------------------------------------
+drill_node = scene.add(drill_mesh, pose=drill_pose)
+bottle_node = scene.add(bottle_mesh, pose=bottle_pose)
+wood_node = scene.add(wood_mesh)
+direc_l_node = scene.add(direc_l, pose=cam_pose)
+spot_l_node = scene.add(spot_l, pose=cam_pose)
+
+#==============================================================================
+# Using the viewer with a default camera
+#==============================================================================
+
+v = Viewer(scene, shadows=True)
+
+#==============================================================================
+# Using the viewer with a pre-specified camera
+#==============================================================================
+cam_node = scene.add(cam, pose=cam_pose)
+v = Viewer(scene, central_node=drill_node)
+
+#==============================================================================
+# Rendering offscreen from that camera
+#==============================================================================
+
+r = OffscreenRenderer(viewport_width=640*2, viewport_height=480*2)
+color, depth = r.render(scene)
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(color)
+plt.show()
+
+#==============================================================================
+# Segmask rendering
+#==============================================================================
+
+nm = {node: 20*(i + 1) for i, node in enumerate(scene.mesh_nodes)}
+seg = r.render(scene, RenderFlags.SEG, nm)[0]
+plt.figure()
+plt.imshow(seg)
+plt.show()
+
+r.delete()
+
diff --git a/pyrender/pyrender/__init__.py b/pyrender/pyrender/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee3709846823b7c4b71b22da0e24d63d805528a8
--- /dev/null
+++ b/pyrender/pyrender/__init__.py
@@ -0,0 +1,24 @@
+from .camera import (Camera, PerspectiveCamera, OrthographicCamera,
+ IntrinsicsCamera)
+from .light import Light, PointLight, DirectionalLight, SpotLight
+from .sampler import Sampler
+from .texture import Texture
+from .material import Material, MetallicRoughnessMaterial
+from .primitive import Primitive
+from .mesh import Mesh
+from .node import Node
+from .scene import Scene
+from .renderer import Renderer
+from .viewer import Viewer
+from .offscreen import OffscreenRenderer
+from .version import __version__
+from .constants import RenderFlags, TextAlign, GLTF
+
+__all__ = [
+ 'Camera', 'PerspectiveCamera', 'OrthographicCamera', 'IntrinsicsCamera',
+ 'Light', 'PointLight', 'DirectionalLight', 'SpotLight',
+ 'Sampler', 'Texture', 'Material', 'MetallicRoughnessMaterial',
+ 'Primitive', 'Mesh', 'Node', 'Scene', 'Renderer', 'Viewer',
+ 'OffscreenRenderer', '__version__', 'RenderFlags', 'TextAlign',
+ 'GLTF'
+]
diff --git a/pyrender/pyrender/camera.py b/pyrender/pyrender/camera.py
new file mode 100644
index 0000000000000000000000000000000000000000..e019358039033c3a372c990ebad3151258c3651d
--- /dev/null
+++ b/pyrender/pyrender/camera.py
@@ -0,0 +1,437 @@
+"""Virtual cameras compliant with the glTF 2.0 specification as described at
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-camera
+
+Author: Matthew Matl
+"""
+import abc
+import numpy as np
+import six
+import sys
+
+from .constants import DEFAULT_Z_NEAR, DEFAULT_Z_FAR
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Camera(object):
+ """Abstract base class for all cameras.
+
+ Note
+ ----
+ Camera poses are specified in the OpenGL format,
+ where the z axis points away from the view direction and the
+ x and y axes point to the right and up in the image plane, respectively.
+
+ Parameters
+ ----------
+ znear : float
+ The floating-point distance to the near clipping plane.
+ zfar : float
+ The floating-point distance to the far clipping plane.
+ ``zfar`` must be greater than ``znear``.
+ name : str, optional
+ The user-defined name of this object.
+ """
+
+ def __init__(self,
+ znear=DEFAULT_Z_NEAR,
+ zfar=DEFAULT_Z_FAR,
+ name=None):
+ self.name = name
+ self.znear = znear
+ self.zfar = zfar
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def znear(self):
+ """float : The distance to the near clipping plane.
+ """
+ return self._znear
+
+ @znear.setter
+ def znear(self, value):
+ value = float(value)
+ if value < 0:
+ raise ValueError('z-near must be >= 0.0')
+ self._znear = value
+
+ @property
+ def zfar(self):
+ """float : The distance to the far clipping plane.
+ """
+ return self._zfar
+
+ @zfar.setter
+ def zfar(self, value):
+ value = float(value)
+ if value <= 0 or value <= self.znear:
+ raise ValueError('zfar must be >0 and >znear')
+ self._zfar = value
+
+ @abc.abstractmethod
+ def get_projection_matrix(self, width=None, height=None):
+ """Return the OpenGL projection matrix for this camera.
+
+ Parameters
+ ----------
+ width : int
+ Width of the current viewport, in pixels.
+ height : int
+ Height of the current viewport, in pixels.
+ """
+ pass
+
+
+class PerspectiveCamera(Camera):
+
+ """A perspective camera for perspective projection.
+
+ Parameters
+ ----------
+ yfov : float
+ The floating-point vertical field of view in radians.
+ znear : float
+ The floating-point distance to the near clipping plane.
+ If not specified, defaults to 0.05.
+ zfar : float, optional
+ The floating-point distance to the far clipping plane.
+ ``zfar`` must be greater than ``znear``.
+ If None, the camera uses an infinite projection matrix.
+ aspectRatio : float, optional
+ The floating-point aspect ratio of the field of view.
+ If not specified, the camera uses the viewport's aspect ratio.
+ name : str, optional
+ The user-defined name of this object.
+ """
+
+ def __init__(self,
+ yfov,
+ znear=DEFAULT_Z_NEAR,
+ zfar=None,
+ aspectRatio=None,
+ name=None):
+ super(PerspectiveCamera, self).__init__(
+ znear=znear,
+ zfar=zfar,
+ name=name,
+ )
+
+ self.yfov = yfov
+ self.aspectRatio = aspectRatio
+
+ @property
+ def yfov(self):
+ """float : The vertical field of view in radians.
+ """
+ return self._yfov
+
+ @yfov.setter
+ def yfov(self, value):
+ value = float(value)
+ if value <= 0.0:
+ raise ValueError('Field of view must be positive')
+ self._yfov = value
+
+ @property
+ def zfar(self):
+ """float : The distance to the far clipping plane.
+ """
+ return self._zfar
+
+ @zfar.setter
+ def zfar(self, value):
+ if value is not None:
+ value = float(value)
+ if value <= 0 or value <= self.znear:
+ raise ValueError('zfar must be >0 and >znear')
+ self._zfar = value
+
+ @property
+ def aspectRatio(self):
+ """float : The ratio of the width to the height of the field of view.
+ """
+ return self._aspectRatio
+
+ @aspectRatio.setter
+ def aspectRatio(self, value):
+ if value is not None:
+ value = float(value)
+ if value <= 0.0:
+ raise ValueError('Aspect ratio must be positive')
+ self._aspectRatio = value
+
+ def get_projection_matrix(self, width=None, height=None):
+ """Return the OpenGL projection matrix for this camera.
+
+ Parameters
+ ----------
+ width : int
+ Width of the current viewport, in pixels.
+ height : int
+ Height of the current viewport, in pixels.
+ """
+ aspect_ratio = self.aspectRatio
+ if aspect_ratio is None:
+ if width is None or height is None:
+ raise ValueError('Aspect ratio of camera must be defined')
+ aspect_ratio = float(width) / float(height)
+
+ a = aspect_ratio
+ t = np.tan(self.yfov / 2.0)
+ n = self.znear
+ f = self.zfar
+
+ P = np.zeros((4,4))
+ P[0][0] = 1.0 / (a * t)
+ P[1][1] = 1.0 / t
+ P[3][2] = -1.0
+
+ if f is None:
+ P[2][2] = -1.0
+ P[2][3] = -2.0 * n
+ else:
+ P[2][2] = (f + n) / (n - f)
+ P[2][3] = (2 * f * n) / (n - f)
+
+ return P
+
+
+class OrthographicCamera(Camera):
+ """An orthographic camera for orthographic projection.
+
+ Parameters
+ ----------
+ xmag : float
+ The floating-point horizontal magnification of the view.
+ ymag : float
+ The floating-point vertical magnification of the view.
+ znear : float
+ The floating-point distance to the near clipping plane.
+ If not specified, defaults to 0.05.
+ zfar : float
+ The floating-point distance to the far clipping plane.
+ ``zfar`` must be greater than ``znear``.
+ If not specified, defaults to 100.0.
+ name : str, optional
+ The user-defined name of this object.
+ """
+
+ def __init__(self,
+ xmag,
+ ymag,
+ znear=DEFAULT_Z_NEAR,
+ zfar=DEFAULT_Z_FAR,
+ name=None):
+ super(OrthographicCamera, self).__init__(
+ znear=znear,
+ zfar=zfar,
+ name=name,
+ )
+
+ self.xmag = xmag
+ self.ymag = ymag
+
+ @property
+ def xmag(self):
+ """float : The horizontal magnification of the view.
+ """
+ return self._xmag
+
+ @xmag.setter
+ def xmag(self, value):
+ value = float(value)
+ if value <= 0.0:
+ raise ValueError('X magnification must be positive')
+ self._xmag = value
+
+ @property
+ def ymag(self):
+ """float : The vertical magnification of the view.
+ """
+ return self._ymag
+
+ @ymag.setter
+ def ymag(self, value):
+ value = float(value)
+ if value <= 0.0:
+ raise ValueError('Y magnification must be positive')
+ self._ymag = value
+
+ @property
+ def znear(self):
+ """float : The distance to the near clipping plane.
+ """
+ return self._znear
+
+ @znear.setter
+ def znear(self, value):
+ value = float(value)
+ if value <= 0:
+ raise ValueError('z-near must be > 0.0')
+ self._znear = value
+
+ def get_projection_matrix(self, width=None, height=None):
+ """Return the OpenGL projection matrix for this camera.
+
+ Parameters
+ ----------
+ width : int
+ Width of the current viewport, in pixels.
+ Unused in this function.
+ height : int
+ Height of the current viewport, in pixels.
+ Unused in this function.
+ """
+ xmag = self.xmag
+ ymag = self.ymag
+
+ # If screen width/height defined, rescale xmag
+ if width is not None and height is not None:
+ xmag = width / height * ymag
+
+ n = self.znear
+ f = self.zfar
+ P = np.zeros((4,4))
+ P[0][0] = 1.0 / xmag
+ P[1][1] = 1.0 / ymag
+ P[2][2] = 2.0 / (n - f)
+ P[2][3] = (f + n) / (n - f)
+ P[3][3] = 1.0
+ return P
+
+
+class IntrinsicsCamera(Camera):
+ """A perspective camera with custom intrinsics.
+
+ Parameters
+ ----------
+ fx : float
+ X-axis focal length in pixels.
+ fy : float
+ Y-axis focal length in pixels.
+ cx : float
+ X-axis optical center in pixels.
+ cy : float
+ Y-axis optical center in pixels.
+ znear : float
+ The floating-point distance to the near clipping plane.
+ If not specified, defaults to 0.05.
+ zfar : float
+ The floating-point distance to the far clipping plane.
+ ``zfar`` must be greater than ``znear``.
+ If not specified, defaults to 100.0.
+ name : str, optional
+ The user-defined name of this object.
+ """
+
+ def __init__(self,
+ fx,
+ fy,
+ cx,
+ cy,
+ znear=DEFAULT_Z_NEAR,
+ zfar=DEFAULT_Z_FAR,
+ name=None):
+ super(IntrinsicsCamera, self).__init__(
+ znear=znear,
+ zfar=zfar,
+ name=name,
+ )
+
+ self.fx = fx
+ self.fy = fy
+ self.cx = cx
+ self.cy = cy
+
+ @property
+ def fx(self):
+ """float : X-axis focal length in meters.
+ """
+ return self._fx
+
+ @fx.setter
+ def fx(self, value):
+ self._fx = float(value)
+
+ @property
+ def fy(self):
+ """float : Y-axis focal length in meters.
+ """
+ return self._fy
+
+ @fy.setter
+ def fy(self, value):
+ self._fy = float(value)
+
+ @property
+ def cx(self):
+ """float : X-axis optical center in pixels.
+ """
+ return self._cx
+
+ @cx.setter
+ def cx(self, value):
+ self._cx = float(value)
+
+ @property
+ def cy(self):
+ """float : Y-axis optical center in pixels.
+ """
+ return self._cy
+
+ @cy.setter
+ def cy(self, value):
+ self._cy = float(value)
+
+ def get_projection_matrix(self, width, height):
+ """Return the OpenGL projection matrix for this camera.
+
+ Parameters
+ ----------
+ width : int
+ Width of the current viewport, in pixels.
+ height : int
+ Height of the current viewport, in pixels.
+ """
+ width = float(width)
+ height = float(height)
+
+ cx, cy = self.cx, self.cy
+ fx, fy = self.fx, self.fy
+ if sys.platform == 'darwin':
+ cx = self.cx * 2.0
+ cy = self.cy * 2.0
+ fx = self.fx * 2.0
+ fy = self.fy * 2.0
+
+ P = np.zeros((4,4))
+ P[0][0] = 2.0 * fx / width
+ P[1][1] = 2.0 * fy / height
+ P[0][2] = 1.0 - 2.0 * cx / width
+ P[1][2] = 2.0 * cy / height - 1.0
+ P[3][2] = -1.0
+
+ n = self.znear
+ f = self.zfar
+ if f is None:
+ P[2][2] = -1.0
+ P[2][3] = -2.0 * n
+ else:
+ P[2][2] = (f + n) / (n - f)
+ P[2][3] = (2 * f * n) / (n - f)
+
+ return P
+
+
+__all__ = ['Camera', 'PerspectiveCamera', 'OrthographicCamera',
+ 'IntrinsicsCamera']
diff --git a/pyrender/pyrender/constants.py b/pyrender/pyrender/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a5785b6fdb21910a174252c5af2f05b40ece4a5
--- /dev/null
+++ b/pyrender/pyrender/constants.py
@@ -0,0 +1,149 @@
+DEFAULT_Z_NEAR = 0.05 # Near clipping plane, in meters
+DEFAULT_Z_FAR = 100.0 # Far clipping plane, in meters
+DEFAULT_SCENE_SCALE = 2.0 # Default scene scale
+MAX_N_LIGHTS = 4 # Maximum number of lights of each type allowed
+TARGET_OPEN_GL_MAJOR = 4 # Target OpenGL Major Version
+TARGET_OPEN_GL_MINOR = 1 # Target OpenGL Minor Version
+MIN_OPEN_GL_MAJOR = 3 # Minimum OpenGL Major Version
+MIN_OPEN_GL_MINOR = 3 # Minimum OpenGL Minor Version
+FLOAT_SZ = 4 # Byte size of GL float32
+UINT_SZ = 4 # Byte size of GL uint32
+SHADOW_TEX_SZ = 2048 # Width and Height of Shadow Textures
+TEXT_PADDING = 20 # Width of padding for rendering text (px)
+
+
+# Flags for render type
+class RenderFlags(object):
+ """Flags for rendering in the scene.
+
+ Combine them with the bitwise or. For example,
+
+ >>> flags = OFFSCREEN | SHADOWS_DIRECTIONAL | VERTEX_NORMALS
+
+ would result in an offscreen render with directional shadows and
+ vertex normals enabled.
+ """
+ NONE = 0
+ """Normal PBR Render."""
+ DEPTH_ONLY = 1
+ """Only render the depth buffer."""
+ OFFSCREEN = 2
+ """Render offscreen and return the depth and (optionally) color buffers."""
+ FLIP_WIREFRAME = 4
+ """Invert the status of wireframe rendering for each mesh."""
+ ALL_WIREFRAME = 8
+ """Render all meshes as wireframes."""
+ ALL_SOLID = 16
+ """Render all meshes as solids."""
+ SHADOWS_DIRECTIONAL = 32
+ """Render shadows for directional lights."""
+ SHADOWS_POINT = 64
+ """Render shadows for point lights."""
+ SHADOWS_SPOT = 128
+ """Render shadows for spot lights."""
+ SHADOWS_ALL = 32 | 64 | 128
+ """Render shadows for all lights."""
+ VERTEX_NORMALS = 256
+ """Render vertex normals."""
+ FACE_NORMALS = 512
+ """Render face normals."""
+ SKIP_CULL_FACES = 1024
+ """Do not cull back faces."""
+ RGBA = 2048
+ """Render the color buffer with the alpha channel enabled."""
+ FLAT = 4096
+ """Render the color buffer flat, with no lighting computations."""
+ SEG = 8192
+
+
+class TextAlign:
+ """Text alignment options for captions.
+
+ Only use one at a time.
+ """
+ CENTER = 0
+ """Center the text by width and height."""
+ CENTER_LEFT = 1
+ """Center the text by height and left-align it."""
+ CENTER_RIGHT = 2
+ """Center the text by height and right-align it."""
+ BOTTOM_LEFT = 3
+ """Put the text in the bottom-left corner."""
+ BOTTOM_RIGHT = 4
+ """Put the text in the bottom-right corner."""
+ BOTTOM_CENTER = 5
+ """Center the text by width and fix it to the bottom."""
+ TOP_LEFT = 6
+ """Put the text in the top-left corner."""
+ TOP_RIGHT = 7
+ """Put the text in the top-right corner."""
+ TOP_CENTER = 8
+ """Center the text by width and fix it to the top."""
+
+
+class GLTF(object):
+ """Options for GL objects."""
+ NEAREST = 9728
+ """Nearest neighbor interpolation."""
+ LINEAR = 9729
+ """Linear interpolation."""
+ NEAREST_MIPMAP_NEAREST = 9984
+ """Nearest mipmapping."""
+ LINEAR_MIPMAP_NEAREST = 9985
+ """Linear mipmapping."""
+ NEAREST_MIPMAP_LINEAR = 9986
+ """Nearest mipmapping."""
+ LINEAR_MIPMAP_LINEAR = 9987
+ """Linear mipmapping."""
+ CLAMP_TO_EDGE = 33071
+ """Clamp to the edge of the texture."""
+ MIRRORED_REPEAT = 33648
+ """Mirror the texture."""
+ REPEAT = 10497
+ """Repeat the texture."""
+ POINTS = 0
+ """Render as points."""
+ LINES = 1
+ """Render as lines."""
+ LINE_LOOP = 2
+ """Render as a line loop."""
+ LINE_STRIP = 3
+ """Render as a line strip."""
+ TRIANGLES = 4
+ """Render as triangles."""
+ TRIANGLE_STRIP = 5
+ """Render as a triangle strip."""
+ TRIANGLE_FAN = 6
+ """Render as a triangle fan."""
+
+
+class BufFlags(object):
+ POSITION = 0
+ NORMAL = 1
+ TANGENT = 2
+ TEXCOORD_0 = 4
+ TEXCOORD_1 = 8
+ COLOR_0 = 16
+ JOINTS_0 = 32
+ WEIGHTS_0 = 64
+
+
+class TexFlags(object):
+ NONE = 0
+ NORMAL = 1
+ OCCLUSION = 2
+ EMISSIVE = 4
+ BASE_COLOR = 8
+ METALLIC_ROUGHNESS = 16
+ DIFFUSE = 32
+ SPECULAR_GLOSSINESS = 64
+
+
+class ProgramFlags:
+ NONE = 0
+ USE_MATERIAL = 1
+ VERTEX_NORMALS = 2
+ FACE_NORMALS = 4
+
+
+__all__ = ['RenderFlags', 'TextAlign', 'GLTF']
diff --git a/pyrender/pyrender/font.py b/pyrender/pyrender/font.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac530d7b949f50314a0d9cf5d744bedcace0571
--- /dev/null
+++ b/pyrender/pyrender/font.py
@@ -0,0 +1,272 @@
+"""Font texture loader and processor.
+
+Author: Matthew Matl
+"""
+import freetype
+import numpy as np
+import os
+
+import OpenGL
+from OpenGL.GL import *
+
+from .constants import TextAlign, FLOAT_SZ
+from .texture import Texture
+from .sampler import Sampler
+
+
+class FontCache(object):
+ """A cache for fonts.
+ """
+
+ def __init__(self, font_dir=None):
+ self._font_cache = {}
+ self.font_dir = font_dir
+ if self.font_dir is None:
+ base_dir, _ = os.path.split(os.path.realpath(__file__))
+ self.font_dir = os.path.join(base_dir, 'fonts')
+
+ def get_font(self, font_name, font_pt):
+ # If it's a file, load it directly, else, try to load from font dir.
+ if os.path.isfile(font_name):
+ font_filename = font_name
+ _, font_name = os.path.split(font_name)
+ font_name, _ = os.path.split(font_name)
+ else:
+ font_filename = os.path.join(self.font_dir, font_name) + '.ttf'
+
+ cid = OpenGL.contextdata.getContext()
+ key = (cid, font_name, int(font_pt))
+
+ if key not in self._font_cache:
+ self._font_cache[key] = Font(font_filename, font_pt)
+ return self._font_cache[key]
+
+ def clear(self):
+ for key in self._font_cache:
+ self._font_cache[key].delete()
+ self._font_cache = {}
+
+
+class Character(object):
+ """A single character, with its texture and attributes.
+ """
+
+ def __init__(self, texture, size, bearing, advance):
+ self.texture = texture
+ self.size = size
+ self.bearing = bearing
+ self.advance = advance
+
+
+class Font(object):
+ """A font object.
+
+ Parameters
+ ----------
+ font_file : str
+ The file to load the font from.
+ font_pt : int
+ The height of the font in pixels.
+ """
+
+ def __init__(self, font_file, font_pt=40):
+ self.font_file = font_file
+ self.font_pt = int(font_pt)
+ self._face = freetype.Face(font_file)
+ self._face.set_pixel_sizes(0, font_pt)
+ self._character_map = {}
+
+ for i in range(0, 128):
+
+ # Generate texture
+ face = self._face
+ face.load_char(chr(i))
+ buf = face.glyph.bitmap.buffer
+ src = (np.array(buf) / 255.0).astype(np.float32)
+ src = src.reshape((face.glyph.bitmap.rows,
+ face.glyph.bitmap.width))
+ tex = Texture(
+ sampler=Sampler(
+ magFilter=GL_LINEAR,
+ minFilter=GL_LINEAR,
+ wrapS=GL_CLAMP_TO_EDGE,
+ wrapT=GL_CLAMP_TO_EDGE
+ ),
+ source=src,
+ source_channels='R',
+ )
+ character = Character(
+ texture=tex,
+ size=np.array([face.glyph.bitmap.width,
+ face.glyph.bitmap.rows]),
+ bearing=np.array([face.glyph.bitmap_left,
+ face.glyph.bitmap_top]),
+ advance=face.glyph.advance.x
+ )
+ self._character_map[chr(i)] = character
+
+ self._vbo = None
+ self._vao = None
+
+ @property
+ def font_file(self):
+ """str : The file the font was loaded from.
+ """
+ return self._font_file
+
+ @font_file.setter
+ def font_file(self, value):
+ self._font_file = value
+
+ @property
+ def font_pt(self):
+ """int : The height of the font in pixels.
+ """
+ return self._font_pt
+
+ @font_pt.setter
+ def font_pt(self, value):
+ self._font_pt = int(value)
+
+ def _add_to_context(self):
+
+ self._vao = glGenVertexArrays(1)
+ glBindVertexArray(self._vao)
+ self._vbo = glGenBuffers(1)
+ glBindBuffer(GL_ARRAY_BUFFER, self._vbo)
+ glBufferData(GL_ARRAY_BUFFER, FLOAT_SZ * 6 * 4, None, GL_DYNAMIC_DRAW)
+ glEnableVertexAttribArray(0)
+ glVertexAttribPointer(
+ 0, 4, GL_FLOAT, GL_FALSE, 4 * FLOAT_SZ, ctypes.c_void_p(0)
+ )
+ glBindVertexArray(0)
+
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1)
+ for c in self._character_map:
+ ch = self._character_map[c]
+ if not ch.texture._in_context():
+ ch.texture._add_to_context()
+
+ def _remove_from_context(self):
+ for c in self._character_map:
+ ch = self._character_map[c]
+ ch.texture.delete()
+ if self._vao is not None:
+ glDeleteVertexArrays(1, [self._vao])
+ glDeleteBuffers(1, [self._vbo])
+ self._vao = None
+ self._vbo = None
+
+ def _in_context(self):
+ return self._vao is not None
+
+ def _bind(self):
+ glBindVertexArray(self._vao)
+
+ def _unbind(self):
+ glBindVertexArray(0)
+
+ def delete(self):
+ self._unbind()
+ self._remove_from_context()
+
+ def render_string(self, text, x, y, scale=1.0,
+ align=TextAlign.BOTTOM_LEFT):
+ """Render a string to the current view buffer.
+
+ Note
+ ----
+ Assumes correct shader program already bound w/ uniforms set.
+
+ Parameters
+ ----------
+ text : str
+ The text to render.
+ x : int
+ Horizontal pixel location of text.
+ y : int
+ Vertical pixel location of text.
+ scale : int
+ Scaling factor for text.
+ align : int
+ One of the TextAlign options which specifies where the ``x``
+ and ``y`` parameters lie on the text. For example,
+ :attr:`.TextAlign.BOTTOM_LEFT` means that ``x`` and ``y`` indicate
+ the position of the bottom-left corner of the textbox.
+ """
+ glActiveTexture(GL_TEXTURE0)
+ glEnable(GL_BLEND)
+ glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
+ glDisable(GL_DEPTH_TEST)
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL)
+ self._bind()
+
+ # Determine width and height of text relative to x, y
+ width = 0.0
+ height = 0.0
+ for c in text:
+ ch = self._character_map[c]
+ height = max(height, ch.bearing[1] * scale)
+ width += (ch.advance >> 6) * scale
+
+ # Determine offsets based on alignments
+ xoff = 0
+ yoff = 0
+ if align == TextAlign.BOTTOM_RIGHT:
+ xoff = -width
+ elif align == TextAlign.BOTTOM_CENTER:
+ xoff = -width / 2.0
+ elif align == TextAlign.TOP_LEFT:
+ yoff = -height
+ elif align == TextAlign.TOP_RIGHT:
+ yoff = -height
+ xoff = -width
+ elif align == TextAlign.TOP_CENTER:
+ yoff = -height
+ xoff = -width / 2.0
+ elif align == TextAlign.CENTER:
+ xoff = -width / 2.0
+ yoff = -height / 2.0
+ elif align == TextAlign.CENTER_LEFT:
+ yoff = -height / 2.0
+ elif align == TextAlign.CENTER_RIGHT:
+ xoff = -width
+ yoff = -height / 2.0
+
+ x += xoff
+ y += yoff
+
+ ch = None
+ for c in text:
+ ch = self._character_map[c]
+ xpos = x + ch.bearing[0] * scale
+ ypos = y - (ch.size[1] - ch.bearing[1]) * scale
+ w = ch.size[0] * scale
+ h = ch.size[1] * scale
+
+ vertices = np.array([
+ [xpos, ypos, 0.0, 0.0],
+ [xpos + w, ypos, 1.0, 0.0],
+ [xpos + w, ypos + h, 1.0, 1.0],
+ [xpos + w, ypos + h, 1.0, 1.0],
+ [xpos, ypos + h, 0.0, 1.0],
+ [xpos, ypos, 0.0, 0.0],
+ ], dtype=np.float32)
+
+ ch.texture._bind()
+
+ glBindBuffer(GL_ARRAY_BUFFER, self._vbo)
+ glBufferData(
+ GL_ARRAY_BUFFER, FLOAT_SZ * 6 * 4, vertices, GL_DYNAMIC_DRAW
+ )
+ # TODO MAKE THIS MORE EFFICIENT, lgBufferSubData is broken
+ # glBufferSubData(
+ # GL_ARRAY_BUFFER, 0, 6 * 4 * FLOAT_SZ,
+ # np.ascontiguousarray(vertices.flatten)
+ # )
+ glDrawArrays(GL_TRIANGLES, 0, 6)
+ x += (ch.advance >> 6) * scale
+
+ self._unbind()
+ if ch:
+ ch.texture._unbind()
diff --git a/pyrender/pyrender/fonts/OpenSans-Bold.ttf b/pyrender/pyrender/fonts/OpenSans-Bold.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..fd79d43bea0293ac1b20e8aca1142627983d2c07
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-Bold.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-BoldItalic.ttf b/pyrender/pyrender/fonts/OpenSans-BoldItalic.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..9bc800958a421d937fc392e00beaef4eea76dc71
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-BoldItalic.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-ExtraBold.ttf b/pyrender/pyrender/fonts/OpenSans-ExtraBold.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..21f6f84a0799946fc4ae02c52b27e61c3762c745
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-ExtraBold.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-ExtraBoldItalic.ttf b/pyrender/pyrender/fonts/OpenSans-ExtraBoldItalic.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..31cb688340eff462dddf47efbb4dfef66cb7fbed
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-ExtraBoldItalic.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-Italic.ttf b/pyrender/pyrender/fonts/OpenSans-Italic.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..c90da48ff3b8ad6167236d70c48df4d7b5de3bbb
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-Italic.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-Light.ttf b/pyrender/pyrender/fonts/OpenSans-Light.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..0d381897da20345fa63112f19042561f44ee3aa0
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-Light.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-LightItalic.ttf b/pyrender/pyrender/fonts/OpenSans-LightItalic.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..68299c4bc6b5b7adfff2c9aee4aed7c1547100ef
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-LightItalic.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-Regular.ttf b/pyrender/pyrender/fonts/OpenSans-Regular.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..db433349b7047f72f40072630c1bc110620bf09e
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-Regular.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-Semibold.ttf b/pyrender/pyrender/fonts/OpenSans-Semibold.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..1a7679e3949fb045f152f456bc4adad31e8b9f55
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-Semibold.ttf differ
diff --git a/pyrender/pyrender/fonts/OpenSans-SemiboldItalic.ttf b/pyrender/pyrender/fonts/OpenSans-SemiboldItalic.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..59b6d16b065f6baa6f70ddbd4322a4f44bb9636a
Binary files /dev/null and b/pyrender/pyrender/fonts/OpenSans-SemiboldItalic.ttf differ
diff --git a/pyrender/pyrender/light.py b/pyrender/pyrender/light.py
new file mode 100644
index 0000000000000000000000000000000000000000..333d9e4e553a245c259251a89b69cb46b73b1278
--- /dev/null
+++ b/pyrender/pyrender/light.py
@@ -0,0 +1,385 @@
+"""Punctual light sources as defined by the glTF 2.0 KHR extension at
+https://github.com/KhronosGroup/glTF/tree/master/extensions/2.0/Khronos/KHR_lights_punctual
+
+Author: Matthew Matl
+"""
+import abc
+import numpy as np
+import six
+
+from OpenGL.GL import *
+
+from .utils import format_color_vector
+from .texture import Texture
+from .constants import SHADOW_TEX_SZ
+from .camera import OrthographicCamera, PerspectiveCamera
+
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Light(object):
+ """Base class for all light objects.
+
+ Parameters
+ ----------
+ color : (3,) float
+ RGB value for the light's color in linear space.
+ intensity : float
+ Brightness of light. The units that this is defined in depend on the
+ type of light. Point and spot lights use luminous intensity in candela
+ (lm/sr), while directional lights use illuminance in lux (lm/m2).
+ name : str, optional
+ Name of the light.
+ """
+ def __init__(self,
+ color=None,
+ intensity=None,
+ name=None):
+
+ if color is None:
+ color = np.ones(3)
+ if intensity is None:
+ intensity = 1.0
+
+ self.name = name
+ self.color = color
+ self.intensity = intensity
+ self._shadow_camera = None
+ self._shadow_texture = None
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def color(self):
+ """(3,) float : The light's color.
+ """
+ return self._color
+
+ @color.setter
+ def color(self, value):
+ self._color = format_color_vector(value, 3)
+
+ @property
+ def intensity(self):
+ """float : The light's intensity in candela or lux.
+ """
+ return self._intensity
+
+ @intensity.setter
+ def intensity(self, value):
+ self._intensity = float(value)
+
+ @property
+ def shadow_texture(self):
+ """:class:`.Texture` : A texture used to hold shadow maps for this light.
+ """
+ return self._shadow_texture
+
+ @shadow_texture.setter
+ def shadow_texture(self, value):
+ if self._shadow_texture is not None:
+ if self._shadow_texture._in_context():
+ self._shadow_texture.delete()
+ self._shadow_texture = value
+
+ @abc.abstractmethod
+ def _generate_shadow_texture(self, size=None):
+ """Generate a shadow texture for this light.
+
+ Parameters
+ ----------
+ size : int, optional
+ Size of texture map. Must be a positive power of two.
+ """
+ pass
+
+ @abc.abstractmethod
+ def _get_shadow_camera(self, scene_scale):
+ """Generate and return a shadow mapping camera for this light.
+
+ Parameters
+ ----------
+ scene_scale : float
+ Length of scene's bounding box diagonal.
+
+ Returns
+ -------
+ camera : :class:`.Camera`
+ The camera used to render shadowmaps for this light.
+ """
+ pass
+
+
+class DirectionalLight(Light):
+ """Directional lights are light sources that act as though they are
+ infinitely far away and emit light in the direction of the local -z axis.
+ This light type inherits the orientation of the node that it belongs to;
+ position and scale are ignored except for their effect on the inherited
+ node orientation. Because it is at an infinite distance, the light is
+ not attenuated. Its intensity is defined in lumens per metre squared,
+ or lux (lm/m2).
+
+ Parameters
+ ----------
+ color : (3,) float, optional
+ RGB value for the light's color in linear space. Defaults to white
+ (i.e. [1.0, 1.0, 1.0]).
+ intensity : float, optional
+ Brightness of light, in lux (lm/m^2). Defaults to 1.0
+ name : str, optional
+ Name of the light.
+ """
+
+ def __init__(self,
+ color=None,
+ intensity=None,
+ name=None):
+ super(DirectionalLight, self).__init__(
+ color=color,
+ intensity=intensity,
+ name=name,
+ )
+
+ def _generate_shadow_texture(self, size=None):
+ """Generate a shadow texture for this light.
+
+ Parameters
+ ----------
+ size : int, optional
+ Size of texture map. Must be a positive power of two.
+ """
+ if size is None:
+ size = SHADOW_TEX_SZ
+ self.shadow_texture = Texture(width=size, height=size,
+ source_channels='D', data_format=GL_FLOAT)
+
+ def _get_shadow_camera(self, scene_scale):
+ """Generate and return a shadow mapping camera for this light.
+
+ Parameters
+ ----------
+ scene_scale : float
+ Length of scene's bounding box diagonal.
+
+ Returns
+ -------
+ camera : :class:`.Camera`
+ The camera used to render shadowmaps for this light.
+ """
+ return OrthographicCamera(
+ znear=0.01 * scene_scale,
+ zfar=10 * scene_scale,
+ xmag=scene_scale,
+ ymag=scene_scale
+ )
+
+
+class PointLight(Light):
+ """Point lights emit light in all directions from their position in space;
+ rotation and scale are ignored except for their effect on the inherited
+ node position. The brightness of the light attenuates in a physically
+ correct manner as distance increases from the light's position (i.e.
+ brightness goes like the inverse square of the distance). Point light
+ intensity is defined in candela, which is lumens per square radian (lm/sr).
+
+ Parameters
+ ----------
+ color : (3,) float
+ RGB value for the light's color in linear space.
+ intensity : float
+ Brightness of light in candela (lm/sr).
+ range : float
+ Cutoff distance at which light's intensity may be considered to
+ have reached zero. If None, the range is assumed to be infinite.
+ name : str, optional
+ Name of the light.
+ """
+
+ def __init__(self,
+ color=None,
+ intensity=None,
+ range=None,
+ name=None):
+ super(PointLight, self).__init__(
+ color=color,
+ intensity=intensity,
+ name=name,
+ )
+ self.range = range
+
+ @property
+ def range(self):
+ """float : The cutoff distance for the light.
+ """
+ return self._range
+
+ @range.setter
+ def range(self, value):
+ if value is not None:
+ value = float(value)
+ if value <= 0:
+ raise ValueError('Range must be > 0')
+ self._range = value
+ self._range = value
+
+ def _generate_shadow_texture(self, size=None):
+ """Generate a shadow texture for this light.
+
+ Parameters
+ ----------
+ size : int, optional
+ Size of texture map. Must be a positive power of two.
+ """
+ raise NotImplementedError('Shadows not implemented for point lights')
+
+ def _get_shadow_camera(self, scene_scale):
+ """Generate and return a shadow mapping camera for this light.
+
+ Parameters
+ ----------
+ scene_scale : float
+ Length of scene's bounding box diagonal.
+
+ Returns
+ -------
+ camera : :class:`.Camera`
+ The camera used to render shadowmaps for this light.
+ """
+ raise NotImplementedError('Shadows not implemented for point lights')
+
+
+class SpotLight(Light):
+ """Spot lights emit light in a cone in the direction of the local -z axis.
+ The angle and falloff of the cone is defined using two numbers, the
+ ``innerConeAngle`` and ``outerConeAngle``.
+ As with point lights, the brightness
+ also attenuates in a physically correct manner as distance increases from
+ the light's position (i.e. brightness goes like the inverse square of the
+ distance). Spot light intensity refers to the brightness inside the
+ ``innerConeAngle`` (and at the location of the light) and is defined in
+ candela, which is lumens per square radian (lm/sr). A spot light's position
+ and orientation are inherited from its node transform. Inherited scale does
+ not affect cone shape, and is ignored except for its effect on position
+ and orientation.
+
+ Parameters
+ ----------
+ color : (3,) float
+ RGB value for the light's color in linear space.
+ intensity : float
+ Brightness of light in candela (lm/sr).
+ range : float
+ Cutoff distance at which light's intensity may be considered to
+ have reached zero. If None, the range is assumed to be infinite.
+ innerConeAngle : float
+ Angle, in radians, from centre of spotlight where falloff begins.
+ Must be greater than or equal to ``0`` and less
+ than ``outerConeAngle``. Defaults to ``0``.
+ outerConeAngle : float
+ Angle, in radians, from centre of spotlight where falloff ends.
+ Must be greater than ``innerConeAngle`` and less than or equal to
+ ``PI / 2.0``. Defaults to ``PI / 4.0``.
+ name : str, optional
+ Name of the light.
+ """
+
+ def __init__(self,
+ color=None,
+ intensity=None,
+ range=None,
+ innerConeAngle=0.0,
+ outerConeAngle=(np.pi / 4.0),
+ name=None):
+ super(SpotLight, self).__init__(
+ name=name,
+ color=color,
+ intensity=intensity,
+ )
+ self.outerConeAngle = outerConeAngle
+ self.innerConeAngle = innerConeAngle
+ self.range = range
+
+ @property
+ def innerConeAngle(self):
+ """float : The inner cone angle in radians.
+ """
+ return self._innerConeAngle
+
+ @innerConeAngle.setter
+ def innerConeAngle(self, value):
+ if value < 0.0 or value > self.outerConeAngle:
+ raise ValueError('Invalid value for inner cone angle')
+ self._innerConeAngle = float(value)
+
+ @property
+ def outerConeAngle(self):
+ """float : The outer cone angle in radians.
+ """
+ return self._outerConeAngle
+
+ @outerConeAngle.setter
+ def outerConeAngle(self, value):
+ if value < 0.0 or value > np.pi / 2.0 + 1e-9:
+ raise ValueError('Invalid value for outer cone angle')
+ self._outerConeAngle = float(value)
+
+ @property
+ def range(self):
+ """float : The cutoff distance for the light.
+ """
+ return self._range
+
+ @range.setter
+ def range(self, value):
+ if value is not None:
+ value = float(value)
+ if value <= 0:
+ raise ValueError('Range must be > 0')
+ self._range = value
+ self._range = value
+
+ def _generate_shadow_texture(self, size=None):
+ """Generate a shadow texture for this light.
+
+ Parameters
+ ----------
+ size : int, optional
+ Size of texture map. Must be a positive power of two.
+ """
+ if size is None:
+ size = SHADOW_TEX_SZ
+ self.shadow_texture = Texture(width=size, height=size,
+ source_channels='D', data_format=GL_FLOAT)
+
+ def _get_shadow_camera(self, scene_scale):
+ """Generate and return a shadow mapping camera for this light.
+
+ Parameters
+ ----------
+ scene_scale : float
+ Length of scene's bounding box diagonal.
+
+ Returns
+ -------
+ camera : :class:`.Camera`
+ The camera used to render shadowmaps for this light.
+ """
+ return PerspectiveCamera(
+ znear=0.01 * scene_scale,
+ zfar=10 * scene_scale,
+ yfov=np.clip(2 * self.outerConeAngle + np.pi / 16.0, 0.0, np.pi),
+ aspectRatio=1.0
+ )
+
+
+__all__ = ['Light', 'DirectionalLight', 'SpotLight', 'PointLight']
diff --git a/pyrender/pyrender/material.py b/pyrender/pyrender/material.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ce9c2d184ed213c84b015e36bea558cd1efc6b7
--- /dev/null
+++ b/pyrender/pyrender/material.py
@@ -0,0 +1,707 @@
+"""Material properties, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-material
+and
+https://github.com/KhronosGroup/glTF/tree/master/extensions/2.0/Khronos/KHR_materials_pbrSpecularGlossiness
+
+Author: Matthew Matl
+"""
+import abc
+import numpy as np
+import six
+
+from .constants import TexFlags
+from .utils import format_color_vector, format_texture_source
+from .texture import Texture
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Material(object):
+ """Base for standard glTF 2.0 materials.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ normalTexture : (n,n,3) float or :class:`Texture`, optional
+ A tangent space normal map. The texture contains RGB components in
+ linear space. Each texel represents the XYZ components of a normal
+ vector in tangent space. Red [0 to 255] maps to X [-1 to 1]. Green
+ [0 to 255] maps to Y [-1 to 1]. Blue [128 to 255] maps to Z
+ [1/255 to 1]. The normal vectors use OpenGL conventions where +X is
+ right and +Y is up. +Z points toward the viewer.
+ occlusionTexture : (n,n,1) float or :class:`Texture`, optional
+ The occlusion map texture. The occlusion values are sampled from the R
+ channel. Higher values indicate areas that should receive full indirect
+ lighting and lower values indicate no indirect lighting. These values
+ are linear. If other channels are present (GBA), they are ignored for
+ occlusion calculations.
+ emissiveTexture : (n,n,3) float or :class:`Texture`, optional
+ The emissive map controls the color and intensity of the light being
+ emitted by the material. This texture contains RGB components in sRGB
+ color space. If a fourth component (A) is present, it is ignored.
+ emissiveFactor : (3,) float, optional
+ The RGB components of the emissive color of the material. These values
+ are linear. If an emissiveTexture is specified, this value is
+ multiplied with the texel values.
+ alphaMode : str, optional
+ The material's alpha rendering mode enumeration specifying the
+ interpretation of the alpha value of the main factor and texture.
+ Allowed Values:
+
+ - `"OPAQUE"` The alpha value is ignored and the rendered output is
+ fully opaque.
+ - `"MASK"` The rendered output is either fully opaque or fully
+ transparent depending on the alpha value and the specified alpha
+ cutoff value.
+ - `"BLEND"` The alpha value is used to composite the source and
+ destination areas. The rendered output is combined with the
+ background using the normal painting operation (i.e. the Porter
+ and Duff over operator).
+
+ alphaCutoff : float, optional
+ Specifies the cutoff threshold when in MASK mode. If the alpha value is
+ greater than or equal to this value then it is rendered as fully
+ opaque, otherwise, it is rendered as fully transparent.
+ A value greater than 1.0 will render the entire material as fully
+ transparent. This value is ignored for other modes.
+ doubleSided : bool, optional
+ Specifies whether the material is double sided. When this value is
+ false, back-face culling is enabled. When this value is true,
+ back-face culling is disabled and double sided lighting is enabled.
+ smooth : bool, optional
+ If True, the material is rendered smoothly by using only one normal
+ per vertex and face indexing.
+ wireframe : bool, optional
+ If True, the material is rendered in wireframe mode.
+ """
+
+ def __init__(self,
+ name=None,
+ normalTexture=None,
+ occlusionTexture=None,
+ emissiveTexture=None,
+ emissiveFactor=None,
+ alphaMode=None,
+ alphaCutoff=None,
+ doubleSided=False,
+ smooth=True,
+ wireframe=False):
+
+ # Set defaults
+ if alphaMode is None:
+ alphaMode = 'OPAQUE'
+
+ if alphaCutoff is None:
+ alphaCutoff = 0.5
+
+ if emissiveFactor is None:
+ emissiveFactor = np.zeros(3).astype(np.float32)
+
+ self.name = name
+ self.normalTexture = normalTexture
+ self.occlusionTexture = occlusionTexture
+ self.emissiveTexture = emissiveTexture
+ self.emissiveFactor = emissiveFactor
+ self.alphaMode = alphaMode
+ self.alphaCutoff = alphaCutoff
+ self.doubleSided = doubleSided
+ self.smooth = smooth
+ self.wireframe = wireframe
+
+ self._tex_flags = None
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def normalTexture(self):
+ """(n,n,3) float or :class:`Texture` : The tangent-space normal map.
+ """
+ return self._normalTexture
+
+ @normalTexture.setter
+ def normalTexture(self, value):
+ # TODO TMP
+ self._normalTexture = self._format_texture(value, 'RGB')
+ self._tex_flags = None
+
+ @property
+ def occlusionTexture(self):
+ """(n,n,1) float or :class:`Texture` : The ambient occlusion map.
+ """
+ return self._occlusionTexture
+
+ @occlusionTexture.setter
+ def occlusionTexture(self, value):
+ self._occlusionTexture = self._format_texture(value, 'R')
+ self._tex_flags = None
+
+ @property
+ def emissiveTexture(self):
+ """(n,n,3) float or :class:`Texture` : The emission map.
+ """
+ return self._emissiveTexture
+
+ @emissiveTexture.setter
+ def emissiveTexture(self, value):
+ self._emissiveTexture = self._format_texture(value, 'RGB')
+ self._tex_flags = None
+
+ @property
+ def emissiveFactor(self):
+ """(3,) float : Base multiplier for emission colors.
+ """
+ return self._emissiveFactor
+
+ @emissiveFactor.setter
+ def emissiveFactor(self, value):
+ if value is None:
+ value = np.zeros(3)
+ self._emissiveFactor = format_color_vector(value, 3)
+
+ @property
+ def alphaMode(self):
+ """str : The mode for blending.
+ """
+ return self._alphaMode
+
+ @alphaMode.setter
+ def alphaMode(self, value):
+ if value not in set(['OPAQUE', 'MASK', 'BLEND']):
+ raise ValueError('Invalid alpha mode {}'.format(value))
+ self._alphaMode = value
+
+ @property
+ def alphaCutoff(self):
+ """float : The cutoff threshold in MASK mode.
+ """
+ return self._alphaCutoff
+
+ @alphaCutoff.setter
+ def alphaCutoff(self, value):
+ if value < 0 or value > 1:
+ raise ValueError('Alpha cutoff must be in range [0,1]')
+ self._alphaCutoff = float(value)
+
+ @property
+ def doubleSided(self):
+ """bool : Whether the material is double-sided.
+ """
+ return self._doubleSided
+
+ @doubleSided.setter
+ def doubleSided(self, value):
+ if not isinstance(value, bool):
+ raise TypeError('Double sided must be a boolean value')
+ self._doubleSided = value
+
+ @property
+ def smooth(self):
+ """bool : Whether to render the mesh smoothly by
+ interpolating vertex normals.
+ """
+ return self._smooth
+
+ @smooth.setter
+ def smooth(self, value):
+ if not isinstance(value, bool):
+ raise TypeError('Double sided must be a boolean value')
+ self._smooth = value
+
+ @property
+ def wireframe(self):
+ """bool : Whether to render the mesh in wireframe mode.
+ """
+ return self._wireframe
+
+ @wireframe.setter
+ def wireframe(self, value):
+ if not isinstance(value, bool):
+ raise TypeError('Wireframe must be a boolean value')
+ self._wireframe = value
+
+ @property
+ def is_transparent(self):
+ """bool : If True, the object is partially transparent.
+ """
+ return self._compute_transparency()
+
+ @property
+ def tex_flags(self):
+ """int : Texture availability flags.
+ """
+ if self._tex_flags is None:
+ self._tex_flags = self._compute_tex_flags()
+ return self._tex_flags
+
+ @property
+ def textures(self):
+ """list of :class:`Texture` : The textures associated with this
+ material.
+ """
+ return self._compute_textures()
+
+ def _compute_transparency(self):
+ return False
+
+ def _compute_tex_flags(self):
+ tex_flags = TexFlags.NONE
+ if self.normalTexture is not None:
+ tex_flags |= TexFlags.NORMAL
+ if self.occlusionTexture is not None:
+ tex_flags |= TexFlags.OCCLUSION
+ if self.emissiveTexture is not None:
+ tex_flags |= TexFlags.EMISSIVE
+ return tex_flags
+
+ def _compute_textures(self):
+ all_textures = [
+ self.normalTexture, self.occlusionTexture, self.emissiveTexture
+ ]
+ textures = set([t for t in all_textures if t is not None])
+ return textures
+
+ def _format_texture(self, texture, target_channels='RGB'):
+ """Format a texture as a float32 np array.
+ """
+ if isinstance(texture, Texture) or texture is None:
+ return texture
+ else:
+ source = format_texture_source(texture, target_channels)
+ return Texture(source=source, source_channels=target_channels)
+
+
+class MetallicRoughnessMaterial(Material):
+ """A material based on the metallic-roughness material model from
+ Physically-Based Rendering (PBR) methodology.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ normalTexture : (n,n,3) float or :class:`Texture`, optional
+ A tangent space normal map. The texture contains RGB components in
+ linear space. Each texel represents the XYZ components of a normal
+ vector in tangent space. Red [0 to 255] maps to X [-1 to 1]. Green
+ [0 to 255] maps to Y [-1 to 1]. Blue [128 to 255] maps to Z
+ [1/255 to 1]. The normal vectors use OpenGL conventions where +X is
+ right and +Y is up. +Z points toward the viewer.
+ occlusionTexture : (n,n,1) float or :class:`Texture`, optional
+ The occlusion map texture. The occlusion values are sampled from the R
+ channel. Higher values indicate areas that should receive full indirect
+ lighting and lower values indicate no indirect lighting. These values
+ are linear. If other channels are present (GBA), they are ignored for
+ occlusion calculations.
+ emissiveTexture : (n,n,3) float or :class:`Texture`, optional
+ The emissive map controls the color and intensity of the light being
+ emitted by the material. This texture contains RGB components in sRGB
+ color space. If a fourth component (A) is present, it is ignored.
+ emissiveFactor : (3,) float, optional
+ The RGB components of the emissive color of the material. These values
+ are linear. If an emissiveTexture is specified, this value is
+ multiplied with the texel values.
+ alphaMode : str, optional
+ The material's alpha rendering mode enumeration specifying the
+ interpretation of the alpha value of the main factor and texture.
+ Allowed Values:
+
+ - `"OPAQUE"` The alpha value is ignored and the rendered output is
+ fully opaque.
+ - `"MASK"` The rendered output is either fully opaque or fully
+ transparent depending on the alpha value and the specified alpha
+ cutoff value.
+ - `"BLEND"` The alpha value is used to composite the source and
+ destination areas. The rendered output is combined with the
+ background using the normal painting operation (i.e. the Porter
+ and Duff over operator).
+
+ alphaCutoff : float, optional
+ Specifies the cutoff threshold when in MASK mode. If the alpha value is
+ greater than or equal to this value then it is rendered as fully
+ opaque, otherwise, it is rendered as fully transparent.
+ A value greater than 1.0 will render the entire material as fully
+ transparent. This value is ignored for other modes.
+ doubleSided : bool, optional
+ Specifies whether the material is double sided. When this value is
+ false, back-face culling is enabled. When this value is true,
+ back-face culling is disabled and double sided lighting is enabled.
+ smooth : bool, optional
+ If True, the material is rendered smoothly by using only one normal
+ per vertex and face indexing.
+ wireframe : bool, optional
+ If True, the material is rendered in wireframe mode.
+ baseColorFactor : (4,) float, optional
+ The RGBA components of the base color of the material. The fourth
+ component (A) is the alpha coverage of the material. The alphaMode
+ property specifies how alpha is interpreted. These values are linear.
+ If a baseColorTexture is specified, this value is multiplied with the
+ texel values.
+ baseColorTexture : (n,n,4) float or :class:`Texture`, optional
+ The base color texture. This texture contains RGB(A) components in sRGB
+ color space. The first three components (RGB) specify the base color of
+ the material. If the fourth component (A) is present, it represents the
+ alpha coverage of the material. Otherwise, an alpha of 1.0 is assumed.
+ The alphaMode property specifies how alpha is interpreted.
+ The stored texels must not be premultiplied.
+ metallicFactor : float
+ The metalness of the material. A value of 1.0 means the material is a
+ metal. A value of 0.0 means the material is a dielectric. Values in
+ between are for blending between metals and dielectrics such as dirty
+ metallic surfaces. This value is linear. If a metallicRoughnessTexture
+ is specified, this value is multiplied with the metallic texel values.
+ roughnessFactor : float
+ The roughness of the material. A value of 1.0 means the material is
+ completely rough. A value of 0.0 means the material is completely
+ smooth. This value is linear. If a metallicRoughnessTexture is
+ specified, this value is multiplied with the roughness texel values.
+ metallicRoughnessTexture : (n,n,2) float or :class:`Texture`, optional
+ The metallic-roughness texture. The metalness values are sampled from
+ the B channel. The roughness values are sampled from the G channel.
+ These values are linear. If other channels are present (R or A), they
+ are ignored for metallic-roughness calculations.
+ """
+
+ def __init__(self,
+ name=None,
+ normalTexture=None,
+ occlusionTexture=None,
+ emissiveTexture=None,
+ emissiveFactor=None,
+ alphaMode=None,
+ alphaCutoff=None,
+ doubleSided=False,
+ smooth=True,
+ wireframe=False,
+ baseColorFactor=None,
+ baseColorTexture=None,
+ metallicFactor=1.0,
+ roughnessFactor=1.0,
+ metallicRoughnessTexture=None):
+ super(MetallicRoughnessMaterial, self).__init__(
+ name=name,
+ normalTexture=normalTexture,
+ occlusionTexture=occlusionTexture,
+ emissiveTexture=emissiveTexture,
+ emissiveFactor=emissiveFactor,
+ alphaMode=alphaMode,
+ alphaCutoff=alphaCutoff,
+ doubleSided=doubleSided,
+ smooth=smooth,
+ wireframe=wireframe
+ )
+
+ # Set defaults
+ if baseColorFactor is None:
+ baseColorFactor = np.ones(4).astype(np.float32)
+
+ self.baseColorFactor = baseColorFactor
+ self.baseColorTexture = baseColorTexture
+ self.metallicFactor = metallicFactor
+ self.roughnessFactor = roughnessFactor
+ self.metallicRoughnessTexture = metallicRoughnessTexture
+
+ @property
+ def baseColorFactor(self):
+ """(4,) float or :class:`Texture` : The RGBA base color multiplier.
+ """
+ return self._baseColorFactor
+
+ @baseColorFactor.setter
+ def baseColorFactor(self, value):
+ if value is None:
+ value = np.ones(4)
+ self._baseColorFactor = format_color_vector(value, 4)
+
+ @property
+ def baseColorTexture(self):
+ """(n,n,4) float or :class:`Texture` : The diffuse texture.
+ """
+ return self._baseColorTexture
+
+ @baseColorTexture.setter
+ def baseColorTexture(self, value):
+ self._baseColorTexture = self._format_texture(value, 'RGBA')
+ self._tex_flags = None
+
+ @property
+ def metallicFactor(self):
+ """float : The metalness of the material.
+ """
+ return self._metallicFactor
+
+ @metallicFactor.setter
+ def metallicFactor(self, value):
+ if value is None:
+ value = 1.0
+ if value < 0 or value > 1:
+ raise ValueError('Metallic factor must be in range [0,1]')
+ self._metallicFactor = float(value)
+
+ @property
+ def roughnessFactor(self):
+ """float : The roughness of the material.
+ """
+ return self.RoughnessFactor
+
+ @roughnessFactor.setter
+ def roughnessFactor(self, value):
+ if value is None:
+ value = 1.0
+ if value < 0 or value > 1:
+ raise ValueError('Roughness factor must be in range [0,1]')
+ self.RoughnessFactor = float(value)
+
+ @property
+ def metallicRoughnessTexture(self):
+ """(n,n,2) float or :class:`Texture` : The metallic-roughness texture.
+ """
+ return self._metallicRoughnessTexture
+
+ @metallicRoughnessTexture.setter
+ def metallicRoughnessTexture(self, value):
+ self._metallicRoughnessTexture = self._format_texture(value, 'GB')
+ self._tex_flags = None
+
+ def _compute_tex_flags(self):
+ tex_flags = super(MetallicRoughnessMaterial, self)._compute_tex_flags()
+ if self.baseColorTexture is not None:
+ tex_flags |= TexFlags.BASE_COLOR
+ if self.metallicRoughnessTexture is not None:
+ tex_flags |= TexFlags.METALLIC_ROUGHNESS
+ return tex_flags
+
+ def _compute_transparency(self):
+ if self.alphaMode == 'OPAQUE':
+ return False
+ cutoff = self.alphaCutoff
+ if self.alphaMode == 'BLEND':
+ cutoff = 1.0
+ if self.baseColorFactor[3] < cutoff:
+ return True
+ if (self.baseColorTexture is not None and
+ self.baseColorTexture.is_transparent(cutoff)):
+ return True
+ return False
+
+ def _compute_textures(self):
+ textures = super(MetallicRoughnessMaterial, self)._compute_textures()
+ all_textures = [self.baseColorTexture, self.metallicRoughnessTexture]
+ all_textures = {t for t in all_textures if t is not None}
+ textures |= all_textures
+ return textures
+
+
+class SpecularGlossinessMaterial(Material):
+ """A material based on the specular-glossiness material model from
+ Physically-Based Rendering (PBR) methodology.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ normalTexture : (n,n,3) float or :class:`Texture`, optional
+ A tangent space normal map. The texture contains RGB components in
+ linear space. Each texel represents the XYZ components of a normal
+ vector in tangent space. Red [0 to 255] maps to X [-1 to 1]. Green
+ [0 to 255] maps to Y [-1 to 1]. Blue [128 to 255] maps to Z
+ [1/255 to 1]. The normal vectors use OpenGL conventions where +X is
+ right and +Y is up. +Z points toward the viewer.
+ occlusionTexture : (n,n,1) float or :class:`Texture`, optional
+ The occlusion map texture. The occlusion values are sampled from the R
+ channel. Higher values indicate areas that should receive full indirect
+ lighting and lower values indicate no indirect lighting. These values
+ are linear. If other channels are present (GBA), they are ignored for
+ occlusion calculations.
+ emissiveTexture : (n,n,3) float or :class:`Texture`, optional
+ The emissive map controls the color and intensity of the light being
+ emitted by the material. This texture contains RGB components in sRGB
+ color space. If a fourth component (A) is present, it is ignored.
+ emissiveFactor : (3,) float, optional
+ The RGB components of the emissive color of the material. These values
+ are linear. If an emissiveTexture is specified, this value is
+ multiplied with the texel values.
+ alphaMode : str, optional
+ The material's alpha rendering mode enumeration specifying the
+ interpretation of the alpha value of the main factor and texture.
+ Allowed Values:
+
+ - `"OPAQUE"` The alpha value is ignored and the rendered output is
+ fully opaque.
+ - `"MASK"` The rendered output is either fully opaque or fully
+ transparent depending on the alpha value and the specified alpha
+ cutoff value.
+ - `"BLEND"` The alpha value is used to composite the source and
+ destination areas. The rendered output is combined with the
+ background using the normal painting operation (i.e. the Porter
+ and Duff over operator).
+
+ alphaCutoff : float, optional
+ Specifies the cutoff threshold when in MASK mode. If the alpha value is
+ greater than or equal to this value then it is rendered as fully
+ opaque, otherwise, it is rendered as fully transparent.
+ A value greater than 1.0 will render the entire material as fully
+ transparent. This value is ignored for other modes.
+ doubleSided : bool, optional
+ Specifies whether the material is double sided. When this value is
+ false, back-face culling is enabled. When this value is true,
+ back-face culling is disabled and double sided lighting is enabled.
+ smooth : bool, optional
+ If True, the material is rendered smoothly by using only one normal
+ per vertex and face indexing.
+ wireframe : bool, optional
+ If True, the material is rendered in wireframe mode.
+ diffuseFactor : (4,) float
+ The RGBA components of the reflected diffuse color of the material.
+ Metals have a diffuse value of [0.0, 0.0, 0.0]. The fourth component
+ (A) is the opacity of the material. The values are linear.
+ diffuseTexture : (n,n,4) float or :class:`Texture`, optional
+ The diffuse texture. This texture contains RGB(A) components of the
+ reflected diffuse color of the material in sRGB color space. If the
+ fourth component (A) is present, it represents the alpha coverage of
+ the material. Otherwise, an alpha of 1.0 is assumed.
+ The alphaMode property specifies how alpha is interpreted.
+ The stored texels must not be premultiplied.
+ specularFactor : (3,) float
+ The specular RGB color of the material. This value is linear.
+ glossinessFactor : float
+ The glossiness or smoothness of the material. A value of 1.0 means the
+ material has full glossiness or is perfectly smooth. A value of 0.0
+ means the material has no glossiness or is perfectly rough. This value
+ is linear.
+ specularGlossinessTexture : (n,n,4) or :class:`Texture`, optional
+ The specular-glossiness texture is a RGBA texture, containing the
+ specular color (RGB) in sRGB space and the glossiness value (A) in
+ linear space.
+ """
+
+ def __init__(self,
+ name=None,
+ normalTexture=None,
+ occlusionTexture=None,
+ emissiveTexture=None,
+ emissiveFactor=None,
+ alphaMode=None,
+ alphaCutoff=None,
+ doubleSided=False,
+ smooth=True,
+ wireframe=False,
+ diffuseFactor=None,
+ diffuseTexture=None,
+ specularFactor=None,
+ glossinessFactor=1.0,
+ specularGlossinessTexture=None):
+ super(SpecularGlossinessMaterial, self).__init__(
+ name=name,
+ normalTexture=normalTexture,
+ occlusionTexture=occlusionTexture,
+ emissiveTexture=emissiveTexture,
+ emissiveFactor=emissiveFactor,
+ alphaMode=alphaMode,
+ alphaCutoff=alphaCutoff,
+ doubleSided=doubleSided,
+ smooth=smooth,
+ wireframe=wireframe
+ )
+
+ # Set defaults
+ if diffuseFactor is None:
+ diffuseFactor = np.ones(4).astype(np.float32)
+ if specularFactor is None:
+ specularFactor = np.ones(3).astype(np.float32)
+
+ self.diffuseFactor = diffuseFactor
+ self.diffuseTexture = diffuseTexture
+ self.specularFactor = specularFactor
+ self.glossinessFactor = glossinessFactor
+ self.specularGlossinessTexture = specularGlossinessTexture
+
+ @property
+ def diffuseFactor(self):
+ """(4,) float : The diffuse base color.
+ """
+ return self._diffuseFactor
+
+ @diffuseFactor.setter
+ def diffuseFactor(self, value):
+ self._diffuseFactor = format_color_vector(value, 4)
+
+ @property
+ def diffuseTexture(self):
+ """(n,n,4) float or :class:`Texture` : The diffuse map.
+ """
+ return self._diffuseTexture
+
+ @diffuseTexture.setter
+ def diffuseTexture(self, value):
+ self._diffuseTexture = self._format_texture(value, 'RGBA')
+ self._tex_flags = None
+
+ @property
+ def specularFactor(self):
+ """(3,) float : The specular color of the material.
+ """
+ return self._specularFactor
+
+ @specularFactor.setter
+ def specularFactor(self, value):
+ self._specularFactor = format_color_vector(value, 3)
+
+ @property
+ def glossinessFactor(self):
+ """float : The glossiness of the material.
+ """
+ return self.glossinessFactor
+
+ @glossinessFactor.setter
+ def glossinessFactor(self, value):
+ if value < 0 or value > 1:
+ raise ValueError('glossiness factor must be in range [0,1]')
+ self._glossinessFactor = float(value)
+
+ @property
+ def specularGlossinessTexture(self):
+ """(n,n,4) or :class:`Texture` : The specular-glossiness texture.
+ """
+ return self._specularGlossinessTexture
+
+ @specularGlossinessTexture.setter
+ def specularGlossinessTexture(self, value):
+ self._specularGlossinessTexture = self._format_texture(value, 'GB')
+ self._tex_flags = None
+
+ def _compute_tex_flags(self):
+ flags = super(SpecularGlossinessMaterial, self)._compute_tex_flags()
+ if self.diffuseTexture is not None:
+ flags |= TexFlags.DIFFUSE
+ if self.specularGlossinessTexture is not None:
+ flags |= TexFlags.SPECULAR_GLOSSINESS
+ return flags
+
+ def _compute_transparency(self):
+ if self.alphaMode == 'OPAQUE':
+ return False
+ cutoff = self.alphaCutoff
+ if self.alphaMode == 'BLEND':
+ cutoff = 1.0
+ if self.diffuseFactor[3] < cutoff:
+ return True
+ if (self.diffuseTexture is not None and
+ self.diffuseTexture.is_transparent(cutoff)):
+ return True
+ return False
+
+ def _compute_textures(self):
+ textures = super(SpecularGlossinessMaterial, self)._compute_textures()
+ all_textures = [self.diffuseTexture, self.specularGlossinessTexture]
+ all_textures = {t for t in all_textures if t is not None}
+ textures |= all_textures
+ return textures
diff --git a/pyrender/pyrender/mesh.py b/pyrender/pyrender/mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..36833ea3dfa6c095a18fc745ff34cf106e83c95d
--- /dev/null
+++ b/pyrender/pyrender/mesh.py
@@ -0,0 +1,328 @@
+"""Meshes, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-mesh
+
+Author: Matthew Matl
+"""
+import copy
+
+import numpy as np
+import trimesh
+
+from .primitive import Primitive
+from .constants import GLTF
+from .material import MetallicRoughnessMaterial
+
+
+class Mesh(object):
+ """A set of primitives to be rendered.
+
+ Parameters
+ ----------
+ name : str
+ The user-defined name of this object.
+ primitives : list of :class:`Primitive`
+ The primitives associated with this mesh.
+ weights : (k,) float
+ Array of weights to be applied to the Morph Targets.
+ is_visible : bool
+ If False, the mesh will not be rendered.
+ """
+
+ def __init__(self, primitives, name=None, weights=None, is_visible=True):
+ self.primitives = primitives
+ self.name = name
+ self.weights = weights
+ self.is_visible = is_visible
+
+ self._bounds = None
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def primitives(self):
+ """list of :class:`Primitive` : The primitives associated
+ with this mesh.
+ """
+ return self._primitives
+
+ @primitives.setter
+ def primitives(self, value):
+ self._primitives = value
+
+ @property
+ def weights(self):
+ """(k,) float : Weights to be applied to morph targets.
+ """
+ return self._weights
+
+ @weights.setter
+ def weights(self, value):
+ self._weights = value
+
+ @property
+ def is_visible(self):
+ """bool : Whether the mesh is visible.
+ """
+ return self._is_visible
+
+ @is_visible.setter
+ def is_visible(self, value):
+ self._is_visible = value
+
+ @property
+ def bounds(self):
+ """(2,3) float : The axis-aligned bounds of the mesh.
+ """
+ if self._bounds is None:
+ bounds = np.array([[np.infty, np.infty, np.infty],
+ [-np.infty, -np.infty, -np.infty]])
+ for p in self.primitives:
+ bounds[0] = np.minimum(bounds[0], p.bounds[0])
+ bounds[1] = np.maximum(bounds[1], p.bounds[1])
+ self._bounds = bounds
+ return self._bounds
+
+ @property
+ def centroid(self):
+ """(3,) float : The centroid of the mesh's axis-aligned bounding box
+ (AABB).
+ """
+ return np.mean(self.bounds, axis=0)
+
+ @property
+ def extents(self):
+ """(3,) float : The lengths of the axes of the mesh's AABB.
+ """
+ return np.diff(self.bounds, axis=0).reshape(-1)
+
+ @property
+ def scale(self):
+ """(3,) float : The length of the diagonal of the mesh's AABB.
+ """
+ return np.linalg.norm(self.extents)
+
+ @property
+ def is_transparent(self):
+ """bool : If True, the mesh is partially-transparent.
+ """
+ for p in self.primitives:
+ if p.is_transparent:
+ return True
+ return False
+
+ @staticmethod
+ def from_points(points, colors=None, normals=None,
+ is_visible=True, poses=None):
+ """Create a Mesh from a set of points.
+
+ Parameters
+ ----------
+ points : (n,3) float
+ The point positions.
+ colors : (n,3) or (n,4) float, optional
+ RGB or RGBA colors for each point.
+ normals : (n,3) float, optionals
+ The normal vectors for each point.
+ is_visible : bool
+ If False, the points will not be rendered.
+ poses : (x,4,4)
+ Array of 4x4 transformation matrices for instancing this object.
+
+ Returns
+ -------
+ mesh : :class:`Mesh`
+ The created mesh.
+ """
+ primitive = Primitive(
+ positions=points,
+ normals=normals,
+ color_0=colors,
+ mode=GLTF.POINTS,
+ poses=poses
+ )
+ mesh = Mesh(primitives=[primitive], is_visible=is_visible)
+ return mesh
+
+ @staticmethod
+ def from_trimesh(mesh, material=None, is_visible=True,
+ poses=None, wireframe=False, smooth=True):
+ """Create a Mesh from a :class:`~trimesh.base.Trimesh`.
+
+ Parameters
+ ----------
+ mesh : :class:`~trimesh.base.Trimesh` or list of them
+ A triangular mesh or a list of meshes.
+ material : :class:`Material`
+ The material of the object. Overrides any mesh material.
+ If not specified and the mesh has no material, a default material
+ will be used.
+ is_visible : bool
+ If False, the mesh will not be rendered.
+ poses : (n,4,4) float
+ Array of 4x4 transformation matrices for instancing this object.
+ wireframe : bool
+ If `True`, the mesh will be rendered as a wireframe object
+ smooth : bool
+ If `True`, the mesh will be rendered with interpolated vertex
+ normals. Otherwise, the mesh edges will stay sharp.
+
+ Returns
+ -------
+ mesh : :class:`Mesh`
+ The created mesh.
+ """
+
+ if isinstance(mesh, (list, tuple, set, np.ndarray)):
+ meshes = list(mesh)
+ elif isinstance(mesh, trimesh.Trimesh):
+ meshes = [mesh]
+ else:
+ raise TypeError('Expected a Trimesh or a list, got a {}'
+ .format(type(mesh)))
+
+ primitives = []
+ for m in meshes:
+ positions = None
+ normals = None
+ indices = None
+
+ # Compute positions, normals, and indices
+ if smooth:
+ positions = m.vertices.copy()
+ normals = m.vertex_normals.copy()
+ indices = m.faces.copy()
+ else:
+ positions = m.vertices[m.faces].reshape((3 * len(m.faces), 3))
+ normals = np.repeat(m.face_normals, 3, axis=0)
+
+ # Compute colors, texture coords, and material properties
+ color_0, texcoord_0, primitive_material = Mesh._get_trimesh_props(m, smooth=smooth, material=material)
+
+ # Override if material is given.
+ if material is not None:
+ #primitive_material = copy.copy(material)
+ primitive_material = copy.deepcopy(material) # TODO
+
+ if primitive_material is None:
+ # Replace material with default if needed
+ primitive_material = MetallicRoughnessMaterial(
+ alphaMode='BLEND',
+ baseColorFactor=[0.3, 0.3, 0.3, 1.0],
+ metallicFactor=0.2,
+ roughnessFactor=0.8
+ )
+
+ primitive_material.wireframe = wireframe
+
+ # Create the primitive
+ primitives.append(Primitive(
+ positions=positions,
+ normals=normals,
+ texcoord_0=texcoord_0,
+ color_0=color_0,
+ indices=indices,
+ material=primitive_material,
+ mode=GLTF.TRIANGLES,
+ poses=poses
+ ))
+
+ return Mesh(primitives=primitives, is_visible=is_visible)
+
+ @staticmethod
+ def _get_trimesh_props(mesh, smooth=False, material=None):
+ """Gets the vertex colors, texture coordinates, and material properties
+ from a :class:`~trimesh.base.Trimesh`.
+ """
+ colors = None
+ texcoords = None
+
+ # If the trimesh visual is undefined, return none for both
+ if not mesh.visual.defined:
+ return colors, texcoords, material
+
+ # Process vertex colors
+ if material is None:
+ if mesh.visual.kind == 'vertex':
+ vc = mesh.visual.vertex_colors.copy()
+ if smooth:
+ colors = vc
+ else:
+ colors = vc[mesh.faces].reshape(
+ (3 * len(mesh.faces), vc.shape[1])
+ )
+ material = MetallicRoughnessMaterial(
+ alphaMode='BLEND',
+ baseColorFactor=[1.0, 1.0, 1.0, 1.0],
+ metallicFactor=0.2,
+ roughnessFactor=0.8
+ )
+ # Process face colors
+ elif mesh.visual.kind == 'face':
+ if smooth:
+ raise ValueError('Cannot use face colors with a smooth mesh')
+ else:
+ colors = np.repeat(mesh.visual.face_colors, 3, axis=0)
+
+ material = MetallicRoughnessMaterial(
+ alphaMode='BLEND',
+ baseColorFactor=[1.0, 1.0, 1.0, 1.0],
+ metallicFactor=0.2,
+ roughnessFactor=0.8
+ )
+
+ # Process texture colors
+ if mesh.visual.kind == 'texture':
+ # Configure UV coordinates
+ if mesh.visual.uv is not None and len(mesh.visual.uv) != 0:
+ uv = mesh.visual.uv.copy()
+ if smooth:
+ texcoords = uv
+ else:
+ texcoords = uv[mesh.faces].reshape(
+ (3 * len(mesh.faces), uv.shape[1])
+ )
+
+ if material is None:
+ # Configure mesh material
+ mat = mesh.visual.material
+
+ if isinstance(mat, trimesh.visual.texture.PBRMaterial):
+ material = MetallicRoughnessMaterial(
+ normalTexture=mat.normalTexture,
+ occlusionTexture=mat.occlusionTexture,
+ emissiveTexture=mat.emissiveTexture,
+ emissiveFactor=mat.emissiveFactor,
+ alphaMode='BLEND',
+ baseColorFactor=mat.baseColorFactor,
+ baseColorTexture=mat.baseColorTexture,
+ metallicFactor=mat.metallicFactor,
+ roughnessFactor=mat.roughnessFactor,
+ metallicRoughnessTexture=mat.metallicRoughnessTexture,
+ doubleSided=mat.doubleSided,
+ alphaCutoff=mat.alphaCutoff
+ )
+ elif isinstance(mat, trimesh.visual.texture.SimpleMaterial):
+ glossiness = mat.kwargs.get('Ns', 1.0)
+ if isinstance(glossiness, list):
+ glossiness = float(glossiness[0])
+ roughness = (2 / (glossiness + 2)) ** (1.0 / 4.0)
+ material = MetallicRoughnessMaterial(
+ alphaMode='BLEND',
+ roughnessFactor=roughness,
+ baseColorFactor=mat.diffuse,
+ baseColorTexture=mat.image,
+ )
+ elif isinstance(mat, MetallicRoughnessMaterial):
+ material = mat
+
+ return colors, texcoords, material
diff --git a/pyrender/pyrender/node.py b/pyrender/pyrender/node.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f37f7856cc732a37dc58253022a7c331489493e
--- /dev/null
+++ b/pyrender/pyrender/node.py
@@ -0,0 +1,263 @@
+"""Nodes, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-node
+
+Author: Matthew Matl
+"""
+import numpy as np
+
+import trimesh.transformations as transformations
+
+from .camera import Camera
+from .mesh import Mesh
+from .light import Light
+
+
+class Node(object):
+ """A node in the node hierarchy.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ camera : :class:`Camera`, optional
+ The camera in this node.
+ children : list of :class:`Node`
+ The children of this node.
+ skin : int, optional
+ The index of the skin referenced by this node.
+ matrix : (4,4) float, optional
+ A floating-point 4x4 transformation matrix.
+ mesh : :class:`Mesh`, optional
+ The mesh in this node.
+ rotation : (4,) float, optional
+ The node's unit quaternion in the order (x, y, z, w), where
+ w is the scalar.
+ scale : (3,) float, optional
+ The node's non-uniform scale, given as the scaling factors along the x,
+ y, and z axes.
+ translation : (3,) float, optional
+ The node's translation along the x, y, and z axes.
+ weights : (n,) float
+ The weights of the instantiated Morph Target. Number of elements must
+ match number of Morph Targets of used mesh.
+ light : :class:`Light`, optional
+ The light in this node.
+ """
+
+ def __init__(self,
+ name=None,
+ camera=None,
+ children=None,
+ skin=None,
+ matrix=None,
+ mesh=None,
+ rotation=None,
+ scale=None,
+ translation=None,
+ weights=None,
+ light=None):
+ # Set defaults
+ if children is None:
+ children = []
+
+ self._matrix = None
+ self._scale = None
+ self._rotation = None
+ self._translation = None
+ if matrix is None:
+ if rotation is None:
+ rotation = np.array([0.0, 0.0, 0.0, 1.0])
+ if translation is None:
+ translation = np.zeros(3)
+ if scale is None:
+ scale = np.ones(3)
+ self.rotation = rotation
+ self.translation = translation
+ self.scale = scale
+ else:
+ self.matrix = matrix
+
+ self.name = name
+ self.camera = camera
+ self.children = children
+ self.skin = skin
+ self.mesh = mesh
+ self.weights = weights
+ self.light = light
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def camera(self):
+ """:class:`Camera` : The camera in this node.
+ """
+ return self._camera
+
+ @camera.setter
+ def camera(self, value):
+ if value is not None and not isinstance(value, Camera):
+ raise TypeError('Value must be a camera')
+ self._camera = value
+
+ @property
+ def children(self):
+ """list of :class:`Node` : The children of this node.
+ """
+ return self._children
+
+ @children.setter
+ def children(self, value):
+ self._children = value
+
+ @property
+ def skin(self):
+ """int : The skin index for this node.
+ """
+ return self._skin
+
+ @skin.setter
+ def skin(self, value):
+ self._skin = value
+
+ @property
+ def mesh(self):
+ """:class:`Mesh` : The mesh in this node.
+ """
+ return self._mesh
+
+ @mesh.setter
+ def mesh(self, value):
+ if value is not None and not isinstance(value, Mesh):
+ raise TypeError('Value must be a mesh')
+ self._mesh = value
+
+ @property
+ def light(self):
+ """:class:`Light` : The light in this node.
+ """
+ return self._light
+
+ @light.setter
+ def light(self, value):
+ if value is not None and not isinstance(value, Light):
+ raise TypeError('Value must be a light')
+ self._light = value
+
+ @property
+ def rotation(self):
+ """(4,) float : The xyzw quaternion for this node.
+ """
+ return self._rotation
+
+ @rotation.setter
+ def rotation(self, value):
+ value = np.asanyarray(value)
+ if value.shape != (4,):
+ raise ValueError('Quaternion must be a (4,) vector')
+ if np.abs(np.linalg.norm(value) - 1.0) > 1e-3:
+ raise ValueError('Quaternion must have norm == 1.0')
+ self._rotation = value
+ self._matrix = None
+
+ @property
+ def translation(self):
+ """(3,) float : The translation for this node.
+ """
+ return self._translation
+
+ @translation.setter
+ def translation(self, value):
+ value = np.asanyarray(value)
+ if value.shape != (3,):
+ raise ValueError('Translation must be a (3,) vector')
+ self._translation = value
+ self._matrix = None
+
+ @property
+ def scale(self):
+ """(3,) float : The scale for this node.
+ """
+ return self._scale
+
+ @scale.setter
+ def scale(self, value):
+ value = np.asanyarray(value)
+ if value.shape != (3,):
+ raise ValueError('Scale must be a (3,) vector')
+ self._scale = value
+ self._matrix = None
+
+ @property
+ def matrix(self):
+ """(4,4) float : The homogenous transform matrix for this node.
+
+ Note that this matrix's elements are not settable,
+ it's just a copy of the internal matrix. You can set the whole
+ matrix, but not an individual element.
+ """
+ if self._matrix is None:
+ self._matrix = self._m_from_tqs(
+ self.translation, self.rotation, self.scale
+ )
+ return self._matrix.copy()
+
+ @matrix.setter
+ def matrix(self, value):
+ value = np.asanyarray(value)
+ if value.shape != (4,4):
+ raise ValueError('Matrix must be a 4x4 numpy ndarray')
+ if not np.allclose(value[3,:], np.array([0.0, 0.0, 0.0, 1.0])):
+ raise ValueError('Bottom row of matrix must be [0,0,0,1]')
+ self.rotation = Node._q_from_m(value)
+ self.scale = Node._s_from_m(value)
+ self.translation = Node._t_from_m(value)
+ self._matrix = value
+
+ @staticmethod
+ def _t_from_m(m):
+ return m[:3,3]
+
+ @staticmethod
+ def _r_from_m(m):
+ U = m[:3,:3]
+ norms = np.linalg.norm(U.T, axis=1)
+ return U / norms
+
+ @staticmethod
+ def _q_from_m(m):
+ M = np.eye(4)
+ M[:3,:3] = Node._r_from_m(m)
+ q_wxyz = transformations.quaternion_from_matrix(M)
+ return np.roll(q_wxyz, -1)
+
+ @staticmethod
+ def _s_from_m(m):
+ return np.linalg.norm(m[:3,:3].T, axis=1)
+
+ @staticmethod
+ def _r_from_q(q):
+ q_wxyz = np.roll(q, 1)
+ return transformations.quaternion_matrix(q_wxyz)[:3,:3]
+
+ @staticmethod
+ def _m_from_tqs(t, q, s):
+ S = np.eye(4)
+ S[:3,:3] = np.diag(s)
+
+ R = np.eye(4)
+ R[:3,:3] = Node._r_from_q(q)
+
+ T = np.eye(4)
+ T[:3,3] = t
+
+ return T.dot(R.dot(S))
diff --git a/pyrender/pyrender/offscreen.py b/pyrender/pyrender/offscreen.py
new file mode 100644
index 0000000000000000000000000000000000000000..340142983006cdc6f51b6d114e9b2b294aa4a919
--- /dev/null
+++ b/pyrender/pyrender/offscreen.py
@@ -0,0 +1,160 @@
+"""Wrapper for offscreen rendering.
+
+Author: Matthew Matl
+"""
+import os
+
+from .renderer import Renderer
+from .constants import RenderFlags
+
+
+class OffscreenRenderer(object):
+ """A wrapper for offscreen rendering.
+
+ Parameters
+ ----------
+ viewport_width : int
+ The width of the main viewport, in pixels.
+ viewport_height : int
+ The height of the main viewport, in pixels.
+ point_size : float
+ The size of screen-space points in pixels.
+ """
+
+ def __init__(self, viewport_width, viewport_height, point_size=1.0):
+ self.viewport_width = viewport_width
+ self.viewport_height = viewport_height
+ self.point_size = point_size
+
+ self._platform = None
+ self._renderer = None
+ self._create()
+
+ @property
+ def viewport_width(self):
+ """int : The width of the main viewport, in pixels.
+ """
+ return self._viewport_width
+
+ @viewport_width.setter
+ def viewport_width(self, value):
+ self._viewport_width = int(value)
+
+ @property
+ def viewport_height(self):
+ """int : The height of the main viewport, in pixels.
+ """
+ return self._viewport_height
+
+ @viewport_height.setter
+ def viewport_height(self, value):
+ self._viewport_height = int(value)
+
+ @property
+ def point_size(self):
+ """float : The pixel size of points in point clouds.
+ """
+ return self._point_size
+
+ @point_size.setter
+ def point_size(self, value):
+ self._point_size = float(value)
+
+ def render(self, scene, flags=RenderFlags.NONE, seg_node_map=None):
+ """Render a scene with the given set of flags.
+
+ Parameters
+ ----------
+ scene : :class:`Scene`
+ A scene to render.
+ flags : int
+ A bitwise or of one or more flags from :class:`.RenderFlags`.
+ seg_node_map : dict
+ A map from :class:`.Node` objects to (3,) colors for each.
+ If specified along with flags set to :attr:`.RenderFlags.SEG`,
+ the color image will be a segmentation image.
+
+ Returns
+ -------
+ color_im : (h, w, 3) uint8 or (h, w, 4) uint8
+ The color buffer in RGB format, or in RGBA format if
+ :attr:`.RenderFlags.RGBA` is set.
+ Not returned if flags includes :attr:`.RenderFlags.DEPTH_ONLY`.
+ depth_im : (h, w) float32
+ The depth buffer in linear units.
+ """
+ self._platform.make_current()
+ # If platform does not support dynamically-resizing framebuffers,
+ # destroy it and restart it
+ if (self._platform.viewport_height != self.viewport_height or
+ self._platform.viewport_width != self.viewport_width):
+ if not self._platform.supports_framebuffers():
+ self.delete()
+ self._create()
+
+ self._platform.make_current()
+ self._renderer.viewport_width = self.viewport_width
+ self._renderer.viewport_height = self.viewport_height
+ self._renderer.point_size = self.point_size
+
+ if self._platform.supports_framebuffers():
+ flags |= RenderFlags.OFFSCREEN
+ retval = self._renderer.render(scene, flags, seg_node_map)
+ else:
+ self._renderer.render(scene, flags, seg_node_map)
+ depth = self._renderer.read_depth_buf()
+ if flags & RenderFlags.DEPTH_ONLY:
+ retval = depth
+ else:
+ color = self._renderer.read_color_buf()
+ retval = color, depth
+
+ # Make the platform not current
+ self._platform.make_uncurrent()
+ return retval
+
+ def delete(self):
+ """Free all OpenGL resources.
+ """
+ self._platform.make_current()
+ self._renderer.delete()
+ self._platform.delete_context()
+ del self._renderer
+ del self._platform
+ self._renderer = None
+ self._platform = None
+ import gc
+ gc.collect()
+
+ def _create(self):
+ if 'PYOPENGL_PLATFORM' not in os.environ:
+ from pyrender.platforms.pyglet_platform import PygletPlatform
+ self._platform = PygletPlatform(self.viewport_width,
+ self.viewport_height)
+ elif os.environ['PYOPENGL_PLATFORM'] == 'egl':
+ from pyrender.platforms import egl
+ device_id = int(os.environ.get('EGL_DEVICE_ID', '0'))
+ egl_device = egl.get_device_by_index(device_id)
+ self._platform = egl.EGLPlatform(self.viewport_width,
+ self.viewport_height,
+ device=egl_device)
+ elif os.environ['PYOPENGL_PLATFORM'] == 'osmesa':
+ from pyrender.platforms.osmesa import OSMesaPlatform
+ self._platform = OSMesaPlatform(self.viewport_width,
+ self.viewport_height)
+ else:
+ raise ValueError('Unsupported PyOpenGL platform: {}'.format(
+ os.environ['PYOPENGL_PLATFORM']
+ ))
+ self._platform.init_context()
+ self._platform.make_current()
+ self._renderer = Renderer(self.viewport_width, self.viewport_height)
+
+ def __del__(self):
+ try:
+ self.delete()
+ except Exception:
+ pass
+
+
+__all__ = ['OffscreenRenderer']
diff --git a/pyrender/pyrender/platforms/__init__.py b/pyrender/pyrender/platforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7837fd5fdeccab5e48c85e41d20b238ea7396599
--- /dev/null
+++ b/pyrender/pyrender/platforms/__init__.py
@@ -0,0 +1,6 @@
+"""Platforms for generating offscreen OpenGL contexts for rendering.
+
+Author: Matthew Matl
+"""
+
+from .base import Platform
diff --git a/pyrender/pyrender/platforms/base.py b/pyrender/pyrender/platforms/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9ecda906145e239737901809aa59db8d3e231c6
--- /dev/null
+++ b/pyrender/pyrender/platforms/base.py
@@ -0,0 +1,76 @@
+import abc
+
+import six
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Platform(object):
+ """Base class for all OpenGL platforms.
+
+ Parameters
+ ----------
+ viewport_width : int
+ The width of the main viewport, in pixels.
+ viewport_height : int
+ The height of the main viewport, in pixels
+ """
+
+ def __init__(self, viewport_width, viewport_height):
+ self.viewport_width = viewport_width
+ self.viewport_height = viewport_height
+
+ @property
+ def viewport_width(self):
+ """int : The width of the main viewport, in pixels.
+ """
+ return self._viewport_width
+
+ @viewport_width.setter
+ def viewport_width(self, value):
+ self._viewport_width = value
+
+ @property
+ def viewport_height(self):
+ """int : The height of the main viewport, in pixels.
+ """
+ return self._viewport_height
+
+ @viewport_height.setter
+ def viewport_height(self, value):
+ self._viewport_height = value
+
+ @abc.abstractmethod
+ def init_context(self):
+ """Create an OpenGL context.
+ """
+ pass
+
+ @abc.abstractmethod
+ def make_current(self):
+ """Make the OpenGL context current.
+ """
+ pass
+
+ @abc.abstractmethod
+ def make_uncurrent(self):
+ """Make the OpenGL context uncurrent.
+ """
+ pass
+
+ @abc.abstractmethod
+ def delete_context(self):
+ """Delete the OpenGL context.
+ """
+ pass
+
+ @abc.abstractmethod
+ def supports_framebuffers(self):
+ """Returns True if the method supports framebuffer rendering.
+ """
+ pass
+
+ def __del__(self):
+ try:
+ self.delete_context()
+ except Exception:
+ pass
diff --git a/pyrender/pyrender/platforms/egl.py b/pyrender/pyrender/platforms/egl.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae2478d29c9a538c53ad83fa31f8e2277cd897c8
--- /dev/null
+++ b/pyrender/pyrender/platforms/egl.py
@@ -0,0 +1,219 @@
+import ctypes
+import os
+
+import OpenGL.platform
+
+from .base import Platform
+
+EGL_PLATFORM_DEVICE_EXT = 0x313F
+EGL_DRM_DEVICE_FILE_EXT = 0x3233
+
+
+def _ensure_egl_loaded():
+ plugin = OpenGL.platform.PlatformPlugin.by_name('egl')
+ if plugin is None:
+ raise RuntimeError("EGL platform plugin is not available.")
+
+ plugin_class = plugin.load()
+ plugin.loaded = True
+ # create instance of this platform implementation
+ plugin = plugin_class()
+
+ plugin.install(vars(OpenGL.platform))
+
+
+_ensure_egl_loaded()
+from OpenGL import EGL as egl
+
+
+def _get_egl_func(func_name, res_type, *arg_types):
+ address = egl.eglGetProcAddress(func_name)
+ if address is None:
+ return None
+
+ proto = ctypes.CFUNCTYPE(res_type)
+ proto.argtypes = arg_types
+ func = proto(address)
+ return func
+
+
+def _get_egl_struct(struct_name):
+ from OpenGL._opaque import opaque_pointer_cls
+ return opaque_pointer_cls(struct_name)
+
+
+# These are not defined in PyOpenGL by default.
+_EGLDeviceEXT = _get_egl_struct('EGLDeviceEXT')
+_eglGetPlatformDisplayEXT = _get_egl_func('eglGetPlatformDisplayEXT', egl.EGLDisplay)
+_eglQueryDevicesEXT = _get_egl_func('eglQueryDevicesEXT', egl.EGLBoolean)
+_eglQueryDeviceStringEXT = _get_egl_func('eglQueryDeviceStringEXT', ctypes.c_char_p)
+
+
+def query_devices():
+ if _eglQueryDevicesEXT is None:
+ raise RuntimeError("EGL query extension is not loaded or is not supported.")
+
+ num_devices = egl.EGLint()
+ success = _eglQueryDevicesEXT(0, None, ctypes.pointer(num_devices))
+ if not success or num_devices.value < 1:
+ return []
+
+ devices = (_EGLDeviceEXT * num_devices.value)() # array of size num_devices
+ success = _eglQueryDevicesEXT(num_devices.value, devices, ctypes.pointer(num_devices))
+ if not success or num_devices.value < 1:
+ return []
+
+ return [EGLDevice(devices[i]) for i in range(num_devices.value)]
+
+
+def get_default_device():
+ # Fall back to not using query extension.
+ if _eglQueryDevicesEXT is None:
+ return EGLDevice(None)
+
+ return query_devices()[0]
+
+
+def get_device_by_index(device_id):
+ if _eglQueryDevicesEXT is None and device_id == 0:
+ return get_default_device()
+
+ devices = query_devices()
+ if device_id >= len(devices):
+ raise ValueError('Invalid device ID ({})'.format(device_id, len(devices)))
+ return devices[device_id]
+
+
+class EGLDevice:
+
+ def __init__(self, display=None):
+ self._display = display
+
+ def get_display(self):
+ if self._display is None:
+ return egl.eglGetDisplay(egl.EGL_DEFAULT_DISPLAY)
+
+ return _eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, self._display, None)
+
+ @property
+ def name(self):
+ if self._display is None:
+ return 'default'
+
+ name = _eglQueryDeviceStringEXT(self._display, EGL_DRM_DEVICE_FILE_EXT)
+ if name is None:
+ return None
+
+ return name.decode('ascii')
+
+ def __repr__(self):
+ return "".format(self.name)
+
+
+class EGLPlatform(Platform):
+ """Renders using EGL.
+ """
+
+ def __init__(self, viewport_width, viewport_height, device: EGLDevice = None):
+ super(EGLPlatform, self).__init__(viewport_width, viewport_height)
+ if device is None:
+ device = get_default_device()
+
+ self._egl_device = device
+ self._egl_display = None
+ self._egl_context = None
+
+ def init_context(self):
+ _ensure_egl_loaded()
+
+ from OpenGL.EGL import (
+ EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_BLUE_SIZE,
+ EGL_RED_SIZE, EGL_GREEN_SIZE, EGL_DEPTH_SIZE,
+ EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER,
+ EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, EGL_CONFORMANT,
+ EGL_NONE, EGL_DEFAULT_DISPLAY, EGL_NO_CONTEXT,
+ EGL_OPENGL_API, EGL_CONTEXT_MAJOR_VERSION,
+ EGL_CONTEXT_MINOR_VERSION,
+ EGL_CONTEXT_OPENGL_PROFILE_MASK,
+ EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
+ eglGetDisplay, eglInitialize, eglChooseConfig,
+ eglBindAPI, eglCreateContext, EGLConfig
+ )
+ from OpenGL import arrays
+
+ config_attributes = arrays.GLintArray.asArray([
+ EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
+ EGL_BLUE_SIZE, 8,
+ EGL_RED_SIZE, 8,
+ EGL_GREEN_SIZE, 8,
+ EGL_DEPTH_SIZE, 24,
+ EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER,
+ EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
+ EGL_CONFORMANT, EGL_OPENGL_BIT,
+ EGL_NONE
+ ])
+ context_attributes = arrays.GLintArray.asArray([
+ EGL_CONTEXT_MAJOR_VERSION, 4,
+ EGL_CONTEXT_MINOR_VERSION, 1,
+ EGL_CONTEXT_OPENGL_PROFILE_MASK,
+ EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
+ EGL_NONE
+ ])
+ major, minor = ctypes.c_long(), ctypes.c_long()
+ num_configs = ctypes.c_long()
+ configs = (EGLConfig * 1)()
+
+ # Cache DISPLAY if necessary and get an off-screen EGL display
+ orig_dpy = None
+ if 'DISPLAY' in os.environ:
+ orig_dpy = os.environ['DISPLAY']
+ del os.environ['DISPLAY']
+
+ self._egl_display = self._egl_device.get_display()
+ if orig_dpy is not None:
+ os.environ['DISPLAY'] = orig_dpy
+
+ # Initialize EGL
+ assert eglInitialize(self._egl_display, major, minor)
+ assert eglChooseConfig(
+ self._egl_display, config_attributes, configs, 1, num_configs
+ )
+
+ # Bind EGL to the OpenGL API
+ assert eglBindAPI(EGL_OPENGL_API)
+
+ # Create an EGL context
+ self._egl_context = eglCreateContext(
+ self._egl_display, configs[0],
+ EGL_NO_CONTEXT, context_attributes
+ )
+
+ # Make it current
+ self.make_current()
+
+ def make_current(self):
+ from OpenGL.EGL import eglMakeCurrent, EGL_NO_SURFACE
+ assert eglMakeCurrent(
+ self._egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+ self._egl_context
+ )
+
+ def make_uncurrent(self):
+ """Make the OpenGL context uncurrent.
+ """
+ pass
+
+ def delete_context(self):
+ from OpenGL.EGL import eglDestroyContext, eglTerminate
+ if self._egl_display is not None:
+ if self._egl_context is not None:
+ eglDestroyContext(self._egl_display, self._egl_context)
+ self._egl_context = None
+ eglTerminate(self._egl_display)
+ self._egl_display = None
+
+ def supports_framebuffers(self):
+ return True
+
+
+__all__ = ['EGLPlatform']
diff --git a/pyrender/pyrender/platforms/osmesa.py b/pyrender/pyrender/platforms/osmesa.py
new file mode 100644
index 0000000000000000000000000000000000000000..deaa5ff44031a107883913ae9a18fc425d650f3d
--- /dev/null
+++ b/pyrender/pyrender/platforms/osmesa.py
@@ -0,0 +1,59 @@
+from .base import Platform
+
+
+__all__ = ['OSMesaPlatform']
+
+
+class OSMesaPlatform(Platform):
+ """Renders into a software buffer using OSMesa. Requires special versions
+ of OSMesa to be installed, plus PyOpenGL upgrade.
+ """
+
+ def __init__(self, viewport_width, viewport_height):
+ super(OSMesaPlatform, self).__init__(viewport_width, viewport_height)
+ self._context = None
+ self._buffer = None
+
+ def init_context(self):
+ from OpenGL import arrays
+ from OpenGL.osmesa import (
+ OSMesaCreateContextAttribs, OSMESA_FORMAT,
+ OSMESA_RGBA, OSMESA_PROFILE, OSMESA_CORE_PROFILE,
+ OSMESA_CONTEXT_MAJOR_VERSION, OSMESA_CONTEXT_MINOR_VERSION,
+ OSMESA_DEPTH_BITS
+ )
+
+ attrs = arrays.GLintArray.asArray([
+ OSMESA_FORMAT, OSMESA_RGBA,
+ OSMESA_DEPTH_BITS, 24,
+ OSMESA_PROFILE, OSMESA_CORE_PROFILE,
+ OSMESA_CONTEXT_MAJOR_VERSION, 3,
+ OSMESA_CONTEXT_MINOR_VERSION, 3,
+ 0
+ ])
+ self._context = OSMesaCreateContextAttribs(attrs, None)
+ self._buffer = arrays.GLubyteArray.zeros(
+ (self.viewport_height, self.viewport_width, 4)
+ )
+
+ def make_current(self):
+ from OpenGL import GL as gl
+ from OpenGL.osmesa import OSMesaMakeCurrent
+ assert(OSMesaMakeCurrent(
+ self._context, self._buffer, gl.GL_UNSIGNED_BYTE,
+ self.viewport_width, self.viewport_height
+ ))
+
+ def make_uncurrent(self):
+ """Make the OpenGL context uncurrent.
+ """
+ pass
+
+ def delete_context(self):
+ from OpenGL.osmesa import OSMesaDestroyContext
+ OSMesaDestroyContext(self._context)
+ self._context = None
+ self._buffer = None
+
+ def supports_framebuffers(self):
+ return False
diff --git a/pyrender/pyrender/platforms/pyglet_platform.py b/pyrender/pyrender/platforms/pyglet_platform.py
new file mode 100644
index 0000000000000000000000000000000000000000..a70cf7b659bc85a92f6c9c8ebcc360662a068507
--- /dev/null
+++ b/pyrender/pyrender/platforms/pyglet_platform.py
@@ -0,0 +1,90 @@
+from pyrender.constants import (TARGET_OPEN_GL_MAJOR, TARGET_OPEN_GL_MINOR,
+ MIN_OPEN_GL_MAJOR, MIN_OPEN_GL_MINOR)
+from .base import Platform
+
+import OpenGL
+
+
+__all__ = ['PygletPlatform']
+
+
+class PygletPlatform(Platform):
+ """Renders on-screen using a 1x1 hidden Pyglet window for getting
+ an OpenGL context.
+ """
+
+ def __init__(self, viewport_width, viewport_height):
+ super(PygletPlatform, self).__init__(viewport_width, viewport_height)
+ self._window = None
+
+ def init_context(self):
+ import pyglet
+ pyglet.options['shadow_window'] = False
+
+ try:
+ pyglet.lib.x11.xlib.XInitThreads()
+ except Exception:
+ pass
+
+ self._window = None
+ confs = [pyglet.gl.Config(sample_buffers=1, samples=4,
+ depth_size=24,
+ double_buffer=True,
+ major_version=TARGET_OPEN_GL_MAJOR,
+ minor_version=TARGET_OPEN_GL_MINOR),
+ pyglet.gl.Config(depth_size=24,
+ double_buffer=True,
+ major_version=TARGET_OPEN_GL_MAJOR,
+ minor_version=TARGET_OPEN_GL_MINOR),
+ pyglet.gl.Config(sample_buffers=1, samples=4,
+ depth_size=24,
+ double_buffer=True,
+ major_version=MIN_OPEN_GL_MAJOR,
+ minor_version=MIN_OPEN_GL_MINOR),
+ pyglet.gl.Config(depth_size=24,
+ double_buffer=True,
+ major_version=MIN_OPEN_GL_MAJOR,
+ minor_version=MIN_OPEN_GL_MINOR)]
+ for conf in confs:
+ try:
+ self._window = pyglet.window.Window(config=conf, visible=False,
+ resizable=False,
+ width=1, height=1)
+ break
+ except pyglet.window.NoSuchConfigException as e:
+ pass
+
+ if not self._window:
+ raise ValueError(
+ 'Failed to initialize Pyglet window with an OpenGL >= 3+ '
+ 'context. If you\'re logged in via SSH, ensure that you\'re '
+ 'running your script with vglrun (i.e. VirtualGL). The '
+ 'internal error message was "{}"'.format(e)
+ )
+
+ def make_current(self):
+ if self._window:
+ self._window.switch_to()
+
+ def make_uncurrent(self):
+ try:
+ import pyglet
+ pyglet.gl.xlib.glx.glXMakeContextCurrent(self._window.context.x_display, 0, 0, None)
+ except Exception:
+ pass
+
+ def delete_context(self):
+ if self._window is not None:
+ self.make_current()
+ cid = OpenGL.contextdata.getContext()
+ try:
+ self._window.context.destroy()
+ self._window.close()
+ except Exception:
+ pass
+ self._window = None
+ OpenGL.contextdata.cleanupContext(cid)
+ del cid
+
+ def supports_framebuffers(self):
+ return True
diff --git a/pyrender/pyrender/primitive.py b/pyrender/pyrender/primitive.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f83f46f532b126a4573e715dd03d079fef755ca
--- /dev/null
+++ b/pyrender/pyrender/primitive.py
@@ -0,0 +1,489 @@
+"""Primitives, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-primitive
+
+Author: Matthew Matl
+"""
+import numpy as np
+
+from OpenGL.GL import *
+
+from .material import Material, MetallicRoughnessMaterial
+from .constants import FLOAT_SZ, UINT_SZ, BufFlags, GLTF
+from .utils import format_color_array
+
+
+class Primitive(object):
+ """A primitive object which can be rendered.
+
+ Parameters
+ ----------
+ positions : (n, 3) float
+ XYZ vertex positions.
+ normals : (n, 3) float
+ Normalized XYZ vertex normals.
+ tangents : (n, 4) float
+ XYZW vertex tangents where the w component is a sign value
+ (either +1 or -1) indicating the handedness of the tangent basis.
+ texcoord_0 : (n, 2) float
+ The first set of UV texture coordinates.
+ texcoord_1 : (n, 2) float
+ The second set of UV texture coordinates.
+ color_0 : (n, 4) float
+ RGBA vertex colors.
+ joints_0 : (n, 4) float
+ Joint information.
+ weights_0 : (n, 4) float
+ Weight information for morphing.
+ indices : (m, 3) int
+ Face indices for triangle meshes or fans.
+ material : :class:`Material`
+ The material to apply to this primitive when rendering.
+ mode : int
+ The type of primitives to render, one of the following:
+
+ - ``0``: POINTS
+ - ``1``: LINES
+ - ``2``: LINE_LOOP
+ - ``3``: LINE_STRIP
+ - ``4``: TRIANGLES
+ - ``5``: TRIANGLES_STRIP
+ - ``6``: TRIANGLES_FAN
+ targets : (k,) int
+ Morph target indices.
+ poses : (x,4,4), float
+ Array of 4x4 transformation matrices for instancing this object.
+ """
+
+ def __init__(self,
+ positions,
+ normals=None,
+ tangents=None,
+ texcoord_0=None,
+ texcoord_1=None,
+ color_0=None,
+ joints_0=None,
+ weights_0=None,
+ indices=None,
+ material=None,
+ mode=None,
+ targets=None,
+ poses=None):
+
+ if mode is None:
+ mode = GLTF.TRIANGLES
+
+ self.positions = positions
+ self.normals = normals
+ self.tangents = tangents
+ self.texcoord_0 = texcoord_0
+ self.texcoord_1 = texcoord_1
+ self.color_0 = color_0
+ self.joints_0 = joints_0
+ self.weights_0 = weights_0
+ self.indices = indices
+ self.material = material
+ self.mode = mode
+ self.targets = targets
+ self.poses = poses
+
+ self._bounds = None
+ self._vaid = None
+ self._buffers = []
+ self._is_transparent = None
+ self._buf_flags = None
+
+ @property
+ def positions(self):
+ """(n,3) float : XYZ vertex positions.
+ """
+ return self._positions
+
+ @positions.setter
+ def positions(self, value):
+ value = np.asanyarray(value, dtype=np.float32)
+ self._positions = np.ascontiguousarray(value)
+ self._bounds = None
+
+ @property
+ def normals(self):
+ """(n,3) float : Normalized XYZ vertex normals.
+ """
+ return self._normals
+
+ @normals.setter
+ def normals(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ if value.shape != self.positions.shape:
+ raise ValueError('Incorrect normals shape')
+ self._normals = value
+
+ @property
+ def tangents(self):
+ """(n,4) float : XYZW vertex tangents.
+ """
+ return self._tangents
+
+ @tangents.setter
+ def tangents(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ if value.shape != (self.positions.shape[0], 4):
+ raise ValueError('Incorrect tangent shape')
+ self._tangents = value
+
+ @property
+ def texcoord_0(self):
+ """(n,2) float : The first set of UV texture coordinates.
+ """
+ return self._texcoord_0
+
+ @texcoord_0.setter
+ def texcoord_0(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ if (value.ndim != 2 or value.shape[0] != self.positions.shape[0] or
+ value.shape[1] < 2):
+ raise ValueError('Incorrect texture coordinate shape')
+ if value.shape[1] > 2:
+ value = value[:,:2]
+ self._texcoord_0 = value
+
+ @property
+ def texcoord_1(self):
+ """(n,2) float : The second set of UV texture coordinates.
+ """
+ return self._texcoord_1
+
+ @texcoord_1.setter
+ def texcoord_1(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ if (value.ndim != 2 or value.shape[0] != self.positions.shape[0] or
+ value.shape[1] != 2):
+ raise ValueError('Incorrect texture coordinate shape')
+ self._texcoord_1 = value
+
+ @property
+ def color_0(self):
+ """(n,4) float : RGBA vertex colors.
+ """
+ return self._color_0
+
+ @color_0.setter
+ def color_0(self, value):
+ if value is not None:
+ value = np.ascontiguousarray(
+ format_color_array(value, shape=(len(self.positions), 4))
+ )
+ self._is_transparent = None
+ self._color_0 = value
+
+ @property
+ def joints_0(self):
+ """(n,4) float : Joint information.
+ """
+ return self._joints_0
+
+ @joints_0.setter
+ def joints_0(self, value):
+ self._joints_0 = value
+
+ @property
+ def weights_0(self):
+ """(n,4) float : Weight information for morphing.
+ """
+ return self._weights_0
+
+ @weights_0.setter
+ def weights_0(self, value):
+ self._weights_0 = value
+
+ @property
+ def indices(self):
+ """(m,3) int : Face indices for triangle meshes or fans.
+ """
+ return self._indices
+
+ @indices.setter
+ def indices(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ self._indices = value
+
+ @property
+ def material(self):
+ """:class:`Material` : The material for this primitive.
+ """
+ return self._material
+
+ @material.setter
+ def material(self, value):
+ # Create default material
+ if value is None:
+ value = MetallicRoughnessMaterial()
+ else:
+ if not isinstance(value, Material):
+ raise TypeError('Object material must be of type Material')
+ self._material = value
+
+ @property
+ def mode(self):
+ """int : The type of primitive to render.
+ """
+ return self._mode
+
+ @mode.setter
+ def mode(self, value):
+ value = int(value)
+ if value < GLTF.POINTS or value > GLTF.TRIANGLE_FAN:
+ raise ValueError('Invalid mode')
+ self._mode = value
+
+ @property
+ def targets(self):
+ """(k,) int : Morph target indices.
+ """
+ return self._targets
+
+ @targets.setter
+ def targets(self, value):
+ self._targets = value
+
+ @property
+ def poses(self):
+ """(x,4,4) float : Homogenous transforms for instancing this primitive.
+ """
+ return self._poses
+
+ @poses.setter
+ def poses(self, value):
+ if value is not None:
+ value = np.asanyarray(value, dtype=np.float32)
+ value = np.ascontiguousarray(value)
+ if value.ndim == 2:
+ value = value[np.newaxis,:,:]
+ if value.shape[1] != 4 or value.shape[2] != 4:
+ raise ValueError('Pose matrices must be of shape (n,4,4), '
+ 'got {}'.format(value.shape))
+ self._poses = value
+ self._bounds = None
+
+ @property
+ def bounds(self):
+ if self._bounds is None:
+ self._bounds = self._compute_bounds()
+ return self._bounds
+
+ @property
+ def centroid(self):
+ """(3,) float : The centroid of the primitive's AABB.
+ """
+ return np.mean(self.bounds, axis=0)
+
+ @property
+ def extents(self):
+ """(3,) float : The lengths of the axes of the primitive's AABB.
+ """
+ return np.diff(self.bounds, axis=0).reshape(-1)
+
+ @property
+ def scale(self):
+ """(3,) float : The length of the diagonal of the primitive's AABB.
+ """
+ return np.linalg.norm(self.extents)
+
+ @property
+ def buf_flags(self):
+ """int : The flags for the render buffer.
+ """
+ if self._buf_flags is None:
+ self._buf_flags = self._compute_buf_flags()
+ return self._buf_flags
+
+ def delete(self):
+ self._unbind()
+ self._remove_from_context()
+
+ @property
+ def is_transparent(self):
+ """bool : If True, the mesh is partially-transparent.
+ """
+ return self._compute_transparency()
+
+ def _add_to_context(self):
+ if self._vaid is not None:
+ raise ValueError('Mesh is already bound to a context')
+
+ # Generate and bind VAO
+ self._vaid = glGenVertexArrays(1)
+ glBindVertexArray(self._vaid)
+
+ #######################################################################
+ # Fill vertex buffer
+ #######################################################################
+
+ # Generate and bind vertex buffer
+ vertexbuffer = glGenBuffers(1)
+ self._buffers.append(vertexbuffer)
+ glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer)
+
+ # positions
+ vertex_data = self.positions
+ attr_sizes = [3]
+
+ # Normals
+ if self.normals is not None:
+ vertex_data = np.hstack((vertex_data, self.normals))
+ attr_sizes.append(3)
+
+ # Tangents
+ if self.tangents is not None:
+ vertex_data = np.hstack((vertex_data, self.tangents))
+ attr_sizes.append(4)
+
+ # Texture Coordinates
+ if self.texcoord_0 is not None:
+ vertex_data = np.hstack((vertex_data, self.texcoord_0))
+ attr_sizes.append(2)
+ if self.texcoord_1 is not None:
+ vertex_data = np.hstack((vertex_data, self.texcoord_1))
+ attr_sizes.append(2)
+
+ # Color
+ if self.color_0 is not None:
+ vertex_data = np.hstack((vertex_data, self.color_0))
+ attr_sizes.append(4)
+
+ # TODO JOINTS AND WEIGHTS
+ # PASS
+
+ # Copy data to buffer
+ vertex_data = np.ascontiguousarray(
+ vertex_data.flatten().astype(np.float32)
+ )
+ glBufferData(
+ GL_ARRAY_BUFFER, FLOAT_SZ * len(vertex_data),
+ vertex_data, GL_STATIC_DRAW
+ )
+ total_sz = sum(attr_sizes)
+ offset = 0
+ for i, sz in enumerate(attr_sizes):
+ glVertexAttribPointer(
+ i, sz, GL_FLOAT, GL_FALSE, FLOAT_SZ * total_sz,
+ ctypes.c_void_p(FLOAT_SZ * offset)
+ )
+ glEnableVertexAttribArray(i)
+ offset += sz
+
+ #######################################################################
+ # Fill model matrix buffer
+ #######################################################################
+
+ if self.poses is not None:
+ pose_data = np.ascontiguousarray(
+ np.transpose(self.poses, [0,2,1]).flatten().astype(np.float32)
+ )
+ else:
+ pose_data = np.ascontiguousarray(
+ np.eye(4).flatten().astype(np.float32)
+ )
+
+ modelbuffer = glGenBuffers(1)
+ self._buffers.append(modelbuffer)
+ glBindBuffer(GL_ARRAY_BUFFER, modelbuffer)
+ glBufferData(
+ GL_ARRAY_BUFFER, FLOAT_SZ * len(pose_data),
+ pose_data, GL_STATIC_DRAW
+ )
+
+ for i in range(0, 4):
+ idx = i + len(attr_sizes)
+ glEnableVertexAttribArray(idx)
+ glVertexAttribPointer(
+ idx, 4, GL_FLOAT, GL_FALSE, FLOAT_SZ * 4 * 4,
+ ctypes.c_void_p(4 * FLOAT_SZ * i)
+ )
+ glVertexAttribDivisor(idx, 1)
+
+ #######################################################################
+ # Fill element buffer
+ #######################################################################
+ if self.indices is not None:
+ elementbuffer = glGenBuffers(1)
+ self._buffers.append(elementbuffer)
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer)
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, UINT_SZ * self.indices.size,
+ self.indices.flatten().astype(np.uint32),
+ GL_STATIC_DRAW)
+
+ glBindVertexArray(0)
+
+ def _remove_from_context(self):
+ if self._vaid is not None:
+ glDeleteVertexArrays(1, [self._vaid])
+ glDeleteBuffers(len(self._buffers), self._buffers)
+ self._vaid = None
+ self._buffers = []
+
+ def _in_context(self):
+ return self._vaid is not None
+
+ def _bind(self):
+ if self._vaid is None:
+ raise ValueError('Cannot bind a Mesh that has not been added '
+ 'to a context')
+ glBindVertexArray(self._vaid)
+
+ def _unbind(self):
+ glBindVertexArray(0)
+
+ def _compute_bounds(self):
+ """Compute the bounds of this object.
+ """
+ # Compute bounds of this object
+ bounds = np.array([np.min(self.positions, axis=0),
+ np.max(self.positions, axis=0)])
+
+ # If instanced, compute translations for approximate bounds
+ if self.poses is not None:
+ bounds += np.array([np.min(self.poses[:,:3,3], axis=0),
+ np.max(self.poses[:,:3,3], axis=0)])
+ return bounds
+
+ def _compute_transparency(self):
+ """Compute whether or not this object is transparent.
+ """
+ if self.material.is_transparent:
+ return True
+ if self._is_transparent is None:
+ self._is_transparent = False
+ if self.color_0 is not None:
+ if np.any(self._color_0[:,3] != 1.0):
+ self._is_transparent = True
+ return self._is_transparent
+
+ def _compute_buf_flags(self):
+ buf_flags = BufFlags.POSITION
+
+ if self.normals is not None:
+ buf_flags |= BufFlags.NORMAL
+ if self.tangents is not None:
+ buf_flags |= BufFlags.TANGENT
+ if self.texcoord_0 is not None:
+ buf_flags |= BufFlags.TEXCOORD_0
+ if self.texcoord_1 is not None:
+ buf_flags |= BufFlags.TEXCOORD_1
+ if self.color_0 is not None:
+ buf_flags |= BufFlags.COLOR_0
+ if self.joints_0 is not None:
+ buf_flags |= BufFlags.JOINTS_0
+ if self.weights_0 is not None:
+ buf_flags |= BufFlags.WEIGHTS_0
+
+ return buf_flags
diff --git a/pyrender/pyrender/renderer.py b/pyrender/pyrender/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ae14c5cdb1785226a52ae6b71b08f01de069962
--- /dev/null
+++ b/pyrender/pyrender/renderer.py
@@ -0,0 +1,1339 @@
+"""PBR renderer for Python.
+
+Author: Matthew Matl
+"""
+import sys
+
+import numpy as np
+import PIL
+
+from .constants import (RenderFlags, TextAlign, GLTF, BufFlags, TexFlags,
+ ProgramFlags, DEFAULT_Z_FAR, DEFAULT_Z_NEAR,
+ SHADOW_TEX_SZ, MAX_N_LIGHTS)
+from .shader_program import ShaderProgramCache
+from .material import MetallicRoughnessMaterial, SpecularGlossinessMaterial
+from .light import PointLight, SpotLight, DirectionalLight
+from .font import FontCache
+from .utils import format_color_vector
+
+from OpenGL.GL import *
+
+
+class Renderer(object):
+ """Class for handling all rendering operations on a scene.
+
+ Note
+ ----
+ This renderer relies on the existence of an OpenGL context and
+ does not create one on its own.
+
+ Parameters
+ ----------
+ viewport_width : int
+ Width of the viewport in pixels.
+ viewport_height : int
+ Width of the viewport height in pixels.
+ point_size : float, optional
+ Size of points in pixels. Defaults to 1.0.
+ """
+
+ def __init__(self, viewport_width, viewport_height, point_size=1.0):
+ self.dpscale = 1
+ # Scaling needed on retina displays
+ if sys.platform == 'darwin':
+ self.dpscale = 2
+
+ self.viewport_width = viewport_width
+ self.viewport_height = viewport_height
+ self.point_size = point_size
+
+ # Optional framebuffer for offscreen renders
+ self._main_fb = None
+ self._main_cb = None
+ self._main_db = None
+ self._main_fb_ms = None
+ self._main_cb_ms = None
+ self._main_db_ms = None
+ self._main_fb_dims = (None, None)
+ self._shadow_fb = None
+ self._latest_znear = DEFAULT_Z_NEAR
+ self._latest_zfar = DEFAULT_Z_FAR
+
+ # Shader Program Cache
+ self._program_cache = ShaderProgramCache()
+ self._font_cache = FontCache()
+ self._meshes = set()
+ self._mesh_textures = set()
+ self._shadow_textures = set()
+ self._texture_alloc_idx = 0
+
+ @property
+ def viewport_width(self):
+ """int : The width of the main viewport, in pixels.
+ """
+ return self._viewport_width
+
+ @viewport_width.setter
+ def viewport_width(self, value):
+ self._viewport_width = self.dpscale * value
+
+ @property
+ def viewport_height(self):
+ """int : The height of the main viewport, in pixels.
+ """
+ return self._viewport_height
+
+ @viewport_height.setter
+ def viewport_height(self, value):
+ self._viewport_height = self.dpscale * value
+
+ @property
+ def point_size(self):
+ """float : The size of screen-space points, in pixels.
+ """
+ return self._point_size
+
+ @point_size.setter
+ def point_size(self, value):
+ self._point_size = float(value)
+
+ def render(self, scene, flags, seg_node_map=None):
+ """Render a scene with the given set of flags.
+
+ Parameters
+ ----------
+ scene : :class:`Scene`
+ A scene to render.
+ flags : int
+ A specification from :class:`.RenderFlags`.
+ seg_node_map : dict
+ A map from :class:`.Node` objects to (3,) colors for each.
+ If specified along with flags set to :attr:`.RenderFlags.SEG`,
+ the color image will be a segmentation image.
+
+ Returns
+ -------
+ color_im : (h, w, 3) uint8 or (h, w, 4) uint8
+ If :attr:`RenderFlags.OFFSCREEN` is set, the color buffer. This is
+ normally an RGB buffer, but if :attr:`.RenderFlags.RGBA` is set,
+ the buffer will be a full RGBA buffer.
+ depth_im : (h, w) float32
+ If :attr:`RenderFlags.OFFSCREEN` is set, the depth buffer
+ in linear units.
+ """
+ # Update context with meshes and textures
+ self._update_context(scene, flags)
+
+ # Render necessary shadow maps
+ if not bool(flags & RenderFlags.DEPTH_ONLY or flags & RenderFlags.SEG):
+ for ln in scene.light_nodes:
+ take_pass = False
+ if (isinstance(ln.light, DirectionalLight) and
+ bool(flags & RenderFlags.SHADOWS_DIRECTIONAL)):
+ take_pass = True
+ elif (isinstance(ln.light, SpotLight) and
+ bool(flags & RenderFlags.SHADOWS_SPOT)):
+ take_pass = True
+ elif (isinstance(ln.light, PointLight) and
+ bool(flags & RenderFlags.SHADOWS_POINT)):
+ take_pass = True
+ if take_pass:
+ self._shadow_mapping_pass(scene, ln, flags)
+
+ # Make forward pass
+ retval = self._forward_pass(scene, flags, seg_node_map=seg_node_map)
+
+ # If necessary, make normals pass
+ if flags & (RenderFlags.VERTEX_NORMALS | RenderFlags.FACE_NORMALS):
+ self._normals_pass(scene, flags)
+
+ # Update camera settings for retrieving depth buffers
+ self._latest_znear = scene.main_camera_node.camera.znear
+ self._latest_zfar = scene.main_camera_node.camera.zfar
+
+ return retval
+
+ def render_text(self, text, x, y, font_name='OpenSans-Regular',
+ font_pt=40, color=None, scale=1.0,
+ align=TextAlign.BOTTOM_LEFT):
+ """Render text into the current viewport.
+
+ Note
+ ----
+ This cannot be done into an offscreen buffer.
+
+ Parameters
+ ----------
+ text : str
+ The text to render.
+ x : int
+ Horizontal pixel location of text.
+ y : int
+ Vertical pixel location of text.
+ font_name : str
+ Name of font, from the ``pyrender/fonts`` folder, or
+ a path to a ``.ttf`` file.
+ font_pt : int
+ Height of the text, in font points.
+ color : (4,) float
+ The color of the text. Default is black.
+ scale : int
+ Scaling factor for text.
+ align : int
+ One of the :class:`TextAlign` options which specifies where the
+ ``x`` and ``y`` parameters lie on the text. For example,
+ :attr:`TextAlign.BOTTOM_LEFT` means that ``x`` and ``y`` indicate
+ the position of the bottom-left corner of the textbox.
+ """
+ x *= self.dpscale
+ y *= self.dpscale
+ font_pt *= self.dpscale
+
+ if color is None:
+ color = np.array([0.0, 0.0, 0.0, 1.0])
+ else:
+ color = format_color_vector(color, 4)
+
+ # Set up viewport for render
+ self._configure_forward_pass_viewport(0)
+
+ # Load font
+ font = self._font_cache.get_font(font_name, font_pt)
+ if not font._in_context():
+ font._add_to_context()
+
+ # Load program
+ program = self._get_text_program()
+ program._bind()
+
+ # Set uniforms
+ p = np.eye(4)
+ p[0,0] = 2.0 / self.viewport_width
+ p[0,3] = -1.0
+ p[1,1] = 2.0 / self.viewport_height
+ p[1,3] = -1.0
+ program.set_uniform('projection', p)
+ program.set_uniform('text_color', color)
+
+ # Draw text
+ font.render_string(text, x, y, scale, align)
+
+ def read_color_buf(self):
+ """Read and return the current viewport's color buffer.
+
+ Alpha cannot be computed for an on-screen buffer.
+
+ Returns
+ -------
+ color_im : (h, w, 3) uint8
+ The color buffer in RGB byte format.
+ """
+ # Extract color image from frame buffer
+ width, height = self.viewport_width, self.viewport_height
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, 0)
+ glReadBuffer(GL_FRONT)
+ color_buf = glReadPixels(0, 0, width, height, GL_RGB, GL_UNSIGNED_BYTE)
+
+ # Re-format them into numpy arrays
+ color_im = np.frombuffer(color_buf, dtype=np.uint8)
+ color_im = color_im.reshape((height, width, 3))
+ color_im = np.flip(color_im, axis=0)
+
+ # Resize for macos if needed
+ if sys.platform == 'darwin':
+ color_im = self._resize_image(color_im, True)
+
+ return color_im
+
+ def read_depth_buf(self):
+ """Read and return the current viewport's color buffer.
+
+ Returns
+ -------
+ depth_im : (h, w) float32
+ The depth buffer in linear units.
+ """
+ width, height = self.viewport_width, self.viewport_height
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, 0)
+ glReadBuffer(GL_FRONT)
+ depth_buf = glReadPixels(
+ 0, 0, width, height, GL_DEPTH_COMPONENT, GL_FLOAT
+ )
+
+ depth_im = np.frombuffer(depth_buf, dtype=np.float32)
+ depth_im = depth_im.reshape((height, width))
+ depth_im = np.flip(depth_im, axis=0)
+
+ inf_inds = (depth_im == 1.0)
+ depth_im = 2.0 * depth_im - 1.0
+ z_near, z_far = self._latest_znear, self._latest_zfar
+ noninf = np.logical_not(inf_inds)
+ if z_far is None:
+ depth_im[noninf] = 2 * z_near / (1.0 - depth_im[noninf])
+ else:
+ depth_im[noninf] = ((2.0 * z_near * z_far) /
+ (z_far + z_near - depth_im[noninf] *
+ (z_far - z_near)))
+ depth_im[inf_inds] = 0.0
+
+ # Resize for macos if needed
+ if sys.platform == 'darwin':
+ depth_im = self._resize_image(depth_im)
+
+ return depth_im
+
+ def delete(self):
+ """Free all allocated OpenGL resources.
+ """
+ # Free shaders
+ self._program_cache.clear()
+
+ # Free fonts
+ self._font_cache.clear()
+
+ # Free meshes
+ for mesh in self._meshes:
+ for p in mesh.primitives:
+ p.delete()
+
+ # Free textures
+ for mesh_texture in self._mesh_textures:
+ mesh_texture.delete()
+
+ for shadow_texture in self._shadow_textures:
+ shadow_texture.delete()
+
+ self._meshes = set()
+ self._mesh_textures = set()
+ self._shadow_textures = set()
+ self._texture_alloc_idx = 0
+
+ self._delete_main_framebuffer()
+ self._delete_shadow_framebuffer()
+
+ def __del__(self):
+ try:
+ self.delete()
+ except Exception:
+ pass
+
+ ###########################################################################
+ # Rendering passes
+ ###########################################################################
+
+ def _forward_pass(self, scene, flags, seg_node_map=None):
+ # Set up viewport for render
+ self._configure_forward_pass_viewport(flags)
+
+ # Clear it
+ if bool(flags & RenderFlags.SEG):
+ glClearColor(0.0, 0.0, 0.0, 1.0)
+ if seg_node_map is None:
+ seg_node_map = {}
+ else:
+ glClearColor(*scene.bg_color)
+
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
+
+ if not bool(flags & RenderFlags.SEG):
+ glEnable(GL_MULTISAMPLE)
+ else:
+ glDisable(GL_MULTISAMPLE)
+
+ # Set up camera matrices
+ V, P = self._get_camera_matrices(scene)
+
+ program = None
+ # Now, render each object in sorted order
+ for node in self._sorted_mesh_nodes(scene):
+ mesh = node.mesh
+
+ # Skip the mesh if it's not visible
+ if not mesh.is_visible:
+ continue
+
+ # If SEG, set color
+ if bool(flags & RenderFlags.SEG):
+ if node not in seg_node_map:
+ continue
+ color = seg_node_map[node]
+ if not isinstance(color, (list, tuple, np.ndarray)):
+ color = np.repeat(color, 3)
+ else:
+ color = np.asanyarray(color)
+ color = color / 255.0
+
+ for primitive in mesh.primitives:
+
+ # First, get and bind the appropriate program
+ program = self._get_primitive_program(
+ primitive, flags, ProgramFlags.USE_MATERIAL
+ )
+ program._bind()
+
+ # Set the camera uniforms
+ program.set_uniform('V', V)
+ program.set_uniform('P', P)
+ program.set_uniform(
+ 'cam_pos', scene.get_pose(scene.main_camera_node)[:3,3]
+ )
+ if bool(flags & RenderFlags.SEG):
+ program.set_uniform('color', color)
+
+ # Next, bind the lighting
+ if not (flags & RenderFlags.DEPTH_ONLY or flags & RenderFlags.FLAT or
+ flags & RenderFlags.SEG):
+ self._bind_lighting(scene, program, node, flags)
+
+ # Finally, bind and draw the primitive
+ self._bind_and_draw_primitive(
+ primitive=primitive,
+ pose=scene.get_pose(node),
+ program=program,
+ flags=flags
+ )
+ self._reset_active_textures()
+
+ # Unbind the shader and flush the output
+ if program is not None:
+ program._unbind()
+ glFlush()
+
+ # If doing offscreen render, copy result from framebuffer and return
+ if flags & RenderFlags.OFFSCREEN:
+ return self._read_main_framebuffer(scene, flags)
+ else:
+ return
+
+ def _shadow_mapping_pass(self, scene, light_node, flags):
+ light = light_node.light
+
+ # Set up viewport for render
+ self._configure_shadow_mapping_viewport(light, flags)
+
+ # Set up camera matrices
+ V, P = self._get_light_cam_matrices(scene, light_node, flags)
+
+ # Now, render each object in sorted order
+ for node in self._sorted_mesh_nodes(scene):
+ mesh = node.mesh
+
+ # Skip the mesh if it's not visible
+ if not mesh.is_visible:
+ continue
+
+ for primitive in mesh.primitives:
+
+ # First, get and bind the appropriate program
+ program = self._get_primitive_program(
+ primitive, flags, ProgramFlags.NONE
+ )
+ program._bind()
+
+ # Set the camera uniforms
+ program.set_uniform('V', V)
+ program.set_uniform('P', P)
+ program.set_uniform(
+ 'cam_pos', scene.get_pose(scene.main_camera_node)[:3,3]
+ )
+
+ # Finally, bind and draw the primitive
+ self._bind_and_draw_primitive(
+ primitive=primitive,
+ pose=scene.get_pose(node),
+ program=program,
+ flags=RenderFlags.DEPTH_ONLY
+ )
+ self._reset_active_textures()
+
+ # Unbind the shader and flush the output
+ if program is not None:
+ program._unbind()
+ glFlush()
+
+ def _normals_pass(self, scene, flags):
+ # Set up viewport for render
+ self._configure_forward_pass_viewport(flags)
+ program = None
+
+ # Set up camera matrices
+ V, P = self._get_camera_matrices(scene)
+
+ # Now, render each object in sorted order
+ for node in self._sorted_mesh_nodes(scene):
+ mesh = node.mesh
+
+ # Skip the mesh if it's not visible
+ if not mesh.is_visible:
+ continue
+
+ for primitive in mesh.primitives:
+
+ # Skip objects that don't have normals
+ if not primitive.buf_flags & BufFlags.NORMAL:
+ continue
+
+ # First, get and bind the appropriate program
+ pf = ProgramFlags.NONE
+ if flags & RenderFlags.VERTEX_NORMALS:
+ pf = pf | ProgramFlags.VERTEX_NORMALS
+ if flags & RenderFlags.FACE_NORMALS:
+ pf = pf | ProgramFlags.FACE_NORMALS
+ program = self._get_primitive_program(primitive, flags, pf)
+ program._bind()
+
+ # Set the camera uniforms
+ program.set_uniform('V', V)
+ program.set_uniform('P', P)
+ program.set_uniform('normal_magnitude', 0.05 * primitive.scale)
+ program.set_uniform(
+ 'normal_color', np.array([0.1, 0.1, 1.0, 1.0])
+ )
+
+ # Finally, bind and draw the primitive
+ self._bind_and_draw_primitive(
+ primitive=primitive,
+ pose=scene.get_pose(node),
+ program=program,
+ flags=RenderFlags.DEPTH_ONLY
+ )
+ self._reset_active_textures()
+
+ # Unbind the shader and flush the output
+ if program is not None:
+ program._unbind()
+ glFlush()
+
+ ###########################################################################
+ # Handlers for binding uniforms and drawing primitives
+ ###########################################################################
+
+ def _bind_and_draw_primitive(self, primitive, pose, program, flags):
+ # Set model pose matrix
+ program.set_uniform('M', pose)
+
+ # Bind mesh buffers
+ primitive._bind()
+
+ # Bind mesh material
+ if not (flags & RenderFlags.DEPTH_ONLY or flags & RenderFlags.SEG):
+ material = primitive.material
+
+ # Bind textures
+ tf = material.tex_flags
+ if tf & TexFlags.NORMAL:
+ self._bind_texture(material.normalTexture,
+ 'material.normal_texture', program)
+ if tf & TexFlags.OCCLUSION:
+ self._bind_texture(material.occlusionTexture,
+ 'material.occlusion_texture', program)
+ if tf & TexFlags.EMISSIVE:
+ self._bind_texture(material.emissiveTexture,
+ 'material.emissive_texture', program)
+ if tf & TexFlags.BASE_COLOR:
+ self._bind_texture(material.baseColorTexture,
+ 'material.base_color_texture', program)
+ if tf & TexFlags.METALLIC_ROUGHNESS:
+ self._bind_texture(material.metallicRoughnessTexture,
+ 'material.metallic_roughness_texture',
+ program)
+ if tf & TexFlags.DIFFUSE:
+ self._bind_texture(material.diffuseTexture,
+ 'material.diffuse_texture', program)
+ if tf & TexFlags.SPECULAR_GLOSSINESS:
+ self._bind_texture(material.specularGlossinessTexture,
+ 'material.specular_glossiness_texture',
+ program)
+
+ # Bind other uniforms
+ b = 'material.{}'
+ program.set_uniform(b.format('emissive_factor'),
+ material.emissiveFactor)
+ if isinstance(material, MetallicRoughnessMaterial):
+ program.set_uniform(b.format('base_color_factor'),
+ material.baseColorFactor)
+ program.set_uniform(b.format('metallic_factor'),
+ material.metallicFactor)
+ program.set_uniform(b.format('roughness_factor'),
+ material.roughnessFactor)
+ elif isinstance(material, SpecularGlossinessMaterial):
+ program.set_uniform(b.format('diffuse_factor'),
+ material.diffuseFactor)
+ program.set_uniform(b.format('specular_factor'),
+ material.specularFactor)
+ program.set_uniform(b.format('glossiness_factor'),
+ material.glossinessFactor)
+
+ # Set blending options
+ if material.alphaMode == 'BLEND':
+ glEnable(GL_BLEND)
+ glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
+ else:
+ glEnable(GL_BLEND)
+ glBlendFunc(GL_ONE, GL_ZERO)
+
+ # Set wireframe mode
+ wf = material.wireframe
+ if flags & RenderFlags.FLIP_WIREFRAME:
+ wf = not wf
+ if (flags & RenderFlags.ALL_WIREFRAME) or wf:
+ glPolygonMode(GL_FRONT_AND_BACK, GL_LINE)
+ else:
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL)
+
+ # Set culling mode
+ if material.doubleSided or flags & RenderFlags.SKIP_CULL_FACES:
+ glDisable(GL_CULL_FACE)
+ else:
+ glEnable(GL_CULL_FACE)
+ glCullFace(GL_BACK)
+ else:
+ glEnable(GL_CULL_FACE)
+ glEnable(GL_BLEND)
+ glCullFace(GL_BACK)
+ glBlendFunc(GL_ONE, GL_ZERO)
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL)
+
+ # Set point size if needed
+ glDisable(GL_PROGRAM_POINT_SIZE)
+ if primitive.mode == GLTF.POINTS:
+ glEnable(GL_PROGRAM_POINT_SIZE)
+ glPointSize(self.point_size)
+
+ # Render mesh
+ n_instances = 1
+ if primitive.poses is not None:
+ n_instances = len(primitive.poses)
+
+ if primitive.indices is not None:
+ glDrawElementsInstanced(
+ primitive.mode, primitive.indices.size, GL_UNSIGNED_INT,
+ ctypes.c_void_p(0), n_instances
+ )
+ else:
+ glDrawArraysInstanced(
+ primitive.mode, 0, len(primitive.positions), n_instances
+ )
+
+ # Unbind mesh buffers
+ primitive._unbind()
+
+ def _bind_lighting(self, scene, program, node, flags):
+ """Bind all lighting uniform values for a scene.
+ """
+ max_n_lights = self._compute_max_n_lights(flags)
+
+ n_d = min(len(scene.directional_light_nodes), max_n_lights[0])
+ n_s = min(len(scene.spot_light_nodes), max_n_lights[1])
+ n_p = min(len(scene.point_light_nodes), max_n_lights[2])
+ program.set_uniform('ambient_light', scene.ambient_light)
+ program.set_uniform('n_directional_lights', n_d)
+ program.set_uniform('n_spot_lights', n_s)
+ program.set_uniform('n_point_lights', n_p)
+ plc = 0
+ slc = 0
+ dlc = 0
+
+ light_nodes = scene.light_nodes
+ if (len(scene.directional_light_nodes) > max_n_lights[0] or
+ len(scene.spot_light_nodes) > max_n_lights[1] or
+ len(scene.point_light_nodes) > max_n_lights[2]):
+ light_nodes = self._sorted_nodes_by_distance(
+ scene, scene.light_nodes, node
+ )
+
+ for n in light_nodes:
+ light = n.light
+ pose = scene.get_pose(n)
+ position = pose[:3,3]
+ direction = -pose[:3,2]
+
+ if isinstance(light, PointLight):
+ if plc == max_n_lights[2]:
+ continue
+ b = 'point_lights[{}].'.format(plc)
+ plc += 1
+ shadow = bool(flags & RenderFlags.SHADOWS_POINT)
+ program.set_uniform(b + 'position', position)
+ elif isinstance(light, SpotLight):
+ if slc == max_n_lights[1]:
+ continue
+ b = 'spot_lights[{}].'.format(slc)
+ slc += 1
+ shadow = bool(flags & RenderFlags.SHADOWS_SPOT)
+ las = 1.0 / max(0.001, np.cos(light.innerConeAngle) -
+ np.cos(light.outerConeAngle))
+ lao = -np.cos(light.outerConeAngle) * las
+ program.set_uniform(b + 'direction', direction)
+ program.set_uniform(b + 'position', position)
+ program.set_uniform(b + 'light_angle_scale', las)
+ program.set_uniform(b + 'light_angle_offset', lao)
+ else:
+ if dlc == max_n_lights[0]:
+ continue
+ b = 'directional_lights[{}].'.format(dlc)
+ dlc += 1
+ shadow = bool(flags & RenderFlags.SHADOWS_DIRECTIONAL)
+ program.set_uniform(b + 'direction', direction)
+
+ program.set_uniform(b + 'color', light.color)
+ program.set_uniform(b + 'intensity', light.intensity)
+ # if light.range is not None:
+ # program.set_uniform(b + 'range', light.range)
+ # else:
+ # program.set_uniform(b + 'range', 0)
+
+ if shadow:
+ self._bind_texture(light.shadow_texture,
+ b + 'shadow_map', program)
+ if not isinstance(light, PointLight):
+ V, P = self._get_light_cam_matrices(scene, n, flags)
+ program.set_uniform(b + 'light_matrix', P.dot(V))
+ else:
+ raise NotImplementedError(
+ 'Point light shadows not implemented'
+ )
+
+ def _sorted_mesh_nodes(self, scene):
+ cam_loc = scene.get_pose(scene.main_camera_node)[:3,3]
+ solid_nodes = []
+ trans_nodes = []
+ for node in scene.mesh_nodes:
+ mesh = node.mesh
+ if mesh.is_transparent:
+ trans_nodes.append(node)
+ else:
+ solid_nodes.append(node)
+
+ # TODO BETTER SORTING METHOD
+ trans_nodes.sort(
+ key=lambda n: -np.linalg.norm(scene.get_pose(n)[:3,3] - cam_loc)
+ )
+ solid_nodes.sort(
+ key=lambda n: -np.linalg.norm(scene.get_pose(n)[:3,3] - cam_loc)
+ )
+
+ return solid_nodes + trans_nodes
+
+ def _sorted_nodes_by_distance(self, scene, nodes, compare_node):
+ nodes = list(nodes)
+ compare_posn = scene.get_pose(compare_node)[:3,3]
+ nodes.sort(key=lambda n: np.linalg.norm(
+ scene.get_pose(n)[:3,3] - compare_posn)
+ )
+ return nodes
+
+ ###########################################################################
+ # Context Management
+ ###########################################################################
+
+ def _update_context(self, scene, flags):
+
+ # Update meshes
+ scene_meshes = scene.meshes
+
+ # Add new meshes to context
+ for mesh in scene_meshes - self._meshes:
+ for p in mesh.primitives:
+ p._add_to_context()
+
+ # Remove old meshes from context
+ for mesh in self._meshes - scene_meshes:
+ for p in mesh.primitives:
+ p.delete()
+
+ self._meshes = scene_meshes.copy()
+
+ # Update mesh textures
+ mesh_textures = set()
+ for m in scene_meshes:
+ for p in m.primitives:
+ mesh_textures |= p.material.textures
+
+ # Add new textures to context
+ for texture in mesh_textures - self._mesh_textures:
+ texture._add_to_context()
+
+ # Remove old textures from context
+ for texture in self._mesh_textures - mesh_textures:
+ texture.delete()
+
+ self._mesh_textures = mesh_textures.copy()
+
+ shadow_textures = set()
+ for l in scene.lights:
+ # Create if needed
+ active = False
+ if (isinstance(l, DirectionalLight) and
+ flags & RenderFlags.SHADOWS_DIRECTIONAL):
+ active = True
+ elif (isinstance(l, PointLight) and
+ flags & RenderFlags.SHADOWS_POINT):
+ active = True
+ elif isinstance(l, SpotLight) and flags & RenderFlags.SHADOWS_SPOT:
+ active = True
+
+ if active and l.shadow_texture is None:
+ l._generate_shadow_texture()
+ if l.shadow_texture is not None:
+ shadow_textures.add(l.shadow_texture)
+
+ # Add new textures to context
+ for texture in shadow_textures - self._shadow_textures:
+ texture._add_to_context()
+
+ # Remove old textures from context
+ for texture in self._shadow_textures - shadow_textures:
+ texture.delete()
+
+ self._shadow_textures = shadow_textures.copy()
+
+ ###########################################################################
+ # Texture Management
+ ###########################################################################
+
+ def _bind_texture(self, texture, uniform_name, program):
+ """Bind a texture to an active texture unit and return
+ the texture unit index that was used.
+ """
+ tex_id = self._get_next_active_texture()
+ glActiveTexture(GL_TEXTURE0 + tex_id)
+ texture._bind()
+ program.set_uniform(uniform_name, tex_id)
+
+ def _get_next_active_texture(self):
+ val = self._texture_alloc_idx
+ self._texture_alloc_idx += 1
+ return val
+
+ def _reset_active_textures(self):
+ self._texture_alloc_idx = 0
+
+ ###########################################################################
+ # Camera Matrix Management
+ ###########################################################################
+
+ def _get_camera_matrices(self, scene):
+ main_camera_node = scene.main_camera_node
+ if main_camera_node is None:
+ raise ValueError('Cannot render scene without a camera')
+ P = main_camera_node.camera.get_projection_matrix(
+ width=self.viewport_width, height=self.viewport_height
+ )
+ pose = scene.get_pose(main_camera_node)
+ V = np.linalg.inv(pose) # V maps from world to camera
+ return V, P
+
+ def _get_light_cam_matrices(self, scene, light_node, flags):
+ light = light_node.light
+ pose = scene.get_pose(light_node).copy()
+ s = scene.scale
+ camera = light._get_shadow_camera(s)
+ P = camera.get_projection_matrix()
+ if isinstance(light, DirectionalLight):
+ direction = -pose[:3,2]
+ c = scene.centroid
+ loc = c - direction * s
+ pose[:3,3] = loc
+ V = np.linalg.inv(pose) # V maps from world to camera
+ return V, P
+
+ ###########################################################################
+ # Shader Program Management
+ ###########################################################################
+
+ def _get_text_program(self):
+ program = self._program_cache.get_program(
+ vertex_shader='text.vert',
+ fragment_shader='text.frag'
+ )
+
+ if not program._in_context():
+ program._add_to_context()
+
+ return program
+
+ def _compute_max_n_lights(self, flags):
+ max_n_lights = [MAX_N_LIGHTS, MAX_N_LIGHTS, MAX_N_LIGHTS]
+ n_tex_units = glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS)
+
+ # Reserved texture units: 6
+ # Normal Map
+ # Occlusion Map
+ # Emissive Map
+ # Base Color or Diffuse Map
+ # MR or SG Map
+ # Environment cubemap
+
+ n_reserved_textures = 6
+ n_available_textures = n_tex_units - n_reserved_textures
+
+ # Distribute textures evenly among lights with shadows, with
+ # a preference for directional lights
+ n_shadow_types = 0
+ if flags & RenderFlags.SHADOWS_DIRECTIONAL:
+ n_shadow_types += 1
+ if flags & RenderFlags.SHADOWS_SPOT:
+ n_shadow_types += 1
+ if flags & RenderFlags.SHADOWS_POINT:
+ n_shadow_types += 1
+
+ if n_shadow_types > 0:
+ tex_per_light = n_available_textures // n_shadow_types
+
+ if flags & RenderFlags.SHADOWS_DIRECTIONAL:
+ max_n_lights[0] = (
+ tex_per_light +
+ (n_available_textures - tex_per_light * n_shadow_types)
+ )
+ if flags & RenderFlags.SHADOWS_SPOT:
+ max_n_lights[1] = tex_per_light
+ if flags & RenderFlags.SHADOWS_POINT:
+ max_n_lights[2] = tex_per_light
+
+ return max_n_lights
+
+ def _get_primitive_program(self, primitive, flags, program_flags):
+ vertex_shader = None
+ fragment_shader = None
+ geometry_shader = None
+ defines = {}
+
+ if (bool(program_flags & ProgramFlags.USE_MATERIAL) and
+ not flags & RenderFlags.DEPTH_ONLY and
+ not flags & RenderFlags.FLAT and
+ not flags & RenderFlags.SEG):
+ vertex_shader = 'mesh.vert'
+ fragment_shader = 'mesh.frag'
+ elif bool(program_flags & (ProgramFlags.VERTEX_NORMALS |
+ ProgramFlags.FACE_NORMALS)):
+ vertex_shader = 'vertex_normals.vert'
+ if primitive.mode == GLTF.POINTS:
+ geometry_shader = 'vertex_normals_pc.geom'
+ else:
+ geometry_shader = 'vertex_normals.geom'
+ fragment_shader = 'vertex_normals.frag'
+ elif flags & RenderFlags.FLAT:
+ vertex_shader = 'flat.vert'
+ fragment_shader = 'flat.frag'
+ elif flags & RenderFlags.SEG:
+ vertex_shader = 'segmentation.vert'
+ fragment_shader = 'segmentation.frag'
+ else:
+ vertex_shader = 'mesh_depth.vert'
+ fragment_shader = 'mesh_depth.frag'
+
+ # Set up vertex buffer DEFINES
+ bf = primitive.buf_flags
+ buf_idx = 1
+ if bf & BufFlags.NORMAL:
+ defines['NORMAL_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.TANGENT:
+ defines['TANGENT_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.TEXCOORD_0:
+ defines['TEXCOORD_0_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.TEXCOORD_1:
+ defines['TEXCOORD_1_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.COLOR_0:
+ defines['COLOR_0_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.JOINTS_0:
+ defines['JOINTS_0_LOC'] = buf_idx
+ buf_idx += 1
+ if bf & BufFlags.WEIGHTS_0:
+ defines['WEIGHTS_0_LOC'] = buf_idx
+ buf_idx += 1
+ defines['INST_M_LOC'] = buf_idx
+
+ # Set up shadow mapping defines
+ if flags & RenderFlags.SHADOWS_DIRECTIONAL:
+ defines['DIRECTIONAL_LIGHT_SHADOWS'] = 1
+ if flags & RenderFlags.SHADOWS_SPOT:
+ defines['SPOT_LIGHT_SHADOWS'] = 1
+ if flags & RenderFlags.SHADOWS_POINT:
+ defines['POINT_LIGHT_SHADOWS'] = 1
+ max_n_lights = self._compute_max_n_lights(flags)
+ defines['MAX_DIRECTIONAL_LIGHTS'] = max_n_lights[0]
+ defines['MAX_SPOT_LIGHTS'] = max_n_lights[1]
+ defines['MAX_POINT_LIGHTS'] = max_n_lights[2]
+
+ # Set up vertex normal defines
+ if program_flags & ProgramFlags.VERTEX_NORMALS:
+ defines['VERTEX_NORMALS'] = 1
+ if program_flags & ProgramFlags.FACE_NORMALS:
+ defines['FACE_NORMALS'] = 1
+
+ # Set up material texture defines
+ if bool(program_flags & ProgramFlags.USE_MATERIAL):
+ tf = primitive.material.tex_flags
+ if tf & TexFlags.NORMAL:
+ defines['HAS_NORMAL_TEX'] = 1
+ if tf & TexFlags.OCCLUSION:
+ defines['HAS_OCCLUSION_TEX'] = 1
+ if tf & TexFlags.EMISSIVE:
+ defines['HAS_EMISSIVE_TEX'] = 1
+ if tf & TexFlags.BASE_COLOR:
+ defines['HAS_BASE_COLOR_TEX'] = 1
+ if tf & TexFlags.METALLIC_ROUGHNESS:
+ defines['HAS_METALLIC_ROUGHNESS_TEX'] = 1
+ if tf & TexFlags.DIFFUSE:
+ defines['HAS_DIFFUSE_TEX'] = 1
+ if tf & TexFlags.SPECULAR_GLOSSINESS:
+ defines['HAS_SPECULAR_GLOSSINESS_TEX'] = 1
+ if isinstance(primitive.material, MetallicRoughnessMaterial):
+ defines['USE_METALLIC_MATERIAL'] = 1
+ elif isinstance(primitive.material, SpecularGlossinessMaterial):
+ defines['USE_GLOSSY_MATERIAL'] = 1
+
+ program = self._program_cache.get_program(
+ vertex_shader=vertex_shader,
+ fragment_shader=fragment_shader,
+ geometry_shader=geometry_shader,
+ defines=defines
+ )
+
+ if not program._in_context():
+ program._add_to_context()
+
+ return program
+
+ ###########################################################################
+ # Viewport Management
+ ###########################################################################
+
+ def _configure_forward_pass_viewport(self, flags):
+
+ # If using offscreen render, bind main framebuffer
+ if flags & RenderFlags.OFFSCREEN:
+ self._configure_main_framebuffer()
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self._main_fb_ms)
+ else:
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0)
+
+ glViewport(0, 0, self.viewport_width, self.viewport_height)
+ glEnable(GL_DEPTH_TEST)
+ glDepthMask(GL_TRUE)
+ glDepthFunc(GL_LESS)
+ glDepthRange(0.0, 1.0)
+
+ def _configure_shadow_mapping_viewport(self, light, flags):
+ self._configure_shadow_framebuffer()
+ glBindFramebuffer(GL_FRAMEBUFFER, self._shadow_fb)
+ light.shadow_texture._bind()
+ light.shadow_texture._bind_as_depth_attachment()
+ glActiveTexture(GL_TEXTURE0)
+ light.shadow_texture._bind()
+ glDrawBuffer(GL_NONE)
+ glReadBuffer(GL_NONE)
+
+ glClear(GL_DEPTH_BUFFER_BIT)
+ glViewport(0, 0, SHADOW_TEX_SZ, SHADOW_TEX_SZ)
+ glEnable(GL_DEPTH_TEST)
+ glDepthMask(GL_TRUE)
+ glDepthFunc(GL_LESS)
+ glDepthRange(0.0, 1.0)
+ glDisable(GL_CULL_FACE)
+ glDisable(GL_BLEND)
+
+ ###########################################################################
+ # Framebuffer Management
+ ###########################################################################
+
+ def _configure_shadow_framebuffer(self):
+ if self._shadow_fb is None:
+ self._shadow_fb = glGenFramebuffers(1)
+
+ def _delete_shadow_framebuffer(self):
+ if self._shadow_fb is not None:
+ glDeleteFramebuffers(1, [self._shadow_fb])
+
+ def _configure_main_framebuffer(self):
+ # If mismatch with prior framebuffer, delete it
+ if (self._main_fb is not None and
+ self.viewport_width != self._main_fb_dims[0] or
+ self.viewport_height != self._main_fb_dims[1]):
+ self._delete_main_framebuffer()
+
+ # If framebuffer doesn't exist, create it
+ if self._main_fb is None:
+ # Generate standard buffer
+ self._main_cb, self._main_db = glGenRenderbuffers(2)
+
+ glBindRenderbuffer(GL_RENDERBUFFER, self._main_cb)
+ glRenderbufferStorage(
+ GL_RENDERBUFFER, GL_RGBA,
+ self.viewport_width, self.viewport_height
+ )
+
+ glBindRenderbuffer(GL_RENDERBUFFER, self._main_db)
+ glRenderbufferStorage(
+ GL_RENDERBUFFER, GL_DEPTH_COMPONENT24,
+ self.viewport_width, self.viewport_height
+ )
+
+ self._main_fb = glGenFramebuffers(1)
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self._main_fb)
+ glFramebufferRenderbuffer(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_RENDERBUFFER, self._main_cb
+ )
+ glFramebufferRenderbuffer(
+ GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
+ GL_RENDERBUFFER, self._main_db
+ )
+
+ # Generate multisample buffer
+ self._main_cb_ms, self._main_db_ms = glGenRenderbuffers(2)
+ glBindRenderbuffer(GL_RENDERBUFFER, self._main_cb_ms)
+ # glRenderbufferStorageMultisample(
+ # GL_RENDERBUFFER, 4, GL_RGBA,
+ # self.viewport_width, self.viewport_height
+ # )
+ # glBindRenderbuffer(GL_RENDERBUFFER, self._main_db_ms)
+ # glRenderbufferStorageMultisample(
+ # GL_RENDERBUFFER, 4, GL_DEPTH_COMPONENT24,
+ # self.viewport_width, self.viewport_height
+ # )
+ # 增加这一行
+ num_samples = min(glGetIntegerv(GL_MAX_SAMPLES), 4) # No more than GL_MAX_SAMPLES
+
+ # 其实就是把 4 替换成 num_samples,其余不变
+ glRenderbufferStorageMultisample(GL_RENDERBUFFER, num_samples, GL_RGBA, self.viewport_width, self.viewport_height)
+
+ glBindRenderbuffer(GL_RENDERBUFFER, self._main_db_ms) # 这行不变
+
+ # 这一行也是将 4 替换成 num_samples
+ glRenderbufferStorageMultisample(GL_RENDERBUFFER, num_samples, GL_DEPTH_COMPONENT24, self.viewport_width, self.viewport_height)
+
+ self._main_fb_ms = glGenFramebuffers(1)
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self._main_fb_ms)
+ glFramebufferRenderbuffer(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_RENDERBUFFER, self._main_cb_ms
+ )
+ glFramebufferRenderbuffer(
+ GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
+ GL_RENDERBUFFER, self._main_db_ms
+ )
+
+ self._main_fb_dims = (self.viewport_width, self.viewport_height)
+
+ def _delete_main_framebuffer(self):
+ if self._main_fb is not None:
+ glDeleteFramebuffers(2, [self._main_fb, self._main_fb_ms])
+ if self._main_cb is not None:
+ glDeleteRenderbuffers(2, [self._main_cb, self._main_cb_ms])
+ if self._main_db is not None:
+ glDeleteRenderbuffers(2, [self._main_db, self._main_db_ms])
+
+ self._main_fb = None
+ self._main_cb = None
+ self._main_db = None
+ self._main_fb_ms = None
+ self._main_cb_ms = None
+ self._main_db_ms = None
+ self._main_fb_dims = (None, None)
+
+ def _read_main_framebuffer(self, scene, flags):
+ width, height = self._main_fb_dims[0], self._main_fb_dims[1]
+
+ # Bind framebuffer and blit buffers
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, self._main_fb_ms)
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self._main_fb)
+ glBlitFramebuffer(
+ 0, 0, width, height, 0, 0, width, height,
+ GL_COLOR_BUFFER_BIT, GL_LINEAR
+ )
+ glBlitFramebuffer(
+ 0, 0, width, height, 0, 0, width, height,
+ GL_DEPTH_BUFFER_BIT, GL_NEAREST
+ )
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, self._main_fb)
+
+ # Read depth
+ depth_buf = glReadPixels(
+ 0, 0, width, height, GL_DEPTH_COMPONENT, GL_FLOAT
+ )
+ depth_im = np.frombuffer(depth_buf, dtype=np.float32)
+ depth_im = depth_im.reshape((height, width))
+ depth_im = np.flip(depth_im, axis=0)
+ inf_inds = (depth_im == 1.0)
+ depth_im = 2.0 * depth_im - 1.0
+ z_near = scene.main_camera_node.camera.znear
+ z_far = scene.main_camera_node.camera.zfar
+ noninf = np.logical_not(inf_inds)
+ if z_far is None:
+ depth_im[noninf] = 2 * z_near / (1.0 - depth_im[noninf])
+ else:
+ depth_im[noninf] = ((2.0 * z_near * z_far) /
+ (z_far + z_near - depth_im[noninf] *
+ (z_far - z_near)))
+ depth_im[inf_inds] = 0.0
+
+ # Resize for macos if needed
+ if sys.platform == 'darwin':
+ depth_im = self._resize_image(depth_im)
+
+ if flags & RenderFlags.DEPTH_ONLY:
+ return depth_im
+
+ # Read color
+ if flags & RenderFlags.RGBA:
+ color_buf = glReadPixels(
+ 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE
+ )
+ color_im = np.frombuffer(color_buf, dtype=np.uint8)
+ color_im = color_im.reshape((height, width, 4))
+ else:
+ color_buf = glReadPixels(
+ 0, 0, width, height, GL_RGB, GL_UNSIGNED_BYTE
+ )
+ color_im = np.frombuffer(color_buf, dtype=np.uint8)
+ color_im = color_im.reshape((height, width, 3))
+ color_im = np.flip(color_im, axis=0)
+
+ # Resize for macos if needed
+ if sys.platform == 'darwin':
+ color_im = self._resize_image(color_im, True)
+
+ return color_im, depth_im
+
+ def _resize_image(self, value, antialias=False):
+ """If needed, rescale the render for MacOS."""
+ img = PIL.Image.fromarray(value)
+ resample = PIL.Image.NEAREST
+ if antialias:
+ resample = PIL.Image.BILINEAR
+ size = (self.viewport_width // self.dpscale,
+ self.viewport_height // self.dpscale)
+ img = img.resize(size, resample=resample)
+ return np.array(img)
+
+ ###########################################################################
+ # Shadowmap Debugging
+ ###########################################################################
+
+ def _forward_pass_no_reset(self, scene, flags):
+ # Set up camera matrices
+ V, P = self._get_camera_matrices(scene)
+
+ # Now, render each object in sorted order
+ for node in self._sorted_mesh_nodes(scene):
+ mesh = node.mesh
+
+ # Skip the mesh if it's not visible
+ if not mesh.is_visible:
+ continue
+
+ for primitive in mesh.primitives:
+
+ # First, get and bind the appropriate program
+ program = self._get_primitive_program(
+ primitive, flags, ProgramFlags.USE_MATERIAL
+ )
+ program._bind()
+
+ # Set the camera uniforms
+ program.set_uniform('V', V)
+ program.set_uniform('P', P)
+ program.set_uniform(
+ 'cam_pos', scene.get_pose(scene.main_camera_node)[:3,3]
+ )
+
+ # Next, bind the lighting
+ if not flags & RenderFlags.DEPTH_ONLY and not flags & RenderFlags.FLAT:
+ self._bind_lighting(scene, program, node, flags)
+
+ # Finally, bind and draw the primitive
+ self._bind_and_draw_primitive(
+ primitive=primitive,
+ pose=scene.get_pose(node),
+ program=program,
+ flags=flags
+ )
+ self._reset_active_textures()
+
+ # Unbind the shader and flush the output
+ if program is not None:
+ program._unbind()
+ glFlush()
+
+ def _render_light_shadowmaps(self, scene, light_nodes, flags, tile=False):
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0)
+ glClearColor(*scene.bg_color)
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
+ glEnable(GL_DEPTH_TEST)
+ glDepthMask(GL_TRUE)
+ glDepthFunc(GL_LESS)
+ glDepthRange(0.0, 1.0)
+
+ w = self.viewport_width
+ h = self.viewport_height
+
+ num_nodes = len(light_nodes)
+ viewport_dims = {
+ (0, 2): [0, h // 2, w // 2, h],
+ (1, 2): [w // 2, h // 2, w, h],
+ (0, 3): [0, h // 2, w // 2, h],
+ (1, 3): [w // 2, h // 2, w, h],
+ (2, 3): [0, 0, w // 2, h // 2],
+ (0, 4): [0, h // 2, w // 2, h],
+ (1, 4): [w // 2, h // 2, w, h],
+ (2, 4): [0, 0, w // 2, h // 2],
+ (3, 4): [w // 2, 0, w, h // 2]
+ }
+
+ if tile:
+ for i, ln in enumerate(light_nodes):
+ light = ln.light
+
+ if light.shadow_texture is None:
+ raise ValueError('Light does not have a shadow texture')
+
+ glViewport(*viewport_dims[(i, num_nodes + 1)])
+
+ program = self._get_debug_quad_program()
+ program._bind()
+ self._bind_texture(light.shadow_texture, 'depthMap', program)
+ self._render_debug_quad()
+ self._reset_active_textures()
+ glFlush()
+ i += 1
+ glViewport(*viewport_dims[(i, num_nodes + 1)])
+ self._forward_pass_no_reset(scene, flags)
+ else:
+ for i, ln in enumerate(light_nodes):
+ light = ln.light
+
+ if light.shadow_texture is None:
+ raise ValueError('Light does not have a shadow texture')
+
+ glViewport(0, 0, self.viewport_width, self.viewport_height)
+
+ program = self._get_debug_quad_program()
+ program._bind()
+ self._bind_texture(light.shadow_texture, 'depthMap', program)
+ self._render_debug_quad()
+ self._reset_active_textures()
+ glFlush()
+ return
+
+ def _get_debug_quad_program(self):
+ program = self._program_cache.get_program(
+ vertex_shader='debug_quad.vert',
+ fragment_shader='debug_quad.frag'
+ )
+ if not program._in_context():
+ program._add_to_context()
+ return program
+
+ def _render_debug_quad(self):
+ x = glGenVertexArrays(1)
+ glBindVertexArray(x)
+ glDrawArrays(GL_TRIANGLES, 0, 6)
+ glBindVertexArray(0)
+ glDeleteVertexArrays(1, [x])
diff --git a/pyrender/pyrender/sampler.py b/pyrender/pyrender/sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4784d068f808a40a56c8e748d83175f7f4e6233
--- /dev/null
+++ b/pyrender/pyrender/sampler.py
@@ -0,0 +1,102 @@
+"""Samplers, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-sampler
+
+Author: Matthew Matl
+"""
+from .constants import GLTF
+
+
+class Sampler(object):
+ """Texture sampler properties for filtering and wrapping modes.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ magFilter : int, optional
+ Magnification filter. Valid values:
+ - :attr:`.GLTF.NEAREST`
+ - :attr:`.GLTF.LINEAR`
+ minFilter : int, optional
+ Minification filter. Valid values:
+ - :attr:`.GLTF.NEAREST`
+ - :attr:`.GLTF.LINEAR`
+ - :attr:`.GLTF.NEAREST_MIPMAP_NEAREST`
+ - :attr:`.GLTF.LINEAR_MIPMAP_NEAREST`
+ - :attr:`.GLTF.NEAREST_MIPMAP_LINEAR`
+ - :attr:`.GLTF.LINEAR_MIPMAP_LINEAR`
+ wrapS : int, optional
+ S (U) wrapping mode. Valid values:
+ - :attr:`.GLTF.CLAMP_TO_EDGE`
+ - :attr:`.GLTF.MIRRORED_REPEAT`
+ - :attr:`.GLTF.REPEAT`
+ wrapT : int, optional
+ T (V) wrapping mode. Valid values:
+ - :attr:`.GLTF.CLAMP_TO_EDGE`
+ - :attr:`.GLTF.MIRRORED_REPEAT`
+ - :attr:`.GLTF.REPEAT`
+ """
+
+ def __init__(self,
+ name=None,
+ magFilter=None,
+ minFilter=None,
+ wrapS=GLTF.REPEAT,
+ wrapT=GLTF.REPEAT):
+ self.name = name
+ self.magFilter = magFilter
+ self.minFilter = minFilter
+ self.wrapS = wrapS
+ self.wrapT = wrapT
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def magFilter(self):
+ """int : Magnification filter type.
+ """
+ return self._magFilter
+
+ @magFilter.setter
+ def magFilter(self, value):
+ self._magFilter = value
+
+ @property
+ def minFilter(self):
+ """int : Minification filter type.
+ """
+ return self._minFilter
+
+ @minFilter.setter
+ def minFilter(self, value):
+ self._minFilter = value
+
+ @property
+ def wrapS(self):
+ """int : S (U) wrapping mode.
+ """
+ return self._wrapS
+
+ @wrapS.setter
+ def wrapS(self, value):
+ self._wrapS = value
+
+ @property
+ def wrapT(self):
+ """int : T (V) wrapping mode.
+ """
+ return self._wrapT
+
+ @wrapT.setter
+ def wrapT(self, value):
+ self._wrapT = value
diff --git a/pyrender/pyrender/scene.py b/pyrender/pyrender/scene.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fe057ec66f52f2dd9c1363aacf72a7c6cec4e6c
--- /dev/null
+++ b/pyrender/pyrender/scene.py
@@ -0,0 +1,585 @@
+"""Scenes, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-scene
+
+Author: Matthew Matl
+"""
+import numpy as np
+import networkx as nx
+import trimesh
+
+from .mesh import Mesh
+from .camera import Camera
+from .light import Light, PointLight, DirectionalLight, SpotLight
+from .node import Node
+from .utils import format_color_vector
+
+
+class Scene(object):
+ """A hierarchical scene graph.
+
+ Parameters
+ ----------
+ nodes : list of :class:`Node`
+ The set of all nodes in the scene.
+ bg_color : (4,) float, optional
+ Background color of scene.
+ ambient_light : (3,) float, optional
+ Color of ambient light. Defaults to no ambient light.
+ name : str, optional
+ The user-defined name of this object.
+ """
+
+ def __init__(self,
+ nodes=None,
+ bg_color=None,
+ ambient_light=None,
+ name=None):
+
+ if bg_color is None:
+ bg_color = np.ones(4)
+ else:
+ bg_color = format_color_vector(bg_color, 4)
+
+ if ambient_light is None:
+ ambient_light = np.zeros(3)
+
+ if nodes is None:
+ nodes = set()
+ self._nodes = set() # Will be added at the end of this function
+
+ self.bg_color = bg_color
+ self.ambient_light = ambient_light
+ self.name = name
+
+ self._name_to_nodes = {}
+ self._obj_to_nodes = {}
+ self._obj_name_to_nodes = {}
+ self._mesh_nodes = set()
+ self._point_light_nodes = set()
+ self._spot_light_nodes = set()
+ self._directional_light_nodes = set()
+ self._camera_nodes = set()
+ self._main_camera_node = None
+ self._bounds = None
+
+ # Transform tree
+ self._digraph = nx.DiGraph()
+ self._digraph.add_node('world')
+ self._path_cache = {}
+
+ # Find root nodes and add them
+ if len(nodes) > 0:
+ node_parent_map = {n: None for n in nodes}
+ for node in nodes:
+ for child in node.children:
+ if node_parent_map[child] is not None:
+ raise ValueError('Nodes may not have more than '
+ 'one parent')
+ node_parent_map[child] = node
+ for node in node_parent_map:
+ if node_parent_map[node] is None:
+ self.add_node(node)
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def nodes(self):
+ """set of :class:`Node` : Set of nodes in the scene.
+ """
+ return self._nodes
+
+ @property
+ def bg_color(self):
+ """(3,) float : The scene background color.
+ """
+ return self._bg_color
+
+ @bg_color.setter
+ def bg_color(self, value):
+ if value is None:
+ value = np.ones(4)
+ else:
+ value = format_color_vector(value, 4)
+ self._bg_color = value
+
+ @property
+ def ambient_light(self):
+ """(3,) float : The ambient light in the scene.
+ """
+ return self._ambient_light
+
+ @ambient_light.setter
+ def ambient_light(self, value):
+ if value is None:
+ value = np.zeros(3)
+ else:
+ value = format_color_vector(value, 3)
+ self._ambient_light = value
+
+ @property
+ def meshes(self):
+ """set of :class:`Mesh` : The meshes in the scene.
+ """
+ return set([n.mesh for n in self.mesh_nodes])
+
+ @property
+ def mesh_nodes(self):
+ """set of :class:`Node` : The nodes containing meshes.
+ """
+ return self._mesh_nodes
+
+ @property
+ def lights(self):
+ """set of :class:`Light` : The lights in the scene.
+ """
+ return self.point_lights | self.spot_lights | self.directional_lights
+
+ @property
+ def light_nodes(self):
+ """set of :class:`Node` : The nodes containing lights.
+ """
+ return (self.point_light_nodes | self.spot_light_nodes |
+ self.directional_light_nodes)
+
+ @property
+ def point_lights(self):
+ """set of :class:`PointLight` : The point lights in the scene.
+ """
+ return set([n.light for n in self.point_light_nodes])
+
+ @property
+ def point_light_nodes(self):
+ """set of :class:`Node` : The nodes containing point lights.
+ """
+ return self._point_light_nodes
+
+ @property
+ def spot_lights(self):
+ """set of :class:`SpotLight` : The spot lights in the scene.
+ """
+ return set([n.light for n in self.spot_light_nodes])
+
+ @property
+ def spot_light_nodes(self):
+ """set of :class:`Node` : The nodes containing spot lights.
+ """
+ return self._spot_light_nodes
+
+ @property
+ def directional_lights(self):
+ """set of :class:`DirectionalLight` : The directional lights in
+ the scene.
+ """
+ return set([n.light for n in self.directional_light_nodes])
+
+ @property
+ def directional_light_nodes(self):
+ """set of :class:`Node` : The nodes containing directional lights.
+ """
+ return self._directional_light_nodes
+
+ @property
+ def cameras(self):
+ """set of :class:`Camera` : The cameras in the scene.
+ """
+ return set([n.camera for n in self.camera_nodes])
+
+ @property
+ def camera_nodes(self):
+ """set of :class:`Node` : The nodes containing cameras in the scene.
+ """
+ return self._camera_nodes
+
+ @property
+ def main_camera_node(self):
+ """set of :class:`Node` : The node containing the main camera in the
+ scene.
+ """
+ return self._main_camera_node
+
+ @main_camera_node.setter
+ def main_camera_node(self, value):
+ if value not in self.nodes:
+ raise ValueError('New main camera node must already be in scene')
+ self._main_camera_node = value
+
+ @property
+ def bounds(self):
+ """(2,3) float : The axis-aligned bounds of the scene.
+ """
+ if self._bounds is None:
+ # Compute corners
+ corners = []
+ for mesh_node in self.mesh_nodes:
+ mesh = mesh_node.mesh
+ pose = self.get_pose(mesh_node)
+ corners_local = trimesh.bounds.corners(mesh.bounds)
+ corners_world = pose[:3,:3].dot(corners_local.T).T + pose[:3,3]
+ corners.append(corners_world)
+ if len(corners) == 0:
+ self._bounds = np.zeros((2,3))
+ else:
+ corners = np.vstack(corners)
+ self._bounds = np.array([np.min(corners, axis=0),
+ np.max(corners, axis=0)])
+ return self._bounds
+
+ @property
+ def centroid(self):
+ """(3,) float : The centroid of the scene's axis-aligned bounding box
+ (AABB).
+ """
+ return np.mean(self.bounds, axis=0)
+
+ @property
+ def extents(self):
+ """(3,) float : The lengths of the axes of the scene's AABB.
+ """
+ return np.diff(self.bounds, axis=0).reshape(-1)
+
+ @property
+ def scale(self):
+ """(3,) float : The length of the diagonal of the scene's AABB.
+ """
+ return np.linalg.norm(self.extents)
+
+ def add(self, obj, name=None, pose=None,
+ parent_node=None, parent_name=None):
+ """Add an object (mesh, light, or camera) to the scene.
+
+ Parameters
+ ----------
+ obj : :class:`Mesh`, :class:`Light`, or :class:`Camera`
+ The object to add to the scene.
+ name : str
+ A name for the new node to be created.
+ pose : (4,4) float
+ The local pose of this node relative to its parent node.
+ parent_node : :class:`Node`
+ The parent of this Node. If None, the new node is a root node.
+ parent_name : str
+ The name of the parent node, can be specified instead of
+ `parent_node`.
+
+ Returns
+ -------
+ node : :class:`Node`
+ The newly-created and inserted node.
+ """
+ if isinstance(obj, Mesh):
+ node = Node(name=name, matrix=pose, mesh=obj)
+ elif isinstance(obj, Light):
+ node = Node(name=name, matrix=pose, light=obj)
+ elif isinstance(obj, Camera):
+ node = Node(name=name, matrix=pose, camera=obj)
+ else:
+ raise TypeError('Unrecognized object type')
+
+ if parent_node is None and parent_name is not None:
+ parent_nodes = self.get_nodes(name=parent_name)
+ if len(parent_nodes) == 0:
+ raise ValueError('No parent node with name {} found'
+ .format(parent_name))
+ elif len(parent_nodes) > 1:
+ raise ValueError('More than one parent node with name {} found'
+ .format(parent_name))
+ parent_node = list(parent_nodes)[0]
+
+ self.add_node(node, parent_node=parent_node)
+
+ return node
+
+ def get_nodes(self, node=None, name=None, obj=None, obj_name=None):
+ """Search for existing nodes. Only nodes matching all specified
+ parameters is returned, or None if no such node exists.
+
+ Parameters
+ ----------
+ node : :class:`Node`, optional
+ If present, returns this node if it is in the scene.
+ name : str
+ A name for the Node.
+ obj : :class:`Mesh`, :class:`Light`, or :class:`Camera`
+ An object that is attached to the node.
+ obj_name : str
+ The name of an object that is attached to the node.
+
+ Returns
+ -------
+ nodes : set of :class:`.Node`
+ The nodes that match all query terms.
+ """
+ if node is not None:
+ if node in self.nodes:
+ return set([node])
+ else:
+ return set()
+ nodes = set(self.nodes)
+ if name is not None:
+ matches = set()
+ if name in self._name_to_nodes:
+ matches = self._name_to_nodes[name]
+ nodes = nodes & matches
+ if obj is not None:
+ matches = set()
+ if obj in self._obj_to_nodes:
+ matches = self._obj_to_nodes[obj]
+ nodes = nodes & matches
+ if obj_name is not None:
+ matches = set()
+ if obj_name in self._obj_name_to_nodes:
+ matches = self._obj_name_to_nodes[obj_name]
+ nodes = nodes & matches
+
+ return nodes
+
+ def add_node(self, node, parent_node=None):
+ """Add a Node to the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to be added.
+ parent_node : :class:`Node`
+ The parent of this Node. If None, the new node is a root node.
+ """
+ if node in self.nodes:
+ raise ValueError('Node already in scene')
+ self.nodes.add(node)
+
+ # Add node to sets
+ if node.name is not None:
+ if node.name not in self._name_to_nodes:
+ self._name_to_nodes[node.name] = set()
+ self._name_to_nodes[node.name].add(node)
+ for obj in [node.mesh, node.camera, node.light]:
+ if obj is not None:
+ if obj not in self._obj_to_nodes:
+ self._obj_to_nodes[obj] = set()
+ self._obj_to_nodes[obj].add(node)
+ if obj.name is not None:
+ if obj.name not in self._obj_name_to_nodes:
+ self._obj_name_to_nodes[obj.name] = set()
+ self._obj_name_to_nodes[obj.name].add(node)
+ if node.mesh is not None:
+ self._mesh_nodes.add(node)
+ if node.light is not None:
+ if isinstance(node.light, PointLight):
+ self._point_light_nodes.add(node)
+ if isinstance(node.light, SpotLight):
+ self._spot_light_nodes.add(node)
+ if isinstance(node.light, DirectionalLight):
+ self._directional_light_nodes.add(node)
+ if node.camera is not None:
+ self._camera_nodes.add(node)
+ if self._main_camera_node is None:
+ self._main_camera_node = node
+
+ if parent_node is None:
+ parent_node = 'world'
+ elif parent_node not in self.nodes:
+ raise ValueError('Parent node must already be in scene')
+ elif node not in parent_node.children:
+ parent_node.children.append(node)
+
+ # Create node in graph
+ self._digraph.add_node(node)
+ self._digraph.add_edge(node, parent_node)
+
+ # Iterate over children
+ for child in node.children:
+ self.add_node(child, node)
+
+ self._path_cache = {}
+ self._bounds = None
+
+ def has_node(self, node):
+ """Check if a node is already in the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to be checked.
+
+ Returns
+ -------
+ has_node : bool
+ True if the node is already in the scene and false otherwise.
+ """
+ return node in self.nodes
+
+ def remove_node(self, node):
+ """Remove a node and all its children from the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to be removed.
+ """
+ # Disconnect self from parent who is staying in the graph
+ parent = list(self._digraph.neighbors(node))[0]
+ self._remove_node(node)
+ if isinstance(parent, Node):
+ parent.children.remove(node)
+ self._path_cache = {}
+ self._bounds = None
+
+ def get_pose(self, node):
+ """Get the world-frame pose of a node in the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to find the pose of.
+
+ Returns
+ -------
+ pose : (4,4) float
+ The transform matrix for this node.
+ """
+ if node not in self.nodes:
+ raise ValueError('Node must already be in scene')
+ if node in self._path_cache:
+ path = self._path_cache[node]
+ else:
+ # Get path from from_frame to to_frame
+ path = nx.shortest_path(self._digraph, node, 'world')
+ self._path_cache[node] = path
+
+ # Traverse from from_node to to_node
+ pose = np.eye(4)
+ for n in path[:-1]:
+ pose = np.dot(n.matrix, pose)
+
+ return pose
+
+ def set_pose(self, node, pose):
+ """Set the local-frame pose of a node in the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to set the pose of.
+ pose : (4,4) float
+ The pose to set the node to.
+ """
+ if node not in self.nodes:
+ raise ValueError('Node must already be in scene')
+ node._matrix = pose
+ if node.mesh is not None:
+ self._bounds = None
+
+ def clear(self):
+ """Clear out all nodes to form an empty scene.
+ """
+ self._nodes = set()
+
+ self._name_to_nodes = {}
+ self._obj_to_nodes = {}
+ self._obj_name_to_nodes = {}
+ self._mesh_nodes = set()
+ self._point_light_nodes = set()
+ self._spot_light_nodes = set()
+ self._directional_light_nodes = set()
+ self._camera_nodes = set()
+ self._main_camera_node = None
+ self._bounds = None
+
+ # Transform tree
+ self._digraph = nx.DiGraph()
+ self._digraph.add_node('world')
+ self._path_cache = {}
+
+ def _remove_node(self, node):
+ """Remove a node and all its children from the scene.
+
+ Parameters
+ ----------
+ node : :class:`Node`
+ The node to be removed.
+ """
+
+ # Remove self from nodes
+ self.nodes.remove(node)
+
+ # Remove children
+ for child in node.children:
+ self._remove_node(child)
+
+ # Remove self from the graph
+ self._digraph.remove_node(node)
+
+ # Remove from maps
+ if node.name in self._name_to_nodes:
+ self._name_to_nodes[node.name].remove(node)
+ if len(self._name_to_nodes[node.name]) == 0:
+ self._name_to_nodes.pop(node.name)
+ for obj in [node.mesh, node.camera, node.light]:
+ if obj is None:
+ continue
+ self._obj_to_nodes[obj].remove(node)
+ if len(self._obj_to_nodes[obj]) == 0:
+ self._obj_to_nodes.pop(obj)
+ if obj.name is not None:
+ self._obj_name_to_nodes[obj.name].remove(node)
+ if len(self._obj_name_to_nodes[obj.name]) == 0:
+ self._obj_name_to_nodes.pop(obj.name)
+ if node.mesh is not None:
+ self._mesh_nodes.remove(node)
+ if node.light is not None:
+ if isinstance(node.light, PointLight):
+ self._point_light_nodes.remove(node)
+ if isinstance(node.light, SpotLight):
+ self._spot_light_nodes.remove(node)
+ if isinstance(node.light, DirectionalLight):
+ self._directional_light_nodes.remove(node)
+ if node.camera is not None:
+ self._camera_nodes.remove(node)
+ if self._main_camera_node == node:
+ if len(self._camera_nodes) > 0:
+ self._main_camera_node = next(iter(self._camera_nodes))
+ else:
+ self._main_camera_node = None
+
+ @staticmethod
+ def from_trimesh_scene(trimesh_scene,
+ bg_color=None, ambient_light=None):
+ """Create a :class:`.Scene` from a :class:`trimesh.scene.scene.Scene`.
+
+ Parameters
+ ----------
+ trimesh_scene : :class:`trimesh.scene.scene.Scene`
+ Scene with :class:~`trimesh.base.Trimesh` objects.
+ bg_color : (4,) float
+ Background color for the created scene.
+ ambient_light : (3,) float or None
+ Ambient light in the scene.
+
+ Returns
+ -------
+ scene_pr : :class:`Scene`
+ A scene containing the same geometry as the trimesh scene.
+ """
+ # convert trimesh geometries to pyrender geometries
+ geometries = {name: Mesh.from_trimesh(geom)
+ for name, geom in trimesh_scene.geometry.items()}
+
+ # create the pyrender scene object
+ scene_pr = Scene(bg_color=bg_color, ambient_light=ambient_light)
+
+ # add every node with geometry to the pyrender scene
+ for node in trimesh_scene.graph.nodes_geometry:
+ pose, geom_name = trimesh_scene.graph[node]
+ scene_pr.add(geometries[geom_name], pose=pose)
+
+ return scene_pr
diff --git a/pyrender/pyrender/shader_program.py b/pyrender/pyrender/shader_program.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1803f280c98033abe0769771a9ad8ecfec942e3
--- /dev/null
+++ b/pyrender/pyrender/shader_program.py
@@ -0,0 +1,283 @@
+"""OpenGL shader program wrapper.
+"""
+import numpy as np
+import os
+import re
+
+import OpenGL
+from OpenGL.GL import *
+from OpenGL.GL import shaders as gl_shader_utils
+
+
+class ShaderProgramCache(object):
+ """A cache for shader programs.
+ """
+
+ def __init__(self, shader_dir=None):
+ self._program_cache = {}
+ self.shader_dir = shader_dir
+ if self.shader_dir is None:
+ base_dir, _ = os.path.split(os.path.realpath(__file__))
+ self.shader_dir = os.path.join(base_dir, 'shaders')
+
+ def get_program(self, vertex_shader, fragment_shader,
+ geometry_shader=None, defines=None):
+ """Get a program via a list of shader files to include in the program.
+
+ Parameters
+ ----------
+ vertex_shader : str
+ The vertex shader filename.
+ fragment_shader : str
+ The fragment shader filename.
+ geometry_shader : str
+ The geometry shader filename.
+ defines : dict
+ Defines and their values for the shader.
+
+ Returns
+ -------
+ program : :class:`.ShaderProgram`
+ The program.
+ """
+ shader_names = []
+ if defines is None:
+ defines = {}
+ shader_filenames = [
+ x for x in [vertex_shader, fragment_shader, geometry_shader]
+ if x is not None
+ ]
+ for fn in shader_filenames:
+ if fn is None:
+ continue
+ _, name = os.path.split(fn)
+ shader_names.append(name)
+ cid = OpenGL.contextdata.getContext()
+ key = tuple([cid] + sorted(
+ [(s,1) for s in shader_names] + [(d, defines[d]) for d in defines]
+ ))
+
+ if key not in self._program_cache:
+ shader_filenames = [
+ os.path.join(self.shader_dir, fn) for fn in shader_filenames
+ ]
+ if len(shader_filenames) == 2:
+ shader_filenames.append(None)
+ vs, fs, gs = shader_filenames
+ self._program_cache[key] = ShaderProgram(
+ vertex_shader=vs, fragment_shader=fs,
+ geometry_shader=gs, defines=defines
+ )
+ return self._program_cache[key]
+
+ def clear(self):
+ for key in self._program_cache:
+ self._program_cache[key].delete()
+ self._program_cache = {}
+
+
+class ShaderProgram(object):
+ """A thin wrapper about OpenGL shader programs that supports easy creation,
+ binding, and uniform-setting.
+
+ Parameters
+ ----------
+ vertex_shader : str
+ The vertex shader filename.
+ fragment_shader : str
+ The fragment shader filename.
+ geometry_shader : str
+ The geometry shader filename.
+ defines : dict
+ Defines and their values for the shader.
+ """
+
+ def __init__(self, vertex_shader, fragment_shader,
+ geometry_shader=None, defines=None):
+
+ self.vertex_shader = vertex_shader
+ self.fragment_shader = fragment_shader
+ self.geometry_shader = geometry_shader
+
+ self.defines = defines
+ if self.defines is None:
+ self.defines = {}
+
+ self._program_id = None
+ self._vao_id = None # PYOPENGL BUG
+
+ # DEBUG
+ # self._unif_map = {}
+
+ def _add_to_context(self):
+ if self._program_id is not None:
+ raise ValueError('Shader program already in context')
+ shader_ids = []
+
+ # Load vert shader
+ shader_ids.append(gl_shader_utils.compileShader(
+ self._load(self.vertex_shader), GL_VERTEX_SHADER)
+ )
+ # Load frag shader
+ shader_ids.append(gl_shader_utils.compileShader(
+ self._load(self.fragment_shader), GL_FRAGMENT_SHADER)
+ )
+ # Load geometry shader
+ if self.geometry_shader is not None:
+ shader_ids.append(gl_shader_utils.compileShader(
+ self._load(self.geometry_shader), GL_GEOMETRY_SHADER)
+ )
+
+ # Bind empty VAO PYOPENGL BUG
+ if self._vao_id is None:
+ self._vao_id = glGenVertexArrays(1)
+ glBindVertexArray(self._vao_id)
+
+ # Compile program
+ self._program_id = gl_shader_utils.compileProgram(*shader_ids)
+
+ # Unbind empty VAO PYOPENGL BUG
+ glBindVertexArray(0)
+
+ def _in_context(self):
+ return self._program_id is not None
+
+ def _remove_from_context(self):
+ if self._program_id is not None:
+ glDeleteProgram(self._program_id)
+ glDeleteVertexArrays(1, [self._vao_id])
+ self._program_id = None
+ self._vao_id = None
+
+ def _load(self, shader_filename):
+ path, _ = os.path.split(shader_filename)
+
+ with open(shader_filename) as f:
+ text = f.read()
+
+ def ifdef(matchobj):
+ if matchobj.group(1) in self.defines:
+ return '#if 1'
+ else:
+ return '#if 0'
+
+ def ifndef(matchobj):
+ if matchobj.group(1) in self.defines:
+ return '#if 0'
+ else:
+ return '#if 1'
+
+ ifdef_regex = re.compile(
+ '#ifdef\\s+([a-zA-Z_][a-zA-Z_0-9]*)\\s*$', re.MULTILINE
+ )
+ ifndef_regex = re.compile(
+ '#ifndef\\s+([a-zA-Z_][a-zA-Z_0-9]*)\\s*$', re.MULTILINE
+ )
+ text = re.sub(ifdef_regex, ifdef, text)
+ text = re.sub(ifndef_regex, ifndef, text)
+
+ for define in self.defines:
+ value = str(self.defines[define])
+ text = text.replace(define, value)
+
+ return text
+
+ def _bind(self):
+ """Bind this shader program to the current OpenGL context.
+ """
+ if self._program_id is None:
+ raise ValueError('Cannot bind program that is not in context')
+ # glBindVertexArray(self._vao_id)
+ glUseProgram(self._program_id)
+
+ def _unbind(self):
+ """Unbind this shader program from the current OpenGL context.
+ """
+ glUseProgram(0)
+
+ def delete(self):
+ """Delete this shader program from the current OpenGL context.
+ """
+ self._remove_from_context()
+
+ def set_uniform(self, name, value, unsigned=False):
+ """Set a uniform value in the current shader program.
+
+ Parameters
+ ----------
+ name : str
+ Name of the uniform to set.
+ value : int, float, or ndarray
+ Value to set the uniform to.
+ unsigned : bool
+ If True, ints will be treated as unsigned values.
+ """
+ try:
+ # DEBUG
+ # self._unif_map[name] = 1, (1,)
+ loc = glGetUniformLocation(self._program_id, name)
+
+ if loc == -1:
+ raise ValueError('Invalid shader variable: {}'.format(name))
+
+ if isinstance(value, np.ndarray):
+ # DEBUG
+ # self._unif_map[name] = value.size, value.shape
+ if value.ndim == 1:
+ if (np.issubdtype(value.dtype, np.unsignedinteger) or
+ unsigned):
+ dtype = 'u'
+ value = value.astype(np.uint32)
+ elif np.issubdtype(value.dtype, np.integer):
+ dtype = 'i'
+ value = value.astype(np.int32)
+ else:
+ dtype = 'f'
+ value = value.astype(np.float32)
+ self._FUNC_MAP[(value.shape[0], dtype)](loc, 1, value)
+ else:
+ self._FUNC_MAP[(value.shape[0], value.shape[1])](
+ loc, 1, GL_TRUE, value
+ )
+
+ # Call correct uniform function
+ elif isinstance(value, float):
+ glUniform1f(loc, value)
+ elif isinstance(value, int):
+ if unsigned:
+ glUniform1ui(loc, value)
+ else:
+ glUniform1i(loc, value)
+ elif isinstance(value, bool):
+ if unsigned:
+ glUniform1ui(loc, int(value))
+ else:
+ glUniform1i(loc, int(value))
+ else:
+ raise ValueError('Invalid data type')
+ except Exception:
+ pass
+
+ _FUNC_MAP = {
+ (1,'u'): glUniform1uiv,
+ (2,'u'): glUniform2uiv,
+ (3,'u'): glUniform3uiv,
+ (4,'u'): glUniform4uiv,
+ (1,'i'): glUniform1iv,
+ (2,'i'): glUniform2iv,
+ (3,'i'): glUniform3iv,
+ (4,'i'): glUniform4iv,
+ (1,'f'): glUniform1fv,
+ (2,'f'): glUniform2fv,
+ (3,'f'): glUniform3fv,
+ (4,'f'): glUniform4fv,
+ (2,2): glUniformMatrix2fv,
+ (2,3): glUniformMatrix2x3fv,
+ (2,4): glUniformMatrix2x4fv,
+ (3,2): glUniformMatrix3x2fv,
+ (3,3): glUniformMatrix3fv,
+ (3,4): glUniformMatrix3x4fv,
+ (4,2): glUniformMatrix4x2fv,
+ (4,3): glUniformMatrix4x3fv,
+ (4,4): glUniformMatrix4fv,
+ }
diff --git a/pyrender/pyrender/shaders/debug_quad.frag b/pyrender/pyrender/shaders/debug_quad.frag
new file mode 100644
index 0000000000000000000000000000000000000000..4647bb50dfa1e4510e2d4afb37959c7f57532eca
--- /dev/null
+++ b/pyrender/pyrender/shaders/debug_quad.frag
@@ -0,0 +1,23 @@
+#version 330 core
+out vec4 FragColor;
+
+in vec2 TexCoords;
+
+uniform sampler2D depthMap;
+//uniform float near_plane;
+//uniform float far_plane;
+//
+//// required when using a perspective projection matrix
+//float LinearizeDepth(float depth)
+//{
+// float z = depth * 2.0 - 1.0; // Back to NDC
+// return (2.0 * near_plane * far_plane) / (far_plane + near_plane - z * (far_plane - near_plane));
+//}
+
+void main()
+{
+ float depthValue = texture(depthMap, TexCoords).r;
+ // FragColor = vec4(vec3(LinearizeDepth(depthValue) / far_plane), 1.0); // perspective
+ FragColor = vec4(vec3(depthValue), 1.0); // orthographic
+ //FragColor = vec4(1.0, 1.0, 0.0, 1.0);
+}
diff --git a/pyrender/pyrender/shaders/debug_quad.vert b/pyrender/pyrender/shaders/debug_quad.vert
new file mode 100644
index 0000000000000000000000000000000000000000..d2f2fcb7626f6c22e0d52bf4d6c91251cbdb9f52
--- /dev/null
+++ b/pyrender/pyrender/shaders/debug_quad.vert
@@ -0,0 +1,25 @@
+#version 330 core
+//layout (location = 0) in vec3 aPos;
+//layout (location = 1) in vec2 aTexCoords;
+//
+//out vec2 TexCoords;
+//
+//void main()
+//{
+// TexCoords = aTexCoords;
+// gl_Position = vec4(aPos, 1.0);
+//}
+//
+//
+//layout(location = 0) out vec2 uv;
+
+out vec2 TexCoords;
+
+void main()
+{
+ float x = float(((uint(gl_VertexID) + 2u) / 3u)%2u);
+ float y = float(((uint(gl_VertexID) + 1u) / 3u)%2u);
+
+ gl_Position = vec4(-1.0f + x*2.0f, -1.0f+y*2.0f, 0.0f, 1.0f);
+ TexCoords = vec2(x, y);
+}
diff --git a/pyrender/pyrender/shaders/flat.frag b/pyrender/pyrender/shaders/flat.frag
new file mode 100644
index 0000000000000000000000000000000000000000..7ec01c6d095ec5dacc693accd3ad507ced61a79a
--- /dev/null
+++ b/pyrender/pyrender/shaders/flat.frag
@@ -0,0 +1,126 @@
+#version 330 core
+///////////////////////////////////////////////////////////////////////////////
+// Structs
+///////////////////////////////////////////////////////////////////////////////
+
+struct Material {
+ vec3 emissive_factor;
+
+#ifdef USE_METALLIC_MATERIAL
+ vec4 base_color_factor;
+ float metallic_factor;
+ float roughness_factor;
+#endif
+
+#ifdef USE_GLOSSY_MATERIAL
+ vec4 diffuse_factor;
+ vec3 specular_factor;
+ float glossiness_factor;
+#endif
+
+#ifdef HAS_NORMAL_TEX
+ sampler2D normal_texture;
+#endif
+#ifdef HAS_OCCLUSION_TEX
+ sampler2D occlusion_texture;
+#endif
+#ifdef HAS_EMISSIVE_TEX
+ sampler2D emissive_texture;
+#endif
+#ifdef HAS_BASE_COLOR_TEX
+ sampler2D base_color_texture;
+#endif
+#ifdef HAS_METALLIC_ROUGHNESS_TEX
+ sampler2D metallic_roughness_texture;
+#endif
+#ifdef HAS_DIFFUSE_TEX
+ sampler2D diffuse_texture;
+#endif
+#ifdef HAS_SPECULAR_GLOSSINESS_TEX
+ sampler2D specular_glossiness;
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Uniforms
+///////////////////////////////////////////////////////////////////////////////
+uniform Material material;
+uniform vec3 cam_pos;
+
+#ifdef USE_IBL
+uniform samplerCube diffuse_env;
+uniform samplerCube specular_env;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// Inputs
+///////////////////////////////////////////////////////////////////////////////
+
+in vec3 frag_position;
+#ifdef NORMAL_LOC
+in vec3 frag_normal;
+#endif
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+in mat3 tbn;
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+in vec2 uv_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+in vec2 uv_1;
+#endif
+#ifdef COLOR_0_LOC
+in vec4 color_multiplier;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// OUTPUTS
+///////////////////////////////////////////////////////////////////////////////
+
+out vec4 frag_color;
+
+///////////////////////////////////////////////////////////////////////////////
+// Constants
+///////////////////////////////////////////////////////////////////////////////
+const float PI = 3.141592653589793;
+const float min_roughness = 0.04;
+
+///////////////////////////////////////////////////////////////////////////////
+// Utility Functions
+///////////////////////////////////////////////////////////////////////////////
+vec4 srgb_to_linear(vec4 srgb)
+{
+#ifndef SRGB_CORRECTED
+ // Fast Approximation
+ //vec3 linOut = pow(srgbIn.xyz,vec3(2.2));
+ //
+ vec3 b_less = step(vec3(0.04045),srgb.xyz);
+ vec3 lin_out = mix( srgb.xyz/vec3(12.92), pow((srgb.xyz+vec3(0.055))/vec3(1.055),vec3(2.4)), b_less );
+ return vec4(lin_out, srgb.w);
+#else
+ return srgb;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// MAIN
+///////////////////////////////////////////////////////////////////////////////
+void main()
+{
+
+ // Compute albedo
+ vec4 base_color = material.base_color_factor;
+#ifdef HAS_BASE_COLOR_TEX
+ base_color = base_color * texture(material.base_color_texture, uv_0);
+#endif
+
+#ifdef COLOR_0_LOC
+ base_color *= color_multiplier;
+#endif
+
+ frag_color = clamp(base_color, 0.0, 1.0);
+}
diff --git a/pyrender/pyrender/shaders/flat.vert b/pyrender/pyrender/shaders/flat.vert
new file mode 100644
index 0000000000000000000000000000000000000000..cfd241c3544718a261f961c3aa3c03aa13c97761
--- /dev/null
+++ b/pyrender/pyrender/shaders/flat.vert
@@ -0,0 +1,86 @@
+#version 330 core
+
+// Vertex Attributes
+layout(location = 0) in vec3 position;
+#ifdef NORMAL_LOC
+layout(location = NORMAL_LOC) in vec3 normal;
+#endif
+#ifdef TANGENT_LOC
+layout(location = TANGENT_LOC) in vec4 tangent;
+#endif
+#ifdef TEXCOORD_0_LOC
+layout(location = TEXCOORD_0_LOC) in vec2 texcoord_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+layout(location = TEXCOORD_1_LOC) in vec2 texcoord_1;
+#endif
+#ifdef COLOR_0_LOC
+layout(location = COLOR_0_LOC) in vec4 color_0;
+#endif
+#ifdef JOINTS_0_LOC
+layout(location = JOINTS_0_LOC) in vec4 joints_0;
+#endif
+#ifdef WEIGHTS_0_LOC
+layout(location = WEIGHTS_0_LOC) in vec4 weights_0;
+#endif
+layout(location = INST_M_LOC) in mat4 inst_m;
+
+// Uniforms
+uniform mat4 M;
+uniform mat4 V;
+uniform mat4 P;
+
+// Outputs
+out vec3 frag_position;
+#ifdef NORMAL_LOC
+out vec3 frag_normal;
+#endif
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+out mat3 tbn;
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+out vec2 uv_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+out vec2 uv_1;
+#endif
+#ifdef COLOR_0_LOC
+out vec4 color_multiplier;
+#endif
+
+
+void main()
+{
+ gl_Position = P * V * M * inst_m * vec4(position, 1);
+ frag_position = vec3(M * inst_m * vec4(position, 1.0));
+
+ mat4 N = transpose(inverse(M * inst_m));
+
+#ifdef NORMAL_LOC
+ frag_normal = normalize(vec3(N * vec4(normal, 0.0)));
+#endif
+
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+ vec3 normal_w = normalize(vec3(N * vec4(normal, 0.0)));
+ vec3 tangent_w = normalize(vec3(N * vec4(tangent.xyz, 0.0)));
+ vec3 bitangent_w = cross(normal_w, tangent_w) * tangent.w;
+ tbn = mat3(tangent_w, bitangent_w, normal_w);
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+ uv_0 = texcoord_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+ uv_1 = texcoord_1;
+#endif
+#ifdef COLOR_0_LOC
+ color_multiplier = color_0;
+#endif
+}
diff --git a/pyrender/pyrender/shaders/mesh.frag b/pyrender/pyrender/shaders/mesh.frag
new file mode 100644
index 0000000000000000000000000000000000000000..43187621b4388b18badf4e562a7ad300e59b029d
--- /dev/null
+++ b/pyrender/pyrender/shaders/mesh.frag
@@ -0,0 +1,456 @@
+#version 330 core
+///////////////////////////////////////////////////////////////////////////////
+// Structs
+///////////////////////////////////////////////////////////////////////////////
+
+struct SpotLight {
+ vec3 color;
+ float intensity;
+ float range;
+ vec3 position;
+ vec3 direction;
+ float light_angle_scale;
+ float light_angle_offset;
+
+ #ifdef SPOT_LIGHT_SHADOWS
+ sampler2D shadow_map;
+ mat4 light_matrix;
+ #endif
+};
+
+struct DirectionalLight {
+ vec3 color;
+ float intensity;
+ vec3 direction;
+
+ #ifdef DIRECTIONAL_LIGHT_SHADOWS
+ sampler2D shadow_map;
+ mat4 light_matrix;
+ #endif
+};
+
+struct PointLight {
+ vec3 color;
+ float intensity;
+ float range;
+ vec3 position;
+
+ #ifdef POINT_LIGHT_SHADOWS
+ samplerCube shadow_map;
+ #endif
+};
+
+struct Material {
+ vec3 emissive_factor;
+
+#ifdef USE_METALLIC_MATERIAL
+ vec4 base_color_factor;
+ float metallic_factor;
+ float roughness_factor;
+#endif
+
+#ifdef USE_GLOSSY_MATERIAL
+ vec4 diffuse_factor;
+ vec3 specular_factor;
+ float glossiness_factor;
+#endif
+
+#ifdef HAS_NORMAL_TEX
+ sampler2D normal_texture;
+#endif
+#ifdef HAS_OCCLUSION_TEX
+ sampler2D occlusion_texture;
+#endif
+#ifdef HAS_EMISSIVE_TEX
+ sampler2D emissive_texture;
+#endif
+#ifdef HAS_BASE_COLOR_TEX
+ sampler2D base_color_texture;
+#endif
+#ifdef HAS_METALLIC_ROUGHNESS_TEX
+ sampler2D metallic_roughness_texture;
+#endif
+#ifdef HAS_DIFFUSE_TEX
+ sampler2D diffuse_texture;
+#endif
+#ifdef HAS_SPECULAR_GLOSSINESS_TEX
+ sampler2D specular_glossiness;
+#endif
+};
+
+struct PBRInfo {
+ float nl;
+ float nv;
+ float nh;
+ float lh;
+ float vh;
+ float roughness;
+ float metallic;
+ vec3 f0;
+ vec3 c_diff;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Uniforms
+///////////////////////////////////////////////////////////////////////////////
+uniform Material material;
+uniform PointLight point_lights[MAX_POINT_LIGHTS];
+uniform int n_point_lights;
+uniform DirectionalLight directional_lights[MAX_DIRECTIONAL_LIGHTS];
+uniform int n_directional_lights;
+uniform SpotLight spot_lights[MAX_SPOT_LIGHTS];
+uniform int n_spot_lights;
+uniform vec3 cam_pos;
+uniform vec3 ambient_light;
+
+#ifdef USE_IBL
+uniform samplerCube diffuse_env;
+uniform samplerCube specular_env;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// Inputs
+///////////////////////////////////////////////////////////////////////////////
+
+in vec3 frag_position;
+#ifdef NORMAL_LOC
+in vec3 frag_normal;
+#endif
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+in mat3 tbn;
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+in vec2 uv_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+in vec2 uv_1;
+#endif
+#ifdef COLOR_0_LOC
+in vec4 color_multiplier;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// OUTPUTS
+///////////////////////////////////////////////////////////////////////////////
+
+out vec4 frag_color;
+
+///////////////////////////////////////////////////////////////////////////////
+// Constants
+///////////////////////////////////////////////////////////////////////////////
+const float PI = 3.141592653589793;
+const float min_roughness = 0.04;
+
+///////////////////////////////////////////////////////////////////////////////
+// Utility Functions
+///////////////////////////////////////////////////////////////////////////////
+vec4 srgb_to_linear(vec4 srgb)
+{
+#ifndef SRGB_CORRECTED
+ // Fast Approximation
+ //vec3 linOut = pow(srgbIn.xyz,vec3(2.2));
+ //
+ vec3 b_less = step(vec3(0.04045),srgb.xyz);
+ vec3 lin_out = mix( srgb.xyz/vec3(12.92), pow((srgb.xyz+vec3(0.055))/vec3(1.055),vec3(2.4)), b_less );
+ return vec4(lin_out, srgb.w);
+#else
+ return srgb;
+#endif
+}
+
+// Normal computation
+vec3 get_normal()
+{
+#ifdef HAS_NORMAL_TEX
+
+#ifndef HAS_TANGENTS
+ vec3 pos_dx = dFdx(frag_position);
+ vec3 pos_dy = dFdy(frag_position);
+ vec3 tex_dx = dFdx(vec3(uv_0, 0.0));
+ vec3 tex_dy = dFdy(vec3(uv_0, 0.0));
+ vec3 t = (tex_dy.t * pos_dx - tex_dx.t * pos_dy) / (tex_dx.s * tex_dy.t - tex_dy.s * tex_dx.t);
+
+#ifdef NORMAL_LOC
+ vec3 ng = normalize(frag_normal);
+#else
+ vec3 = cross(pos_dx, pos_dy);
+#endif
+
+ t = normalize(t - ng * dot(ng, t));
+ vec3 b = normalize(cross(ng, t));
+ mat3 tbn_n = mat3(t, b, ng);
+
+#else
+
+ mat3 tbn_n = tbn;
+
+#endif
+
+ vec3 n = texture(material.normal_texture, uv_0).rgb;
+ n = normalize(tbn_n * ((2.0 * n - 1.0) * vec3(1.0, 1.0, 1.0)));
+ return n; // TODO NORMAL MAPPING
+
+#else
+
+#ifdef NORMAL_LOC
+ return frag_normal;
+#else
+ return normalize(cam_pos - frag_position);
+#endif
+
+#endif
+}
+
+// Fresnel
+vec3 specular_reflection(PBRInfo info)
+{
+ vec3 res = info.f0 + (1.0 - info.f0) * pow(clamp(1.0 - info.vh, 0.0, 1.0), 5.0);
+ return res;
+}
+
+// Smith
+float geometric_occlusion(PBRInfo info)
+{
+ float r = info.roughness + 1.0;
+ float k = r * r / 8.0;
+ float g1 = info.nv / (info.nv * (1.0 - k) + k);
+ float g2 = info.nl / (info.nl * (1.0 - k) + k);
+ //float k = info.roughness * sqrt(2.0 / PI);
+ //float g1 = info.lh / (info.lh * (1.0 - k) + k);
+ //float g2 = info.nh / (info.nh * (1.0 - k) + k);
+ return g1 * g2;
+}
+
+float microfacet_distribution(PBRInfo info)
+{
+ float a = info.roughness * info.roughness;
+ float a2 = a * a;
+ float nh2 = info.nh * info.nh;
+
+ float denom = (nh2 * (a2 - 1.0) + 1.0);
+ return a2 / (PI * denom * denom);
+}
+
+vec3 compute_brdf(vec3 n, vec3 v, vec3 l,
+ float roughness, float metalness,
+ vec3 f0, vec3 c_diff, vec3 albedo,
+ vec3 radiance)
+{
+ vec3 h = normalize(l+v);
+ float nl = clamp(dot(n, l), 0.001, 1.0);
+ float nv = clamp(abs(dot(n, v)), 0.001, 1.0);
+ float nh = clamp(dot(n, h), 0.0, 1.0);
+ float lh = clamp(dot(l, h), 0.0, 1.0);
+ float vh = clamp(dot(v, h), 0.0, 1.0);
+
+ PBRInfo info = PBRInfo(nl, nv, nh, lh, vh, roughness, metalness, f0, c_diff);
+
+ // Compute PBR terms
+ vec3 F = specular_reflection(info);
+ float G = geometric_occlusion(info);
+ float D = microfacet_distribution(info);
+
+ // Compute BRDF
+ vec3 diffuse_contrib = (1.0 - F) * c_diff / PI;
+ vec3 spec_contrib = F * G * D / (4.0 * nl * nv + 0.001);
+
+ vec3 color = nl * radiance * (diffuse_contrib + spec_contrib);
+ return color;
+}
+
+float texture2DCompare(sampler2D depths, vec2 uv, float compare) {
+ return compare > texture(depths, uv.xy).r ? 1.0 : 0.0;
+}
+
+float texture2DShadowLerp(sampler2D depths, vec2 size, vec2 uv, float compare) {
+ vec2 texelSize = vec2(1.0)/size;
+ vec2 f = fract(uv*size+0.5);
+ vec2 centroidUV = floor(uv*size+0.5)/size;
+
+ float lb = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 0.0), compare);
+ float lt = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 1.0), compare);
+ float rb = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 0.0), compare);
+ float rt = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 1.0), compare);
+ float a = mix(lb, lt, f.y);
+ float b = mix(rb, rt, f.y);
+ float c = mix(a, b, f.x);
+ return c;
+}
+
+float PCF(sampler2D depths, vec2 size, vec2 uv, float compare){
+ float result = 0.0;
+ for(int x=-1; x<=1; x++){
+ for(int y=-1; y<=1; y++){
+ vec2 off = vec2(x,y)/size;
+ result += texture2DShadowLerp(depths, size, uv+off, compare);
+ }
+ }
+ return result/9.0;
+}
+
+float shadow_calc(mat4 light_matrix, sampler2D shadow_map, float nl)
+{
+ // Compute light texture UV coords
+ vec4 proj_coords = vec4(light_matrix * vec4(frag_position.xyz, 1.0));
+ vec3 light_coords = proj_coords.xyz / proj_coords.w;
+ light_coords = light_coords * 0.5 + 0.5;
+ float current_depth = light_coords.z;
+ float bias = max(0.001 * (1.0 - nl), 0.0001) / proj_coords.w;
+ float compare = (current_depth - bias);
+ float shadow = PCF(shadow_map, textureSize(shadow_map, 0), light_coords.xy, compare);
+ if (light_coords.z > 1.0) {
+ shadow = 0.0;
+ }
+ return shadow;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// MAIN
+///////////////////////////////////////////////////////////////////////////////
+void main()
+{
+
+ vec4 color = vec4(vec3(0.0), 1.0);
+///////////////////////////////////////////////////////////////////////////////
+// Handle Metallic Materials
+///////////////////////////////////////////////////////////////////////////////
+#ifdef USE_METALLIC_MATERIAL
+
+ // Compute metallic/roughness factors
+ float roughness = material.roughness_factor;
+ float metallic = material.metallic_factor;
+#ifdef HAS_METALLIC_ROUGHNESS_TEX
+ vec2 mr = texture(material.metallic_roughness_texture, uv_0).rg;
+ roughness = roughness * mr.r;
+ metallic = metallic * mr.g;
+#endif
+ roughness = clamp(roughness, min_roughness, 1.0);
+ metallic = clamp(metallic, 0.0, 1.0);
+ // In convention, material roughness is perceputal roughness ^ 2
+ float alpha_roughness = roughness * roughness;
+
+ // Compute albedo
+ vec4 base_color = material.base_color_factor;
+#ifdef HAS_BASE_COLOR_TEX
+ base_color = base_color * srgb_to_linear(texture(material.base_color_texture, uv_0));
+#endif
+
+ // Compute specular and diffuse colors
+ vec3 dialectric_spec = vec3(min_roughness);
+ vec3 c_diff = mix(vec3(0.0), base_color.rgb * (1 - min_roughness), 1.0 - metallic);
+ vec3 f0 = mix(dialectric_spec, base_color.rgb, metallic);
+
+ // Compute normal
+ vec3 n = normalize(get_normal());
+
+ // Loop over lights
+ for (int i = 0; i < n_directional_lights; i++) {
+ vec3 direction = directional_lights[i].direction;
+ vec3 v = normalize(cam_pos - frag_position); // Vector towards camera
+ vec3 l = normalize(-1.0 * direction); // Vector towards light
+
+ // Compute attenuation and radiance
+ float attenuation = directional_lights[i].intensity;
+ vec3 radiance = attenuation * directional_lights[i].color;
+
+ // Compute outbound color
+ vec3 res = compute_brdf(n, v, l, roughness, metallic,
+ f0, c_diff, base_color.rgb, radiance);
+
+ // Compute shadow
+#ifdef DIRECTIONAL_LIGHT_SHADOWS
+ float nl = clamp(dot(n,l), 0.0, 1.0);
+ float shadow = shadow_calc(
+ directional_lights[i].light_matrix,
+ directional_lights[i].shadow_map,
+ nl
+ );
+ res = res * (1.0 - shadow);
+#endif
+ color.xyz += res;
+ }
+
+ for (int i = 0; i < n_point_lights; i++) {
+ vec3 position = point_lights[i].position;
+ vec3 v = normalize(cam_pos - frag_position); // Vector towards camera
+ vec3 l = normalize(position - frag_position); // Vector towards light
+
+ // Compute attenuation and radiance
+ float dist = length(position - frag_position);
+ float attenuation = point_lights[i].intensity / (dist * dist);
+ vec3 radiance = attenuation * point_lights[i].color;
+
+ // Compute outbound color
+ vec3 res = compute_brdf(n, v, l, roughness, metallic,
+ f0, c_diff, base_color.rgb, radiance);
+ color.xyz += res;
+ }
+ for (int i = 0; i < n_spot_lights; i++) {
+ vec3 position = spot_lights[i].position;
+ vec3 v = normalize(cam_pos - frag_position); // Vector towards camera
+ vec3 l = normalize(position - frag_position); // Vector towards light
+
+ // Compute attenuation and radiance
+ vec3 direction = spot_lights[i].direction;
+ float las = spot_lights[i].light_angle_scale;
+ float lao = spot_lights[i].light_angle_offset;
+ float dist = length(position - frag_position);
+ float cd = clamp(dot(direction, -l), 0.0, 1.0);
+ float attenuation = clamp(cd * las + lao, 0.0, 1.0);
+ attenuation = attenuation * attenuation * spot_lights[i].intensity;
+ attenuation = attenuation / (dist * dist);
+ vec3 radiance = attenuation * spot_lights[i].color;
+
+ // Compute outbound color
+ vec3 res = compute_brdf(n, v, l, roughness, metallic,
+ f0, c_diff, base_color.rgb, radiance);
+#ifdef SPOT_LIGHT_SHADOWS
+ float nl = clamp(dot(n,l), 0.0, 1.0);
+ float shadow = shadow_calc(
+ spot_lights[i].light_matrix,
+ spot_lights[i].shadow_map,
+ nl
+ );
+ res = res * (1.0 - shadow);
+#endif
+ color.xyz += res;
+ }
+ color.xyz += base_color.xyz * ambient_light;
+
+ // Calculate lighting from environment
+#ifdef USE_IBL
+ // TODO
+#endif
+
+ // Apply occlusion
+#ifdef HAS_OCCLUSION_TEX
+ float ao = texture(material.occlusion_texture, uv_0).r;
+ color.xyz *= ao;
+#endif
+
+ // Apply emissive map
+ vec3 emissive = material.emissive_factor;
+#ifdef HAS_EMISSIVE_TEX
+ emissive *= srgb_to_linear(texture(material.emissive_texture, uv_0)).rgb;
+#endif
+ color.xyz += emissive * material.emissive_factor;
+
+#ifdef COLOR_0_LOC
+ color *= color_multiplier;
+#endif
+
+ frag_color = clamp(vec4(pow(color.xyz, vec3(1.0/2.2)), color.a * base_color.a), 0.0, 1.0);
+
+#else
+ // TODO GLOSSY MATERIAL BRDF
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// Handle Glossy Materials
+///////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/pyrender/pyrender/shaders/mesh.vert b/pyrender/pyrender/shaders/mesh.vert
new file mode 100644
index 0000000000000000000000000000000000000000..cfd241c3544718a261f961c3aa3c03aa13c97761
--- /dev/null
+++ b/pyrender/pyrender/shaders/mesh.vert
@@ -0,0 +1,86 @@
+#version 330 core
+
+// Vertex Attributes
+layout(location = 0) in vec3 position;
+#ifdef NORMAL_LOC
+layout(location = NORMAL_LOC) in vec3 normal;
+#endif
+#ifdef TANGENT_LOC
+layout(location = TANGENT_LOC) in vec4 tangent;
+#endif
+#ifdef TEXCOORD_0_LOC
+layout(location = TEXCOORD_0_LOC) in vec2 texcoord_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+layout(location = TEXCOORD_1_LOC) in vec2 texcoord_1;
+#endif
+#ifdef COLOR_0_LOC
+layout(location = COLOR_0_LOC) in vec4 color_0;
+#endif
+#ifdef JOINTS_0_LOC
+layout(location = JOINTS_0_LOC) in vec4 joints_0;
+#endif
+#ifdef WEIGHTS_0_LOC
+layout(location = WEIGHTS_0_LOC) in vec4 weights_0;
+#endif
+layout(location = INST_M_LOC) in mat4 inst_m;
+
+// Uniforms
+uniform mat4 M;
+uniform mat4 V;
+uniform mat4 P;
+
+// Outputs
+out vec3 frag_position;
+#ifdef NORMAL_LOC
+out vec3 frag_normal;
+#endif
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+out mat3 tbn;
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+out vec2 uv_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+out vec2 uv_1;
+#endif
+#ifdef COLOR_0_LOC
+out vec4 color_multiplier;
+#endif
+
+
+void main()
+{
+ gl_Position = P * V * M * inst_m * vec4(position, 1);
+ frag_position = vec3(M * inst_m * vec4(position, 1.0));
+
+ mat4 N = transpose(inverse(M * inst_m));
+
+#ifdef NORMAL_LOC
+ frag_normal = normalize(vec3(N * vec4(normal, 0.0)));
+#endif
+
+#ifdef HAS_NORMAL_TEX
+#ifdef TANGENT_LOC
+#ifdef NORMAL_LOC
+ vec3 normal_w = normalize(vec3(N * vec4(normal, 0.0)));
+ vec3 tangent_w = normalize(vec3(N * vec4(tangent.xyz, 0.0)));
+ vec3 bitangent_w = cross(normal_w, tangent_w) * tangent.w;
+ tbn = mat3(tangent_w, bitangent_w, normal_w);
+#endif
+#endif
+#endif
+#ifdef TEXCOORD_0_LOC
+ uv_0 = texcoord_0;
+#endif
+#ifdef TEXCOORD_1_LOC
+ uv_1 = texcoord_1;
+#endif
+#ifdef COLOR_0_LOC
+ color_multiplier = color_0;
+#endif
+}
diff --git a/pyrender/pyrender/shaders/mesh_depth.frag b/pyrender/pyrender/shaders/mesh_depth.frag
new file mode 100644
index 0000000000000000000000000000000000000000..d8b1fac6091cfa457ba835ae0758e955f06d8754
--- /dev/null
+++ b/pyrender/pyrender/shaders/mesh_depth.frag
@@ -0,0 +1,8 @@
+#version 330 core
+
+out vec4 frag_color;
+
+void main()
+{
+ frag_color = vec4(1.0);
+}
diff --git a/pyrender/pyrender/shaders/mesh_depth.vert b/pyrender/pyrender/shaders/mesh_depth.vert
new file mode 100644
index 0000000000000000000000000000000000000000..e534c058fb3e7b0efbec090513d55982db68ccaf
--- /dev/null
+++ b/pyrender/pyrender/shaders/mesh_depth.vert
@@ -0,0 +1,13 @@
+#version 330 core
+layout(location = 0) in vec3 position;
+layout(location = INST_M_LOC) in mat4 inst_m;
+
+uniform mat4 P;
+uniform mat4 V;
+uniform mat4 M;
+
+void main()
+{
+ mat4 light_matrix = P * V;
+ gl_Position = light_matrix * M * inst_m * vec4(position, 1.0);
+}
diff --git a/pyrender/pyrender/shaders/segmentation.frag b/pyrender/pyrender/shaders/segmentation.frag
new file mode 100644
index 0000000000000000000000000000000000000000..40deb92cbdef3ec9fd952632624cd5f4b5ce0c84
--- /dev/null
+++ b/pyrender/pyrender/shaders/segmentation.frag
@@ -0,0 +1,13 @@
+#version 330 core
+
+uniform vec3 color;
+out vec4 frag_color;
+
+///////////////////////////////////////////////////////////////////////////////
+// MAIN
+///////////////////////////////////////////////////////////////////////////////
+void main()
+{
+ frag_color = vec4(color, 1.0);
+ //frag_color = vec4(1.0, 0.5, 0.5, 1.0);
+}
diff --git a/pyrender/pyrender/shaders/segmentation.vert b/pyrender/pyrender/shaders/segmentation.vert
new file mode 100644
index 0000000000000000000000000000000000000000..503382599dae3c9415845f35b99d6678cfc7f716
--- /dev/null
+++ b/pyrender/pyrender/shaders/segmentation.vert
@@ -0,0 +1,14 @@
+#version 330 core
+layout(location = 0) in vec3 position;
+layout(location = INST_M_LOC) in mat4 inst_m;
+
+uniform mat4 P;
+uniform mat4 V;
+uniform mat4 M;
+
+void main()
+{
+ mat4 light_matrix = P * V;
+ gl_Position = light_matrix * M * inst_m * vec4(position, 1.0);
+}
+
diff --git a/pyrender/pyrender/shaders/text.frag b/pyrender/pyrender/shaders/text.frag
new file mode 100644
index 0000000000000000000000000000000000000000..486c97dc94ed5e9083ae348bc1e85c5cb26c44dc
--- /dev/null
+++ b/pyrender/pyrender/shaders/text.frag
@@ -0,0 +1,12 @@
+#version 330 core
+in vec2 uv;
+out vec4 color;
+
+uniform sampler2D text;
+uniform vec4 text_color;
+
+void main()
+{
+ vec4 sampled = vec4(1.0, 1.0, 1.0, texture(text, uv).r);
+ color = text_color * sampled;
+}
diff --git a/pyrender/pyrender/shaders/text.vert b/pyrender/pyrender/shaders/text.vert
new file mode 100644
index 0000000000000000000000000000000000000000..005bc439b3d63522df99e5db2088953eb8defcf4
--- /dev/null
+++ b/pyrender/pyrender/shaders/text.vert
@@ -0,0 +1,12 @@
+#version 330 core
+layout (location = 0) in vec4 vertex;
+
+out vec2 uv;
+
+uniform mat4 projection;
+
+void main()
+{
+ gl_Position = projection * vec4(vertex.xy, 0.0, 1.0);
+ uv = vertex.zw;
+}
diff --git a/pyrender/pyrender/shaders/vertex_normals.frag b/pyrender/pyrender/shaders/vertex_normals.frag
new file mode 100644
index 0000000000000000000000000000000000000000..edf5beb7f283dd67e1710bff922555539966cee4
--- /dev/null
+++ b/pyrender/pyrender/shaders/vertex_normals.frag
@@ -0,0 +1,10 @@
+#version 330 core
+
+out vec4 frag_color;
+
+uniform vec4 normal_color;
+
+void main()
+{
+ frag_color = normal_color;
+}
diff --git a/pyrender/pyrender/shaders/vertex_normals.geom b/pyrender/pyrender/shaders/vertex_normals.geom
new file mode 100644
index 0000000000000000000000000000000000000000..57f0b0e645e72d41116f5767d66fc37d01ed2714
--- /dev/null
+++ b/pyrender/pyrender/shaders/vertex_normals.geom
@@ -0,0 +1,74 @@
+#version 330 core
+
+layout (triangles) in;
+
+#ifdef FACE_NORMALS
+
+#ifdef VERTEX_NORMALS
+ layout (line_strip, max_vertices = 8) out;
+#else
+ layout (line_strip, max_vertices = 2) out;
+#endif
+
+#else
+
+ layout (line_strip, max_vertices = 6) out;
+
+#endif
+
+in VS_OUT {
+ vec3 position;
+ vec3 normal;
+ mat4 mvp;
+} gs_in[];
+
+uniform float normal_magnitude;
+
+void GenerateVertNormal(int index)
+{
+
+ vec4 p0 = gs_in[index].mvp * vec4(gs_in[index].position, 1.0);
+ vec4 p1 = gs_in[index].mvp * vec4(normal_magnitude * normalize(gs_in[index].normal) + gs_in[index].position, 1.0);
+ gl_Position = p0;
+ EmitVertex();
+ gl_Position = p1;
+ EmitVertex();
+ EndPrimitive();
+}
+
+void GenerateFaceNormal()
+{
+ vec3 p0 = gs_in[0].position.xyz;
+ vec3 p1 = gs_in[1].position.xyz;
+ vec3 p2 = gs_in[2].position.xyz;
+
+ vec3 v0 = p0 - p1;
+ vec3 v1 = p2 - p1;
+
+ vec3 N = normalize(cross(v1, v0));
+ vec3 P = (p0 + p1 + p2) / 3.0;
+
+ vec4 np0 = gs_in[0].mvp * vec4(P, 1.0);
+ vec4 np1 = gs_in[0].mvp * vec4(normal_magnitude * N + P, 1.0);
+
+ gl_Position = np0;
+ EmitVertex();
+ gl_Position = np1;
+ EmitVertex();
+ EndPrimitive();
+}
+
+void main()
+{
+
+#ifdef FACE_NORMALS
+ GenerateFaceNormal();
+#endif
+
+#ifdef VERTEX_NORMALS
+ GenerateVertNormal(0);
+ GenerateVertNormal(1);
+ GenerateVertNormal(2);
+#endif
+
+}
diff --git a/pyrender/pyrender/shaders/vertex_normals.vert b/pyrender/pyrender/shaders/vertex_normals.vert
new file mode 100644
index 0000000000000000000000000000000000000000..be22eed2a0e904bcaf1ac5a4721558e574cddc62
--- /dev/null
+++ b/pyrender/pyrender/shaders/vertex_normals.vert
@@ -0,0 +1,27 @@
+#version 330 core
+
+// Inputs
+layout(location = 0) in vec3 position;
+layout(location = NORMAL_LOC) in vec3 normal;
+layout(location = INST_M_LOC) in mat4 inst_m;
+
+// Output data
+out VS_OUT {
+ vec3 position;
+ vec3 normal;
+ mat4 mvp;
+} vs_out;
+
+// Uniform data
+uniform mat4 M;
+uniform mat4 V;
+uniform mat4 P;
+
+// Render loop
+void main() {
+ vs_out.mvp = P * V * M * inst_m;
+ vs_out.position = position;
+ vs_out.normal = normal;
+
+ gl_Position = vec4(position, 1.0);
+}
diff --git a/pyrender/pyrender/shaders/vertex_normals_pc.geom b/pyrender/pyrender/shaders/vertex_normals_pc.geom
new file mode 100644
index 0000000000000000000000000000000000000000..4ea4e7b8542703f64b8d28fd187e425137861fe4
--- /dev/null
+++ b/pyrender/pyrender/shaders/vertex_normals_pc.geom
@@ -0,0 +1,29 @@
+#version 330 core
+
+layout (points) in;
+
+layout (line_strip, max_vertices = 2) out;
+
+in VS_OUT {
+ vec3 position;
+ vec3 normal;
+ mat4 mvp;
+} gs_in[];
+
+uniform float normal_magnitude;
+
+void GenerateVertNormal(int index)
+{
+ vec4 p0 = gs_in[index].mvp * vec4(gs_in[index].position, 1.0);
+ vec4 p1 = gs_in[index].mvp * vec4(normal_magnitude * normalize(gs_in[index].normal) + gs_in[index].position, 1.0);
+ gl_Position = p0;
+ EmitVertex();
+ gl_Position = p1;
+ EmitVertex();
+ EndPrimitive();
+}
+
+void main()
+{
+ GenerateVertNormal(0);
+}
diff --git a/pyrender/pyrender/texture.py b/pyrender/pyrender/texture.py
new file mode 100644
index 0000000000000000000000000000000000000000..477759729d7b995a4f276e81d649617d045a066e
--- /dev/null
+++ b/pyrender/pyrender/texture.py
@@ -0,0 +1,259 @@
+"""Textures, conforming to the glTF 2.0 standards as specified in
+https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#reference-texture
+
+Author: Matthew Matl
+"""
+import numpy as np
+
+from OpenGL.GL import *
+
+from .utils import format_texture_source
+from .sampler import Sampler
+
+
+class Texture(object):
+ """A texture and its sampler.
+
+ Parameters
+ ----------
+ name : str, optional
+ The user-defined name of this object.
+ sampler : :class:`Sampler`
+ The sampler used by this texture.
+ source : (h,w,c) uint8 or (h,w,c) float or :class:`PIL.Image.Image`
+ The image used by this texture. If None, the texture is created
+ empty and width and height must be specified.
+ source_channels : str
+ Either `D`, `R`, `RG`, `GB`, `RGB`, or `RGBA`. Indicates the
+ channels to extract from `source`. Any missing channels will be filled
+ with `1.0`.
+ width : int, optional
+ For empty textures, the width of the texture buffer.
+ height : int, optional
+ For empty textures, the height of the texture buffer.
+ tex_type : int
+ Either GL_TEXTURE_2D or GL_TEXTURE_CUBE.
+ data_format : int
+ For now, just GL_FLOAT.
+ """
+
+ def __init__(self,
+ name=None,
+ sampler=None,
+ source=None,
+ source_channels=None,
+ width=None,
+ height=None,
+ tex_type=GL_TEXTURE_2D,
+ data_format=GL_UNSIGNED_BYTE):
+ self.source_channels = source_channels
+ self.name = name
+ self.sampler = sampler
+ self.source = source
+ self.width = width
+ self.height = height
+ self.tex_type = tex_type
+ self.data_format = data_format
+
+ self._texid = None
+ self._is_transparent = False
+
+ @property
+ def name(self):
+ """str : The user-defined name of this object.
+ """
+ return self._name
+
+ @name.setter
+ def name(self, value):
+ if value is not None:
+ value = str(value)
+ self._name = value
+
+ @property
+ def sampler(self):
+ """:class:`Sampler` : The sampler used by this texture.
+ """
+ return self._sampler
+
+ @sampler.setter
+ def sampler(self, value):
+ if value is None:
+ value = Sampler()
+ self._sampler = value
+
+ @property
+ def source(self):
+ """(h,w,c) uint8 or float or :class:`PIL.Image.Image` : The image
+ used in this texture.
+ """
+ return self._source
+
+ @source.setter
+ def source(self, value):
+ if value is None:
+ self._source = None
+ else:
+ self._source = format_texture_source(value, self.source_channels)
+ self._is_transparent = False
+
+ @property
+ def source_channels(self):
+ """str : The channels that were extracted from the original source.
+ """
+ return self._source_channels
+
+ @source_channels.setter
+ def source_channels(self, value):
+ self._source_channels = value
+
+ @property
+ def width(self):
+ """int : The width of the texture buffer.
+ """
+ return self._width
+
+ @width.setter
+ def width(self, value):
+ self._width = value
+
+ @property
+ def height(self):
+ """int : The height of the texture buffer.
+ """
+ return self._height
+
+ @height.setter
+ def height(self, value):
+ self._height = value
+
+ @property
+ def tex_type(self):
+ """int : The type of the texture.
+ """
+ return self._tex_type
+
+ @tex_type.setter
+ def tex_type(self, value):
+ self._tex_type = value
+
+ @property
+ def data_format(self):
+ """int : The format of the texture data.
+ """
+ return self._data_format
+
+ @data_format.setter
+ def data_format(self, value):
+ self._data_format = value
+
+ def is_transparent(self, cutoff=1.0):
+ """bool : If True, the texture is partially transparent.
+ """
+ if self._is_transparent is None:
+ self._is_transparent = False
+ if self.source_channels == 'RGBA' and self.source is not None:
+ if np.any(self.source[:,:,3] < cutoff):
+ self._is_transparent = True
+ return self._is_transparent
+
+ def delete(self):
+ """Remove this texture from the OpenGL context.
+ """
+ self._unbind()
+ self._remove_from_context()
+
+ ##################
+ # OpenGL code
+ ##################
+ def _add_to_context(self):
+ if self._texid is not None:
+ raise ValueError('Texture already loaded into OpenGL context')
+
+ fmt = GL_DEPTH_COMPONENT
+ if self.source_channels == 'R':
+ fmt = GL_RED
+ elif self.source_channels == 'RG' or self.source_channels == 'GB':
+ fmt = GL_RG
+ elif self.source_channels == 'RGB':
+ fmt = GL_RGB
+ elif self.source_channels == 'RGBA':
+ fmt = GL_RGBA
+
+ # Generate the OpenGL texture
+ self._texid = glGenTextures(1)
+ glBindTexture(self.tex_type, self._texid)
+
+ # Flip data for OpenGL buffer
+ data = None
+ width = self.width
+ height = self.height
+ if self.source is not None:
+ data = np.ascontiguousarray(np.flip(self.source, axis=0).flatten())
+ width = self.source.shape[1]
+ height = self.source.shape[0]
+
+ # Bind texture and generate mipmaps
+ glTexImage2D(
+ self.tex_type, 0, fmt, width, height, 0, fmt,
+ self.data_format, data
+ )
+ if self.source is not None:
+ glGenerateMipmap(self.tex_type)
+
+ if self.sampler.magFilter is not None:
+ glTexParameteri(
+ self.tex_type, GL_TEXTURE_MAG_FILTER, self.sampler.magFilter
+ )
+ else:
+ if self.source is not None:
+ glTexParameteri(self.tex_type, GL_TEXTURE_MAG_FILTER, GL_LINEAR)
+ else:
+ glTexParameteri(self.tex_type, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
+ if self.sampler.minFilter is not None:
+ glTexParameteri(
+ self.tex_type, GL_TEXTURE_MIN_FILTER, self.sampler.minFilter
+ )
+ else:
+ if self.source is not None:
+ glTexParameteri(self.tex_type, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR)
+ else:
+ glTexParameteri(self.tex_type, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
+
+ glTexParameteri(self.tex_type, GL_TEXTURE_WRAP_S, self.sampler.wrapS)
+ glTexParameteri(self.tex_type, GL_TEXTURE_WRAP_T, self.sampler.wrapT)
+ border_color = 255 * np.ones(4).astype(np.uint8)
+ if self.data_format == GL_FLOAT:
+ border_color = np.ones(4).astype(np.float32)
+ glTexParameterfv(
+ self.tex_type, GL_TEXTURE_BORDER_COLOR,
+ border_color
+ )
+
+ # Unbind texture
+ glBindTexture(self.tex_type, 0)
+
+ def _remove_from_context(self):
+ if self._texid is not None:
+ # TODO OPENGL BUG?
+ # glDeleteTextures(1, [self._texid])
+ glDeleteTextures([self._texid])
+ self._texid = None
+
+ def _in_context(self):
+ return self._texid is not None
+
+ def _bind(self):
+ # TODO HANDLE INDEXING INTO OTHER UV's
+ glBindTexture(self.tex_type, self._texid)
+
+ def _unbind(self):
+ glBindTexture(self.tex_type, 0)
+
+ def _bind_as_depth_attachment(self):
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
+ self.tex_type, self._texid, 0)
+
+ def _bind_as_color_attachment(self):
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ self.tex_type, self._texid, 0)
diff --git a/pyrender/pyrender/trackball.py b/pyrender/pyrender/trackball.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e57a0e82d3f07b80754f575c28a0e05cb73fc50
--- /dev/null
+++ b/pyrender/pyrender/trackball.py
@@ -0,0 +1,216 @@
+"""Trackball class for 3D manipulation of viewpoints.
+"""
+import numpy as np
+
+import trimesh.transformations as transformations
+
+
+class Trackball(object):
+ """A trackball class for creating camera transforms from mouse movements.
+ """
+ STATE_ROTATE = 0
+ STATE_PAN = 1
+ STATE_ROLL = 2
+ STATE_ZOOM = 3
+
+ def __init__(self, pose, size, scale,
+ target=np.array([0.0, 0.0, 0.0])):
+ """Initialize a trackball with an initial camera-to-world pose
+ and the given parameters.
+
+ Parameters
+ ----------
+ pose : [4,4]
+ An initial camera-to-world pose for the trackball.
+
+ size : (float, float)
+ The width and height of the camera image in pixels.
+
+ scale : float
+ The diagonal of the scene's bounding box --
+ used for ensuring translation motions are sufficiently
+ fast for differently-sized scenes.
+
+ target : (3,) float
+ The center of the scene in world coordinates.
+ The trackball will revolve around this point.
+ """
+ self._size = np.array(size)
+ self._scale = float(scale)
+
+ self._pose = pose
+ self._n_pose = pose
+
+ self._target = target
+ self._n_target = target
+
+ self._state = Trackball.STATE_ROTATE
+
+ @property
+ def pose(self):
+ """autolab_core.RigidTransform : The current camera-to-world pose.
+ """
+ return self._n_pose
+
+ def set_state(self, state):
+ """Set the state of the trackball in order to change the effect of
+ dragging motions.
+
+ Parameters
+ ----------
+ state : int
+ One of Trackball.STATE_ROTATE, Trackball.STATE_PAN,
+ Trackball.STATE_ROLL, and Trackball.STATE_ZOOM.
+ """
+ self._state = state
+
+ def resize(self, size):
+ """Resize the window.
+
+ Parameters
+ ----------
+ size : (float, float)
+ The new width and height of the camera image in pixels.
+ """
+ self._size = np.array(size)
+
+ def down(self, point):
+ """Record an initial mouse press at a given point.
+
+ Parameters
+ ----------
+ point : (2,) int
+ The x and y pixel coordinates of the mouse press.
+ """
+ self._pdown = np.array(point, dtype=np.float32)
+ self._pose = self._n_pose
+ self._target = self._n_target
+
+ def drag(self, point):
+ """Update the tracball during a drag.
+
+ Parameters
+ ----------
+ point : (2,) int
+ The current x and y pixel coordinates of the mouse during a drag.
+ This will compute a movement for the trackball with the relative
+ motion between this point and the one marked by down().
+ """
+ point = np.array(point, dtype=np.float32)
+ dx, dy = point - self._pdown
+ mindim = 0.3 * np.min(self._size)
+
+ target = self._target
+ x_axis = self._pose[:3,0].flatten()
+ y_axis = self._pose[:3,1].flatten()
+ z_axis = self._pose[:3,2].flatten()
+ eye = self._pose[:3,3].flatten()
+
+ # Interpret drag as a rotation
+ if self._state == Trackball.STATE_ROTATE:
+ x_angle = -dx / mindim
+ x_rot_mat = transformations.rotation_matrix(
+ x_angle, y_axis, target
+ )
+
+ y_angle = dy / mindim
+ y_rot_mat = transformations.rotation_matrix(
+ y_angle, x_axis, target
+ )
+
+ self._n_pose = y_rot_mat.dot(x_rot_mat.dot(self._pose))
+
+ # Interpret drag as a roll about the camera axis
+ elif self._state == Trackball.STATE_ROLL:
+ center = self._size / 2.0
+ v_init = self._pdown - center
+ v_curr = point - center
+ v_init = v_init / np.linalg.norm(v_init)
+ v_curr = v_curr / np.linalg.norm(v_curr)
+
+ theta = (-np.arctan2(v_curr[1], v_curr[0]) +
+ np.arctan2(v_init[1], v_init[0]))
+
+ rot_mat = transformations.rotation_matrix(theta, z_axis, target)
+
+ self._n_pose = rot_mat.dot(self._pose)
+
+ # Interpret drag as a camera pan in view plane
+ elif self._state == Trackball.STATE_PAN:
+ dx = -dx / (5.0 * mindim) * self._scale
+ dy = -dy / (5.0 * mindim) * self._scale
+
+ translation = dx * x_axis + dy * y_axis
+ self._n_target = self._target + translation
+ t_tf = np.eye(4)
+ t_tf[:3,3] = translation
+ self._n_pose = t_tf.dot(self._pose)
+
+ # Interpret drag as a zoom motion
+ elif self._state == Trackball.STATE_ZOOM:
+ radius = np.linalg.norm(eye - target)
+ ratio = 0.0
+ if dy > 0:
+ ratio = np.exp(abs(dy) / (0.5 * self._size[1])) - 1.0
+ elif dy < 0:
+ ratio = 1.0 - np.exp(dy / (0.5 * (self._size[1])))
+ translation = -np.sign(dy) * ratio * radius * z_axis
+ t_tf = np.eye(4)
+ t_tf[:3,3] = translation
+ self._n_pose = t_tf.dot(self._pose)
+
+ def scroll(self, clicks):
+ """Zoom using a mouse scroll wheel motion.
+
+ Parameters
+ ----------
+ clicks : int
+ The number of clicks. Positive numbers indicate forward wheel
+ movement.
+ """
+ target = self._target
+ ratio = 0.90
+
+ mult = 1.0
+ if clicks > 0:
+ mult = ratio**clicks
+ elif clicks < 0:
+ mult = (1.0 / ratio)**abs(clicks)
+
+ z_axis = self._n_pose[:3,2].flatten()
+ eye = self._n_pose[:3,3].flatten()
+ radius = np.linalg.norm(eye - target)
+ translation = (mult * radius - radius) * z_axis
+ t_tf = np.eye(4)
+ t_tf[:3,3] = translation
+ self._n_pose = t_tf.dot(self._n_pose)
+
+ z_axis = self._pose[:3,2].flatten()
+ eye = self._pose[:3,3].flatten()
+ radius = np.linalg.norm(eye - target)
+ translation = (mult * radius - radius) * z_axis
+ t_tf = np.eye(4)
+ t_tf[:3,3] = translation
+ self._pose = t_tf.dot(self._pose)
+
+ def rotate(self, azimuth, axis=None):
+ """Rotate the trackball about the "Up" axis by azimuth radians.
+
+ Parameters
+ ----------
+ azimuth : float
+ The number of radians to rotate.
+ """
+ target = self._target
+
+ y_axis = self._n_pose[:3,1].flatten()
+ if axis is not None:
+ y_axis = axis
+ x_rot_mat = transformations.rotation_matrix(azimuth, y_axis, target)
+ self._n_pose = x_rot_mat.dot(self._n_pose)
+
+ y_axis = self._pose[:3,1].flatten()
+ if axis is not None:
+ y_axis = axis
+ x_rot_mat = transformations.rotation_matrix(azimuth, y_axis, target)
+ self._pose = x_rot_mat.dot(self._pose)
diff --git a/pyrender/pyrender/utils.py b/pyrender/pyrender/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..48a11faf991606ad7fb0691582f0bc6f06101a45
--- /dev/null
+++ b/pyrender/pyrender/utils.py
@@ -0,0 +1,115 @@
+import numpy as np
+from PIL import Image
+
+
+def format_color_vector(value, length):
+ """Format a color vector.
+ """
+ if isinstance(value, int):
+ value = value / 255.0
+ if isinstance(value, float):
+ value = np.repeat(value, length)
+ if isinstance(value, list) or isinstance(value, tuple):
+ value = np.array(value)
+ if isinstance(value, np.ndarray):
+ value = value.squeeze()
+ if np.issubdtype(value.dtype, np.integer):
+ value = (value / 255.0).astype(np.float32)
+ if value.ndim != 1:
+ raise ValueError('Format vector takes only 1-D vectors')
+ if length > value.shape[0]:
+ value = np.hstack((value, np.ones(length - value.shape[0])))
+ elif length < value.shape[0]:
+ value = value[:length]
+ else:
+ raise ValueError('Invalid vector data type')
+
+ return value.squeeze().astype(np.float32)
+
+
+def format_color_array(value, shape):
+ """Format an array of colors.
+ """
+ # Convert uint8 to floating
+ value = np.asanyarray(value)
+ if np.issubdtype(value.dtype, np.integer):
+ value = (value / 255.0).astype(np.float32)
+
+ # Match up shapes
+ if value.ndim == 1:
+ value = np.tile(value, (shape[0],1))
+ if value.shape[1] < shape[1]:
+ nc = shape[1] - value.shape[1]
+ value = np.column_stack((value, np.ones((value.shape[0], nc))))
+ elif value.shape[1] > shape[1]:
+ value = value[:,:shape[1]]
+ return value.astype(np.float32)
+
+
+def format_texture_source(texture, target_channels='RGB'):
+ """Format a texture as a float32 np array.
+ """
+
+ # Pass through None
+ if texture is None:
+ return None
+
+ # Convert PIL images into numpy arrays
+ if isinstance(texture, Image.Image):
+ if texture.mode == 'P' and target_channels in ('RGB', 'RGBA'):
+ texture = np.array(texture.convert(target_channels))
+ else:
+ texture = np.array(texture)
+
+ # Format numpy arrays
+ if isinstance(texture, np.ndarray):
+ if np.issubdtype(texture.dtype, np.floating):
+ texture = np.array(texture * 255.0, dtype=np.uint8)
+ elif np.issubdtype(texture.dtype, np.integer):
+ texture = texture.astype(np.uint8)
+ else:
+ raise TypeError('Invalid type {} for texture'.format(
+ type(texture)
+ ))
+
+ # Format array by picking out correct texture channels or padding
+ if texture.ndim == 2:
+ texture = texture[:,:,np.newaxis]
+ if target_channels == 'R':
+ texture = texture[:,:,0]
+ texture = texture.squeeze()
+ elif target_channels == 'RG':
+ if texture.shape[2] == 1:
+ texture = np.repeat(texture, 2, axis=2)
+ else:
+ texture = texture[:,:,(0,1)]
+ elif target_channels == 'GB':
+ if texture.shape[2] == 1:
+ texture = np.repeat(texture, 2, axis=2)
+ elif texture.shape[2] > 2:
+ texture = texture[:,:,(1,2)]
+ elif target_channels == 'RGB':
+ if texture.shape[2] == 1:
+ texture = np.repeat(texture, 3, axis=2)
+ elif texture.shape[2] == 2:
+ raise ValueError('Cannot reformat 2-channel texture into RGB')
+ else:
+ texture = texture[:,:,(0,1,2)]
+ elif target_channels == 'RGBA':
+ if texture.shape[2] == 1:
+ texture = np.repeat(texture, 4, axis=2)
+ texture[:,:,3] = 255
+ elif texture.shape[2] == 2:
+ raise ValueError('Cannot reformat 2-channel texture into RGBA')
+ elif texture.shape[2] == 3:
+ tx = np.empty((texture.shape[0], texture.shape[1], 4), dtype=np.uint8)
+ tx[:,:,:3] = texture
+ tx[:,:,3] = 255
+ texture = tx
+ else:
+ raise ValueError('Invalid texture channel specification: {}'
+ .format(target_channels))
+ else:
+ raise TypeError('Invalid type {} for texture'.format(type(texture)))
+
+ return texture
diff --git a/pyrender/pyrender/version.py b/pyrender/pyrender/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..a33fc87f61f528780e3319a5160769cc84512b1b
--- /dev/null
+++ b/pyrender/pyrender/version.py
@@ -0,0 +1 @@
+__version__ = '0.1.45'
diff --git a/pyrender/pyrender/viewer.py b/pyrender/pyrender/viewer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2326c38205c6eaddb4f567e3b088329187af258
--- /dev/null
+++ b/pyrender/pyrender/viewer.py
@@ -0,0 +1,1160 @@
+"""A pyglet-based interactive 3D scene viewer.
+"""
+import copy
+import os
+import sys
+from threading import Thread, RLock
+import time
+
+import imageio
+import numpy as np
+import OpenGL
+import trimesh
+
+try:
+ from Tkinter import Tk, tkFileDialog as filedialog
+except Exception:
+ try:
+ from tkinter import Tk, filedialog as filedialog
+ except Exception:
+ pass
+
+from .constants import (TARGET_OPEN_GL_MAJOR, TARGET_OPEN_GL_MINOR,
+ MIN_OPEN_GL_MAJOR, MIN_OPEN_GL_MINOR,
+ TEXT_PADDING, DEFAULT_SCENE_SCALE,
+ DEFAULT_Z_FAR, DEFAULT_Z_NEAR, RenderFlags, TextAlign)
+from .light import DirectionalLight
+from .node import Node
+from .camera import PerspectiveCamera, OrthographicCamera, IntrinsicsCamera
+from .trackball import Trackball
+from .renderer import Renderer
+from .mesh import Mesh
+
+import pyglet
+from pyglet import clock
+pyglet.options['shadow_window'] = False
+
+
+class Viewer(pyglet.window.Window):
+ """An interactive viewer for 3D scenes.
+
+ The viewer's camera is separate from the scene's, but will take on
+ the parameters of the scene's main view camera and start in the same pose.
+ If the scene does not have a camera, a suitable default will be provided.
+
+ Parameters
+ ----------
+ scene : :class:`Scene`
+ The scene to visualize.
+ viewport_size : (2,) int
+ The width and height of the initial viewing window.
+ render_flags : dict
+ A set of flags for rendering the scene. Described in the note below.
+ viewer_flags : dict
+ A set of flags for controlling the viewer's behavior.
+ Described in the note below.
+ registered_keys : dict
+ A map from ASCII key characters to tuples containing:
+
+ - A function to be called whenever the key is pressed,
+ whose first argument will be the viewer itself.
+ - (Optionally) A list of additional positional arguments
+ to be passed to the function.
+ - (Optionally) A dict of keyword arguments to be passed
+ to the function.
+
+ kwargs : dict
+ Any keyword arguments left over will be interpreted as belonging to
+ either the :attr:`.Viewer.render_flags` or :attr:`.Viewer.viewer_flags`
+ dictionaries. Those flag sets will be updated appropriately.
+
+ Note
+ ----
+ The basic commands for moving about the scene are given as follows:
+
+ - **Rotating about the scene**: Hold the left mouse button and
+ drag the cursor.
+ - **Rotating about the view axis**: Hold ``CTRL`` and the left mouse
+ button and drag the cursor.
+ - **Panning**:
+
+ - Hold SHIFT, then hold the left mouse button and drag the cursor, or
+ - Hold the middle mouse button and drag the cursor.
+
+ - **Zooming**:
+
+ - Scroll the mouse wheel, or
+ - Hold the right mouse button and drag the cursor.
+
+ Other keyboard commands are as follows:
+
+ - ``a``: Toggles rotational animation mode.
+ - ``c``: Toggles backface culling.
+ - ``f``: Toggles fullscreen mode.
+ - ``h``: Toggles shadow rendering.
+ - ``i``: Toggles axis display mode
+ (no axes, world axis, mesh axes, all axes).
+ - ``l``: Toggles lighting mode
+ (scene lighting, Raymond lighting, or direct lighting).
+ - ``m``: Toggles face normal visualization.
+ - ``n``: Toggles vertex normal visualization.
+ - ``o``: Toggles orthographic mode.
+ - ``q``: Quits the viewer.
+ - ``r``: Starts recording a GIF, and pressing again stops recording
+ and opens a file dialog.
+ - ``s``: Opens a file dialog to save the current view as an image.
+ - ``w``: Toggles wireframe mode
+ (scene default, flip wireframes, all wireframe, or all solid).
+ - ``z``: Resets the camera to the initial view.
+
+ Note
+ ----
+ The valid keys for ``render_flags`` are as follows:
+
+ - ``flip_wireframe``: `bool`, If `True`, all objects will have their
+ wireframe modes flipped from what their material indicates.
+ Defaults to `False`.
+ - ``all_wireframe``: `bool`, If `True`, all objects will be rendered
+ in wireframe mode. Defaults to `False`.
+ - ``all_solid``: `bool`, If `True`, all objects will be rendered in
+ solid mode. Defaults to `False`.
+ - ``shadows``: `bool`, If `True`, shadows will be rendered.
+ Defaults to `False`.
+ - ``vertex_normals``: `bool`, If `True`, vertex normals will be
+ rendered as blue lines. Defaults to `False`.
+ - ``face_normals``: `bool`, If `True`, face normals will be rendered as
+ blue lines. Defaults to `False`.
+ - ``cull_faces``: `bool`, If `True`, backfaces will be culled.
+ Defaults to `True`.
+ - ``point_size`` : float, The point size in pixels. Defaults to 1px.
+
+ Note
+ ----
+ The valid keys for ``viewer_flags`` are as follows:
+
+ - ``rotate``: `bool`, If `True`, the scene's camera will rotate
+ about an axis. Defaults to `False`.
+ - ``rotate_rate``: `float`, The rate of rotation in radians per second.
+ Defaults to `PI / 3.0`.
+ - ``rotate_axis``: `(3,) float`, The axis in world coordinates to rotate
+ about. Defaults to ``[0,0,1]``.
+ - ``view_center``: `(3,) float`, The position to rotate the scene about.
+ Defaults to the scene's centroid.
+ - ``use_raymond_lighting``: `bool`, If `True`, an additional set of three
+ directional lights that move with the camera will be added to the scene.
+ Defaults to `False`.
+ - ``use_direct_lighting``: `bool`, If `True`, an additional directional
+ light that moves with the camera and points out of it will be added to
+ the scene. Defaults to `False`.
+ - ``lighting_intensity``: `float`, The overall intensity of the
+ viewer's additional lights (when they're in use). Defaults to 3.0.
+ - ``use_perspective_cam``: `bool`, If `True`, a perspective camera will
+ be used. Otherwise, an orthographic camera is used. Defaults to `True`.
+ - ``save_directory``: `str`, A directory to open the file dialogs in.
+ Defaults to `None`.
+ - ``window_title``: `str`, A title for the viewer's application window.
+ Defaults to `"Scene Viewer"`.
+ - ``refresh_rate``: `float`, A refresh rate for rendering, in Hertz.
+ Defaults to `30.0`.
+ - ``fullscreen``: `bool`, Whether to make viewer fullscreen.
+ Defaults to `False`.
+ - ``show_world_axis``: `bool`, Whether to show the world axis.
+ Defaults to `False`.
+ - ``show_mesh_axes``: `bool`, Whether to show the individual mesh axes.
+ Defaults to `False`.
+ - ``caption``: `list of dict`, Text caption(s) to display on the viewer.
+ Defaults to `None`.
+
+ Note
+ ----
+ Animation can be accomplished by running the viewer with ``run_in_thread``
+ enabled. Then, just run a loop in your main thread, updating the scene as
+ needed. Before updating the scene, be sure to acquire the
+ :attr:`.Viewer.render_lock`, and release it when your update is done.
+ """
+
+ def __init__(self, scene, viewport_size=None,
+ render_flags=None, viewer_flags=None,
+ registered_keys=None, run_in_thread=False,
+ auto_start=True,
+ **kwargs):
+
+ #######################################################################
+ # Save attributes and flags
+ #######################################################################
+ if viewport_size is None:
+ viewport_size = (640, 480)
+ self._scene = scene
+ self._viewport_size = viewport_size
+ self._render_lock = RLock()
+ self._is_active = False
+ self._should_close = False
+ self._run_in_thread = run_in_thread
+ self._auto_start = auto_start
+
+ self._default_render_flags = {
+ 'flip_wireframe': False,
+ 'all_wireframe': False,
+ 'all_solid': False,
+ 'shadows': False,
+ 'vertex_normals': False,
+ 'face_normals': False,
+ 'cull_faces': True,
+ 'point_size': 1.0,
+ }
+ self._default_viewer_flags = {
+ 'mouse_pressed': False,
+ 'rotate': False,
+ 'rotate_rate': np.pi / 3.0,
+ 'rotate_axis': np.array([0.0, 0.0, 1.0]),
+ 'view_center': None,
+ 'record': False,
+ 'use_raymond_lighting': False,
+ 'use_direct_lighting': False,
+ 'lighting_intensity': 3.0,
+ 'use_perspective_cam': True,
+ 'save_directory': None,
+ 'window_title': 'Scene Viewer',
+ 'refresh_rate': 30.0,
+ 'fullscreen': False,
+ 'show_world_axis': False,
+ 'show_mesh_axes': False,
+ 'caption': None
+ }
+ self._render_flags = self._default_render_flags.copy()
+ self._viewer_flags = self._default_viewer_flags.copy()
+ self._viewer_flags['rotate_axis'] = (
+ self._default_viewer_flags['rotate_axis'].copy()
+ )
+
+ if render_flags is not None:
+ self._render_flags.update(render_flags)
+ if viewer_flags is not None:
+ self._viewer_flags.update(viewer_flags)
+
+ for key in kwargs:
+ if key in self.render_flags:
+ self._render_flags[key] = kwargs[key]
+ elif key in self.viewer_flags:
+ self._viewer_flags[key] = kwargs[key]
+
+ # TODO MAC OS BUG FOR SHADOWS
+ if sys.platform == 'darwin':
+ self._render_flags['shadows'] = False
+
+ self._registered_keys = {}
+ if registered_keys is not None:
+ self._registered_keys = {
+ ord(k.lower()): registered_keys[k] for k in registered_keys
+ }
+
+ #######################################################################
+ # Save internal settings
+ #######################################################################
+
+ # Set up caption stuff
+ self._message_text = None
+ self._ticks_till_fade = 2.0 / 3.0 * self.viewer_flags['refresh_rate']
+ self._message_opac = 1.0 + self._ticks_till_fade
+
+ # Set up raymond lights and direct lights
+ self._raymond_lights = self._create_raymond_lights()
+ self._direct_light = self._create_direct_light()
+
+ # Set up axes
+ self._axes = {}
+ self._axis_mesh = Mesh.from_trimesh(
+ trimesh.creation.axis(origin_size=0.1, axis_radius=0.05,
+ axis_length=1.0), smooth=False)
+ if self.viewer_flags['show_world_axis']:
+ self._set_axes(world=self.viewer_flags['show_world_axis'],
+ mesh=self.viewer_flags['show_mesh_axes'])
+
+ #######################################################################
+ # Set up camera node
+ #######################################################################
+ self._camera_node = None
+ self._prior_main_camera_node = None
+ self._default_camera_pose = None
+ self._default_persp_cam = None
+ self._default_orth_cam = None
+ self._trackball = None
+ self._saved_frames = []
+
+ # Extract main camera from scene and set up our mirrored copy
+ znear = None
+ zfar = None
+ if scene.main_camera_node is not None:
+ n = scene.main_camera_node
+ camera = copy.copy(n.camera)
+ if isinstance(camera, (PerspectiveCamera, IntrinsicsCamera)):
+ self._default_persp_cam = camera
+ znear = camera.znear
+ zfar = camera.zfar
+ elif isinstance(camera, OrthographicCamera):
+ self._default_orth_cam = camera
+ znear = camera.znear
+ zfar = camera.zfar
+ self._default_camera_pose = scene.get_pose(scene.main_camera_node)
+ self._prior_main_camera_node = n
+
+ # Set defaults as needed
+ if zfar is None:
+ zfar = max(scene.scale * 10.0, DEFAULT_Z_FAR)
+ if znear is None or znear == 0:
+ if scene.scale == 0:
+ znear = DEFAULT_Z_NEAR
+ else:
+ znear = min(scene.scale / 10.0, DEFAULT_Z_NEAR)
+
+ if self._default_persp_cam is None:
+ self._default_persp_cam = PerspectiveCamera(
+ yfov=np.pi / 3.0, znear=znear, zfar=zfar
+ )
+ if self._default_orth_cam is None:
+ xmag = ymag = scene.scale
+ if scene.scale == 0:
+ xmag = ymag = 1.0
+ self._default_orth_cam = OrthographicCamera(
+ xmag=xmag, ymag=ymag,
+ znear=znear,
+ zfar=zfar
+ )
+ if self._default_camera_pose is None:
+ self._default_camera_pose = self._compute_initial_camera_pose()
+
+ # Pick camera
+ if self.viewer_flags['use_perspective_cam']:
+ camera = self._default_persp_cam
+ else:
+ camera = self._default_orth_cam
+
+ self._camera_node = Node(
+ matrix=self._default_camera_pose, camera=camera
+ )
+ scene.add_node(self._camera_node)
+ scene.main_camera_node = self._camera_node
+ self._reset_view()
+
+ #######################################################################
+ # Initialize OpenGL context and renderer
+ #######################################################################
+ self._renderer = Renderer(
+ self._viewport_size[0], self._viewport_size[1],
+ self.render_flags['point_size']
+ )
+ self._is_active = True
+
+ if self.run_in_thread:
+ self._thread = Thread(target=self._init_and_start_app)
+ self._thread.start()
+ else:
+ if auto_start:
+ self._init_and_start_app()
+
+ def start(self):
+ self._init_and_start_app()
+
+ @property
+ def scene(self):
+ """:class:`.Scene` : The scene being visualized.
+ """
+ return self._scene
+
+ @property
+ def viewport_size(self):
+ """(2,) int : The width and height of the viewing window.
+ """
+ return self._viewport_size
+
+ @property
+ def render_lock(self):
+ """:class:`threading.RLock` : If acquired, prevents the viewer from
+ rendering until released.
+
+ Run :meth:`.Viewer.render_lock.acquire` before making updates to
+ the scene in a different thread, and run
+ :meth:`.Viewer.render_lock.release` once you're done to let the viewer
+ continue.
+ """
+ return self._render_lock
+
+ @property
+ def is_active(self):
+ """bool : `True` if the viewer is active, or `False` if it has
+ been closed.
+ """
+ return self._is_active
+
+ @property
+ def run_in_thread(self):
+ """bool : Whether the viewer was run in a separate thread.
+ """
+ return self._run_in_thread
+
+ @property
+ def render_flags(self):
+ """dict : Flags for controlling the renderer's behavior.
+
+ - ``flip_wireframe``: `bool`, If `True`, all objects will have their
+ wireframe modes flipped from what their material indicates.
+ Defaults to `False`.
+ - ``all_wireframe``: `bool`, If `True`, all objects will be rendered
+ in wireframe mode. Defaults to `False`.
+ - ``all_solid``: `bool`, If `True`, all objects will be rendered in
+ solid mode. Defaults to `False`.
+ - ``shadows``: `bool`, If `True`, shadows will be rendered.
+ Defaults to `False`.
+ - ``vertex_normals``: `bool`, If `True`, vertex normals will be
+ rendered as blue lines. Defaults to `False`.
+ - ``face_normals``: `bool`, If `True`, face normals will be rendered as
+ blue lines. Defaults to `False`.
+ - ``cull_faces``: `bool`, If `True`, backfaces will be culled.
+ Defaults to `True`.
+ - ``point_size`` : float, The point size in pixels. Defaults to 1px.
+
+ """
+ return self._render_flags
+
+ @render_flags.setter
+ def render_flags(self, value):
+ self._render_flags = value
+
+ @property
+ def viewer_flags(self):
+ """dict : Flags for controlling the viewer's behavior.
+
+ The valid keys for ``viewer_flags`` are as follows:
+
+ - ``rotate``: `bool`, If `True`, the scene's camera will rotate
+ about an axis. Defaults to `False`.
+ - ``rotate_rate``: `float`, The rate of rotation in radians per second.
+ Defaults to `PI / 3.0`.
+ - ``rotate_axis``: `(3,) float`, The axis in world coordinates to
+ rotate about. Defaults to ``[0,0,1]``.
+ - ``view_center``: `(3,) float`, The position to rotate the scene
+ about. Defaults to the scene's centroid.
+ - ``use_raymond_lighting``: `bool`, If `True`, an additional set of
+ three directional lights that move with the camera will be added to
+ the scene. Defaults to `False`.
+ - ``use_direct_lighting``: `bool`, If `True`, an additional directional
+ light that moves with the camera and points out of it will be
+ added to the scene. Defaults to `False`.
+ - ``lighting_intensity``: `float`, The overall intensity of the
+ viewer's additional lights (when they're in use). Defaults to 3.0.
+ - ``use_perspective_cam``: `bool`, If `True`, a perspective camera will
+ be used. Otherwise, an orthographic camera is used. Defaults to
+ `True`.
+ - ``save_directory``: `str`, A directory to open the file dialogs in.
+ Defaults to `None`.
+ - ``window_title``: `str`, A title for the viewer's application window.
+ Defaults to `"Scene Viewer"`.
+ - ``refresh_rate``: `float`, A refresh rate for rendering, in Hertz.
+ Defaults to `30.0`.
+ - ``fullscreen``: `bool`, Whether to make viewer fullscreen.
+ Defaults to `False`.
+ - ``show_world_axis``: `bool`, Whether to show the world axis.
+ Defaults to `False`.
+ - ``show_mesh_axes``: `bool`, Whether to show the individual mesh axes.
+ Defaults to `False`.
+ - ``caption``: `list of dict`, Text caption(s) to display on
+ the viewer. Defaults to `None`.
+
+ """
+ return self._viewer_flags
+
+ @viewer_flags.setter
+ def viewer_flags(self, value):
+ self._viewer_flags = value
+
+ @property
+ def registered_keys(self):
+ """dict : Map from ASCII key character to a handler function.
+
+ This is a map from ASCII key characters to tuples containing:
+
+ - A function to be called whenever the key is pressed,
+ whose first argument will be the viewer itself.
+ - (Optionally) A list of additional positional arguments
+ to be passed to the function.
+ - (Optionally) A dict of keyword arguments to be passed
+ to the function.
+
+ """
+ return self._registered_keys
+
+ @registered_keys.setter
+ def registered_keys(self, value):
+ self._registered_keys = value
+
+ def close_external(self):
+ """Close the viewer from another thread.
+
+ This function will wait for the actual close, so you immediately
+ manipulate the scene afterwards.
+ """
+ self._should_close = True
+ while self.is_active:
+ time.sleep(1.0 / self.viewer_flags['refresh_rate'])
+
+ def save_gif(self, filename=None):
+ """Save the stored GIF frames to a file.
+
+ To use this asynchronously, run the viewer with the ``record``
+ flag and the ``run_in_thread`` flags set.
+ Kill the viewer after your desired time with
+ :meth:`.Viewer.close_external`, and then call :meth:`.Viewer.save_gif`.
+
+ Parameters
+ ----------
+ filename : str
+ The file to save the GIF to. If not specified,
+ a file dialog will be opened to ask the user where
+ to save the GIF file.
+ """
+ if filename is None:
+ filename = self._get_save_filename(['gif', 'all'])
+ if filename is not None:
+ self.viewer_flags['save_directory'] = os.path.dirname(filename)
+ imageio.mimwrite(filename, self._saved_frames,
+ fps=self.viewer_flags['refresh_rate'],
+ palettesize=128, subrectangles=True)
+ self._saved_frames = []
+
+ def on_close(self):
+ """Exit the event loop when the window is closed.
+ """
+ # Remove our camera and restore the prior one
+ if self._camera_node is not None:
+ self.scene.remove_node(self._camera_node)
+ if self._prior_main_camera_node is not None:
+ self.scene.main_camera_node = self._prior_main_camera_node
+
+ # Delete any lighting nodes that we've attached
+ if self.viewer_flags['use_raymond_lighting']:
+ for n in self._raymond_lights:
+ if self.scene.has_node(n):
+ self.scene.remove_node(n)
+ if self.viewer_flags['use_direct_lighting']:
+ if self.scene.has_node(self._direct_light):
+ self.scene.remove_node(self._direct_light)
+
+ # Delete any axis nodes that we've attached
+ self._remove_axes()
+
+ # Delete renderer
+ if self._renderer is not None:
+ self._renderer.delete()
+ self._renderer = None
+
+ # Force clean-up of OpenGL context data
+ try:
+ OpenGL.contextdata.cleanupContext()
+ self.close()
+ except Exception:
+ pass
+ finally:
+ self._is_active = False
+ super(Viewer, self).on_close()
+ pyglet.app.exit()
+
+ def on_draw(self):
+ """Redraw the scene into the viewing window.
+ """
+ if self._renderer is None:
+ return
+
+ if self.run_in_thread or not self._auto_start:
+ self.render_lock.acquire()
+
+ # Make OpenGL context current
+ self.switch_to()
+
+ # Render the scene
+ self.clear()
+ self._render()
+
+ if self._message_text is not None:
+ self._renderer.render_text(
+ self._message_text,
+ self.viewport_size[0] - TEXT_PADDING,
+ TEXT_PADDING,
+ font_pt=20,
+ color=np.array([0.1, 0.7, 0.2,
+ np.clip(self._message_opac, 0.0, 1.0)]),
+ align=TextAlign.BOTTOM_RIGHT
+ )
+
+ if self.viewer_flags['caption'] is not None:
+ for caption in self.viewer_flags['caption']:
+ xpos, ypos = self._location_to_x_y(caption['location'])
+ self._renderer.render_text(
+ caption['text'],
+ xpos,
+ ypos,
+ font_name=caption['font_name'],
+ font_pt=caption['font_pt'],
+ color=caption['color'],
+ scale=caption['scale'],
+ align=caption['location']
+ )
+
+ if self.run_in_thread or not self._auto_start:
+ self.render_lock.release()
+
+ def on_resize(self, width, height):
+ """Resize the camera and trackball when the window is resized.
+ """
+ if self._renderer is None:
+ return
+
+ self._viewport_size = (width, height)
+ self._trackball.resize(self._viewport_size)
+ self._renderer.viewport_width = self._viewport_size[0]
+ self._renderer.viewport_height = self._viewport_size[1]
+ self.on_draw()
+
+ def on_mouse_press(self, x, y, buttons, modifiers):
+ """Record an initial mouse press.
+ """
+ self._trackball.set_state(Trackball.STATE_ROTATE)
+ if (buttons == pyglet.window.mouse.LEFT):
+ ctrl = (modifiers & pyglet.window.key.MOD_CTRL)
+ shift = (modifiers & pyglet.window.key.MOD_SHIFT)
+ if (ctrl and shift):
+ self._trackball.set_state(Trackball.STATE_ZOOM)
+ elif ctrl:
+ self._trackball.set_state(Trackball.STATE_ROLL)
+ elif shift:
+ self._trackball.set_state(Trackball.STATE_PAN)
+ elif (buttons == pyglet.window.mouse.MIDDLE):
+ self._trackball.set_state(Trackball.STATE_PAN)
+ elif (buttons == pyglet.window.mouse.RIGHT):
+ self._trackball.set_state(Trackball.STATE_ZOOM)
+
+ self._trackball.down(np.array([x, y]))
+
+ # Stop animating while using the mouse
+ self.viewer_flags['mouse_pressed'] = True
+
+ def on_mouse_drag(self, x, y, dx, dy, buttons, modifiers):
+ """Record a mouse drag.
+ """
+ self._trackball.drag(np.array([x, y]))
+
+ def on_mouse_release(self, x, y, button, modifiers):
+ """Record a mouse release.
+ """
+ self.viewer_flags['mouse_pressed'] = False
+
+ def on_mouse_scroll(self, x, y, dx, dy):
+ """Record a mouse scroll.
+ """
+ if self.viewer_flags['use_perspective_cam']:
+ self._trackball.scroll(dy)
+ else:
+ spfc = 0.95
+ spbc = 1.0 / 0.95
+ sf = 1.0
+ if dy > 0:
+ sf = spfc * dy
+ elif dy < 0:
+ sf = - spbc * dy
+
+ c = self._camera_node.camera
+ xmag = max(c.xmag * sf, 1e-8)
+ ymag = max(c.ymag * sf, 1e-8 * c.ymag / c.xmag)
+ c.xmag = xmag
+ c.ymag = ymag
+
+ def on_key_press(self, symbol, modifiers):
+ """Record a key press.
+ """
+ # First, check for registered key callbacks
+ if symbol in self.registered_keys:
+ tup = self.registered_keys[symbol]
+ callback = None
+ args = []
+ kwargs = {}
+ if not isinstance(tup, (list, tuple, np.ndarray)):
+ callback = tup
+ else:
+ callback = tup[0]
+ if len(tup) == 2:
+ args = tup[1]
+ if len(tup) == 3:
+ kwargs = tup[2]
+ callback(self, *args, **kwargs)
+ return
+
+ # Otherwise, use default key functions
+
+ # A causes the frame to rotate
+ self._message_text = None
+ if symbol == pyglet.window.key.A:
+ self.viewer_flags['rotate'] = not self.viewer_flags['rotate']
+ if self.viewer_flags['rotate']:
+ self._message_text = 'Rotation On'
+ else:
+ self._message_text = 'Rotation Off'
+
+ # C toggles backface culling
+ elif symbol == pyglet.window.key.C:
+ self.render_flags['cull_faces'] = (
+ not self.render_flags['cull_faces']
+ )
+ if self.render_flags['cull_faces']:
+ self._message_text = 'Cull Faces On'
+ else:
+ self._message_text = 'Cull Faces Off'
+
+ # F toggles face normals
+ elif symbol == pyglet.window.key.F:
+ self.viewer_flags['fullscreen'] = (
+ not self.viewer_flags['fullscreen']
+ )
+ self.set_fullscreen(self.viewer_flags['fullscreen'])
+ self.activate()
+ if self.viewer_flags['fullscreen']:
+ self._message_text = 'Fullscreen On'
+ else:
+ self._message_text = 'Fullscreen Off'
+
+ # S toggles shadows
+ elif symbol == pyglet.window.key.H and sys.platform != 'darwin':
+ self.render_flags['shadows'] = not self.render_flags['shadows']
+ if self.render_flags['shadows']:
+ self._message_text = 'Shadows On'
+ else:
+ self._message_text = 'Shadows Off'
+
+ elif symbol == pyglet.window.key.I:
+ if (self.viewer_flags['show_world_axis'] and not
+ self.viewer_flags['show_mesh_axes']):
+ self.viewer_flags['show_world_axis'] = False
+ self.viewer_flags['show_mesh_axes'] = True
+ self._set_axes(False, True)
+ self._message_text = 'Mesh Axes On'
+ elif (not self.viewer_flags['show_world_axis'] and
+ self.viewer_flags['show_mesh_axes']):
+ self.viewer_flags['show_world_axis'] = True
+ self.viewer_flags['show_mesh_axes'] = True
+ self._set_axes(True, True)
+ self._message_text = 'All Axes On'
+ elif (self.viewer_flags['show_world_axis'] and
+ self.viewer_flags['show_mesh_axes']):
+ self.viewer_flags['show_world_axis'] = False
+ self.viewer_flags['show_mesh_axes'] = False
+ self._set_axes(False, False)
+ self._message_text = 'All Axes Off'
+ else:
+ self.viewer_flags['show_world_axis'] = True
+ self.viewer_flags['show_mesh_axes'] = False
+ self._set_axes(True, False)
+ self._message_text = 'World Axis On'
+
+ # L toggles the lighting mode
+ elif symbol == pyglet.window.key.L:
+ if self.viewer_flags['use_raymond_lighting']:
+ self.viewer_flags['use_raymond_lighting'] = False
+ self.viewer_flags['use_direct_lighting'] = True
+ self._message_text = 'Direct Lighting'
+ elif self.viewer_flags['use_direct_lighting']:
+ self.viewer_flags['use_raymond_lighting'] = False
+ self.viewer_flags['use_direct_lighting'] = False
+ self._message_text = 'Default Lighting'
+ else:
+ self.viewer_flags['use_raymond_lighting'] = True
+ self.viewer_flags['use_direct_lighting'] = False
+ self._message_text = 'Raymond Lighting'
+
+ # M toggles face normals
+ elif symbol == pyglet.window.key.M:
+ self.render_flags['face_normals'] = (
+ not self.render_flags['face_normals']
+ )
+ if self.render_flags['face_normals']:
+ self._message_text = 'Face Normals On'
+ else:
+ self._message_text = 'Face Normals Off'
+
+ # N toggles vertex normals
+ elif symbol == pyglet.window.key.N:
+ self.render_flags['vertex_normals'] = (
+ not self.render_flags['vertex_normals']
+ )
+ if self.render_flags['vertex_normals']:
+ self._message_text = 'Vert Normals On'
+ else:
+ self._message_text = 'Vert Normals Off'
+
+ # O toggles orthographic camera mode
+ elif symbol == pyglet.window.key.O:
+ self.viewer_flags['use_perspective_cam'] = (
+ not self.viewer_flags['use_perspective_cam']
+ )
+ if self.viewer_flags['use_perspective_cam']:
+ camera = self._default_persp_cam
+ self._message_text = 'Perspective View'
+ else:
+ camera = self._default_orth_cam
+ self._message_text = 'Orthographic View'
+
+ cam_pose = self._camera_node.matrix.copy()
+ cam_node = Node(matrix=cam_pose, camera=camera)
+ self.scene.remove_node(self._camera_node)
+ self.scene.add_node(cam_node)
+ self.scene.main_camera_node = cam_node
+ self._camera_node = cam_node
+
+ # Q quits the viewer
+ elif symbol == pyglet.window.key.Q:
+ self.on_close()
+
+ # R starts recording frames
+ elif symbol == pyglet.window.key.R:
+ if self.viewer_flags['record']:
+ self.save_gif()
+ self.set_caption(self.viewer_flags['window_title'])
+ else:
+ self.set_caption(
+ '{} (RECORDING)'.format(self.viewer_flags['window_title'])
+ )
+ self.viewer_flags['record'] = not self.viewer_flags['record']
+
+ # S saves the current frame as an image
+ elif symbol == pyglet.window.key.S:
+ self._save_image()
+
+ # W toggles through wireframe modes
+ elif symbol == pyglet.window.key.W:
+ if self.render_flags['flip_wireframe']:
+ self.render_flags['flip_wireframe'] = False
+ self.render_flags['all_wireframe'] = True
+ self.render_flags['all_solid'] = False
+ self._message_text = 'All Wireframe'
+ elif self.render_flags['all_wireframe']:
+ self.render_flags['flip_wireframe'] = False
+ self.render_flags['all_wireframe'] = False
+ self.render_flags['all_solid'] = True
+ self._message_text = 'All Solid'
+ elif self.render_flags['all_solid']:
+ self.render_flags['flip_wireframe'] = False
+ self.render_flags['all_wireframe'] = False
+ self.render_flags['all_solid'] = False
+ self._message_text = 'Default Wireframe'
+ else:
+ self.render_flags['flip_wireframe'] = True
+ self.render_flags['all_wireframe'] = False
+ self.render_flags['all_solid'] = False
+ self._message_text = 'Flip Wireframe'
+
+ # Z resets the camera viewpoint
+ elif symbol == pyglet.window.key.Z:
+ self._reset_view()
+
+ if self._message_text is not None:
+ self._message_opac = 1.0 + self._ticks_till_fade
+
+ @staticmethod
+ def _time_event(dt, self):
+ """The timer callback.
+ """
+ # Don't run old dead events after we've already closed
+ if not self._is_active:
+ return
+
+ if self.viewer_flags['record']:
+ self._record()
+ if (self.viewer_flags['rotate'] and not
+ self.viewer_flags['mouse_pressed']):
+ self._rotate()
+
+ # Manage message opacity
+ if self._message_text is not None:
+ if self._message_opac > 1.0:
+ self._message_opac -= 1.0
+ else:
+ self._message_opac *= 0.90
+ if self._message_opac < 0.05:
+ self._message_opac = 1.0 + self._ticks_till_fade
+ self._message_text = None
+
+ if self._should_close:
+ self.on_close()
+ else:
+ self.on_draw()
+
+ def _reset_view(self):
+ """Reset the view to a good initial state.
+
+ The view is initially along the positive x-axis at a
+ sufficient distance from the scene.
+ """
+ scale = self.scene.scale
+ if scale == 0.0:
+ scale = DEFAULT_SCENE_SCALE
+ centroid = self.scene.centroid
+
+ if self.viewer_flags['view_center'] is not None:
+ centroid = self.viewer_flags['view_center']
+
+ self._camera_node.matrix = self._default_camera_pose
+ self._trackball = Trackball(
+ self._default_camera_pose, self.viewport_size, scale, centroid
+ )
+
+ def _get_save_filename(self, file_exts):
+ file_types = {
+ 'png': ('png files', '*.png'),
+ 'jpg': ('jpeg files', '*.jpg'),
+ 'gif': ('gif files', '*.gif'),
+ 'all': ('all files', '*'),
+ }
+ filetypes = [file_types[x] for x in file_exts]
+ try:
+ root = Tk()
+ save_dir = self.viewer_flags['save_directory']
+ if save_dir is None:
+ save_dir = os.getcwd()
+ filename = filedialog.asksaveasfilename(
+ initialdir=save_dir, title='Select file save location',
+ filetypes=filetypes
+ )
+ except Exception:
+ return None
+
+ root.destroy()
+ if filename == ():
+ return None
+ return filename
+
+ def _save_image(self):
+ filename = self._get_save_filename(['png', 'jpg', 'gif', 'all'])
+ if filename is not None:
+ self.viewer_flags['save_directory'] = os.path.dirname(filename)
+ imageio.imwrite(filename, self._renderer.read_color_buf())
+
+ def _record(self):
+ """Save another frame for the GIF.
+ """
+ data = self._renderer.read_color_buf()
+ if not np.all(data == 0.0):
+ self._saved_frames.append(data)
+
+ def _rotate(self):
+ """Animate the scene by rotating the camera.
+ """
+ az = (self.viewer_flags['rotate_rate'] /
+ self.viewer_flags['refresh_rate'])
+ self._trackball.rotate(az, self.viewer_flags['rotate_axis'])
+
+ def _render(self):
+ """Render the scene into the framebuffer and flip.
+ """
+ scene = self.scene
+ self._camera_node.matrix = self._trackball.pose.copy()
+
+ # Set lighting
+ vli = self.viewer_flags['lighting_intensity']
+ if self.viewer_flags['use_raymond_lighting']:
+ for n in self._raymond_lights:
+ n.light.intensity = vli / 3.0
+ if not self.scene.has_node(n):
+ scene.add_node(n, parent_node=self._camera_node)
+ else:
+ self._direct_light.light.intensity = vli
+ for n in self._raymond_lights:
+ if self.scene.has_node(n):
+ self.scene.remove_node(n)
+
+ if self.viewer_flags['use_direct_lighting']:
+ if not self.scene.has_node(self._direct_light):
+ scene.add_node(
+ self._direct_light, parent_node=self._camera_node
+ )
+ elif self.scene.has_node(self._direct_light):
+ self.scene.remove_node(self._direct_light)
+
+ flags = RenderFlags.NONE
+ if self.render_flags['flip_wireframe']:
+ flags |= RenderFlags.FLIP_WIREFRAME
+ elif self.render_flags['all_wireframe']:
+ flags |= RenderFlags.ALL_WIREFRAME
+ elif self.render_flags['all_solid']:
+ flags |= RenderFlags.ALL_SOLID
+
+ if self.render_flags['shadows']:
+ flags |= RenderFlags.SHADOWS_DIRECTIONAL | RenderFlags.SHADOWS_SPOT
+ if self.render_flags['vertex_normals']:
+ flags |= RenderFlags.VERTEX_NORMALS
+ if self.render_flags['face_normals']:
+ flags |= RenderFlags.FACE_NORMALS
+ if not self.render_flags['cull_faces']:
+ flags |= RenderFlags.SKIP_CULL_FACES
+
+ self._renderer.render(self.scene, flags)
+
+ def _init_and_start_app(self):
+ # Try multiple configs starting with target OpenGL version
+ # and multisampling and removing these options if exception
+ # Note: multisampling not available on all hardware
+ from pyglet.gl import Config
+ confs = [Config(sample_buffers=1, samples=4,
+ depth_size=24,
+ double_buffer=True,
+ major_version=TARGET_OPEN_GL_MAJOR,
+ minor_version=TARGET_OPEN_GL_MINOR),
+ Config(depth_size=24,
+ double_buffer=True,
+ major_version=TARGET_OPEN_GL_MAJOR,
+ minor_version=TARGET_OPEN_GL_MINOR),
+ Config(sample_buffers=1, samples=4,
+ depth_size=24,
+ double_buffer=True,
+ major_version=MIN_OPEN_GL_MAJOR,
+ minor_version=MIN_OPEN_GL_MINOR),
+ Config(depth_size=24,
+ double_buffer=True,
+ major_version=MIN_OPEN_GL_MAJOR,
+ minor_version=MIN_OPEN_GL_MINOR)]
+ for conf in confs:
+ try:
+ super(Viewer, self).__init__(config=conf, resizable=True,
+ width=self._viewport_size[0],
+ height=self._viewport_size[1])
+ break
+ except pyglet.window.NoSuchConfigException:
+ pass
+
+ if not self.context:
+ raise ValueError('Unable to initialize an OpenGL 3+ context')
+ clock.schedule_interval(
+ Viewer._time_event, 1.0 / self.viewer_flags['refresh_rate'], self
+ )
+ self.switch_to()
+ self.set_caption(self.viewer_flags['window_title'])
+ pyglet.app.run()
+
+ def _compute_initial_camera_pose(self):
+ centroid = self.scene.centroid
+ if self.viewer_flags['view_center'] is not None:
+ centroid = self.viewer_flags['view_center']
+ scale = self.scene.scale
+ if scale == 0.0:
+ scale = DEFAULT_SCENE_SCALE
+
+ s2 = 1.0 / np.sqrt(2.0)
+ cp = np.eye(4)
+ cp[:3,:3] = np.array([
+ [0.0, -s2, s2],
+ [1.0, 0.0, 0.0],
+ [0.0, s2, s2]
+ ])
+ hfov = np.pi / 6.0
+ dist = scale / (2.0 * np.tan(hfov))
+ cp[:3,3] = dist * np.array([1.0, 0.0, 1.0]) + centroid
+
+ return cp
+
+ def _create_raymond_lights(self):
+ thetas = np.pi * np.array([1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
+ phis = np.pi * np.array([0.0, 2.0 / 3.0, 4.0 / 3.0])
+
+ nodes = []
+
+ for phi, theta in zip(phis, thetas):
+ xp = np.sin(theta) * np.cos(phi)
+ yp = np.sin(theta) * np.sin(phi)
+ zp = np.cos(theta)
+
+ z = np.array([xp, yp, zp])
+ z = z / np.linalg.norm(z)
+ x = np.array([-z[1], z[0], 0.0])
+ if np.linalg.norm(x) == 0:
+ x = np.array([1.0, 0.0, 0.0])
+ x = x / np.linalg.norm(x)
+ y = np.cross(z, x)
+
+ matrix = np.eye(4)
+ matrix[:3,:3] = np.c_[x,y,z]
+ nodes.append(Node(
+ light=DirectionalLight(color=np.ones(3), intensity=1.0),
+ matrix=matrix
+ ))
+
+ return nodes
+
+ def _create_direct_light(self):
+ light = DirectionalLight(color=np.ones(3), intensity=1.0)
+ n = Node(light=light, matrix=np.eye(4))
+ return n
+
+ def _set_axes(self, world, mesh):
+ scale = self.scene.scale
+ if world:
+ if 'scene' not in self._axes:
+ n = Node(mesh=self._axis_mesh, scale=np.ones(3) * scale * 0.3)
+ self.scene.add_node(n)
+ self._axes['scene'] = n
+ else:
+ if 'scene' in self._axes:
+ self.scene.remove_node(self._axes['scene'])
+ self._axes.pop('scene')
+
+ if mesh:
+ old_nodes = []
+ existing_axes = set([self._axes[k] for k in self._axes])
+ for node in self.scene.mesh_nodes:
+ if node not in existing_axes:
+ old_nodes.append(node)
+
+ for node in old_nodes:
+ if node in self._axes:
+ continue
+ n = Node(
+ mesh=self._axis_mesh,
+ scale=np.ones(3) * node.mesh.scale * 0.5
+ )
+ self.scene.add_node(n, parent_node=node)
+ self._axes[node] = n
+ else:
+ to_remove = set()
+ for main_node in self._axes:
+ if main_node in self.scene.mesh_nodes:
+ self.scene.remove_node(self._axes[main_node])
+ to_remove.add(main_node)
+ for main_node in to_remove:
+ self._axes.pop(main_node)
+
+ def _remove_axes(self):
+ for main_node in self._axes:
+ axis_node = self._axes[main_node]
+ self.scene.remove_node(axis_node)
+ self._axes = {}
+
+ def _location_to_x_y(self, location):
+ if location == TextAlign.CENTER:
+ return (self.viewport_size[0] / 2.0, self.viewport_size[1] / 2.0)
+ elif location == TextAlign.CENTER_LEFT:
+ return (TEXT_PADDING, self.viewport_size[1] / 2.0)
+ elif location == TextAlign.CENTER_RIGHT:
+ return (self.viewport_size[0] - TEXT_PADDING,
+ self.viewport_size[1] / 2.0)
+ elif location == TextAlign.BOTTOM_LEFT:
+ return (TEXT_PADDING, TEXT_PADDING)
+ elif location == TextAlign.BOTTOM_RIGHT:
+ return (self.viewport_size[0] - TEXT_PADDING, TEXT_PADDING)
+ elif location == TextAlign.BOTTOM_CENTER:
+ return (self.viewport_size[0] / 2.0, TEXT_PADDING)
+ elif location == TextAlign.TOP_LEFT:
+ return (TEXT_PADDING, self.viewport_size[1] - TEXT_PADDING)
+ elif location == TextAlign.TOP_RIGHT:
+ return (self.viewport_size[0] - TEXT_PADDING,
+ self.viewport_size[1] - TEXT_PADDING)
+ elif location == TextAlign.TOP_CENTER:
+ return (self.viewport_size[0] / 2.0,
+ self.viewport_size[1] - TEXT_PADDING)
+
+
+__all__ = ['Viewer']
diff --git a/pyrender/requirements.txt b/pyrender/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c40b74256f0dc6697754bb8609f69a39d51beba
--- /dev/null
+++ b/pyrender/requirements.txt
@@ -0,0 +1,14 @@
+freetype-py
+imageio
+networkx
+numpy
+Pillow
+pyglet==1.4.0a1
+PyOpenGL
+PyOpenGL_accelerate
+six
+trimesh
+sphinx
+sphinx_rtd_theme
+sphinx-automodapi
+
diff --git a/pyrender/setup.py b/pyrender/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3b5ba0da2b0f17b759e5556597981096a80bda8
--- /dev/null
+++ b/pyrender/setup.py
@@ -0,0 +1,76 @@
+"""
+Setup of pyrender Python codebase.
+
+Author: Matthew Matl
+"""
+import sys
+from setuptools import setup
+
+# load __version__
+exec(open('pyrender/version.py').read())
+
+def get_imageio_dep():
+ if sys.version[0] == "2":
+ return 'imageio<=2.6.1'
+ return 'imageio'
+
+requirements = [
+ 'freetype-py', # For font loading
+ get_imageio_dep(), # For Image I/O
+ 'networkx', # For the scene graph
+ 'numpy', # Numpy
+ 'Pillow', # For Trimesh texture conversions
+ 'pyglet>=1.4.10', # For the pyglet viewer
+ 'PyOpenGL~=3.1.0', # For OpenGL
+# 'PyOpenGL_accelerate~=3.1.0', # For OpenGL
+ 'scipy', # Because of trimesh missing dep
+ 'six', # For Python 2/3 interop
+ 'trimesh', # For meshes
+]
+
+dev_requirements = [
+ 'flake8', # Code formatting checker
+ 'pre-commit', # Pre-commit hooks
+ 'pytest', # Code testing
+ 'pytest-cov', # Coverage testing
+ 'tox', # Automatic virtualenv testing
+]
+
+docs_requirements = [
+ 'sphinx', # General doc library
+ 'sphinx_rtd_theme', # RTD theme for sphinx
+ 'sphinx-automodapi' # For generating nice tables
+]
+
+
+setup(
+ name = 'pyrender',
+ version=__version__,
+ description='Easy-to-use Python renderer for 3D visualization',
+ long_description='A simple implementation of Physically-Based Rendering '
+ '(PBR) in Python. Compliant with the glTF 2.0 standard.',
+ author='Matthew Matl',
+ author_email='matthewcmatl@gmail.com',
+ license='MIT License',
+ url = 'https://github.com/mmatl/pyrender',
+ classifiers = [
+ 'Development Status :: 4 - Beta',
+ 'License :: OSI Approved :: MIT License',
+ 'Operating System :: POSIX :: Linux',
+ 'Operating System :: MacOS :: MacOS X',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Natural Language :: English',
+ 'Topic :: Scientific/Engineering'
+ ],
+ keywords = 'rendering graphics opengl 3d visualization pbr gltf',
+ packages = ['pyrender', 'pyrender.platforms'],
+ setup_requires = requirements,
+ install_requires = requirements,
+ extras_require={
+ 'dev': dev_requirements,
+ 'docs': docs_requirements,
+ },
+ include_package_data=True
+)
diff --git a/pyrender/tests/__init__.py b/pyrender/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyrender/tests/conftest.py b/pyrender/tests/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyrender/tests/pytest.ini b/pyrender/tests/pytest.ini
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyrender/tests/unit/__init__.py b/pyrender/tests/unit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyrender/tests/unit/test_cameras.py b/pyrender/tests/unit/test_cameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..7544ad8f8e3ee55236fd2e32dbc12065153cbe5b
--- /dev/null
+++ b/pyrender/tests/unit/test_cameras.py
@@ -0,0 +1,164 @@
+import numpy as np
+import pytest
+
+from pyrender import PerspectiveCamera, OrthographicCamera
+
+
+def test_perspective_camera():
+
+ # Set up constants
+ znear = 0.05
+ zfar = 100
+ yfov = np.pi / 3.0
+ width = 1000.0
+ height = 500.0
+ aspectRatio = 640.0 / 480.0
+
+ # Test basics
+ with pytest.raises(TypeError):
+ p = PerspectiveCamera()
+
+ p = PerspectiveCamera(yfov=yfov)
+ assert p.yfov == yfov
+ assert p.znear == 0.05
+ assert p.zfar is None
+ assert p.aspectRatio is None
+ p.name = 'asdf'
+ p.name = None
+
+ with pytest.raises(ValueError):
+ p.yfov = 0.0
+
+ with pytest.raises(ValueError):
+ p.yfov = -1.0
+
+ with pytest.raises(ValueError):
+ p.znear = -1.0
+
+ p.znear = 0.0
+ p.znear = 0.05
+ p.zfar = 100.0
+ assert p.zfar == 100.0
+
+ with pytest.raises(ValueError):
+ p.zfar = 0.03
+
+ with pytest.raises(ValueError):
+ p.zfar = 0.05
+
+ p.aspectRatio = 10.0
+ assert p.aspectRatio == 10.0
+
+ with pytest.raises(ValueError):
+ p.aspectRatio = 0.0
+
+ with pytest.raises(ValueError):
+ p.aspectRatio = -1.0
+
+ # Test matrix getting/setting
+
+ # NF
+ p.znear = 0.05
+ p.zfar = 100
+ p.aspectRatio = None
+
+ with pytest.raises(ValueError):
+ p.get_projection_matrix()
+
+ assert np.allclose(
+ p.get_projection_matrix(width, height),
+ np.array([
+ [1.0 / (width / height * np.tan(yfov / 2.0)), 0.0, 0.0, 0.0],
+ [0.0, 1.0 / np.tan(yfov / 2.0), 0.0, 0.0],
+ [0.0, 0.0, (zfar + znear) / (znear - zfar),
+ (2 * zfar * znear) / (znear - zfar)],
+ [0.0, 0.0, -1.0, 0.0]
+ ])
+ )
+
+ # NFA
+ p.aspectRatio = aspectRatio
+ assert np.allclose(
+ p.get_projection_matrix(width, height),
+ np.array([
+ [1.0 / (aspectRatio * np.tan(yfov / 2.0)), 0.0, 0.0, 0.0],
+ [0.0, 1.0 / np.tan(yfov / 2.0), 0.0, 0.0],
+ [0.0, 0.0, (zfar + znear) / (znear - zfar),
+ (2 * zfar * znear) / (znear - zfar)],
+ [0.0, 0.0, -1.0, 0.0]
+ ])
+ )
+ assert np.allclose(
+ p.get_projection_matrix(), p.get_projection_matrix(width, height)
+ )
+
+ # N
+ p.zfar = None
+ p.aspectRatio = None
+ assert np.allclose(
+ p.get_projection_matrix(width, height),
+ np.array([
+ [1.0 / (width / height * np.tan(yfov / 2.0)), 0.0, 0.0, 0.0],
+ [0.0, 1.0 / np.tan(yfov / 2.0), 0.0, 0.0],
+ [0.0, 0.0, -1.0, -2.0 * znear],
+ [0.0, 0.0, -1.0, 0.0]
+ ])
+ )
+
+
+def test_orthographic_camera():
+ xm = 1.0
+ ym = 2.0
+ n = 0.05
+ f = 100.0
+
+ with pytest.raises(TypeError):
+ c = OrthographicCamera()
+
+ c = OrthographicCamera(xmag=xm, ymag=ym)
+
+ assert c.xmag == xm
+ assert c.ymag == ym
+ assert c.znear == 0.05
+ assert c.zfar == 100.0
+ assert c.name is None
+
+ with pytest.raises(TypeError):
+ c.ymag = None
+
+ with pytest.raises(ValueError):
+ c.ymag = 0.0
+
+ with pytest.raises(ValueError):
+ c.ymag = -1.0
+
+ with pytest.raises(TypeError):
+ c.xmag = None
+
+ with pytest.raises(ValueError):
+ c.xmag = 0.0
+
+ with pytest.raises(ValueError):
+ c.xmag = -1.0
+
+ with pytest.raises(TypeError):
+ c.znear = None
+
+ with pytest.raises(ValueError):
+ c.znear = 0.0
+
+ with pytest.raises(ValueError):
+ c.znear = -1.0
+
+ with pytest.raises(ValueError):
+ c.zfar = 0.01
+
+ assert np.allclose(
+ c.get_projection_matrix(),
+ np.array([
+ [1.0 / xm, 0, 0, 0],
+ [0, 1.0 / ym, 0, 0],
+ [0, 0, 2.0 / (n - f), (f + n) / (n - f)],
+ [0, 0, 0, 1.0]
+ ])
+ )
diff --git a/pyrender/tests/unit/test_egl.py b/pyrender/tests/unit/test_egl.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2f4bef39e33c2794e6837b5a1bb127d8d4dba06
--- /dev/null
+++ b/pyrender/tests/unit/test_egl.py
@@ -0,0 +1,16 @@
+# from pyrender.platforms import egl
+
+
+def tmp_test_default_device():
+ egl.get_default_device()
+
+
+def tmp_test_query_device():
+ devices = egl.query_devices()
+ assert len(devices) > 0
+
+
+def tmp_test_init_context():
+ device = egl.query_devices()[0]
+ platform = egl.EGLPlatform(128, 128, device=device)
+ platform.init_context()
diff --git a/pyrender/tests/unit/test_lights.py b/pyrender/tests/unit/test_lights.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffde856b21e8cce9532f0308fcd1c7eb2d1eba90
--- /dev/null
+++ b/pyrender/tests/unit/test_lights.py
@@ -0,0 +1,104 @@
+import numpy as np
+import pytest
+
+from pyrender import (DirectionalLight, SpotLight, PointLight, Texture,
+ PerspectiveCamera, OrthographicCamera)
+from pyrender.constants import SHADOW_TEX_SZ
+
+
+def test_directional_light():
+
+ d = DirectionalLight()
+ assert d.name is None
+ assert np.all(d.color == 1.0)
+ assert d.intensity == 1.0
+
+ d.name = 'direc'
+ with pytest.raises(ValueError):
+ d.color = None
+ with pytest.raises(TypeError):
+ d.intensity = None
+
+ d = DirectionalLight(color=[0.0, 0.0, 0.0])
+ assert np.all(d.color == 0.0)
+
+ d._generate_shadow_texture()
+ st = d.shadow_texture
+ assert isinstance(st, Texture)
+ assert st.width == st.height == SHADOW_TEX_SZ
+
+ sc = d._get_shadow_camera(scene_scale=5.0)
+ assert isinstance(sc, OrthographicCamera)
+ assert sc.xmag == sc.ymag == 5.0
+ assert sc.znear == 0.01 * 5.0
+ assert sc.zfar == 10 * 5.0
+
+
+def test_spot_light():
+
+ s = SpotLight()
+ assert s.name is None
+ assert np.all(s.color == 1.0)
+ assert s.intensity == 1.0
+ assert s.innerConeAngle == 0.0
+ assert s.outerConeAngle == np.pi / 4.0
+ assert s.range is None
+
+ with pytest.raises(ValueError):
+ s.range = -1.0
+
+ with pytest.raises(ValueError):
+ s.range = 0.0
+
+ with pytest.raises(ValueError):
+ s.innerConeAngle = -1.0
+
+ with pytest.raises(ValueError):
+ s.innerConeAngle = np.pi / 3.0
+
+ with pytest.raises(ValueError):
+ s.outerConeAngle = -1.0
+
+ with pytest.raises(ValueError):
+ s.outerConeAngle = np.pi
+
+ s.range = 5.0
+ s.outerConeAngle = np.pi / 2 - 0.05
+ s.innerConeAngle = np.pi / 3
+ s.innerConeAngle = 0.0
+ s.outerConeAngle = np.pi / 4.0
+
+ s._generate_shadow_texture()
+ st = s.shadow_texture
+ assert isinstance(st, Texture)
+ assert st.width == st.height == SHADOW_TEX_SZ
+
+ sc = s._get_shadow_camera(scene_scale=5.0)
+ assert isinstance(sc, PerspectiveCamera)
+ assert sc.znear == 0.01 * 5.0
+ assert sc.zfar == 10 * 5.0
+ assert sc.aspectRatio == 1.0
+ assert np.allclose(sc.yfov, np.pi / 16.0 * 9.0) # Plus pi / 16
+
+
+def test_point_light():
+
+ s = PointLight()
+ assert s.name is None
+ assert np.all(s.color == 1.0)
+ assert s.intensity == 1.0
+ assert s.range is None
+
+ with pytest.raises(ValueError):
+ s.range = -1.0
+
+ with pytest.raises(ValueError):
+ s.range = 0.0
+
+ s.range = 5.0
+
+ with pytest.raises(NotImplementedError):
+ s._generate_shadow_texture()
+
+ with pytest.raises(NotImplementedError):
+ s._get_shadow_camera(scene_scale=5.0)
diff --git a/pyrender/tests/unit/test_meshes.py b/pyrender/tests/unit/test_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7070b01171c97069fa013c6eba8eee217017f08e
--- /dev/null
+++ b/pyrender/tests/unit/test_meshes.py
@@ -0,0 +1,133 @@
+import numpy as np
+import pytest
+import trimesh
+
+from pyrender import (Mesh, Primitive)
+
+
+def test_meshes():
+
+ with pytest.raises(TypeError):
+ x = Mesh()
+ with pytest.raises(TypeError):
+ x = Primitive()
+ with pytest.raises(ValueError):
+ x = Primitive([], mode=10)
+
+ # Basics
+ x = Mesh([])
+ assert x.name is None
+ assert x.is_visible
+ assert x.weights is None
+
+ x.name = 'str'
+
+ # From Trimesh
+ x = Mesh.from_trimesh(trimesh.creation.box())
+ assert isinstance(x, Mesh)
+ assert len(x.primitives) == 1
+ assert x.is_visible
+ assert np.allclose(x.bounds, np.array([
+ [-0.5, -0.5, -0.5],
+ [0.5, 0.5, 0.5]
+ ]))
+ assert np.allclose(x.centroid, np.zeros(3))
+ assert np.allclose(x.extents, np.ones(3))
+ assert np.allclose(x.scale, np.sqrt(3))
+ assert not x.is_transparent
+
+ # Test some primitive functions
+ x = x.primitives[0]
+ with pytest.raises(ValueError):
+ x.normals = np.zeros(10)
+ with pytest.raises(ValueError):
+ x.tangents = np.zeros(10)
+ with pytest.raises(ValueError):
+ x.texcoord_0 = np.zeros(10)
+ with pytest.raises(ValueError):
+ x.texcoord_1 = np.zeros(10)
+ with pytest.raises(TypeError):
+ x.material = np.zeros(10)
+ assert x.targets is None
+ assert np.allclose(x.bounds, np.array([
+ [-0.5, -0.5, -0.5],
+ [0.5, 0.5, 0.5]
+ ]))
+ assert np.allclose(x.centroid, np.zeros(3))
+ assert np.allclose(x.extents, np.ones(3))
+ assert np.allclose(x.scale, np.sqrt(3))
+ x.material.baseColorFactor = np.array([0.0, 0.0, 0.0, 0.0])
+ assert x.is_transparent
+
+ # From two trimeshes
+ x = Mesh.from_trimesh([trimesh.creation.box(),
+ trimesh.creation.cylinder(radius=0.1, height=2.0)],
+ smooth=False)
+ assert isinstance(x, Mesh)
+ assert len(x.primitives) == 2
+ assert x.is_visible
+ assert np.allclose(x.bounds, np.array([
+ [-0.5, -0.5, -1.0],
+ [0.5, 0.5, 1.0]
+ ]))
+ assert np.allclose(x.centroid, np.zeros(3))
+ assert np.allclose(x.extents, [1.0, 1.0, 2.0])
+ assert np.allclose(x.scale, np.sqrt(6))
+ assert not x.is_transparent
+
+ # From bad data
+ with pytest.raises(TypeError):
+ x = Mesh.from_trimesh(None)
+
+ # With instancing
+ poses = np.tile(np.eye(4), (5,1,1))
+ poses[:,0,3] = np.array([0,1,2,3,4])
+ x = Mesh.from_trimesh(trimesh.creation.box(), poses=poses)
+ assert np.allclose(x.bounds, np.array([
+ [-0.5, -0.5, -0.5],
+ [4.5, 0.5, 0.5]
+ ]))
+ poses = np.eye(4)
+ x = Mesh.from_trimesh(trimesh.creation.box(), poses=poses)
+ poses = np.eye(3)
+ with pytest.raises(ValueError):
+ x = Mesh.from_trimesh(trimesh.creation.box(), poses=poses)
+
+ # From textured meshes
+ fm = trimesh.load('tests/data/fuze.obj')
+ x = Mesh.from_trimesh(fm)
+ assert isinstance(x, Mesh)
+ assert len(x.primitives) == 1
+ assert x.is_visible
+ assert not x.is_transparent
+ assert x.primitives[0].material.baseColorTexture is not None
+
+ x = Mesh.from_trimesh(fm, smooth=False)
+ fm.visual = fm.visual.to_color()
+ fm.visual.face_colors = np.array([1.0, 0.0, 0.0, 1.0])
+ x = Mesh.from_trimesh(fm, smooth=False)
+ with pytest.raises(ValueError):
+ x = Mesh.from_trimesh(fm, smooth=True)
+
+ fm.visual.vertex_colors = np.array([1.0, 0.0, 0.0, 0.5])
+ x = Mesh.from_trimesh(fm, smooth=False)
+ x = Mesh.from_trimesh(fm, smooth=True)
+ assert x.primitives[0].color_0 is not None
+ assert x.is_transparent
+
+ bm = trimesh.load('tests/data/WaterBottle.glb').dump()[0]
+ x = Mesh.from_trimesh(bm)
+ assert x.primitives[0].material.baseColorTexture is not None
+ assert x.primitives[0].material.emissiveTexture is not None
+ assert x.primitives[0].material.metallicRoughnessTexture is not None
+
+ # From point cloud
+ x = Mesh.from_points(fm.vertices)
+
+# def test_duck():
+# bm = trimesh.load('tests/data/Duck.glb').dump()[0]
+# x = Mesh.from_trimesh(bm)
+# assert x.primitives[0].material.baseColorTexture is not None
+# pixel = x.primitives[0].material.baseColorTexture.source[100, 100]
+# yellowish = np.array([1.0, 0.7411765, 0.0, 1.0])
+# assert np.allclose(pixel, yellowish)
diff --git a/pyrender/tests/unit/test_nodes.py b/pyrender/tests/unit/test_nodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9857c8221b7f6fb8530699bdf5593f8f0b74e152
--- /dev/null
+++ b/pyrender/tests/unit/test_nodes.py
@@ -0,0 +1,124 @@
+import numpy as np
+import pytest
+from trimesh import transformations
+
+from pyrender import (DirectionalLight, PerspectiveCamera, Mesh, Node)
+
+
+def test_nodes():
+
+ x = Node()
+ assert x.name is None
+ assert x.camera is None
+ assert x.children == []
+ assert x.skin is None
+ assert np.allclose(x.matrix, np.eye(4))
+ assert x.mesh is None
+ assert np.allclose(x.rotation, [0,0,0,1])
+ assert np.allclose(x.scale, np.ones(3))
+ assert np.allclose(x.translation, np.zeros(3))
+ assert x.weights is None
+ assert x.light is None
+
+ x.name = 'node'
+
+ # Test node light/camera/mesh tests
+ c = PerspectiveCamera(yfov=2.0)
+ m = Mesh([])
+ d = DirectionalLight()
+ x.camera = c
+ assert x.camera == c
+ with pytest.raises(TypeError):
+ x.camera = m
+ x.camera = d
+ x.camera = None
+ x.mesh = m
+ assert x.mesh == m
+ with pytest.raises(TypeError):
+ x.mesh = c
+ x.mesh = d
+ x.light = d
+ assert x.light == d
+ with pytest.raises(TypeError):
+ x.light = m
+ x.light = c
+
+ # Test transformations getters/setters/etc...
+ # Set up test values
+ x = np.array([1.0, 0.0, 0.0])
+ y = np.array([0.0, 1.0, 0.0])
+ t = np.array([1.0, 2.0, 3.0])
+ s = np.array([0.5, 2.0, 1.0])
+
+ Mx = transformations.rotation_matrix(np.pi / 2.0, x)
+ qx = np.roll(transformations.quaternion_about_axis(np.pi / 2.0, x), -1)
+ Mxt = Mx.copy()
+ Mxt[:3,3] = t
+ S = np.eye(4)
+ S[:3,:3] = np.diag(s)
+ Mxts = Mxt.dot(S)
+
+ My = transformations.rotation_matrix(np.pi / 2.0, y)
+ qy = np.roll(transformations.quaternion_about_axis(np.pi / 2.0, y), -1)
+ Myt = My.copy()
+ Myt[:3,3] = t
+
+ x = Node(matrix=Mx)
+ assert np.allclose(x.matrix, Mx)
+ assert np.allclose(x.rotation, qx)
+ assert np.allclose(x.translation, np.zeros(3))
+ assert np.allclose(x.scale, np.ones(3))
+
+ x.matrix = My
+ assert np.allclose(x.matrix, My)
+ assert np.allclose(x.rotation, qy)
+ assert np.allclose(x.translation, np.zeros(3))
+ assert np.allclose(x.scale, np.ones(3))
+ x.translation = t
+ assert np.allclose(x.matrix, Myt)
+ assert np.allclose(x.rotation, qy)
+ x.rotation = qx
+ assert np.allclose(x.matrix, Mxt)
+ x.scale = s
+ assert np.allclose(x.matrix, Mxts)
+
+ x = Node(matrix=Mxt)
+ assert np.allclose(x.matrix, Mxt)
+ assert np.allclose(x.rotation, qx)
+ assert np.allclose(x.translation, t)
+ assert np.allclose(x.scale, np.ones(3))
+
+ x = Node(matrix=Mxts)
+ assert np.allclose(x.matrix, Mxts)
+ assert np.allclose(x.rotation, qx)
+ assert np.allclose(x.translation, t)
+ assert np.allclose(x.scale, s)
+
+ # Individual element getters
+ x.scale[0] = 0
+ assert np.allclose(x.scale[0], 0)
+
+ x.translation[0] = 0
+ assert np.allclose(x.translation[0], 0)
+
+ x.matrix = np.eye(4)
+ x.matrix[0,0] = 500
+ assert x.matrix[0,0] == 1.0
+
+ # Failures
+ with pytest.raises(ValueError):
+ x.matrix = 5 * np.eye(4)
+ with pytest.raises(ValueError):
+ x.matrix = np.eye(5)
+ with pytest.raises(ValueError):
+ x.matrix = np.eye(4).dot([5,1,1,1])
+ with pytest.raises(ValueError):
+ x.rotation = np.array([1,2])
+ with pytest.raises(ValueError):
+ x.rotation = np.array([1,2,3])
+ with pytest.raises(ValueError):
+ x.rotation = np.array([1,2,3,4])
+ with pytest.raises(ValueError):
+ x.translation = np.array([1,2,3,4])
+ with pytest.raises(ValueError):
+ x.scale = np.array([1,2,3,4])
diff --git a/pyrender/tests/unit/test_offscreen.py b/pyrender/tests/unit/test_offscreen.py
new file mode 100644
index 0000000000000000000000000000000000000000..88983b0ff4e2ab6f5ef252c51f2ac669c3a0e0ca
--- /dev/null
+++ b/pyrender/tests/unit/test_offscreen.py
@@ -0,0 +1,92 @@
+import numpy as np
+import trimesh
+
+from pyrender import (OffscreenRenderer, PerspectiveCamera, DirectionalLight,
+ SpotLight, Mesh, Node, Scene)
+
+
+def test_offscreen_renderer(tmpdir):
+
+ # Fuze trimesh
+ fuze_trimesh = trimesh.load('examples/models/fuze.obj')
+ fuze_mesh = Mesh.from_trimesh(fuze_trimesh)
+
+ # Drill trimesh
+ drill_trimesh = trimesh.load('examples/models/drill.obj')
+ drill_mesh = Mesh.from_trimesh(drill_trimesh)
+ drill_pose = np.eye(4)
+ drill_pose[0,3] = 0.1
+ drill_pose[2,3] = -np.min(drill_trimesh.vertices[:,2])
+
+ # Wood trimesh
+ wood_trimesh = trimesh.load('examples/models/wood.obj')
+ wood_mesh = Mesh.from_trimesh(wood_trimesh)
+
+ # Water bottle trimesh
+ bottle_gltf = trimesh.load('examples/models/WaterBottle.glb')
+ bottle_trimesh = bottle_gltf.geometry[list(bottle_gltf.geometry.keys())[0]]
+ bottle_mesh = Mesh.from_trimesh(bottle_trimesh)
+ bottle_pose = np.array([
+ [1.0, 0.0, 0.0, 0.1],
+ [0.0, 0.0, -1.0, -0.16],
+ [0.0, 1.0, 0.0, 0.13],
+ [0.0, 0.0, 0.0, 1.0],
+ ])
+
+ boxv_trimesh = trimesh.creation.box(extents=0.1 * np.ones(3))
+ boxv_vertex_colors = np.random.uniform(size=(boxv_trimesh.vertices.shape))
+ boxv_trimesh.visual.vertex_colors = boxv_vertex_colors
+ boxv_mesh = Mesh.from_trimesh(boxv_trimesh, smooth=False)
+ boxf_trimesh = trimesh.creation.box(extents=0.1 * np.ones(3))
+ boxf_face_colors = np.random.uniform(size=boxf_trimesh.faces.shape)
+ boxf_trimesh.visual.face_colors = boxf_face_colors
+ # Instanced
+ poses = np.tile(np.eye(4), (2,1,1))
+ poses[0,:3,3] = np.array([-0.1, -0.10, 0.05])
+ poses[1,:3,3] = np.array([-0.15, -0.10, 0.05])
+ boxf_mesh = Mesh.from_trimesh(boxf_trimesh, poses=poses, smooth=False)
+
+ points = trimesh.creation.icosphere(radius=0.05).vertices
+ point_colors = np.random.uniform(size=points.shape)
+ points_mesh = Mesh.from_points(points, colors=point_colors)
+
+ direc_l = DirectionalLight(color=np.ones(3), intensity=1.0)
+ spot_l = SpotLight(color=np.ones(3), intensity=10.0,
+ innerConeAngle=np.pi / 16, outerConeAngle=np.pi / 6)
+
+ cam = PerspectiveCamera(yfov=(np.pi / 3.0))
+ cam_pose = np.array([
+ [0.0, -np.sqrt(2) / 2, np.sqrt(2) / 2, 0.5],
+ [1.0, 0.0, 0.0, 0.0],
+ [0.0, np.sqrt(2) / 2, np.sqrt(2) / 2, 0.4],
+ [0.0, 0.0, 0.0, 1.0]
+ ])
+
+ scene = Scene(ambient_light=np.array([0.02, 0.02, 0.02]))
+
+ fuze_node = Node(mesh=fuze_mesh, translation=np.array([
+ 0.1, 0.15, -np.min(fuze_trimesh.vertices[:,2])
+ ]))
+ scene.add_node(fuze_node)
+ boxv_node = Node(mesh=boxv_mesh, translation=np.array([-0.1, 0.10, 0.05]))
+ scene.add_node(boxv_node)
+ boxf_node = Node(mesh=boxf_mesh)
+ scene.add_node(boxf_node)
+
+ _ = scene.add(drill_mesh, pose=drill_pose)
+ _ = scene.add(bottle_mesh, pose=bottle_pose)
+ _ = scene.add(wood_mesh)
+ _ = scene.add(direc_l, pose=cam_pose)
+ _ = scene.add(spot_l, pose=cam_pose)
+ _ = scene.add(points_mesh)
+
+ _ = scene.add(cam, pose=cam_pose)
+
+ r = OffscreenRenderer(viewport_width=640, viewport_height=480)
+ color, depth = r.render(scene)
+
+ assert color.shape == (480, 640, 3)
+ assert depth.shape == (480, 640)
+ assert np.max(depth.data) > 0.05
+ assert np.count_nonzero(depth.data) > (0.2 * depth.size)
+ r.delete()
diff --git a/pyrender/tests/unit/test_scenes.py b/pyrender/tests/unit/test_scenes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d85dd714cb5d842ea12dee4140adfd7db55c9c01
--- /dev/null
+++ b/pyrender/tests/unit/test_scenes.py
@@ -0,0 +1,235 @@
+import numpy as np
+import pytest
+import trimesh
+
+from pyrender import (Mesh, PerspectiveCamera, DirectionalLight,
+ SpotLight, PointLight, Scene, Node, OrthographicCamera)
+
+
+def test_scenes():
+
+ # Basics
+ s = Scene()
+ assert np.allclose(s.bg_color, np.ones(4))
+ assert np.allclose(s.ambient_light, np.zeros(3))
+ assert len(s.nodes) == 0
+ assert s.name is None
+ s.name = 'asdf'
+ s.bg_color = None
+ s.ambient_light = None
+ assert np.allclose(s.bg_color, np.ones(4))
+ assert np.allclose(s.ambient_light, np.zeros(3))
+
+ assert s.nodes == set()
+ assert s.cameras == set()
+ assert s.lights == set()
+ assert s.point_lights == set()
+ assert s.spot_lights == set()
+ assert s.directional_lights == set()
+ assert s.meshes == set()
+ assert s.camera_nodes == set()
+ assert s.light_nodes == set()
+ assert s.point_light_nodes == set()
+ assert s.spot_light_nodes == set()
+ assert s.directional_light_nodes == set()
+ assert s.mesh_nodes == set()
+ assert s.main_camera_node is None
+ assert np.all(s.bounds == 0)
+ assert np.all(s.centroid == 0)
+ assert np.all(s.extents == 0)
+ assert np.all(s.scale == 0)
+
+ # From trimesh scene
+ tms = trimesh.load('tests/data/WaterBottle.glb')
+ s = Scene.from_trimesh_scene(tms)
+ assert len(s.meshes) == 1
+ assert len(s.mesh_nodes) == 1
+
+ # Test bg color formatting
+ s = Scene(bg_color=[0, 1.0, 0])
+ assert np.allclose(s.bg_color, np.array([0.0, 1.0, 0.0, 1.0]))
+
+ # Test constructor for nodes
+ n1 = Node()
+ n2 = Node()
+ n3 = Node()
+ nodes = [n1, n2, n3]
+ s = Scene(nodes=nodes)
+ n1.children.append(n2)
+ s = Scene(nodes=nodes)
+ n3.children.append(n2)
+ with pytest.raises(ValueError):
+ s = Scene(nodes=nodes)
+ n3.children = []
+ n2.children.append(n3)
+ n3.children.append(n2)
+ with pytest.raises(ValueError):
+ s = Scene(nodes=nodes)
+
+ # Test node accessors
+ n1 = Node()
+ n2 = Node()
+ n3 = Node()
+ nodes = [n1, n2]
+ s = Scene(nodes=nodes)
+ assert s.has_node(n1)
+ assert s.has_node(n2)
+ assert not s.has_node(n3)
+
+ # Test node poses
+ for n in nodes:
+ assert np.allclose(s.get_pose(n), np.eye(4))
+ with pytest.raises(ValueError):
+ s.get_pose(n3)
+ with pytest.raises(ValueError):
+ s.set_pose(n3, np.eye(4))
+ tf = np.eye(4)
+ tf[:3,3] = np.ones(3)
+ s.set_pose(n1, tf)
+ assert np.allclose(s.get_pose(n1), tf)
+ assert np.allclose(s.get_pose(n2), np.eye(4))
+
+ nodes = [n1, n2, n3]
+ tf2 = np.eye(4)
+ tf2[:3,:3] = np.diag([-1,-1,1])
+ n1.children.append(n2)
+ n1.matrix = tf
+ n2.matrix = tf2
+ s = Scene(nodes=nodes)
+ assert np.allclose(s.get_pose(n1), tf)
+ assert np.allclose(s.get_pose(n2), tf.dot(tf2))
+ assert np.allclose(s.get_pose(n3), np.eye(4))
+
+ n1 = Node()
+ n2 = Node()
+ n3 = Node()
+ n1.children.append(n2)
+ s = Scene()
+ s.add_node(n1)
+ with pytest.raises(ValueError):
+ s.add_node(n2)
+ s.set_pose(n1, tf)
+ assert np.allclose(s.get_pose(n1), tf)
+ assert np.allclose(s.get_pose(n2), tf)
+ s.set_pose(n2, tf2)
+ assert np.allclose(s.get_pose(n2), tf.dot(tf2))
+
+ # Test node removal
+ n1 = Node()
+ n2 = Node()
+ n3 = Node()
+ n1.children.append(n2)
+ n2.children.append(n3)
+ s = Scene(nodes=[n1, n2, n3])
+ s.remove_node(n2)
+ assert len(s.nodes) == 1
+ assert n1 in s.nodes
+ assert len(n1.children) == 0
+ assert len(n2.children) == 1
+ s.add_node(n2, parent_node=n1)
+ assert len(n1.children) == 1
+ n1.matrix = tf
+ n3.matrix = tf2
+ assert np.allclose(s.get_pose(n3), tf.dot(tf2))
+
+ # Now test ADD function
+ s = Scene()
+ m = Mesh([], name='m')
+ cp = PerspectiveCamera(yfov=2.0)
+ co = OrthographicCamera(xmag=1.0, ymag=1.0)
+ dl = DirectionalLight()
+ pl = PointLight()
+ sl = SpotLight()
+
+ n1 = s.add(m, name='mn')
+ assert n1.mesh == m
+ assert len(s.nodes) == 1
+ assert len(s.mesh_nodes) == 1
+ assert n1 in s.mesh_nodes
+ assert len(s.meshes) == 1
+ assert m in s.meshes
+ assert len(s.get_nodes(node=n2)) == 0
+ n2 = s.add(m, pose=tf)
+ assert len(s.nodes) == len(s.mesh_nodes) == 2
+ assert len(s.meshes) == 1
+ assert len(s.get_nodes(node=n1)) == 1
+ assert len(s.get_nodes(node=n1, name='mn')) == 1
+ assert len(s.get_nodes(name='mn')) == 1
+ assert len(s.get_nodes(obj=m)) == 2
+ assert len(s.get_nodes(obj=m, obj_name='m')) == 2
+ assert len(s.get_nodes(obj=co)) == 0
+ nsl = s.add(sl, name='sln')
+ npl = s.add(pl, parent_name='sln')
+ assert nsl.children[0] == npl
+ ndl = s.add(dl, parent_node=npl)
+ assert npl.children[0] == ndl
+ nco = s.add(co)
+ ncp = s.add(cp)
+
+ assert len(s.light_nodes) == len(s.lights) == 3
+ assert len(s.point_light_nodes) == len(s.point_lights) == 1
+ assert npl in s.point_light_nodes
+ assert len(s.spot_light_nodes) == len(s.spot_lights) == 1
+ assert nsl in s.spot_light_nodes
+ assert len(s.directional_light_nodes) == len(s.directional_lights) == 1
+ assert ndl in s.directional_light_nodes
+ assert len(s.cameras) == len(s.camera_nodes) == 2
+ assert s.main_camera_node == nco
+ s.main_camera_node = ncp
+ s.remove_node(ncp)
+ assert len(s.cameras) == len(s.camera_nodes) == 1
+ assert s.main_camera_node == nco
+ s.remove_node(n2)
+ assert len(s.meshes) == 1
+ s.remove_node(n1)
+ assert len(s.meshes) == 0
+ s.remove_node(nsl)
+ assert len(s.lights) == 0
+ s.remove_node(nco)
+ assert s.main_camera_node is None
+
+ s.add_node(n1)
+ s.clear()
+ assert len(s.nodes) == 0
+
+ # Trigger final errors
+ with pytest.raises(ValueError):
+ s.main_camera_node = None
+ with pytest.raises(ValueError):
+ s.main_camera_node = ncp
+ with pytest.raises(ValueError):
+ s.add(m, parent_node=n1)
+ with pytest.raises(ValueError):
+ s.add(m, name='asdf')
+ s.add(m, name='asdf')
+ s.add(m, parent_name='asdf')
+ with pytest.raises(ValueError):
+ s.add(m, parent_name='asfd')
+ with pytest.raises(TypeError):
+ s.add(None)
+
+ s.clear()
+ # Test bounds
+ m1 = Mesh.from_trimesh(trimesh.creation.box())
+ m2 = Mesh.from_trimesh(trimesh.creation.box())
+ m3 = Mesh.from_trimesh(trimesh.creation.box())
+ n1 = Node(mesh=m1)
+ n2 = Node(mesh=m2, translation=[1.0, 0.0, 0.0])
+ n3 = Node(mesh=m3, translation=[0.5, 0.0, 1.0])
+ s.add_node(n1)
+ s.add_node(n2)
+ s.add_node(n3)
+ assert np.allclose(s.bounds, [[-0.5, -0.5, -0.5], [1.5, 0.5, 1.5]])
+ s.clear()
+ s.add_node(n1)
+ s.add_node(n2, parent_node=n1)
+ s.add_node(n3, parent_node=n2)
+ assert np.allclose(s.bounds, [[-0.5, -0.5, -0.5], [2.0, 0.5, 1.5]])
+ tf = np.eye(4)
+ tf[:3,3] = np.ones(3)
+ s.set_pose(n3, tf)
+ assert np.allclose(s.bounds, [[-0.5, -0.5, -0.5], [2.5, 1.5, 1.5]])
+ s.remove_node(n2)
+ assert np.allclose(s.bounds, [[-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]])
+ s.clear()
+ assert np.allclose(s.bounds, 0.0)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24296c20c5c1fcfb89efde6d032a0f3270c8258c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,24 @@
+--extra-index-url https://download.pytorch.org/whl/cu116
+torch==1.13.1+cu116
+torchvision==0.14.1+cu116
+
+pytorch-lightning
+smplx==0.1.28
+opencv-python
+yacs
+scikit-image
+einops
+timm
+OmegaConf
+trimesh
+pyglet==1.4.0a1
+PyOpenGL==3.1.4
+PyOpenGL_accelerate
+shapely
+xtcocotools
+pandas
+mmcv-full==1.3.9
+numpy==1.23.3
+json_tricks
+munkres
+chumpy
\ No newline at end of file
diff --git a/vendor/.DS_Store b/vendor/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/vendor/.DS_Store differ
diff --git a/vendor/ViTPose/.gitignore b/vendor/ViTPose/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..b102be2dbb3ba920e5d22f8714915503952cc509
--- /dev/null
+++ b/vendor/ViTPose/.gitignore
@@ -0,0 +1,162 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+imgs/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/vendor/ViTPose/CITATION.cff b/vendor/ViTPose/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..62b75a422a45a29bf1e4c6d18bbd1d773dfbf8e1
--- /dev/null
+++ b/vendor/ViTPose/CITATION.cff
@@ -0,0 +1,8 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+ - name: "MMPose Contributors"
+title: "OpenMMLab Pose Estimation Toolbox and Benchmark"
+date-released: 2020-08-31
+url: "https://github.com/open-mmlab/mmpose"
+license: Apache-2.0
diff --git a/vendor/ViTPose/LICENSE b/vendor/ViTPose/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..b712427afe4978c6084580f113cdc87f77564fd9
--- /dev/null
+++ b/vendor/ViTPose/LICENSE
@@ -0,0 +1,203 @@
+Copyright 2018-2020 Open-MMLab. All rights reserved.
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2018-2020 Open-MMLab.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/ViTPose/MANIFEST.in b/vendor/ViTPose/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..8a93c252bd38bafddc390bc9ae9b7278e3479246
--- /dev/null
+++ b/vendor/ViTPose/MANIFEST.in
@@ -0,0 +1,5 @@
+include requirements/*.txt
+include mmpose/.mim/model-index.yml
+recursive-include mmpose/.mim/configs *.py *.yml
+recursive-include mmpose/.mim/tools *.py *.sh
+recursive-include mmpose/.mim/demo *.py
diff --git a/vendor/ViTPose/README.md b/vendor/ViTPose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d56759c8bf5a83043d01e1454fd33b989e958183
--- /dev/null
+++ b/vendor/ViTPose/README.md
@@ -0,0 +1,293 @@
+ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation
+
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/vitpose-simple-vision-transformer-baselines/pose-estimation-on-coco-test-dev)](https://paperswithcode.com/sota/pose-estimation-on-coco-test-dev?p=vitpose-simple-vision-transformer-baselines)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/vitpose-simple-vision-transformer-baselines/pose-estimation-on-aic)](https://paperswithcode.com/sota/pose-estimation-on-aic?p=vitpose-simple-vision-transformer-baselines)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/vitpose-simple-vision-transformer-baselines/pose-estimation-on-crowdpose)](https://paperswithcode.com/sota/pose-estimation-on-crowdpose?p=vitpose-simple-vision-transformer-baselines)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/vitpose-simple-vision-transformer-baselines/pose-estimation-on-ochuman)](https://paperswithcode.com/sota/pose-estimation-on-ochuman?p=vitpose-simple-vision-transformer-baselines)
+
+
+ Results |
+ Updates |
+ Usage |
+ Todo |
+ Acknowledge
+
+
+
+
+
+
+
+
+
+This branch contains the pytorch implementation of ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation and ViTPose+: Vision Transformer Foundation Model for Generic Body Pose Estimation . It obtains 81.1 AP on MS COCO Keypoint test-dev set.
+
+
+
+## Web Demo
+
+- Integrated into [Huggingface Spaces 🤗](https://huggingface.co/spaces) using [Gradio](https://github.com/gradio-app/gradio). Try out the Web Demo for video: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/hysts/ViTPose_video) and images [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Gradio-Blocks/ViTPose)
+
+## MAE Pre-trained model
+
+- The small size MAE pre-trained model can be found in [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccZeiFjh4DJ7gjYyg?e=iTMdMq).
+- The base, large, and huge pre-trained models using MAE can be found in the [MAE official repo](https://github.com/facebookresearch/mae).
+
+## Results from this repo on MS COCO val set (single-task training)
+
+Using detection results from a detector that obtains 56 mAP on person. The configs here are for both training and test.
+
+> With classic decoder
+
+| Model | Pretrain | Resolution | AP | AR | config | log | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-S | MAE | 256x192 | 73.8 | 79.2 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_small_coco_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcchdNXBAh7ClS14pA?e=dKXmJ6) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccifT1XlGRatxg3vw?e=9wz7BY) |
+| ViTPose-B | MAE | 256x192 | 75.8 | 81.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py) | [log](logs/vitpose-b.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSMjp1_NrV3VRSmK?e=Q1uZKs) |
+| ViTPose-L | MAE | 256x192 | 78.3 | 83.5 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py) | [log](logs/vitpose-l.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSd9k_kuktPtiP4F?e=K7DGYT) |
+| ViTPose-H | MAE | 256x192 | 79.1 | 84.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py) | [log](logs/vitpose-h.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgShLMI-kkmvNfF_h?e=dEhGHe) |
+
+> With simple decoder
+
+| Model | Pretrain | Resolution | AP | AR | config | log | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-S | MAE | 256x192 | 73.5 | 78.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_small_simple_coco_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccfkqELJqE67kpRtw?e=InSjJP) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccgb_50jIgiYkHvdw?e=D7RbH2) |
+| ViTPose-B | MAE | 256x192 | 75.5 | 80.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_simple_coco_256x192.py) | [log](logs/vitpose-b-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSRPKrD5PmDRiv0R?e=jifvOe) |
+| ViTPose-L | MAE | 256x192 | 78.2 | 83.4 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_simple_coco_256x192.py) | [log](logs/vitpose-l-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSVS6DP2LmKwZ3sm?e=MmCvDT) |
+| ViTPose-H | MAE | 256x192 | 78.9 | 84.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_simple_coco_256x192.py) | [log](logs/vitpose-h-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSbHyN2mjh2n2LyG?e=y0FgMK) |
+
+
+## Results with multi-task training
+
+**Note** \* There may exist duplicate images in the crowdpose training set and the validation images in other datasets, as discussed in [issue #24](https://github.com/ViTAE-Transformer/ViTPose/issues/24). Please be careful when using these models for evaluation. We provide the results without the crowpose dataset for reference.
+
+### Human datasets (MS COCO, AIC, MPII, CrowdPose)
+> Results on MS COCO val set
+
+Using detection results from a detector that obtains 56 mAP on person. Note the configs here are only for evaluation.
+
+| Model | Dataset | Resolution | AP | AR | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-B | COCO+AIC+MPII | 256x192 | 77.1 | 82.2 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcccwaTZ8xCFFM3Sjg?e=chmiK5) |
+| ViTPose-L | COCO+AIC+MPII | 256x192 | 78.7 | 83.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccdOLQqSo6E87GfMw?e=TEurgW) |
+| ViTPose-H | COCO+AIC+MPII | 256x192 | 79.5 | 84.5 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccmHofkmfJDQDukVw?e=gRK224) |
+| ViTPose-G | COCO+AIC+MPII | 576x432 | 81.0 | 85.6 | | |
+| ViTPose-B* | COCO+AIC+MPII+CrowdPose | 256x192 | 77.5 | 82.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSrlMB093JzJtqq-?e=Jr5S3R) |
+| ViTPose-L* | COCO+AIC+MPII+CrowdPose | 256x192 | 79.1 | 84.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTBm3dCVmBUbHYT6?e=fHUrTq) |
+| ViTPose-H* | COCO+AIC+MPII+CrowdPose | 256x192 | 79.8 | 84.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS5rLeRAJiWobCdh?e=41GsDd) |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 75.8 | 82.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_small_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 77.0 | 82.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_base_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 78.6 | 84.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_large_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 79.4 | 84.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_huge_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+
+> Results on OCHuman test set
+
+Using groundtruth bounding boxes. Note the configs here are only for evaluation.
+
+| Model | Dataset | Resolution | AP | AR | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-B | COCO+AIC+MPII | 256x192 | 88.0 | 89.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcccwaTZ8xCFFM3Sjg?e=chmiK5) |
+| ViTPose-L | COCO+AIC+MPII | 256x192 | 90.9 | 92.2 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccdOLQqSo6E87GfMw?e=TEurgW) |
+| ViTPose-H | COCO+AIC+MPII | 256x192 | 90.9 | 92.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccmHofkmfJDQDukVw?e=gRK224) |
+| ViTPose-G | COCO+AIC+MPII | 576x432 | 93.3 | 94.3 | | |
+| ViTPose-B* | COCO+AIC+MPII+CrowdPose | 256x192 | 88.2 | 90.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSrlMB093JzJtqq-?e=Jr5S3R) |
+| ViTPose-L* | COCO+AIC+MPII+CrowdPose | 256x192 | 91.5 | 92.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTBm3dCVmBUbHYT6?e=fHUrTq) |
+| ViTPose-H* | COCO+AIC+MPII+CrowdPose | 256x192 | 91.6 | 92.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS5rLeRAJiWobCdh?e=41GsDd) |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 78.4 | 80.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_small_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 82.6 | 84.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 85.7 | 87.5 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 85.7 | 87.4 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+> Results on MPII val set
+
+Using groundtruth bounding boxes. Note the configs here are only for evaluation. The metric is PCKh.
+
+| Model | Dataset | Resolution | Mean | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-B | COCO+AIC+MPII | 256x192 | 93.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcccwaTZ8xCFFM3Sjg?e=chmiK5) |
+| ViTPose-L | COCO+AIC+MPII | 256x192 | 94.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccdOLQqSo6E87GfMw?e=TEurgW) |
+| ViTPose-H | COCO+AIC+MPII | 256x192 | 94.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccmHofkmfJDQDukVw?e=gRK224) |
+| ViTPose-G | COCO+AIC+MPII | 576x432 | 94.3 | | |
+| ViTPose-B* | COCO+AIC+MPII+CrowdPose | 256x192 | 93.4 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSy_OSEm906wd2LB?e=GOSg14) |
+| ViTPose-L* | COCO+AIC+MPII+CrowdPose | 256x192 | 93.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTM32I6Kpjr-esl6?e=qvh0Yl) |
+| ViTPose-H* | COCO+AIC+MPII+CrowdPose | 256x192 | 94.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTT90XEQBKy-scIH?e=D2WhTS) |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 92.7 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_small_mpii_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 92.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 94.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 94.2 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+
+> Results on AI Challenger test set
+
+Using groundtruth bounding boxes. Note the configs here are only for evaluation.
+
+| Model | Dataset | Resolution | AP | AR | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-B | COCO+AIC+MPII | 256x192 | 32.0 | 36.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcccwaTZ8xCFFM3Sjg?e=chmiK5) |
+| ViTPose-L | COCO+AIC+MPII | 256x192 | 34.5 | 39.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_large_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccdOLQqSo6E87GfMw?e=TEurgW) |
+| ViTPose-H | COCO+AIC+MPII | 256x192 | 35.4 | 39.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_huge_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccmHofkmfJDQDukVw?e=gRK224) |
+| ViTPose-G | COCO+AIC+MPII | 576x432 | 43.2 | 47.1 | | |
+| ViTPose-B* | COCO+AIC+MPII+CrowdPose | 256x192 | 31.9 | 36.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSlvdVaXTC92SHYH?e=j7iqcp) |
+| ViTPose-L* | COCO+AIC+MPII+CrowdPose | 256x192 | 34.6 | 39.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_large_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTF06FX3FSAm0MOH?e=rYts9F) |
+| ViTPose-H* | COCO+AIC+MPII+CrowdPose | 256x192 | 35.3 | 39.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_huge_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS1MRmb2mcow_K04?e=q9jPab) |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 29.7 | 34.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_small_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 31.8 | 36.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 34.3 | 38.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 34.8 | 39.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+> Results on CrowdPose test set
+
+Using YOLOv3 human detector. Note the configs here are only for evaluation.
+
+| Model | Dataset | Resolution | AP | AP(H) | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| ViTPose-B* | COCO+AIC+MPII+CrowdPose | 256x192 | 74.7 | 63.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_base_crowdpose_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgStrrCb91cPlaxJx?e=6Xobo6) |
+| ViTPose-L* | COCO+AIC+MPII+CrowdPose | 256x192 | 76.6 | 65.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_large_crowdpose_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTK3dug-r7c6GFyu?e=1ZBpEG) |
+| ViTPose-H* | COCO+AIC+MPII+CrowdPose | 256x192 | 76.3 | 65.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_huge_crowdpose_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS-oAvEV4MTD--Xr?e=EeW2Fu) |
+
+### Animal datasets (AP10K, APT36K)
+
+> Results on AP-10K test set
+
+| Model | Dataset | Resolution | AP | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 71.4 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_small_ap10k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 74.5 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_base_ap10k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 80.4 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_large_ap10k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 82.4 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_huge_ap10k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+> Results on APT-36K val set
+
+| Model | Dataset | Resolution | AP | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 74.2 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_small_apt36k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 75.9 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_base_apt36k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 80.8 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_large_apt36k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 82.3 | [config](configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_huge_apt36k_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+### WholeBody dataset
+
+| Model | Dataset | Resolution | AP | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: |
+| **ViTPose+-S** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 54.4 | [config](configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_small_wholebody_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccqO1JBHtBjNaeCbQ?e=ZN5NSz) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccrwORr61gT9E4n8g?e=kz9sz5) |
+| **ViTPose+-B** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 57.4 | [config](cconfigs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_base_wholebody_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccjj9lgPTlkGT1HTw?e=OlS5zv) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgcckRZk1bIAuRa_E1w?e=ylDB2G) |
+| **ViTPose+-L** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 60.6 | [config](configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_large_wholebody_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgccp7HJf4QMeQQpeyA?e=JagPNt) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccs1SNFUGSTsmRJ8w?e=a9zKwZ) |
+| **ViTPose+-H** | COCO+AIC+MPII+AP10K+APT36K+WholeBody | 256x192 | 61.2 | [config](configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_huge_wholebody_256x192.py) | [log](https://1drv.ms/u/s!AimBgYV7JjTlgcclxZOlwRJdqpIIjA?e=nFQgVC) \| [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgccoXv8rCUgVe7oD9Q?e=ZBw6gR) |
+
+### Transfer results on the hand dataset (InterHand2.6M)
+
+| Model | Dataset | Resolution | AUC | config | weight |
+| :----: | :----: | :----: | :----: | :----: | :----: |
+| **ViTPose+-S** | COCO+AIC+MPII+WholeBody | 256x192 | 86.5 | [config](configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_small_interhand2d_all_256x192.py) | Coming Soon |
+| **ViTPose+-B** | COCO+AIC+MPII+WholeBody | 256x192 | 87.0 | [config](configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_base_interhand2d_all_256x192.py) | Coming Soon |
+| **ViTPose+-L** | COCO+AIC+MPII+WholeBody | 256x192 | 87.5 | [config](configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_large_interhand2d_all_256x192.py) | Coming Soon |
+| **ViTPose+-H** | COCO+AIC+MPII+WholeBody | 256x192 | 87.6 | [config](configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_huge_interhand2d_all_256x192.py) | Coming Soon |
+
+## Updates
+
+> [2023-01-10] Update ViTPose+! It uses MoE strategies to jointly deal with human, animal, and wholebody pose estimation tasks.
+
+> [2022-05-24] Upload the single-task training code, single-task pre-trained models, and multi-task pretrained models.
+
+> [2022-05-06] Upload the logs for the base, large, and huge models!
+
+> [2022-04-27] Our ViTPose with ViTAE-G obtains 81.1 AP on COCO test-dev set!
+
+> Applications of ViTAE Transformer include: [image classification](https://github.com/ViTAE-Transformer/ViTAE-Transformer/tree/main/Image-Classification) | [object detection](https://github.com/ViTAE-Transformer/ViTAE-Transformer/tree/main/Object-Detection) | [semantic segmentation](https://github.com/ViTAE-Transformer/ViTAE-Transformer/tree/main/Semantic-Segmentation) | [animal pose segmentation](https://github.com/ViTAE-Transformer/ViTAE-Transformer/tree/main/Animal-Pose-Estimation) | [remote sensing](https://github.com/ViTAE-Transformer/ViTAE-Transformer-Remote-Sensing) | [matting](https://github.com/ViTAE-Transformer/ViTAE-Transformer-Matting) | [VSA](https://github.com/ViTAE-Transformer/ViTAE-VSA) | [ViTDet](https://github.com/ViTAE-Transformer/ViTDet)
+
+## Usage
+
+We use PyTorch 1.9.0 or NGC docker 21.06, and mmcv 1.3.9 for the experiments.
+```bash
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+git checkout v1.3.9
+MMCV_WITH_OPS=1 pip install -e .
+cd ..
+git clone https://github.com/ViTAE-Transformer/ViTPose.git
+cd ViTPose
+pip install -v -e .
+```
+
+After install the two repos, install timm and einops, i.e.,
+```bash
+pip install timm==0.4.9 einops
+```
+
+After downloading the pretrained models, please conduct the experiments by running
+
+```bash
+# for single machine
+bash tools/dist_train.sh --cfg-options model.pretrained= --seed 0
+
+# for multiple machines
+python -m torch.distributed.launch --nnodes --node_rank --nproc_per_node --master_addr --master_port tools/train.py --cfg-options model.pretrained= --launcher pytorch --seed 0
+```
+
+To test the pretrained models performance, please run
+
+```bash
+bash tools/dist_test.sh
+```
+
+For ViTPose+ pre-trained models, please first re-organize the pre-trained weights using
+
+```bash
+python tools/model_split.py --source
+```
+
+## Todo
+
+This repo current contains modifications including:
+
+- [x] Upload configs and pretrained models
+
+- [x] More models with SOTA results
+
+- [x] Upload multi-task training config
+
+## Acknowledge
+We acknowledge the excellent implementation from [mmpose](https://github.com/open-mmlab/mmdetection) and [MAE](https://github.com/facebookresearch/mae).
+
+## Citing ViTPose
+
+For ViTPose
+
+```
+@inproceedings{
+ xu2022vitpose,
+ title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation},
+ author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
+ booktitle={Advances in Neural Information Processing Systems},
+ year={2022},
+}
+```
+
+For ViTPose+
+
+```
+@article{xu2022vitpose+,
+ title={ViTPose+: Vision Transformer Foundation Model for Generic Body Pose Estimation},
+ author={Xu, Yufei and Zhang, Jing and Zhang, Qiming and Tao, Dacheng},
+ journal={arXiv preprint arXiv:2212.04246},
+ year={2022}
+}
+```
+
+For ViTAE and ViTAEv2, please refer to:
+```
+@article{xu2021vitae,
+ title={Vitae: Vision transformer advanced by exploring intrinsic inductive bias},
+ author={Xu, Yufei and Zhang, Qiming and Zhang, Jing and Tao, Dacheng},
+ journal={Advances in Neural Information Processing Systems},
+ volume={34},
+ year={2021}
+}
+
+@article{zhang2022vitaev2,
+ title={ViTAEv2: Vision Transformer Advanced by Exploring Inductive Bias for Image Recognition and Beyond},
+ author={Zhang, Qiming and Xu, Yufei and Zhang, Jing and Tao, Dacheng},
+ journal={arXiv preprint arXiv:2202.10108},
+ year={2022}
+}
+```
diff --git a/vendor/ViTPose/configs/_base_/datasets/300w.py b/vendor/ViTPose/configs/_base_/datasets/300w.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c343a2adf84947159f2651b3e918d1fc32ea90
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/300w.py
@@ -0,0 +1,384 @@
+dataset_info = dict(
+ dataset_name='300w',
+ paper_info=dict(
+ author='Sagonas, Christos and Antonakos, Epameinondas '
+ 'and Tzimiropoulos, Georgios and Zafeiriou, Stefanos '
+ 'and Pantic, Maja',
+ title='300 faces in-the-wild challenge: '
+ 'Database and results',
+ container='Image and vision computing',
+ year='2016',
+ homepage='https://ibug.doc.ic.ac.uk/resources/300-W/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-16'),
+ 1:
+ dict(
+ name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-15'),
+ 2:
+ dict(
+ name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-14'),
+ 3:
+ dict(
+ name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-13'),
+ 4:
+ dict(
+ name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-12'),
+ 5:
+ dict(
+ name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-11'),
+ 6:
+ dict(
+ name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-10'),
+ 7:
+ dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-9'),
+ 8:
+ dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap=''),
+ 9:
+ dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-7'),
+ 10:
+ dict(
+ name='kpt-10', id=10, color=[255, 255, 255], type='',
+ swap='kpt-6'),
+ 11:
+ dict(
+ name='kpt-11', id=11, color=[255, 255, 255], type='',
+ swap='kpt-5'),
+ 12:
+ dict(
+ name='kpt-12', id=12, color=[255, 255, 255], type='',
+ swap='kpt-4'),
+ 13:
+ dict(
+ name='kpt-13', id=13, color=[255, 255, 255], type='',
+ swap='kpt-3'),
+ 14:
+ dict(
+ name='kpt-14', id=14, color=[255, 255, 255], type='',
+ swap='kpt-2'),
+ 15:
+ dict(
+ name='kpt-15', id=15, color=[255, 255, 255], type='',
+ swap='kpt-1'),
+ 16:
+ dict(
+ name='kpt-16', id=16, color=[255, 255, 255], type='',
+ swap='kpt-0'),
+ 17:
+ dict(
+ name='kpt-17',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-26'),
+ 18:
+ dict(
+ name='kpt-18',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-25'),
+ 19:
+ dict(
+ name='kpt-19',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-24'),
+ 20:
+ dict(
+ name='kpt-20',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-23'),
+ 21:
+ dict(
+ name='kpt-21',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-22'),
+ 22:
+ dict(
+ name='kpt-22',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-21'),
+ 23:
+ dict(
+ name='kpt-23',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-20'),
+ 24:
+ dict(
+ name='kpt-24',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-19'),
+ 25:
+ dict(
+ name='kpt-25',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-18'),
+ 26:
+ dict(
+ name='kpt-26',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-17'),
+ 27:
+ dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
+ 28:
+ dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap=''),
+ 29:
+ dict(name='kpt-29', id=29, color=[255, 255, 255], type='', swap=''),
+ 30:
+ dict(name='kpt-30', id=30, color=[255, 255, 255], type='', swap=''),
+ 31:
+ dict(
+ name='kpt-31',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-35'),
+ 32:
+ dict(
+ name='kpt-32',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-34'),
+ 33:
+ dict(name='kpt-33', id=33, color=[255, 255, 255], type='', swap=''),
+ 34:
+ dict(
+ name='kpt-34',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-32'),
+ 35:
+ dict(
+ name='kpt-35',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-31'),
+ 36:
+ dict(
+ name='kpt-36',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-45'),
+ 37:
+ dict(
+ name='kpt-37',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-44'),
+ 38:
+ dict(
+ name='kpt-38',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-43'),
+ 39:
+ dict(
+ name='kpt-39',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-42'),
+ 40:
+ dict(
+ name='kpt-40',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-47'),
+ 41:
+ dict(
+ name='kpt-41',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-46'),
+ 42:
+ dict(
+ name='kpt-42',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-39'),
+ 43:
+ dict(
+ name='kpt-43',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-38'),
+ 44:
+ dict(
+ name='kpt-44',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-37'),
+ 45:
+ dict(
+ name='kpt-45',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-36'),
+ 46:
+ dict(
+ name='kpt-46',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-41'),
+ 47:
+ dict(
+ name='kpt-47',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-40'),
+ 48:
+ dict(
+ name='kpt-48',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-54'),
+ 49:
+ dict(
+ name='kpt-49',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-53'),
+ 50:
+ dict(
+ name='kpt-50',
+ id=50,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-52'),
+ 51:
+ dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(
+ name='kpt-52',
+ id=52,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-50'),
+ 53:
+ dict(
+ name='kpt-53',
+ id=53,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-49'),
+ 54:
+ dict(
+ name='kpt-54',
+ id=54,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-48'),
+ 55:
+ dict(
+ name='kpt-55',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-59'),
+ 56:
+ dict(
+ name='kpt-56',
+ id=56,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-58'),
+ 57:
+ dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
+ 58:
+ dict(
+ name='kpt-58',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-56'),
+ 59:
+ dict(
+ name='kpt-59',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-55'),
+ 60:
+ dict(
+ name='kpt-60',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-64'),
+ 61:
+ dict(
+ name='kpt-61',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-63'),
+ 62:
+ dict(name='kpt-62', id=62, color=[255, 255, 255], type='', swap=''),
+ 63:
+ dict(
+ name='kpt-63',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-61'),
+ 64:
+ dict(
+ name='kpt-64',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-60'),
+ 65:
+ dict(
+ name='kpt-65',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-67'),
+ 66:
+ dict(name='kpt-66', id=66, color=[255, 255, 255], type='', swap=''),
+ 67:
+ dict(
+ name='kpt-67',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-65'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 68,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/aflw.py b/vendor/ViTPose/configs/_base_/datasets/aflw.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf534cbb756e8c514c2f5e2a7fceedd55afb637e
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/aflw.py
@@ -0,0 +1,83 @@
+dataset_info = dict(
+ dataset_name='aflw',
+ paper_info=dict(
+ author='Koestinger, Martin and Wohlhart, Paul and '
+ 'Roth, Peter M and Bischof, Horst',
+ title='Annotated facial landmarks in the wild: '
+ 'A large-scale, real-world database for facial '
+ 'landmark localization',
+ container='2011 IEEE international conference on computer '
+ 'vision workshops (ICCV workshops)',
+ year='2011',
+ homepage='https://www.tugraz.at/institute/icg/research/'
+ 'team-bischof/lrs/downloads/aflw/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-5'),
+ 1:
+ dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-4'),
+ 2:
+ dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
+ 3:
+ dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
+ 4:
+ dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-1'),
+ 5:
+ dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-0'),
+ 6:
+ dict(
+ name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-11'),
+ 7:
+ dict(
+ name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-10'),
+ 8:
+ dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
+ 9:
+ dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
+ 10:
+ dict(
+ name='kpt-10', id=10, color=[255, 255, 255], type='',
+ swap='kpt-7'),
+ 11:
+ dict(
+ name='kpt-11', id=11, color=[255, 255, 255], type='',
+ swap='kpt-6'),
+ 12:
+ dict(
+ name='kpt-12',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-14'),
+ 13:
+ dict(name='kpt-13', id=13, color=[255, 255, 255], type='', swap=''),
+ 14:
+ dict(
+ name='kpt-14',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-12'),
+ 15:
+ dict(
+ name='kpt-15',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-17'),
+ 16:
+ dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
+ 17:
+ dict(
+ name='kpt-17',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-15'),
+ 18:
+ dict(name='kpt-18', id=18, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 19,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/aic.py b/vendor/ViTPose/configs/_base_/datasets/aic.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ecdbe3f0afeb19dbb7aed42653ce5efd85cfda3
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/aic.py
@@ -0,0 +1,140 @@
+dataset_info = dict(
+ dataset_name='aic',
+ paper_info=dict(
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
+ 'Fu, Yanwei and others',
+ title='Ai challenger: A large-scale dataset for going '
+ 'deeper in image understanding',
+ container='arXiv',
+ year='2017',
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_shoulder',
+ id=0,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 1:
+ dict(
+ name='right_elbow',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 2:
+ dict(
+ name='right_wrist',
+ id=2,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 3:
+ dict(
+ name='left_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 4:
+ dict(
+ name='left_elbow',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 5:
+ dict(
+ name='left_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 6:
+ dict(
+ name='right_hip',
+ id=6,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 7:
+ dict(
+ name='right_knee',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 8:
+ dict(
+ name='right_ankle',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 9:
+ dict(
+ name='left_hip',
+ id=9,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 10:
+ dict(
+ name='left_knee',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 11:
+ dict(
+ name='left_ankle',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 12:
+ dict(
+ name='head_top',
+ id=12,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 13:
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
+ 1: dict(
+ link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
+ 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
+ 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
+ 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
+ 12: dict(
+ link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
+ ],
+
+ # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
+ # 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
+ # delta = 2 x sigma
+ sigmas=[
+ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
+ 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
+ 0.01291456, 0.01236173
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/aic_info.py b/vendor/ViTPose/configs/_base_/datasets/aic_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..f143fd8c4be5e9cd24988e03f6a1c3ab2d1ceb19
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/aic_info.py
@@ -0,0 +1,140 @@
+aic_info = dict(
+ dataset_name='aic',
+ paper_info=dict(
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
+ 'Fu, Yanwei and others',
+ title='Ai challenger: A large-scale dataset for going '
+ 'deeper in image understanding',
+ container='arXiv',
+ year='2017',
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_shoulder',
+ id=0,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 1:
+ dict(
+ name='right_elbow',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 2:
+ dict(
+ name='right_wrist',
+ id=2,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 3:
+ dict(
+ name='left_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 4:
+ dict(
+ name='left_elbow',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 5:
+ dict(
+ name='left_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 6:
+ dict(
+ name='right_hip',
+ id=6,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 7:
+ dict(
+ name='right_knee',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 8:
+ dict(
+ name='right_ankle',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 9:
+ dict(
+ name='left_hip',
+ id=9,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 10:
+ dict(
+ name='left_knee',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 11:
+ dict(
+ name='left_ankle',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 12:
+ dict(
+ name='head_top',
+ id=12,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 13:
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
+ 1: dict(
+ link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
+ 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
+ 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
+ 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
+ 12: dict(
+ link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
+ ],
+
+ # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
+ # 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
+ # delta = 2 x sigma
+ sigmas=[
+ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
+ 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
+ 0.01291456, 0.01236173
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/animalpose.py b/vendor/ViTPose/configs/_base_/datasets/animalpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5bb62d951b71da25e679bd755fe566216dc3f6f
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/animalpose.py
@@ -0,0 +1,166 @@
+dataset_info = dict(
+ dataset_name='animalpose',
+ paper_info=dict(
+ author='Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and '
+ 'Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing',
+ title='Cross-Domain Adaptation for Animal Pose Estimation',
+ container='The IEEE International Conference on '
+ 'Computer Vision (ICCV)',
+ year='2019',
+ homepage='https://sites.google.com/view/animal-pose/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
+ 1:
+ dict(
+ name='R_Eye',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Eye'),
+ 2:
+ dict(
+ name='L_EarBase',
+ id=2,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_EarBase'),
+ 3:
+ dict(
+ name='R_EarBase',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_EarBase'),
+ 4:
+ dict(name='Nose', id=4, color=[51, 153, 255], type='upper', swap=''),
+ 5:
+ dict(name='Throat', id=5, color=[51, 153, 255], type='upper', swap=''),
+ 6:
+ dict(
+ name='TailBase', id=6, color=[51, 153, 255], type='lower',
+ swap=''),
+ 7:
+ dict(
+ name='Withers', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='L_F_Elbow',
+ id=8,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Elbow'),
+ 9:
+ dict(
+ name='R_F_Elbow',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Elbow'),
+ 10:
+ dict(
+ name='L_B_Elbow',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Elbow'),
+ 11:
+ dict(
+ name='R_B_Elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Elbow'),
+ 12:
+ dict(
+ name='L_F_Knee',
+ id=12,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Knee'),
+ 13:
+ dict(
+ name='R_F_Knee',
+ id=13,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Knee'),
+ 14:
+ dict(
+ name='L_B_Knee',
+ id=14,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Knee'),
+ 15:
+ dict(
+ name='R_B_Knee',
+ id=15,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Knee'),
+ 16:
+ dict(
+ name='L_F_Paw',
+ id=16,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Paw'),
+ 17:
+ dict(
+ name='R_F_Paw',
+ id=17,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Paw'),
+ 18:
+ dict(
+ name='L_B_Paw',
+ id=18,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Paw'),
+ 19:
+ dict(
+ name='R_B_Paw',
+ id=19,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Paw')
+ },
+ skeleton_info={
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[51, 153, 255]),
+ 1: dict(link=('L_Eye', 'L_EarBase'), id=1, color=[0, 255, 0]),
+ 2: dict(link=('R_Eye', 'R_EarBase'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('L_Eye', 'Nose'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('R_Eye', 'Nose'), id=4, color=[255, 128, 0]),
+ 5: dict(link=('Nose', 'Throat'), id=5, color=[51, 153, 255]),
+ 6: dict(link=('Throat', 'Withers'), id=6, color=[51, 153, 255]),
+ 7: dict(link=('TailBase', 'Withers'), id=7, color=[51, 153, 255]),
+ 8: dict(link=('Throat', 'L_F_Elbow'), id=8, color=[0, 255, 0]),
+ 9: dict(link=('L_F_Elbow', 'L_F_Knee'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('L_F_Knee', 'L_F_Paw'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('Throat', 'R_F_Elbow'), id=11, color=[255, 128, 0]),
+ 12: dict(link=('R_F_Elbow', 'R_F_Knee'), id=12, color=[255, 128, 0]),
+ 13: dict(link=('R_F_Knee', 'R_F_Paw'), id=13, color=[255, 128, 0]),
+ 14: dict(link=('TailBase', 'L_B_Elbow'), id=14, color=[0, 255, 0]),
+ 15: dict(link=('L_B_Elbow', 'L_B_Knee'), id=15, color=[0, 255, 0]),
+ 16: dict(link=('L_B_Knee', 'L_B_Paw'), id=16, color=[0, 255, 0]),
+ 17: dict(link=('TailBase', 'R_B_Elbow'), id=17, color=[255, 128, 0]),
+ 18: dict(link=('R_B_Elbow', 'R_B_Knee'), id=18, color=[255, 128, 0]),
+ 19: dict(link=('R_B_Knee', 'R_B_Paw'), id=19, color=[255, 128, 0])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2,
+ 1.5, 1.5, 1.5, 1.5
+ ],
+
+ # Note: The original paper did not provide enough information about
+ # the sigmas. We modified from 'https://github.com/cocodataset/'
+ # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523'
+ sigmas=[
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.10, 0.10, 0.10, 0.107, 0.107,
+ 0.107, 0.107, 0.087, 0.087, 0.087, 0.087, 0.089, 0.089, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/ap10k.py b/vendor/ViTPose/configs/_base_/datasets/ap10k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0df579acbb8cf0de1ef62412ba865ee8710f0aa
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/ap10k.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='ap10k',
+ paper_info=dict(
+ author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
+ 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
+ title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
+ container='35th Conference on Neural Information Processing Systems '
+ '(NeurIPS 2021) Track on Datasets and Bench-marks.',
+ year='2021',
+ homepage='https://github.com/AlexTheBad/AP-10K',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
+ 1:
+ dict(
+ name='R_Eye',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Eye'),
+ 2:
+ dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
+ 3:
+ dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
+ 4:
+ dict(
+ name='Root of tail',
+ id=4,
+ color=[51, 153, 255],
+ type='lower',
+ swap=''),
+ 5:
+ dict(
+ name='L_Shoulder',
+ id=5,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Shoulder'),
+ 6:
+ dict(
+ name='L_Elbow',
+ id=6,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Elbow'),
+ 7:
+ dict(
+ name='L_F_Paw',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Paw'),
+ 8:
+ dict(
+ name='R_Shoulder',
+ id=8,
+ color=[0, 255, 0],
+ type='upper',
+ swap='L_Shoulder'),
+ 9:
+ dict(
+ name='R_Elbow',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Elbow'),
+ 10:
+ dict(
+ name='R_F_Paw',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_F_Paw'),
+ 11:
+ dict(
+ name='L_Hip',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Hip'),
+ 12:
+ dict(
+ name='L_Knee',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Knee'),
+ 13:
+ dict(
+ name='L_B_Paw',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Paw'),
+ 14:
+ dict(
+ name='R_Hip', id=14, color=[0, 255, 0], type='lower',
+ swap='L_Hip'),
+ 15:
+ dict(
+ name='R_Knee',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_Knee'),
+ 16:
+ dict(
+ name='R_B_Paw',
+ id=16,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_B_Paw'),
+ },
+ skeleton_info={
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
+ 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
+ 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
+ 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
+ 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
+ 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
+ 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
+ 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
+ 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
+ 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
+ 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
+ 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
+ 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
+ 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
+ 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
+ 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/ap10k_info.py b/vendor/ViTPose/configs/_base_/datasets/ap10k_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..af2461c75450818e821894cb1152d59a06443a26
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/ap10k_info.py
@@ -0,0 +1,142 @@
+ap10k_info = dict(
+ dataset_name='ap10k',
+ paper_info=dict(
+ author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
+ 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
+ title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
+ container='35th Conference on Neural Information Processing Systems '
+ '(NeurIPS 2021) Track on Datasets and Bench-marks.',
+ year='2021',
+ homepage='https://github.com/AlexTheBad/AP-10K',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
+ 1:
+ dict(
+ name='R_Eye',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Eye'),
+ 2:
+ dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
+ 3:
+ dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
+ 4:
+ dict(
+ name='Root of tail',
+ id=4,
+ color=[51, 153, 255],
+ type='lower',
+ swap=''),
+ 5:
+ dict(
+ name='L_Shoulder',
+ id=5,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Shoulder'),
+ 6:
+ dict(
+ name='L_Elbow',
+ id=6,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Elbow'),
+ 7:
+ dict(
+ name='L_F_Paw',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Paw'),
+ 8:
+ dict(
+ name='R_Shoulder',
+ id=8,
+ color=[0, 255, 0],
+ type='upper',
+ swap='L_Shoulder'),
+ 9:
+ dict(
+ name='R_Elbow',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Elbow'),
+ 10:
+ dict(
+ name='R_F_Paw',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_F_Paw'),
+ 11:
+ dict(
+ name='L_Hip',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Hip'),
+ 12:
+ dict(
+ name='L_Knee',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Knee'),
+ 13:
+ dict(
+ name='L_B_Paw',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Paw'),
+ 14:
+ dict(
+ name='R_Hip', id=14, color=[0, 255, 0], type='lower',
+ swap='L_Hip'),
+ 15:
+ dict(
+ name='R_Knee',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_Knee'),
+ 16:
+ dict(
+ name='R_B_Paw',
+ id=16,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_B_Paw'),
+ },
+ skeleton_info={
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
+ 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
+ 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
+ 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
+ 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
+ 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
+ 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
+ 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
+ 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
+ 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
+ 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
+ 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
+ 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
+ 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
+ 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
+ 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/atrw.py b/vendor/ViTPose/configs/_base_/datasets/atrw.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ec71c8c508a0340139371a651ca2dd56eeae3cf
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/atrw.py
@@ -0,0 +1,144 @@
+dataset_info = dict(
+ dataset_name='atrw',
+ paper_info=dict(
+ author='Li, Shuyuan and Li, Jianguo and Tang, Hanlin '
+ 'and Qian, Rui and Lin, Weiyao',
+ title='ATRW: A Benchmark for Amur Tiger '
+ 'Re-identification in the Wild',
+ container='Proceedings of the 28th ACM '
+ 'International Conference on Multimedia',
+ year='2020',
+ homepage='https://cvwc2019.github.io/challenge.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_ear',
+ id=0,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 1:
+ dict(
+ name='right_ear',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 2:
+ dict(name='nose', id=2, color=[51, 153, 255], type='upper', swap=''),
+ 3:
+ dict(
+ name='right_shoulder',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 4:
+ dict(
+ name='right_front_paw',
+ id=4,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_front_paw'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='left_front_paw',
+ id=6,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_front_paw'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='right_knee',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 9:
+ dict(
+ name='right_back_paw',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_back_paw'),
+ 10:
+ dict(
+ name='left_hip',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 11:
+ dict(
+ name='left_knee',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 12:
+ dict(
+ name='left_back_paw',
+ id=12,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_back_paw'),
+ 13:
+ dict(name='tail', id=13, color=[51, 153, 255], type='lower', swap=''),
+ 14:
+ dict(
+ name='center', id=14, color=[51, 153, 255], type='lower', swap=''),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ear', 'nose'), id=0, color=[51, 153, 255]),
+ 1:
+ dict(link=('right_ear', 'nose'), id=1, color=[51, 153, 255]),
+ 2:
+ dict(link=('nose', 'center'), id=2, color=[51, 153, 255]),
+ 3:
+ dict(
+ link=('left_shoulder', 'left_front_paw'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_shoulder', 'center'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(
+ link=('right_shoulder', 'right_front_paw'),
+ id=5,
+ color=[255, 128, 0]),
+ 6:
+ dict(link=('right_shoulder', 'center'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('tail', 'center'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('right_back_paw', 'right_knee'), id=8, color=[255, 128, 0]),
+ 9:
+ dict(link=('right_knee', 'right_hip'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('right_hip', 'tail'), id=10, color=[255, 128, 0]),
+ 11:
+ dict(link=('left_back_paw', 'left_knee'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_knee', 'left_hip'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_hip', 'tail'), id=13, color=[0, 255, 0]),
+ },
+ joint_weights=[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
+ sigmas=[
+ 0.0277, 0.0823, 0.0831, 0.0202, 0.0716, 0.0263, 0.0646, 0.0302, 0.0440,
+ 0.0316, 0.0333, 0.0547, 0.0263, 0.0683, 0.0539
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/coco.py b/vendor/ViTPose/configs/_base_/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..865a95bc02fedd318f32d2e7aa8397147d78fdb5
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/coco.py
@@ -0,0 +1,181 @@
+dataset_info = dict(
+ dataset_name='coco',
+ paper_info=dict(
+ author='Lin, Tsung-Yi and Maire, Michael and '
+ 'Belongie, Serge and Hays, James and '
+ 'Perona, Pietro and Ramanan, Deva and '
+ r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+ title='Microsoft coco: Common objects in context',
+ container='European conference on computer vision',
+ year='2014',
+ homepage='http://cocodataset.org/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/coco_wholebody.py b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef9b707017a24a1a133bb28566d212c618fee694
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody.py
@@ -0,0 +1,1154 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(
+ name='left_big_toe',
+ id=17,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_big_toe'),
+ 18:
+ dict(
+ name='left_small_toe',
+ id=18,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_small_toe'),
+ 19:
+ dict(
+ name='left_heel',
+ id=19,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_heel'),
+ 20:
+ dict(
+ name='right_big_toe',
+ id=20,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_big_toe'),
+ 21:
+ dict(
+ name='right_small_toe',
+ id=21,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_small_toe'),
+ 22:
+ dict(
+ name='right_heel',
+ id=22,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_heel'),
+ 23:
+ dict(
+ name='face-0',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 24:
+ dict(
+ name='face-1',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 25:
+ dict(
+ name='face-2',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 26:
+ dict(
+ name='face-3',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 27:
+ dict(
+ name='face-4',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 28:
+ dict(
+ name='face-5',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 29:
+ dict(
+ name='face-6',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 30:
+ dict(
+ name='face-7',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='face-9'),
+ 31:
+ dict(name='face-8', id=31, color=[255, 255, 255], type='', swap=''),
+ 32:
+ dict(
+ name='face-9',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-7'),
+ 33:
+ dict(
+ name='face-10',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 34:
+ dict(
+ name='face-11',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 35:
+ dict(
+ name='face-12',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 36:
+ dict(
+ name='face-13',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 37:
+ dict(
+ name='face-14',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 38:
+ dict(
+ name='face-15',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 39:
+ dict(
+ name='face-16',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 40:
+ dict(
+ name='face-17',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 41:
+ dict(
+ name='face-18',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 42:
+ dict(
+ name='face-19',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 43:
+ dict(
+ name='face-20',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 44:
+ dict(
+ name='face-21',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 45:
+ dict(
+ name='face-22',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 46:
+ dict(
+ name='face-23',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 47:
+ dict(
+ name='face-24',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 48:
+ dict(
+ name='face-25',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 49:
+ dict(
+ name='face-26',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 50:
+ dict(name='face-27', id=50, color=[255, 255, 255], type='', swap=''),
+ 51:
+ dict(name='face-28', id=51, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(name='face-29', id=52, color=[255, 255, 255], type='', swap=''),
+ 53:
+ dict(name='face-30', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(
+ name='face-31',
+ id=54,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 55:
+ dict(
+ name='face-32',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 56:
+ dict(name='face-33', id=56, color=[255, 255, 255], type='', swap=''),
+ 57:
+ dict(
+ name='face-34',
+ id=57,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 58:
+ dict(
+ name='face-35',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 59:
+ dict(
+ name='face-36',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 60:
+ dict(
+ name='face-37',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 61:
+ dict(
+ name='face-38',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 62:
+ dict(
+ name='face-39',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 63:
+ dict(
+ name='face-40',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 64:
+ dict(
+ name='face-41',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 65:
+ dict(
+ name='face-42',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 66:
+ dict(
+ name='face-43',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 67:
+ dict(
+ name='face-44',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 68:
+ dict(
+ name='face-45',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 69:
+ dict(
+ name='face-46',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 70:
+ dict(
+ name='face-47',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 71:
+ dict(
+ name='face-48',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 72:
+ dict(
+ name='face-49',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 73:
+ dict(
+ name='face-50',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 74:
+ dict(name='face-51', id=74, color=[255, 255, 255], type='', swap=''),
+ 75:
+ dict(
+ name='face-52',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 76:
+ dict(
+ name='face-53',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 77:
+ dict(
+ name='face-54',
+ id=77,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 78:
+ dict(
+ name='face-55',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 79:
+ dict(
+ name='face-56',
+ id=79,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 80:
+ dict(name='face-57', id=80, color=[255, 255, 255], type='', swap=''),
+ 81:
+ dict(
+ name='face-58',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 82:
+ dict(
+ name='face-59',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 83:
+ dict(
+ name='face-60',
+ id=83,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 84:
+ dict(
+ name='face-61',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 85:
+ dict(name='face-62', id=85, color=[255, 255, 255], type='', swap=''),
+ 86:
+ dict(
+ name='face-63',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 87:
+ dict(
+ name='face-64',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 88:
+ dict(
+ name='face-65',
+ id=88,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 89:
+ dict(name='face-66', id=89, color=[255, 255, 255], type='', swap=''),
+ 90:
+ dict(
+ name='face-67',
+ id=90,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65'),
+ 91:
+ dict(
+ name='left_hand_root',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='right_hand_root'),
+ 92:
+ dict(
+ name='left_thumb1',
+ id=92,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 93:
+ dict(
+ name='left_thumb2',
+ id=93,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 94:
+ dict(
+ name='left_thumb3',
+ id=94,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 95:
+ dict(
+ name='left_thumb4',
+ id=95,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 96:
+ dict(
+ name='left_forefinger1',
+ id=96,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 97:
+ dict(
+ name='left_forefinger2',
+ id=97,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 98:
+ dict(
+ name='left_forefinger3',
+ id=98,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 99:
+ dict(
+ name='left_forefinger4',
+ id=99,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 100:
+ dict(
+ name='left_middle_finger1',
+ id=100,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 101:
+ dict(
+ name='left_middle_finger2',
+ id=101,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 102:
+ dict(
+ name='left_middle_finger3',
+ id=102,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 103:
+ dict(
+ name='left_middle_finger4',
+ id=103,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 104:
+ dict(
+ name='left_ring_finger1',
+ id=104,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 105:
+ dict(
+ name='left_ring_finger2',
+ id=105,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 106:
+ dict(
+ name='left_ring_finger3',
+ id=106,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 107:
+ dict(
+ name='left_ring_finger4',
+ id=107,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 108:
+ dict(
+ name='left_pinky_finger1',
+ id=108,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 109:
+ dict(
+ name='left_pinky_finger2',
+ id=109,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 110:
+ dict(
+ name='left_pinky_finger3',
+ id=110,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 111:
+ dict(
+ name='left_pinky_finger4',
+ id=111,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 112:
+ dict(
+ name='right_hand_root',
+ id=112,
+ color=[255, 255, 255],
+ type='',
+ swap='left_hand_root'),
+ 113:
+ dict(
+ name='right_thumb1',
+ id=113,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 114:
+ dict(
+ name='right_thumb2',
+ id=114,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 115:
+ dict(
+ name='right_thumb3',
+ id=115,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 116:
+ dict(
+ name='right_thumb4',
+ id=116,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 117:
+ dict(
+ name='right_forefinger1',
+ id=117,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 118:
+ dict(
+ name='right_forefinger2',
+ id=118,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 119:
+ dict(
+ name='right_forefinger3',
+ id=119,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 120:
+ dict(
+ name='right_forefinger4',
+ id=120,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 121:
+ dict(
+ name='right_middle_finger1',
+ id=121,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 122:
+ dict(
+ name='right_middle_finger2',
+ id=122,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 123:
+ dict(
+ name='right_middle_finger3',
+ id=123,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 124:
+ dict(
+ name='right_middle_finger4',
+ id=124,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 125:
+ dict(
+ name='right_ring_finger1',
+ id=125,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 126:
+ dict(
+ name='right_ring_finger2',
+ id=126,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 127:
+ dict(
+ name='right_ring_finger3',
+ id=127,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 128:
+ dict(
+ name='right_ring_finger4',
+ id=128,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 129:
+ dict(
+ name='right_pinky_finger1',
+ id=129,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 130:
+ dict(
+ name='right_pinky_finger2',
+ id=130,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 131:
+ dict(
+ name='right_pinky_finger3',
+ id=131,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 132:
+ dict(
+ name='right_pinky_finger4',
+ id=132,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('left_ankle', 'left_big_toe'), id=19, color=[0, 255, 0]),
+ 20:
+ dict(link=('left_ankle', 'left_small_toe'), id=20, color=[0, 255, 0]),
+ 21:
+ dict(link=('left_ankle', 'left_heel'), id=21, color=[0, 255, 0]),
+ 22:
+ dict(
+ link=('right_ankle', 'right_big_toe'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(
+ link=('right_ankle', 'right_small_toe'),
+ id=23,
+ color=[255, 128, 0]),
+ 24:
+ dict(link=('right_ankle', 'right_heel'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(
+ link=('left_hand_root', 'left_thumb1'), id=25, color=[255, 128,
+ 0]),
+ 26:
+ dict(link=('left_thumb1', 'left_thumb2'), id=26, color=[255, 128, 0]),
+ 27:
+ dict(link=('left_thumb2', 'left_thumb3'), id=27, color=[255, 128, 0]),
+ 28:
+ dict(link=('left_thumb3', 'left_thumb4'), id=28, color=[255, 128, 0]),
+ 29:
+ dict(
+ link=('left_hand_root', 'left_forefinger1'),
+ id=29,
+ color=[255, 153, 255]),
+ 30:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=30,
+ color=[255, 153, 255]),
+ 31:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=31,
+ color=[255, 153, 255]),
+ 32:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=32,
+ color=[255, 153, 255]),
+ 33:
+ dict(
+ link=('left_hand_root', 'left_middle_finger1'),
+ id=33,
+ color=[102, 178, 255]),
+ 34:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=34,
+ color=[102, 178, 255]),
+ 35:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=35,
+ color=[102, 178, 255]),
+ 36:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=36,
+ color=[102, 178, 255]),
+ 37:
+ dict(
+ link=('left_hand_root', 'left_ring_finger1'),
+ id=37,
+ color=[255, 51, 51]),
+ 38:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=38,
+ color=[255, 51, 51]),
+ 39:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=39,
+ color=[255, 51, 51]),
+ 40:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=40,
+ color=[255, 51, 51]),
+ 41:
+ dict(
+ link=('left_hand_root', 'left_pinky_finger1'),
+ id=41,
+ color=[0, 255, 0]),
+ 42:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=42,
+ color=[0, 255, 0]),
+ 43:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=43,
+ color=[0, 255, 0]),
+ 44:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=44,
+ color=[0, 255, 0]),
+ 45:
+ dict(
+ link=('right_hand_root', 'right_thumb1'),
+ id=45,
+ color=[255, 128, 0]),
+ 46:
+ dict(
+ link=('right_thumb1', 'right_thumb2'), id=46, color=[255, 128, 0]),
+ 47:
+ dict(
+ link=('right_thumb2', 'right_thumb3'), id=47, color=[255, 128, 0]),
+ 48:
+ dict(
+ link=('right_thumb3', 'right_thumb4'), id=48, color=[255, 128, 0]),
+ 49:
+ dict(
+ link=('right_hand_root', 'right_forefinger1'),
+ id=49,
+ color=[255, 153, 255]),
+ 50:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=50,
+ color=[255, 153, 255]),
+ 51:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=51,
+ color=[255, 153, 255]),
+ 52:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=52,
+ color=[255, 153, 255]),
+ 53:
+ dict(
+ link=('right_hand_root', 'right_middle_finger1'),
+ id=53,
+ color=[102, 178, 255]),
+ 54:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=54,
+ color=[102, 178, 255]),
+ 55:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=55,
+ color=[102, 178, 255]),
+ 56:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=56,
+ color=[102, 178, 255]),
+ 57:
+ dict(
+ link=('right_hand_root', 'right_ring_finger1'),
+ id=57,
+ color=[255, 51, 51]),
+ 58:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=58,
+ color=[255, 51, 51]),
+ 59:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=59,
+ color=[255, 51, 51]),
+ 60:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=60,
+ color=[255, 51, 51]),
+ 61:
+ dict(
+ link=('right_hand_root', 'right_pinky_finger1'),
+ id=61,
+ color=[0, 255, 0]),
+ 62:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=62,
+ color=[0, 255, 0]),
+ 63:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=63,
+ color=[0, 255, 0]),
+ 64:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=64,
+ color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 133,
+ # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
+ # 'evaluation/myeval_wholebody.py#L175'
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.068, 0.066, 0.066,
+ 0.092, 0.094, 0.094, 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031,
+ 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045,
+ 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015,
+ 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017,
+ 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010,
+ 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009,
+ 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01,
+ 0.008, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035,
+ 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019,
+ 0.022, 0.031, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024,
+ 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02,
+ 0.019, 0.022, 0.031
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_face.py b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_face.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c9ee3350e3bd67ab1825344849487834c71c82b
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_face.py
@@ -0,0 +1,448 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody_face',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='face-0',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 1:
+ dict(
+ name='face-1',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 2:
+ dict(
+ name='face-2',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 3:
+ dict(
+ name='face-3',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 4:
+ dict(
+ name='face-4',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 5:
+ dict(
+ name='face-5',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 6:
+ dict(
+ name='face-6',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 7:
+ dict(
+ name='face-7', id=7, color=[255, 255, 255], type='',
+ swap='face-9'),
+ 8:
+ dict(name='face-8', id=8, color=[255, 255, 255], type='', swap=''),
+ 9:
+ dict(
+ name='face-9', id=9, color=[255, 255, 255], type='',
+ swap='face-7'),
+ 10:
+ dict(
+ name='face-10',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 11:
+ dict(
+ name='face-11',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 12:
+ dict(
+ name='face-12',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 13:
+ dict(
+ name='face-13',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 14:
+ dict(
+ name='face-14',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 15:
+ dict(
+ name='face-15',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 16:
+ dict(
+ name='face-16',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 17:
+ dict(
+ name='face-17',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 18:
+ dict(
+ name='face-18',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 19:
+ dict(
+ name='face-19',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 20:
+ dict(
+ name='face-20',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 21:
+ dict(
+ name='face-21',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 22:
+ dict(
+ name='face-22',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 23:
+ dict(
+ name='face-23',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 24:
+ dict(
+ name='face-24',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 25:
+ dict(
+ name='face-25',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 26:
+ dict(
+ name='face-26',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 27:
+ dict(name='face-27', id=27, color=[255, 255, 255], type='', swap=''),
+ 28:
+ dict(name='face-28', id=28, color=[255, 255, 255], type='', swap=''),
+ 29:
+ dict(name='face-29', id=29, color=[255, 255, 255], type='', swap=''),
+ 30:
+ dict(name='face-30', id=30, color=[255, 255, 255], type='', swap=''),
+ 31:
+ dict(
+ name='face-31',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 32:
+ dict(
+ name='face-32',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 33:
+ dict(name='face-33', id=33, color=[255, 255, 255], type='', swap=''),
+ 34:
+ dict(
+ name='face-34',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 35:
+ dict(
+ name='face-35',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 36:
+ dict(
+ name='face-36',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 37:
+ dict(
+ name='face-37',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 38:
+ dict(
+ name='face-38',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 39:
+ dict(
+ name='face-39',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 40:
+ dict(
+ name='face-40',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 41:
+ dict(
+ name='face-41',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 42:
+ dict(
+ name='face-42',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 43:
+ dict(
+ name='face-43',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 44:
+ dict(
+ name='face-44',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 45:
+ dict(
+ name='face-45',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 46:
+ dict(
+ name='face-46',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 47:
+ dict(
+ name='face-47',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 48:
+ dict(
+ name='face-48',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 49:
+ dict(
+ name='face-49',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 50:
+ dict(
+ name='face-50',
+ id=50,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 51:
+ dict(name='face-51', id=52, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(
+ name='face-52',
+ id=52,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 53:
+ dict(
+ name='face-53',
+ id=53,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 54:
+ dict(
+ name='face-54',
+ id=54,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 55:
+ dict(
+ name='face-55',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 56:
+ dict(
+ name='face-56',
+ id=56,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 57:
+ dict(name='face-57', id=57, color=[255, 255, 255], type='', swap=''),
+ 58:
+ dict(
+ name='face-58',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 59:
+ dict(
+ name='face-59',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 60:
+ dict(
+ name='face-60',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 61:
+ dict(
+ name='face-61',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 62:
+ dict(name='face-62', id=62, color=[255, 255, 255], type='', swap=''),
+ 63:
+ dict(
+ name='face-63',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 64:
+ dict(
+ name='face-64',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 65:
+ dict(
+ name='face-65',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 66:
+ dict(name='face-66', id=66, color=[255, 255, 255], type='', swap=''),
+ 67:
+ dict(
+ name='face-67',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 68,
+
+ # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
+ # 'evaluation/myeval_wholebody.py#L177'
+ sigmas=[
+ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023,
+ 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011,
+ 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007,
+ 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011,
+ 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008,
+ 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007,
+ 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_hand.py b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_hand.py
new file mode 100644
index 0000000000000000000000000000000000000000..1910b2ced5a8b31cd6f83911e41cae9f1a580222
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_hand.py
@@ -0,0 +1,147 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody_hand',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[
+ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018,
+ 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022,
+ 0.031
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_info.py b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..50ac8fe8cc726711bbcf98dadf003b6e1bc76c33
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/coco_wholebody_info.py
@@ -0,0 +1,1154 @@
+cocowholebody_info = dict(
+ dataset_name='coco_wholebody',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(
+ name='left_big_toe',
+ id=17,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_big_toe'),
+ 18:
+ dict(
+ name='left_small_toe',
+ id=18,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_small_toe'),
+ 19:
+ dict(
+ name='left_heel',
+ id=19,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_heel'),
+ 20:
+ dict(
+ name='right_big_toe',
+ id=20,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_big_toe'),
+ 21:
+ dict(
+ name='right_small_toe',
+ id=21,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_small_toe'),
+ 22:
+ dict(
+ name='right_heel',
+ id=22,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_heel'),
+ 23:
+ dict(
+ name='face-0',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 24:
+ dict(
+ name='face-1',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 25:
+ dict(
+ name='face-2',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 26:
+ dict(
+ name='face-3',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 27:
+ dict(
+ name='face-4',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 28:
+ dict(
+ name='face-5',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 29:
+ dict(
+ name='face-6',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 30:
+ dict(
+ name='face-7',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='face-9'),
+ 31:
+ dict(name='face-8', id=31, color=[255, 255, 255], type='', swap=''),
+ 32:
+ dict(
+ name='face-9',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-7'),
+ 33:
+ dict(
+ name='face-10',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 34:
+ dict(
+ name='face-11',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 35:
+ dict(
+ name='face-12',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 36:
+ dict(
+ name='face-13',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 37:
+ dict(
+ name='face-14',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 38:
+ dict(
+ name='face-15',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 39:
+ dict(
+ name='face-16',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 40:
+ dict(
+ name='face-17',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 41:
+ dict(
+ name='face-18',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 42:
+ dict(
+ name='face-19',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 43:
+ dict(
+ name='face-20',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 44:
+ dict(
+ name='face-21',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 45:
+ dict(
+ name='face-22',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 46:
+ dict(
+ name='face-23',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 47:
+ dict(
+ name='face-24',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 48:
+ dict(
+ name='face-25',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 49:
+ dict(
+ name='face-26',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 50:
+ dict(name='face-27', id=50, color=[255, 255, 255], type='', swap=''),
+ 51:
+ dict(name='face-28', id=51, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(name='face-29', id=52, color=[255, 255, 255], type='', swap=''),
+ 53:
+ dict(name='face-30', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(
+ name='face-31',
+ id=54,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 55:
+ dict(
+ name='face-32',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 56:
+ dict(name='face-33', id=56, color=[255, 255, 255], type='', swap=''),
+ 57:
+ dict(
+ name='face-34',
+ id=57,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 58:
+ dict(
+ name='face-35',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 59:
+ dict(
+ name='face-36',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 60:
+ dict(
+ name='face-37',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 61:
+ dict(
+ name='face-38',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 62:
+ dict(
+ name='face-39',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 63:
+ dict(
+ name='face-40',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 64:
+ dict(
+ name='face-41',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 65:
+ dict(
+ name='face-42',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 66:
+ dict(
+ name='face-43',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 67:
+ dict(
+ name='face-44',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 68:
+ dict(
+ name='face-45',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 69:
+ dict(
+ name='face-46',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 70:
+ dict(
+ name='face-47',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 71:
+ dict(
+ name='face-48',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 72:
+ dict(
+ name='face-49',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 73:
+ dict(
+ name='face-50',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 74:
+ dict(name='face-51', id=74, color=[255, 255, 255], type='', swap=''),
+ 75:
+ dict(
+ name='face-52',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 76:
+ dict(
+ name='face-53',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 77:
+ dict(
+ name='face-54',
+ id=77,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 78:
+ dict(
+ name='face-55',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 79:
+ dict(
+ name='face-56',
+ id=79,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 80:
+ dict(name='face-57', id=80, color=[255, 255, 255], type='', swap=''),
+ 81:
+ dict(
+ name='face-58',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 82:
+ dict(
+ name='face-59',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 83:
+ dict(
+ name='face-60',
+ id=83,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 84:
+ dict(
+ name='face-61',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 85:
+ dict(name='face-62', id=85, color=[255, 255, 255], type='', swap=''),
+ 86:
+ dict(
+ name='face-63',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 87:
+ dict(
+ name='face-64',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 88:
+ dict(
+ name='face-65',
+ id=88,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 89:
+ dict(name='face-66', id=89, color=[255, 255, 255], type='', swap=''),
+ 90:
+ dict(
+ name='face-67',
+ id=90,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65'),
+ 91:
+ dict(
+ name='left_hand_root',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='right_hand_root'),
+ 92:
+ dict(
+ name='left_thumb1',
+ id=92,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 93:
+ dict(
+ name='left_thumb2',
+ id=93,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 94:
+ dict(
+ name='left_thumb3',
+ id=94,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 95:
+ dict(
+ name='left_thumb4',
+ id=95,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 96:
+ dict(
+ name='left_forefinger1',
+ id=96,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 97:
+ dict(
+ name='left_forefinger2',
+ id=97,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 98:
+ dict(
+ name='left_forefinger3',
+ id=98,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 99:
+ dict(
+ name='left_forefinger4',
+ id=99,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 100:
+ dict(
+ name='left_middle_finger1',
+ id=100,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 101:
+ dict(
+ name='left_middle_finger2',
+ id=101,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 102:
+ dict(
+ name='left_middle_finger3',
+ id=102,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 103:
+ dict(
+ name='left_middle_finger4',
+ id=103,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 104:
+ dict(
+ name='left_ring_finger1',
+ id=104,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 105:
+ dict(
+ name='left_ring_finger2',
+ id=105,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 106:
+ dict(
+ name='left_ring_finger3',
+ id=106,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 107:
+ dict(
+ name='left_ring_finger4',
+ id=107,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 108:
+ dict(
+ name='left_pinky_finger1',
+ id=108,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 109:
+ dict(
+ name='left_pinky_finger2',
+ id=109,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 110:
+ dict(
+ name='left_pinky_finger3',
+ id=110,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 111:
+ dict(
+ name='left_pinky_finger4',
+ id=111,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 112:
+ dict(
+ name='right_hand_root',
+ id=112,
+ color=[255, 255, 255],
+ type='',
+ swap='left_hand_root'),
+ 113:
+ dict(
+ name='right_thumb1',
+ id=113,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 114:
+ dict(
+ name='right_thumb2',
+ id=114,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 115:
+ dict(
+ name='right_thumb3',
+ id=115,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 116:
+ dict(
+ name='right_thumb4',
+ id=116,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 117:
+ dict(
+ name='right_forefinger1',
+ id=117,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 118:
+ dict(
+ name='right_forefinger2',
+ id=118,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 119:
+ dict(
+ name='right_forefinger3',
+ id=119,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 120:
+ dict(
+ name='right_forefinger4',
+ id=120,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 121:
+ dict(
+ name='right_middle_finger1',
+ id=121,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 122:
+ dict(
+ name='right_middle_finger2',
+ id=122,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 123:
+ dict(
+ name='right_middle_finger3',
+ id=123,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 124:
+ dict(
+ name='right_middle_finger4',
+ id=124,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 125:
+ dict(
+ name='right_ring_finger1',
+ id=125,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 126:
+ dict(
+ name='right_ring_finger2',
+ id=126,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 127:
+ dict(
+ name='right_ring_finger3',
+ id=127,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 128:
+ dict(
+ name='right_ring_finger4',
+ id=128,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 129:
+ dict(
+ name='right_pinky_finger1',
+ id=129,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 130:
+ dict(
+ name='right_pinky_finger2',
+ id=130,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 131:
+ dict(
+ name='right_pinky_finger3',
+ id=131,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 132:
+ dict(
+ name='right_pinky_finger4',
+ id=132,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('left_ankle', 'left_big_toe'), id=19, color=[0, 255, 0]),
+ 20:
+ dict(link=('left_ankle', 'left_small_toe'), id=20, color=[0, 255, 0]),
+ 21:
+ dict(link=('left_ankle', 'left_heel'), id=21, color=[0, 255, 0]),
+ 22:
+ dict(
+ link=('right_ankle', 'right_big_toe'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(
+ link=('right_ankle', 'right_small_toe'),
+ id=23,
+ color=[255, 128, 0]),
+ 24:
+ dict(link=('right_ankle', 'right_heel'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(
+ link=('left_hand_root', 'left_thumb1'), id=25, color=[255, 128,
+ 0]),
+ 26:
+ dict(link=('left_thumb1', 'left_thumb2'), id=26, color=[255, 128, 0]),
+ 27:
+ dict(link=('left_thumb2', 'left_thumb3'), id=27, color=[255, 128, 0]),
+ 28:
+ dict(link=('left_thumb3', 'left_thumb4'), id=28, color=[255, 128, 0]),
+ 29:
+ dict(
+ link=('left_hand_root', 'left_forefinger1'),
+ id=29,
+ color=[255, 153, 255]),
+ 30:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=30,
+ color=[255, 153, 255]),
+ 31:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=31,
+ color=[255, 153, 255]),
+ 32:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=32,
+ color=[255, 153, 255]),
+ 33:
+ dict(
+ link=('left_hand_root', 'left_middle_finger1'),
+ id=33,
+ color=[102, 178, 255]),
+ 34:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=34,
+ color=[102, 178, 255]),
+ 35:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=35,
+ color=[102, 178, 255]),
+ 36:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=36,
+ color=[102, 178, 255]),
+ 37:
+ dict(
+ link=('left_hand_root', 'left_ring_finger1'),
+ id=37,
+ color=[255, 51, 51]),
+ 38:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=38,
+ color=[255, 51, 51]),
+ 39:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=39,
+ color=[255, 51, 51]),
+ 40:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=40,
+ color=[255, 51, 51]),
+ 41:
+ dict(
+ link=('left_hand_root', 'left_pinky_finger1'),
+ id=41,
+ color=[0, 255, 0]),
+ 42:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=42,
+ color=[0, 255, 0]),
+ 43:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=43,
+ color=[0, 255, 0]),
+ 44:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=44,
+ color=[0, 255, 0]),
+ 45:
+ dict(
+ link=('right_hand_root', 'right_thumb1'),
+ id=45,
+ color=[255, 128, 0]),
+ 46:
+ dict(
+ link=('right_thumb1', 'right_thumb2'), id=46, color=[255, 128, 0]),
+ 47:
+ dict(
+ link=('right_thumb2', 'right_thumb3'), id=47, color=[255, 128, 0]),
+ 48:
+ dict(
+ link=('right_thumb3', 'right_thumb4'), id=48, color=[255, 128, 0]),
+ 49:
+ dict(
+ link=('right_hand_root', 'right_forefinger1'),
+ id=49,
+ color=[255, 153, 255]),
+ 50:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=50,
+ color=[255, 153, 255]),
+ 51:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=51,
+ color=[255, 153, 255]),
+ 52:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=52,
+ color=[255, 153, 255]),
+ 53:
+ dict(
+ link=('right_hand_root', 'right_middle_finger1'),
+ id=53,
+ color=[102, 178, 255]),
+ 54:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=54,
+ color=[102, 178, 255]),
+ 55:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=55,
+ color=[102, 178, 255]),
+ 56:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=56,
+ color=[102, 178, 255]),
+ 57:
+ dict(
+ link=('right_hand_root', 'right_ring_finger1'),
+ id=57,
+ color=[255, 51, 51]),
+ 58:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=58,
+ color=[255, 51, 51]),
+ 59:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=59,
+ color=[255, 51, 51]),
+ 60:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=60,
+ color=[255, 51, 51]),
+ 61:
+ dict(
+ link=('right_hand_root', 'right_pinky_finger1'),
+ id=61,
+ color=[0, 255, 0]),
+ 62:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=62,
+ color=[0, 255, 0]),
+ 63:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=63,
+ color=[0, 255, 0]),
+ 64:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=64,
+ color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 133,
+ # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
+ # 'evaluation/myeval_wholebody.py#L175'
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.068, 0.066, 0.066,
+ 0.092, 0.094, 0.094, 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031,
+ 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045,
+ 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015,
+ 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017,
+ 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010,
+ 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009,
+ 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01,
+ 0.008, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035,
+ 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019,
+ 0.022, 0.031, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024,
+ 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02,
+ 0.019, 0.022, 0.031
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/cofw.py b/vendor/ViTPose/configs/_base_/datasets/cofw.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fb7ad2f8d1fdbe868b3691858a370e26b59a105
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/cofw.py
@@ -0,0 +1,134 @@
+dataset_info = dict(
+ dataset_name='cofw',
+ paper_info=dict(
+ author='Burgos-Artizzu, Xavier P and Perona, '
+ r'Pietro and Doll{\'a}r, Piotr',
+ title='Robust face landmark estimation under occlusion',
+ container='Proceedings of the IEEE international '
+ 'conference on computer vision',
+ year='2013',
+ homepage='http://www.vision.caltech.edu/xpburgos/ICCV13/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-1'),
+ 1:
+ dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-0'),
+ 2:
+ dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
+ 3:
+ dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
+ 4:
+ dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-6'),
+ 5:
+ dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-7'),
+ 6:
+ dict(name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-4'),
+ 7:
+ dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-5'),
+ 8:
+ dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
+ 9:
+ dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
+ 10:
+ dict(
+ name='kpt-10',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-11'),
+ 11:
+ dict(
+ name='kpt-11',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-10'),
+ 12:
+ dict(
+ name='kpt-12',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-14'),
+ 13:
+ dict(
+ name='kpt-13',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-15'),
+ 14:
+ dict(
+ name='kpt-14',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-12'),
+ 15:
+ dict(
+ name='kpt-15',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-13'),
+ 16:
+ dict(
+ name='kpt-16',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-17'),
+ 17:
+ dict(
+ name='kpt-17',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-16'),
+ 18:
+ dict(
+ name='kpt-18',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-19'),
+ 19:
+ dict(
+ name='kpt-19',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-18'),
+ 20:
+ dict(name='kpt-20', id=20, color=[255, 255, 255], type='', swap=''),
+ 21:
+ dict(name='kpt-21', id=21, color=[255, 255, 255], type='', swap=''),
+ 22:
+ dict(
+ name='kpt-22',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-23'),
+ 23:
+ dict(
+ name='kpt-23',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-22'),
+ 24:
+ dict(name='kpt-24', id=24, color=[255, 255, 255], type='', swap=''),
+ 25:
+ dict(name='kpt-25', id=25, color=[255, 255, 255], type='', swap=''),
+ 26:
+ dict(name='kpt-26', id=26, color=[255, 255, 255], type='', swap=''),
+ 27:
+ dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
+ 28:
+ dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 29,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/crowdpose.py b/vendor/ViTPose/configs/_base_/datasets/crowdpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..45086531a601870716eed15a32c5413c0e24b7ae
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/crowdpose.py
@@ -0,0 +1,147 @@
+dataset_info = dict(
+ dataset_name='crowdpose',
+ paper_info=dict(
+ author='Li, Jiefeng and Wang, Can and Zhu, Hao and '
+ 'Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu',
+ title='CrowdPose: Efficient Crowded Scenes Pose Estimation '
+ 'and A New Benchmark',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2019',
+ homepage='https://github.com/Jeff-sjtu/CrowdPose',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_shoulder',
+ id=0,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_shoulder'),
+ 1:
+ dict(
+ name='right_shoulder',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_shoulder'),
+ 2:
+ dict(
+ name='left_elbow',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_elbow'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='left_wrist',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_wrist'),
+ 5:
+ dict(
+ name='right_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='left_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='left_knee',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_knee'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_ankle',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_ankle'),
+ 11:
+ dict(
+ name='right_ankle',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_ankle'),
+ 12:
+ dict(
+ name='top_head', id=12, color=[255, 128, 0], type='upper',
+ swap=''),
+ 13:
+ dict(name='neck', id=13, color=[0, 255, 0], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('top_head', 'neck'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('right_shoulder', 'neck'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('left_shoulder', 'neck'), id=14, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5
+ ],
+ sigmas=[
+ 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087,
+ 0.089, 0.089, 0.079, 0.079
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/deepfashion_full.py b/vendor/ViTPose/configs/_base_/datasets/deepfashion_full.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d989069ee7253d3a5b5f01c81135b1a472cd4b2
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/deepfashion_full.py
@@ -0,0 +1,74 @@
+dataset_info = dict(
+ dataset_name='deepfashion_full',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left collar',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right collar'),
+ 1:
+ dict(
+ name='right collar',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left collar'),
+ 2:
+ dict(
+ name='left sleeve',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right sleeve'),
+ 3:
+ dict(
+ name='right sleeve',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left sleeve'),
+ 4:
+ dict(
+ name='left waistline',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right waistline'),
+ 5:
+ dict(
+ name='right waistline',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left waistline'),
+ 6:
+ dict(
+ name='left hem',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 7:
+ dict(
+ name='right hem',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 8,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/deepfashion_lower.py b/vendor/ViTPose/configs/_base_/datasets/deepfashion_lower.py
new file mode 100644
index 0000000000000000000000000000000000000000..db014a1747ca618f93a7d092d29027015b48ae3c
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/deepfashion_lower.py
@@ -0,0 +1,46 @@
+dataset_info = dict(
+ dataset_name='deepfashion_lower',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left waistline',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right waistline'),
+ 1:
+ dict(
+ name='right waistline',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left waistline'),
+ 2:
+ dict(
+ name='left hem',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 3:
+ dict(
+ name='right hem',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 4,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/deepfashion_upper.py b/vendor/ViTPose/configs/_base_/datasets/deepfashion_upper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0b012fd37bee1ba5ed956a7a5465a8623bf0894
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/deepfashion_upper.py
@@ -0,0 +1,60 @@
+dataset_info = dict(
+ dataset_name='deepfashion_upper',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left collar',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right collar'),
+ 1:
+ dict(
+ name='right collar',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left collar'),
+ 2:
+ dict(
+ name='left sleeve',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right sleeve'),
+ 3:
+ dict(
+ name='right sleeve',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left sleeve'),
+ 4:
+ dict(
+ name='left hem',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 5:
+ dict(
+ name='right hem',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 6,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/fly.py b/vendor/ViTPose/configs/_base_/datasets/fly.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f94ff57ca93d8f562b6a61b9a67198abdcde217
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/fly.py
@@ -0,0 +1,237 @@
+dataset_info = dict(
+ dataset_name='fly',
+ paper_info=dict(
+ author='Pereira, Talmo D and Aldarondo, Diego E and '
+ 'Willmore, Lindsay and Kislin, Mikhail and '
+ 'Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W',
+ title='Fast animal pose estimation using deep neural networks',
+ container='Nature methods',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='eyeL', id=1, color=[255, 255, 255], type='', swap='eyeR'),
+ 2:
+ dict(name='eyeR', id=2, color=[255, 255, 255], type='', swap='eyeL'),
+ 3:
+ dict(name='neck', id=3, color=[255, 255, 255], type='', swap=''),
+ 4:
+ dict(name='thorax', id=4, color=[255, 255, 255], type='', swap=''),
+ 5:
+ dict(name='abdomen', id=5, color=[255, 255, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forelegR1',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 7:
+ dict(
+ name='forelegR2',
+ id=7,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL2'),
+ 8:
+ dict(
+ name='forelegR3',
+ id=8,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL3'),
+ 9:
+ dict(
+ name='forelegR4',
+ id=9,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL4'),
+ 10:
+ dict(
+ name='midlegR1',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL1'),
+ 11:
+ dict(
+ name='midlegR2',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL2'),
+ 12:
+ dict(
+ name='midlegR3',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL3'),
+ 13:
+ dict(
+ name='midlegR4',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL4'),
+ 14:
+ dict(
+ name='hindlegR1',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 15:
+ dict(
+ name='hindlegR2',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL2'),
+ 16:
+ dict(
+ name='hindlegR3',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL3'),
+ 17:
+ dict(
+ name='hindlegR4',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL4'),
+ 18:
+ dict(
+ name='forelegL1',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 19:
+ dict(
+ name='forelegL2',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR2'),
+ 20:
+ dict(
+ name='forelegL3',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR3'),
+ 21:
+ dict(
+ name='forelegL4',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR4'),
+ 22:
+ dict(
+ name='midlegL1',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR1'),
+ 23:
+ dict(
+ name='midlegL2',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR2'),
+ 24:
+ dict(
+ name='midlegL3',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR3'),
+ 25:
+ dict(
+ name='midlegL4',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR4'),
+ 26:
+ dict(
+ name='hindlegL1',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 27:
+ dict(
+ name='hindlegL2',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR2'),
+ 28:
+ dict(
+ name='hindlegL3',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR3'),
+ 29:
+ dict(
+ name='hindlegL4',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR4'),
+ 30:
+ dict(
+ name='wingL', id=30, color=[255, 255, 255], type='', swap='wingR'),
+ 31:
+ dict(
+ name='wingR', id=31, color=[255, 255, 255], type='', swap='wingL'),
+ },
+ skeleton_info={
+ 0: dict(link=('eyeL', 'head'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('eyeR', 'head'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('neck', 'head'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('thorax', 'neck'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('abdomen', 'thorax'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('forelegR2', 'forelegR1'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('forelegR3', 'forelegR2'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('forelegR4', 'forelegR3'), id=7, color=[255, 255, 255]),
+ 8: dict(link=('midlegR2', 'midlegR1'), id=8, color=[255, 255, 255]),
+ 9: dict(link=('midlegR3', 'midlegR2'), id=9, color=[255, 255, 255]),
+ 10: dict(link=('midlegR4', 'midlegR3'), id=10, color=[255, 255, 255]),
+ 11:
+ dict(link=('hindlegR2', 'hindlegR1'), id=11, color=[255, 255, 255]),
+ 12:
+ dict(link=('hindlegR3', 'hindlegR2'), id=12, color=[255, 255, 255]),
+ 13:
+ dict(link=('hindlegR4', 'hindlegR3'), id=13, color=[255, 255, 255]),
+ 14:
+ dict(link=('forelegL2', 'forelegL1'), id=14, color=[255, 255, 255]),
+ 15:
+ dict(link=('forelegL3', 'forelegL2'), id=15, color=[255, 255, 255]),
+ 16:
+ dict(link=('forelegL4', 'forelegL3'), id=16, color=[255, 255, 255]),
+ 17: dict(link=('midlegL2', 'midlegL1'), id=17, color=[255, 255, 255]),
+ 18: dict(link=('midlegL3', 'midlegL2'), id=18, color=[255, 255, 255]),
+ 19: dict(link=('midlegL4', 'midlegL3'), id=19, color=[255, 255, 255]),
+ 20:
+ dict(link=('hindlegL2', 'hindlegL1'), id=20, color=[255, 255, 255]),
+ 21:
+ dict(link=('hindlegL3', 'hindlegL2'), id=21, color=[255, 255, 255]),
+ 22:
+ dict(link=('hindlegL4', 'hindlegL3'), id=22, color=[255, 255, 255]),
+ 23: dict(link=('wingL', 'neck'), id=23, color=[255, 255, 255]),
+ 24: dict(link=('wingR', 'neck'), id=24, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 32,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/freihand2d.py b/vendor/ViTPose/configs/_base_/datasets/freihand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b960d10f3538801531dbccdd67aeac6e73ac572
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/freihand2d.py
@@ -0,0 +1,144 @@
+dataset_info = dict(
+ dataset_name='freihand',
+ paper_info=dict(
+ author='Zimmermann, Christian and Ceylan, Duygu and '
+ 'Yang, Jimei and Russell, Bryan and '
+ 'Argus, Max and Brox, Thomas',
+ title='Freihand: A dataset for markerless capture of hand pose '
+ 'and shape from single rgb images',
+ container='Proceedings of the IEEE International '
+ 'Conference on Computer Vision',
+ year='2019',
+ homepage='https://lmb.informatik.uni-freiburg.de/projects/freihand/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/h36m.py b/vendor/ViTPose/configs/_base_/datasets/h36m.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a719d8b19f9ff3c5ef98476d73216055bf9186
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/h36m.py
@@ -0,0 +1,152 @@
+dataset_info = dict(
+ dataset_name='h36m',
+ paper_info=dict(
+ author='Ionescu, Catalin and Papava, Dragos and '
+ 'Olaru, Vlad and Sminchisescu, Cristian',
+ title='Human3.6M: Large Scale Datasets and Predictive '
+ 'Methods for 3D Human Sensing in Natural Environments',
+ container='IEEE Transactions on Pattern Analysis and '
+ 'Machine Intelligence',
+ year='2014',
+ homepage='http://vision.imar.ro/human3.6m/description.php',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='root', id=0, color=[51, 153, 255], type='lower', swap=''),
+ 1:
+ dict(
+ name='right_hip',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 2:
+ dict(
+ name='right_knee',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 3:
+ dict(
+ name='right_foot',
+ id=3,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_foot'),
+ 4:
+ dict(
+ name='left_hip',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 5:
+ dict(
+ name='left_knee',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 6:
+ dict(
+ name='left_foot',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_foot'),
+ 7:
+ dict(name='spine', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(name='thorax', id=8, color=[51, 153, 255], type='upper', swap=''),
+ 9:
+ dict(
+ name='neck_base',
+ id=9,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 10:
+ dict(name='head', id=10, color=[51, 153, 255], type='upper', swap=''),
+ 11:
+ dict(
+ name='left_shoulder',
+ id=11,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 12:
+ dict(
+ name='left_elbow',
+ id=12,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 13:
+ dict(
+ name='left_wrist',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 14:
+ dict(
+ name='right_shoulder',
+ id=14,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 15:
+ dict(
+ name='right_elbow',
+ id=15,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 16:
+ dict(
+ name='right_wrist',
+ id=16,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('root', 'left_hip'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_hip', 'left_knee'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('left_knee', 'left_foot'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('root', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('right_hip', 'right_knee'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('right_knee', 'right_foot'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(link=('root', 'spine'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('spine', 'thorax'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('thorax', 'neck_base'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('neck_base', 'head'), id=9, color=[51, 153, 255]),
+ 10:
+ dict(link=('thorax', 'left_shoulder'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('left_shoulder', 'left_elbow'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_elbow', 'left_wrist'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('thorax', 'right_shoulder'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=14, color=[255, 128,
+ 0]),
+ 15:
+ dict(link=('right_elbow', 'right_wrist'), id=15, color=[255, 128, 0])
+ },
+ joint_weights=[1.] * 17,
+ sigmas=[],
+ stats_info=dict(bbox_center=(528., 427.), bbox_scale=400.))
diff --git a/vendor/ViTPose/configs/_base_/datasets/halpe.py b/vendor/ViTPose/configs/_base_/datasets/halpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..1385fe81dc2190684f2142449c0f288f2cb74c1a
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/halpe.py
@@ -0,0 +1,1157 @@
+dataset_info = dict(
+ dataset_name='halpe',
+ paper_info=dict(
+ author='Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie'
+ ' and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu'
+ ' and Ma, Ze and Chen, Mingyang and Lu, Cewu',
+ title='PaStaNet: Toward Human Activity Knowledge Engine',
+ container='CVPR',
+ year='2020',
+ homepage='https://github.com/Fang-Haoshu/Halpe-FullBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(name='head', id=17, color=[255, 128, 0], type='upper', swap=''),
+ 18:
+ dict(name='neck', id=18, color=[255, 128, 0], type='upper', swap=''),
+ 19:
+ dict(name='hip', id=19, color=[255, 128, 0], type='lower', swap=''),
+ 20:
+ dict(
+ name='left_big_toe',
+ id=20,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_big_toe'),
+ 21:
+ dict(
+ name='right_big_toe',
+ id=21,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_big_toe'),
+ 22:
+ dict(
+ name='left_small_toe',
+ id=22,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_small_toe'),
+ 23:
+ dict(
+ name='right_small_toe',
+ id=23,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_small_toe'),
+ 24:
+ dict(
+ name='left_heel',
+ id=24,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_heel'),
+ 25:
+ dict(
+ name='right_heel',
+ id=25,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_heel'),
+ 26:
+ dict(
+ name='face-0',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 27:
+ dict(
+ name='face-1',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 28:
+ dict(
+ name='face-2',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 29:
+ dict(
+ name='face-3',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 30:
+ dict(
+ name='face-4',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 31:
+ dict(
+ name='face-5',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 32:
+ dict(
+ name='face-6',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 33:
+ dict(
+ name='face-7',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='face-9'),
+ 34:
+ dict(name='face-8', id=34, color=[255, 255, 255], type='', swap=''),
+ 35:
+ dict(
+ name='face-9',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-7'),
+ 36:
+ dict(
+ name='face-10',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 37:
+ dict(
+ name='face-11',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 38:
+ dict(
+ name='face-12',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 39:
+ dict(
+ name='face-13',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 40:
+ dict(
+ name='face-14',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 41:
+ dict(
+ name='face-15',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 42:
+ dict(
+ name='face-16',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 43:
+ dict(
+ name='face-17',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 44:
+ dict(
+ name='face-18',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 45:
+ dict(
+ name='face-19',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 46:
+ dict(
+ name='face-20',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 47:
+ dict(
+ name='face-21',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 48:
+ dict(
+ name='face-22',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 49:
+ dict(
+ name='face-23',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 50:
+ dict(
+ name='face-24',
+ id=50,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 51:
+ dict(
+ name='face-25',
+ id=51,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 52:
+ dict(
+ name='face-26',
+ id=52,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 53:
+ dict(name='face-27', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(name='face-28', id=54, color=[255, 255, 255], type='', swap=''),
+ 55:
+ dict(name='face-29', id=55, color=[255, 255, 255], type='', swap=''),
+ 56:
+ dict(name='face-30', id=56, color=[255, 255, 255], type='', swap=''),
+ 57:
+ dict(
+ name='face-31',
+ id=57,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 58:
+ dict(
+ name='face-32',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 59:
+ dict(name='face-33', id=59, color=[255, 255, 255], type='', swap=''),
+ 60:
+ dict(
+ name='face-34',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 61:
+ dict(
+ name='face-35',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 62:
+ dict(
+ name='face-36',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 63:
+ dict(
+ name='face-37',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 64:
+ dict(
+ name='face-38',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 65:
+ dict(
+ name='face-39',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 66:
+ dict(
+ name='face-40',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 67:
+ dict(
+ name='face-41',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 68:
+ dict(
+ name='face-42',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 69:
+ dict(
+ name='face-43',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 70:
+ dict(
+ name='face-44',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 71:
+ dict(
+ name='face-45',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 72:
+ dict(
+ name='face-46',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 73:
+ dict(
+ name='face-47',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 74:
+ dict(
+ name='face-48',
+ id=74,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 75:
+ dict(
+ name='face-49',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 76:
+ dict(
+ name='face-50',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 77:
+ dict(name='face-51', id=77, color=[255, 255, 255], type='', swap=''),
+ 78:
+ dict(
+ name='face-52',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 79:
+ dict(
+ name='face-53',
+ id=79,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 80:
+ dict(
+ name='face-54',
+ id=80,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 81:
+ dict(
+ name='face-55',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 82:
+ dict(
+ name='face-56',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 83:
+ dict(name='face-57', id=83, color=[255, 255, 255], type='', swap=''),
+ 84:
+ dict(
+ name='face-58',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 85:
+ dict(
+ name='face-59',
+ id=85,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 86:
+ dict(
+ name='face-60',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 87:
+ dict(
+ name='face-61',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 88:
+ dict(name='face-62', id=88, color=[255, 255, 255], type='', swap=''),
+ 89:
+ dict(
+ name='face-63',
+ id=89,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 90:
+ dict(
+ name='face-64',
+ id=90,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 91:
+ dict(
+ name='face-65',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 92:
+ dict(name='face-66', id=92, color=[255, 255, 255], type='', swap=''),
+ 93:
+ dict(
+ name='face-67',
+ id=93,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65'),
+ 94:
+ dict(
+ name='left_hand_root',
+ id=94,
+ color=[255, 255, 255],
+ type='',
+ swap='right_hand_root'),
+ 95:
+ dict(
+ name='left_thumb1',
+ id=95,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 96:
+ dict(
+ name='left_thumb2',
+ id=96,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 97:
+ dict(
+ name='left_thumb3',
+ id=97,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 98:
+ dict(
+ name='left_thumb4',
+ id=98,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 99:
+ dict(
+ name='left_forefinger1',
+ id=99,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 100:
+ dict(
+ name='left_forefinger2',
+ id=100,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 101:
+ dict(
+ name='left_forefinger3',
+ id=101,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 102:
+ dict(
+ name='left_forefinger4',
+ id=102,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 103:
+ dict(
+ name='left_middle_finger1',
+ id=103,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 104:
+ dict(
+ name='left_middle_finger2',
+ id=104,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 105:
+ dict(
+ name='left_middle_finger3',
+ id=105,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 106:
+ dict(
+ name='left_middle_finger4',
+ id=106,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 107:
+ dict(
+ name='left_ring_finger1',
+ id=107,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 108:
+ dict(
+ name='left_ring_finger2',
+ id=108,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 109:
+ dict(
+ name='left_ring_finger3',
+ id=109,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 110:
+ dict(
+ name='left_ring_finger4',
+ id=110,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 111:
+ dict(
+ name='left_pinky_finger1',
+ id=111,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 112:
+ dict(
+ name='left_pinky_finger2',
+ id=112,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 113:
+ dict(
+ name='left_pinky_finger3',
+ id=113,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 114:
+ dict(
+ name='left_pinky_finger4',
+ id=114,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 115:
+ dict(
+ name='right_hand_root',
+ id=115,
+ color=[255, 255, 255],
+ type='',
+ swap='left_hand_root'),
+ 116:
+ dict(
+ name='right_thumb1',
+ id=116,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 117:
+ dict(
+ name='right_thumb2',
+ id=117,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 118:
+ dict(
+ name='right_thumb3',
+ id=118,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 119:
+ dict(
+ name='right_thumb4',
+ id=119,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 120:
+ dict(
+ name='right_forefinger1',
+ id=120,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 121:
+ dict(
+ name='right_forefinger2',
+ id=121,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 122:
+ dict(
+ name='right_forefinger3',
+ id=122,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 123:
+ dict(
+ name='right_forefinger4',
+ id=123,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 124:
+ dict(
+ name='right_middle_finger1',
+ id=124,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 125:
+ dict(
+ name='right_middle_finger2',
+ id=125,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 126:
+ dict(
+ name='right_middle_finger3',
+ id=126,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 127:
+ dict(
+ name='right_middle_finger4',
+ id=127,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 128:
+ dict(
+ name='right_ring_finger1',
+ id=128,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 129:
+ dict(
+ name='right_ring_finger2',
+ id=129,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 130:
+ dict(
+ name='right_ring_finger3',
+ id=130,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 131:
+ dict(
+ name='right_ring_finger4',
+ id=131,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 132:
+ dict(
+ name='right_pinky_finger1',
+ id=132,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 133:
+ dict(
+ name='right_pinky_finger2',
+ id=133,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 134:
+ dict(
+ name='right_pinky_finger3',
+ id=134,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 135:
+ dict(
+ name='right_pinky_finger4',
+ id=135,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('left_hip', 'hip'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('right_ankle', 'right_knee'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('right_knee', 'right_hip'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('right_hip', 'hip'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(link=('head', 'neck'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('neck', 'hip'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('neck', 'left_shoulder'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(link=('left_shoulder', 'left_elbow'), id=9, color=[0, 255, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('neck', 'right_shoulder'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=12, color=[255, 128,
+ 0]),
+ 13:
+ dict(link=('right_elbow', 'right_wrist'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(link=('left_eye', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('nose', 'left_eye'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('nose', 'right_eye'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_eye', 'left_ear'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(link=('right_eye', 'right_ear'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('left_ear', 'left_shoulder'), id=19, color=[51, 153, 255]),
+ 20:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=20, color=[51, 153, 255]),
+ 21:
+ dict(link=('left_ankle', 'left_big_toe'), id=21, color=[0, 255, 0]),
+ 22:
+ dict(link=('left_ankle', 'left_small_toe'), id=22, color=[0, 255, 0]),
+ 23:
+ dict(link=('left_ankle', 'left_heel'), id=23, color=[0, 255, 0]),
+ 24:
+ dict(
+ link=('right_ankle', 'right_big_toe'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(
+ link=('right_ankle', 'right_small_toe'),
+ id=25,
+ color=[255, 128, 0]),
+ 26:
+ dict(link=('right_ankle', 'right_heel'), id=26, color=[255, 128, 0]),
+ 27:
+ dict(link=('left_wrist', 'left_thumb1'), id=27, color=[255, 128, 0]),
+ 28:
+ dict(link=('left_thumb1', 'left_thumb2'), id=28, color=[255, 128, 0]),
+ 29:
+ dict(link=('left_thumb2', 'left_thumb3'), id=29, color=[255, 128, 0]),
+ 30:
+ dict(link=('left_thumb3', 'left_thumb4'), id=30, color=[255, 128, 0]),
+ 31:
+ dict(
+ link=('left_wrist', 'left_forefinger1'),
+ id=31,
+ color=[255, 153, 255]),
+ 32:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=32,
+ color=[255, 153, 255]),
+ 33:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=33,
+ color=[255, 153, 255]),
+ 34:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=34,
+ color=[255, 153, 255]),
+ 35:
+ dict(
+ link=('left_wrist', 'left_middle_finger1'),
+ id=35,
+ color=[102, 178, 255]),
+ 36:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=36,
+ color=[102, 178, 255]),
+ 37:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=37,
+ color=[102, 178, 255]),
+ 38:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=38,
+ color=[102, 178, 255]),
+ 39:
+ dict(
+ link=('left_wrist', 'left_ring_finger1'),
+ id=39,
+ color=[255, 51, 51]),
+ 40:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=40,
+ color=[255, 51, 51]),
+ 41:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=41,
+ color=[255, 51, 51]),
+ 42:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=42,
+ color=[255, 51, 51]),
+ 43:
+ dict(
+ link=('left_wrist', 'left_pinky_finger1'),
+ id=43,
+ color=[0, 255, 0]),
+ 44:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=44,
+ color=[0, 255, 0]),
+ 45:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=45,
+ color=[0, 255, 0]),
+ 46:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=46,
+ color=[0, 255, 0]),
+ 47:
+ dict(link=('right_wrist', 'right_thumb1'), id=47, color=[255, 128, 0]),
+ 48:
+ dict(
+ link=('right_thumb1', 'right_thumb2'), id=48, color=[255, 128, 0]),
+ 49:
+ dict(
+ link=('right_thumb2', 'right_thumb3'), id=49, color=[255, 128, 0]),
+ 50:
+ dict(
+ link=('right_thumb3', 'right_thumb4'), id=50, color=[255, 128, 0]),
+ 51:
+ dict(
+ link=('right_wrist', 'right_forefinger1'),
+ id=51,
+ color=[255, 153, 255]),
+ 52:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=52,
+ color=[255, 153, 255]),
+ 53:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=53,
+ color=[255, 153, 255]),
+ 54:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=54,
+ color=[255, 153, 255]),
+ 55:
+ dict(
+ link=('right_wrist', 'right_middle_finger1'),
+ id=55,
+ color=[102, 178, 255]),
+ 56:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=56,
+ color=[102, 178, 255]),
+ 57:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=57,
+ color=[102, 178, 255]),
+ 58:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=58,
+ color=[102, 178, 255]),
+ 59:
+ dict(
+ link=('right_wrist', 'right_ring_finger1'),
+ id=59,
+ color=[255, 51, 51]),
+ 60:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=60,
+ color=[255, 51, 51]),
+ 61:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=61,
+ color=[255, 51, 51]),
+ 62:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=62,
+ color=[255, 51, 51]),
+ 63:
+ dict(
+ link=('right_wrist', 'right_pinky_finger1'),
+ id=63,
+ color=[0, 255, 0]),
+ 64:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=64,
+ color=[0, 255, 0]),
+ 65:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=65,
+ color=[0, 255, 0]),
+ 66:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=66,
+ color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 136,
+
+ # 'https://github.com/Fang-Haoshu/Halpe-FullBody/blob/master/'
+ # 'HalpeCOCOAPI/PythonAPI/halpecocotools/cocoeval.py#L245'
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.08, 0.08, 0.08,
+ 0.089, 0.089, 0.089, 0.089, 0.089, 0.089, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/horse10.py b/vendor/ViTPose/configs/_base_/datasets/horse10.py
new file mode 100644
index 0000000000000000000000000000000000000000..a485bf191bc151b0d76e48f3e55eb8e2dda6c506
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/horse10.py
@@ -0,0 +1,201 @@
+dataset_info = dict(
+ dataset_name='horse10',
+ paper_info=dict(
+ author='Mathis, Alexander and Biasi, Thomas and '
+ 'Schneider, Steffen and '
+ 'Yuksekgonul, Mert and Rogers, Byron and '
+ 'Bethge, Matthias and '
+ 'Mathis, Mackenzie W',
+ title='Pretraining boosts out-of-domain robustness '
+ 'for pose estimation',
+ container='Proceedings of the IEEE/CVF Winter Conference on '
+ 'Applications of Computer Vision',
+ year='2021',
+ homepage='http://www.mackenziemathislab.org/horse10',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='Nose', id=0, color=[255, 153, 255], type='upper', swap=''),
+ 1:
+ dict(name='Eye', id=1, color=[255, 153, 255], type='upper', swap=''),
+ 2:
+ dict(
+ name='Nearknee',
+ id=2,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 3:
+ dict(
+ name='Nearfrontfetlock',
+ id=3,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 4:
+ dict(
+ name='Nearfrontfoot',
+ id=4,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 5:
+ dict(
+ name='Offknee', id=5, color=[255, 102, 255], type='upper',
+ swap=''),
+ 6:
+ dict(
+ name='Offfrontfetlock',
+ id=6,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 7:
+ dict(
+ name='Offfrontfoot',
+ id=7,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 8:
+ dict(
+ name='Shoulder',
+ id=8,
+ color=[255, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='Midshoulder',
+ id=9,
+ color=[255, 153, 255],
+ type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='Elbow', id=10, color=[255, 153, 255], type='upper', swap=''),
+ 11:
+ dict(
+ name='Girth', id=11, color=[255, 153, 255], type='upper', swap=''),
+ 12:
+ dict(
+ name='Wither', id=12, color=[255, 153, 255], type='upper',
+ swap=''),
+ 13:
+ dict(
+ name='Nearhindhock',
+ id=13,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 14:
+ dict(
+ name='Nearhindfetlock',
+ id=14,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 15:
+ dict(
+ name='Nearhindfoot',
+ id=15,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 16:
+ dict(name='Hip', id=16, color=[255, 153, 255], type='lower', swap=''),
+ 17:
+ dict(
+ name='Stifle', id=17, color=[255, 153, 255], type='lower',
+ swap=''),
+ 18:
+ dict(
+ name='Offhindhock',
+ id=18,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 19:
+ dict(
+ name='Offhindfetlock',
+ id=19,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 20:
+ dict(
+ name='Offhindfoot',
+ id=20,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 21:
+ dict(
+ name='Ischium',
+ id=21,
+ color=[255, 153, 255],
+ type='lower',
+ swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('Nose', 'Eye'), id=0, color=[255, 153, 255]),
+ 1:
+ dict(link=('Eye', 'Wither'), id=1, color=[255, 153, 255]),
+ 2:
+ dict(link=('Wither', 'Hip'), id=2, color=[255, 153, 255]),
+ 3:
+ dict(link=('Hip', 'Ischium'), id=3, color=[255, 153, 255]),
+ 4:
+ dict(link=('Ischium', 'Stifle'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('Stifle', 'Girth'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('Girth', 'Elbow'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('Elbow', 'Shoulder'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('Shoulder', 'Midshoulder'), id=8, color=[255, 153, 255]),
+ 9:
+ dict(link=('Midshoulder', 'Wither'), id=9, color=[255, 153, 255]),
+ 10:
+ dict(
+ link=('Nearknee', 'Nearfrontfetlock'),
+ id=10,
+ color=[255, 102, 255]),
+ 11:
+ dict(
+ link=('Nearfrontfetlock', 'Nearfrontfoot'),
+ id=11,
+ color=[255, 102, 255]),
+ 12:
+ dict(
+ link=('Offknee', 'Offfrontfetlock'), id=12, color=[255, 102, 255]),
+ 13:
+ dict(
+ link=('Offfrontfetlock', 'Offfrontfoot'),
+ id=13,
+ color=[255, 102, 255]),
+ 14:
+ dict(
+ link=('Nearhindhock', 'Nearhindfetlock'),
+ id=14,
+ color=[255, 51, 255]),
+ 15:
+ dict(
+ link=('Nearhindfetlock', 'Nearhindfoot'),
+ id=15,
+ color=[255, 51, 255]),
+ 16:
+ dict(
+ link=('Offhindhock', 'Offhindfetlock'),
+ id=16,
+ color=[255, 51, 255]),
+ 17:
+ dict(
+ link=('Offhindfetlock', 'Offhindfoot'),
+ id=17,
+ color=[255, 51, 255])
+ },
+ joint_weights=[1.] * 22,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/interhand2d.py b/vendor/ViTPose/configs/_base_/datasets/interhand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..0134f07de5bf536eaffbf71155a7e6eb33b24f0a
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/interhand2d.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='interhand2d',
+ paper_info=dict(
+ author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
+ 'Shiratori, Takaaki and Lee, Kyoung Mu',
+ title='InterHand2.6M: A dataset and baseline for 3D '
+ 'interacting hand pose estimation from a single RGB image',
+ container='arXiv',
+ year='2020',
+ homepage='https://mks0601.github.io/InterHand2.6M/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='thumb4', id=0, color=[255, 128, 0], type='', swap=''),
+ 1:
+ dict(name='thumb3', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb1', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(
+ name='forefinger4', id=4, color=[255, 153, 255], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger3', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger1', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='middle_finger4',
+ id=8,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 9:
+ dict(
+ name='middle_finger3',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger1',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='ring_finger4', id=12, color=[255, 51, 51], type='', swap=''),
+ 13:
+ dict(
+ name='ring_finger3', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger1', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(name='pinky_finger4', id=16, color=[0, 255, 0], type='', swap=''),
+ 17:
+ dict(name='pinky_finger3', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger1', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='wrist', id=20, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/interhand3d.py b/vendor/ViTPose/configs/_base_/datasets/interhand3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2bd8121c281c741ec9b980c7570ebef8a632993
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/interhand3d.py
@@ -0,0 +1,487 @@
+dataset_info = dict(
+ dataset_name='interhand3d',
+ paper_info=dict(
+ author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
+ 'Shiratori, Takaaki and Lee, Kyoung Mu',
+ title='InterHand2.6M: A dataset and baseline for 3D '
+ 'interacting hand pose estimation from a single RGB image',
+ container='arXiv',
+ year='2020',
+ homepage='https://mks0601.github.io/InterHand2.6M/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_thumb4',
+ id=0,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 1:
+ dict(
+ name='right_thumb3',
+ id=1,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 2:
+ dict(
+ name='right_thumb2',
+ id=2,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 3:
+ dict(
+ name='right_thumb1',
+ id=3,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 4:
+ dict(
+ name='right_forefinger4',
+ id=4,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 5:
+ dict(
+ name='right_forefinger3',
+ id=5,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 6:
+ dict(
+ name='right_forefinger2',
+ id=6,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 7:
+ dict(
+ name='right_forefinger1',
+ id=7,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 8:
+ dict(
+ name='right_middle_finger4',
+ id=8,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 9:
+ dict(
+ name='right_middle_finger3',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 10:
+ dict(
+ name='right_middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 11:
+ dict(
+ name='right_middle_finger1',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 12:
+ dict(
+ name='right_ring_finger4',
+ id=12,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 13:
+ dict(
+ name='right_ring_finger3',
+ id=13,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 14:
+ dict(
+ name='right_ring_finger2',
+ id=14,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 15:
+ dict(
+ name='right_ring_finger1',
+ id=15,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 16:
+ dict(
+ name='right_pinky_finger4',
+ id=16,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4'),
+ 17:
+ dict(
+ name='right_pinky_finger3',
+ id=17,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 18:
+ dict(
+ name='right_pinky_finger2',
+ id=18,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 19:
+ dict(
+ name='right_pinky_finger1',
+ id=19,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 20:
+ dict(
+ name='right_wrist',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='left_wrist'),
+ 21:
+ dict(
+ name='left_thumb4',
+ id=21,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 22:
+ dict(
+ name='left_thumb3',
+ id=22,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 23:
+ dict(
+ name='left_thumb2',
+ id=23,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 24:
+ dict(
+ name='left_thumb1',
+ id=24,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 25:
+ dict(
+ name='left_forefinger4',
+ id=25,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 26:
+ dict(
+ name='left_forefinger3',
+ id=26,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 27:
+ dict(
+ name='left_forefinger2',
+ id=27,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 28:
+ dict(
+ name='left_forefinger1',
+ id=28,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 29:
+ dict(
+ name='left_middle_finger4',
+ id=29,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 30:
+ dict(
+ name='left_middle_finger3',
+ id=30,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 31:
+ dict(
+ name='left_middle_finger2',
+ id=31,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 32:
+ dict(
+ name='left_middle_finger1',
+ id=32,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 33:
+ dict(
+ name='left_ring_finger4',
+ id=33,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 34:
+ dict(
+ name='left_ring_finger3',
+ id=34,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 35:
+ dict(
+ name='left_ring_finger2',
+ id=35,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 36:
+ dict(
+ name='left_ring_finger1',
+ id=36,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 37:
+ dict(
+ name='left_pinky_finger4',
+ id=37,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 38:
+ dict(
+ name='left_pinky_finger3',
+ id=38,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 39:
+ dict(
+ name='left_pinky_finger2',
+ id=39,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 40:
+ dict(
+ name='left_pinky_finger1',
+ id=40,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 41:
+ dict(
+ name='left_wrist',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='right_wrist'),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_wrist', 'right_thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_thumb1', 'right_thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_thumb2', 'right_thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_thumb3', 'right_thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(
+ link=('right_wrist', 'right_forefinger1'),
+ id=4,
+ color=[255, 153, 255]),
+ 5:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=5,
+ color=[255, 153, 255]),
+ 6:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=6,
+ color=[255, 153, 255]),
+ 7:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=7,
+ color=[255, 153, 255]),
+ 8:
+ dict(
+ link=('right_wrist', 'right_middle_finger1'),
+ id=8,
+ color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(
+ link=('right_wrist', 'right_ring_finger1'),
+ id=12,
+ color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=13,
+ color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=14,
+ color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=15,
+ color=[255, 51, 51]),
+ 16:
+ dict(
+ link=('right_wrist', 'right_pinky_finger1'),
+ id=16,
+ color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=17,
+ color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=18,
+ color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=19,
+ color=[0, 255, 0]),
+ 20:
+ dict(link=('left_wrist', 'left_thumb1'), id=20, color=[255, 128, 0]),
+ 21:
+ dict(link=('left_thumb1', 'left_thumb2'), id=21, color=[255, 128, 0]),
+ 22:
+ dict(link=('left_thumb2', 'left_thumb3'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(link=('left_thumb3', 'left_thumb4'), id=23, color=[255, 128, 0]),
+ 24:
+ dict(
+ link=('left_wrist', 'left_forefinger1'),
+ id=24,
+ color=[255, 153, 255]),
+ 25:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=25,
+ color=[255, 153, 255]),
+ 26:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=26,
+ color=[255, 153, 255]),
+ 27:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=27,
+ color=[255, 153, 255]),
+ 28:
+ dict(
+ link=('left_wrist', 'left_middle_finger1'),
+ id=28,
+ color=[102, 178, 255]),
+ 29:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=29,
+ color=[102, 178, 255]),
+ 30:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=30,
+ color=[102, 178, 255]),
+ 31:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=31,
+ color=[102, 178, 255]),
+ 32:
+ dict(
+ link=('left_wrist', 'left_ring_finger1'),
+ id=32,
+ color=[255, 51, 51]),
+ 33:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=33,
+ color=[255, 51, 51]),
+ 34:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=34,
+ color=[255, 51, 51]),
+ 35:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=35,
+ color=[255, 51, 51]),
+ 36:
+ dict(
+ link=('left_wrist', 'left_pinky_finger1'),
+ id=36,
+ color=[0, 255, 0]),
+ 37:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=37,
+ color=[0, 255, 0]),
+ 38:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=38,
+ color=[0, 255, 0]),
+ 39:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=39,
+ color=[0, 255, 0]),
+ },
+ joint_weights=[1.] * 42,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/jhmdb.py b/vendor/ViTPose/configs/_base_/datasets/jhmdb.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b37488498a2bade1fa6f2ff6532fcd219071803
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/jhmdb.py
@@ -0,0 +1,129 @@
+dataset_info = dict(
+ dataset_name='jhmdb',
+ paper_info=dict(
+ author='H. Jhuang and J. Gall and S. Zuffi and '
+ 'C. Schmid and M. J. Black',
+ title='Towards understanding action recognition',
+ container='International Conf. on Computer Vision (ICCV)',
+ year='2013',
+ homepage='http://jhmdb.is.tue.mpg.de/dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='neck', id=0, color=[255, 128, 0], type='upper', swap=''),
+ 1:
+ dict(name='belly', id=1, color=[255, 128, 0], type='upper', swap=''),
+ 2:
+ dict(name='head', id=2, color=[255, 128, 0], type='upper', swap=''),
+ 3:
+ dict(
+ name='right_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 4:
+ dict(
+ name='left_shoulder',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 5:
+ dict(
+ name='right_hip',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_hip'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[51, 153, 255],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_elbow',
+ id=7,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_elbow'),
+ 8:
+ dict(
+ name='left_elbow',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_elbow'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[51, 153, 255],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_knee',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_knee'),
+ 11:
+ dict(
+ name='right_wrist',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 12:
+ dict(
+ name='left_wrist',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='right_wrist'),
+ 13:
+ dict(
+ name='right_ankle',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_ankle'),
+ 14:
+ dict(
+ name='left_ankle',
+ id=14,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle')
+ },
+ skeleton_info={
+ 0: dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1: dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2: dict(link=('right_hip', 'belly'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('belly', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('belly', 'neck'), id=6, color=[51, 153, 255]),
+ 7: dict(link=('neck', 'head'), id=7, color=[51, 153, 255]),
+ 8: dict(link=('neck', 'right_shoulder'), id=8, color=[255, 128, 0]),
+ 9: dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('right_elbow', 'right_wrist'), id=10, color=[255, 128, 0]),
+ 11: dict(link=('neck', 'left_shoulder'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_shoulder', 'left_elbow'), id=12, color=[0, 255, 0]),
+ 13: dict(link=('left_elbow', 'left_wrist'), id=13, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.025, 0.107, 0.025, 0.079, 0.079, 0.107, 0.107, 0.072, 0.072, 0.087,
+ 0.087, 0.062, 0.062, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/locust.py b/vendor/ViTPose/configs/_base_/datasets/locust.py
new file mode 100644
index 0000000000000000000000000000000000000000..db3fa15aa060b5806faae7a21f65460f77be2745
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/locust.py
@@ -0,0 +1,263 @@
+dataset_info = dict(
+ dataset_name='locust',
+ paper_info=dict(
+ author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
+ 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
+ 'Couzin, Iain D',
+ title='DeepPoseKit, a software toolkit for fast and robust '
+ 'animal pose estimation using deep learning',
+ container='Elife',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='neck', id=1, color=[255, 255, 255], type='', swap=''),
+ 2:
+ dict(name='thorax', id=2, color=[255, 255, 255], type='', swap=''),
+ 3:
+ dict(name='abdomen1', id=3, color=[255, 255, 255], type='', swap=''),
+ 4:
+ dict(name='abdomen2', id=4, color=[255, 255, 255], type='', swap=''),
+ 5:
+ dict(
+ name='anttipL',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='anttipR'),
+ 6:
+ dict(
+ name='antbaseL',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='antbaseR'),
+ 7:
+ dict(name='eyeL', id=7, color=[255, 255, 255], type='', swap='eyeR'),
+ 8:
+ dict(
+ name='forelegL1',
+ id=8,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 9:
+ dict(
+ name='forelegL2',
+ id=9,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR2'),
+ 10:
+ dict(
+ name='forelegL3',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR3'),
+ 11:
+ dict(
+ name='forelegL4',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR4'),
+ 12:
+ dict(
+ name='midlegL1',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR1'),
+ 13:
+ dict(
+ name='midlegL2',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR2'),
+ 14:
+ dict(
+ name='midlegL3',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR3'),
+ 15:
+ dict(
+ name='midlegL4',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR4'),
+ 16:
+ dict(
+ name='hindlegL1',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 17:
+ dict(
+ name='hindlegL2',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR2'),
+ 18:
+ dict(
+ name='hindlegL3',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR3'),
+ 19:
+ dict(
+ name='hindlegL4',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR4'),
+ 20:
+ dict(
+ name='anttipR',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='anttipL'),
+ 21:
+ dict(
+ name='antbaseR',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='antbaseL'),
+ 22:
+ dict(name='eyeR', id=22, color=[255, 255, 255], type='', swap='eyeL'),
+ 23:
+ dict(
+ name='forelegR1',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 24:
+ dict(
+ name='forelegR2',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL2'),
+ 25:
+ dict(
+ name='forelegR3',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL3'),
+ 26:
+ dict(
+ name='forelegR4',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL4'),
+ 27:
+ dict(
+ name='midlegR1',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL1'),
+ 28:
+ dict(
+ name='midlegR2',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL2'),
+ 29:
+ dict(
+ name='midlegR3',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL3'),
+ 30:
+ dict(
+ name='midlegR4',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL4'),
+ 31:
+ dict(
+ name='hindlegR1',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 32:
+ dict(
+ name='hindlegR2',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL2'),
+ 33:
+ dict(
+ name='hindlegR3',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL3'),
+ 34:
+ dict(
+ name='hindlegR4',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL4')
+ },
+ skeleton_info={
+ 0: dict(link=('neck', 'head'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('thorax', 'neck'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('abdomen1', 'thorax'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('abdomen2', 'abdomen1'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('antbaseL', 'anttipL'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('eyeL', 'antbaseL'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('forelegL2', 'forelegL1'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('forelegL3', 'forelegL2'), id=7, color=[255, 255, 255]),
+ 8: dict(link=('forelegL4', 'forelegL3'), id=8, color=[255, 255, 255]),
+ 9: dict(link=('midlegL2', 'midlegL1'), id=9, color=[255, 255, 255]),
+ 10: dict(link=('midlegL3', 'midlegL2'), id=10, color=[255, 255, 255]),
+ 11: dict(link=('midlegL4', 'midlegL3'), id=11, color=[255, 255, 255]),
+ 12:
+ dict(link=('hindlegL2', 'hindlegL1'), id=12, color=[255, 255, 255]),
+ 13:
+ dict(link=('hindlegL3', 'hindlegL2'), id=13, color=[255, 255, 255]),
+ 14:
+ dict(link=('hindlegL4', 'hindlegL3'), id=14, color=[255, 255, 255]),
+ 15: dict(link=('antbaseR', 'anttipR'), id=15, color=[255, 255, 255]),
+ 16: dict(link=('eyeR', 'antbaseR'), id=16, color=[255, 255, 255]),
+ 17:
+ dict(link=('forelegR2', 'forelegR1'), id=17, color=[255, 255, 255]),
+ 18:
+ dict(link=('forelegR3', 'forelegR2'), id=18, color=[255, 255, 255]),
+ 19:
+ dict(link=('forelegR4', 'forelegR3'), id=19, color=[255, 255, 255]),
+ 20: dict(link=('midlegR2', 'midlegR1'), id=20, color=[255, 255, 255]),
+ 21: dict(link=('midlegR3', 'midlegR2'), id=21, color=[255, 255, 255]),
+ 22: dict(link=('midlegR4', 'midlegR3'), id=22, color=[255, 255, 255]),
+ 23:
+ dict(link=('hindlegR2', 'hindlegR1'), id=23, color=[255, 255, 255]),
+ 24:
+ dict(link=('hindlegR3', 'hindlegR2'), id=24, color=[255, 255, 255]),
+ 25:
+ dict(link=('hindlegR4', 'hindlegR3'), id=25, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 35,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/macaque.py b/vendor/ViTPose/configs/_base_/datasets/macaque.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea8dac297ea2f0e36dabccccc021d953216a6ac8
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/macaque.py
@@ -0,0 +1,183 @@
+dataset_info = dict(
+ dataset_name='macaque',
+ paper_info=dict(
+ author='Labuguen, Rollyn and Matsumoto, Jumpei and '
+ 'Negrete, Salvador and Nishimaru, Hiroshi and '
+ 'Nishijo, Hisao and Takada, Masahiko and '
+ 'Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro',
+ title='MacaquePose: A novel "in the wild" macaque monkey pose dataset '
+ 'for markerless motion capture',
+ container='bioRxiv',
+ year='2020',
+ homepage='http://www.pri.kyoto-u.ac.jp/datasets/'
+ 'macaquepose/index.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/mhp.py b/vendor/ViTPose/configs/_base_/datasets/mhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..e16e37c79cb63c4352c48bb4e45602b8408f534b
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/mhp.py
@@ -0,0 +1,156 @@
+dataset_info = dict(
+ dataset_name='mhp',
+ paper_info=dict(
+ author='Zhao, Jian and Li, Jianshu and Cheng, Yu and '
+ 'Sim, Terence and Yan, Shuicheng and Feng, Jiashi',
+ title='Understanding humans in crowded scenes: '
+ 'Deep nested adversarial learning and a '
+ 'new benchmark for multi-human parsing',
+ container='Proceedings of the 26th ACM '
+ 'international conference on Multimedia',
+ year='2018',
+ homepage='https://lv-mhp.github.io/dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
+ 7:
+ dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='upper_neck',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='head_top', id=9, color=[51, 153, 255], type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='right_elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 12:
+ dict(
+ name='right_shoulder',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 13:
+ dict(
+ name='left_shoulder',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 14:
+ dict(
+ name='left_elbow',
+ id=14,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 15:
+ dict(
+ name='left_wrist',
+ id=15,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
+ 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
+ 14:
+ dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
+ 0.062, 0.072, 0.179, 0.179, 0.072, 0.062
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/mpi_inf_3dhp.py b/vendor/ViTPose/configs/_base_/datasets/mpi_inf_3dhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd0a70297b24456ea38566ac205bb585aa47e5d
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/mpi_inf_3dhp.py
@@ -0,0 +1,132 @@
+dataset_info = dict(
+ dataset_name='mpi_inf_3dhp',
+ paper_info=dict(
+ author='ehta, Dushyant and Rhodin, Helge and Casas, Dan and '
+ 'Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and '
+ 'Theobalt, Christian',
+ title='Monocular 3D Human Pose Estimation In The Wild Using Improved '
+ 'CNN Supervision',
+ container='2017 international conference on 3D vision (3DV)',
+ year='2017',
+ homepage='http://gvv.mpi-inf.mpg.de/3dhp-dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='head_top', id=0, color=[51, 153, 255], type='upper',
+ swap=''),
+ 1:
+ dict(name='neck', id=1, color=[51, 153, 255], type='upper', swap=''),
+ 2:
+ dict(
+ name='right_shoulder',
+ id=2,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='right_wrist',
+ id=4,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='left_elbow',
+ id=6,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 7:
+ dict(
+ name='left_wrist',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 8:
+ dict(
+ name='right_hip',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='right_ankle',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='left_knee',
+ id=12,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 13:
+ dict(
+ name='left_ankle',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 14:
+ dict(name='root', id=14, color=[51, 153, 255], type='lower', swap=''),
+ 15:
+ dict(name='spine', id=15, color=[51, 153, 255], type='upper', swap=''),
+ 16:
+ dict(name='head', id=16, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 128, 0]),
+ 1: dict(
+ link=('right_shoulder', 'right_elbow'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_elbow', 'right_wrist'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('root', 'right_hip'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('right_hip', 'right_knee'), id=7, color=[255, 128, 0]),
+ 8: dict(link=('right_knee', 'right_ankle'), id=8, color=[255, 128, 0]),
+ 9: dict(link=('root', 'left_hip'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 255, 0]),
+ 12: dict(link=('head_top', 'head'), id=12, color=[51, 153, 255]),
+ 13: dict(link=('head', 'neck'), id=13, color=[51, 153, 255]),
+ 14: dict(link=('neck', 'spine'), id=14, color=[51, 153, 255]),
+ 15: dict(link=('spine', 'root'), id=15, color=[51, 153, 255])
+ },
+ joint_weights=[1.] * 17,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/mpii.py b/vendor/ViTPose/configs/_base_/datasets/mpii.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c2a491c7b58bc3eaa5c0056d3d7184bdd1d1cc7
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/mpii.py
@@ -0,0 +1,155 @@
+dataset_info = dict(
+ dataset_name='mpii',
+ paper_info=dict(
+ author='Mykhaylo Andriluka and Leonid Pishchulin and '
+ 'Peter Gehler and Schiele, Bernt',
+ title='2D Human Pose Estimation: New Benchmark and '
+ 'State of the Art Analysis',
+ container='IEEE Conference on Computer Vision and '
+ 'Pattern Recognition (CVPR)',
+ year='2014',
+ homepage='http://human-pose.mpi-inf.mpg.de/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
+ 7:
+ dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='upper_neck',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='head_top', id=9, color=[51, 153, 255], type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='right_elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 12:
+ dict(
+ name='right_shoulder',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 13:
+ dict(
+ name='left_shoulder',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 14:
+ dict(
+ name='left_elbow',
+ id=14,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 15:
+ dict(
+ name='left_wrist',
+ id=15,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
+ 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
+ 14:
+ dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
+ 0.062, 0.072, 0.179, 0.179, 0.072, 0.062
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/mpii_info.py b/vendor/ViTPose/configs/_base_/datasets/mpii_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..8090992a672af4aa13a321369f382e33a4e3b1a4
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/mpii_info.py
@@ -0,0 +1,155 @@
+mpii_info = dict(
+ dataset_name='mpii',
+ paper_info=dict(
+ author='Mykhaylo Andriluka and Leonid Pishchulin and '
+ 'Peter Gehler and Schiele, Bernt',
+ title='2D Human Pose Estimation: New Benchmark and '
+ 'State of the Art Analysis',
+ container='IEEE Conference on Computer Vision and '
+ 'Pattern Recognition (CVPR)',
+ year='2014',
+ homepage='http://human-pose.mpi-inf.mpg.de/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
+ 7:
+ dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='upper_neck',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='head_top', id=9, color=[51, 153, 255], type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='right_elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 12:
+ dict(
+ name='right_shoulder',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 13:
+ dict(
+ name='left_shoulder',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 14:
+ dict(
+ name='left_elbow',
+ id=14,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 15:
+ dict(
+ name='left_wrist',
+ id=15,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
+ 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
+ 14:
+ dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
+ 0.062, 0.072, 0.179, 0.179, 0.072, 0.062
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/mpii_trb.py b/vendor/ViTPose/configs/_base_/datasets/mpii_trb.py
new file mode 100644
index 0000000000000000000000000000000000000000..73940d4b4827f8e08343c3b517360db788e4820d
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/mpii_trb.py
@@ -0,0 +1,380 @@
+dataset_info = dict(
+ dataset_name='mpii_trb',
+ paper_info=dict(
+ author='Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and '
+ 'Liu, Wentao and Qian, Chen and Ouyang, Wanli',
+ title='TRB: A Novel Triplet Representation for '
+ 'Understanding 2D Human Body',
+ container='Proceedings of the IEEE International '
+ 'Conference on Computer Vision',
+ year='2019',
+ homepage='https://github.com/kennymckormick/'
+ 'Triplet-Representation-of-human-Body',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_shoulder',
+ id=0,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 1:
+ dict(
+ name='right_shoulder',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 2:
+ dict(
+ name='left_elbow',
+ id=2,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='left_wrist',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 5:
+ dict(
+ name='right_wrist',
+ id=5,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='left_knee',
+ id=8,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_ankle',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 11:
+ dict(
+ name='right_ankle',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 12:
+ dict(name='head', id=12, color=[51, 153, 255], type='upper', swap=''),
+ 13:
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap=''),
+ 14:
+ dict(
+ name='right_neck',
+ id=14,
+ color=[255, 255, 255],
+ type='upper',
+ swap='left_neck'),
+ 15:
+ dict(
+ name='left_neck',
+ id=15,
+ color=[255, 255, 255],
+ type='upper',
+ swap='right_neck'),
+ 16:
+ dict(
+ name='medial_right_shoulder',
+ id=16,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_shoulder'),
+ 17:
+ dict(
+ name='lateral_right_shoulder',
+ id=17,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_shoulder'),
+ 18:
+ dict(
+ name='medial_right_bow',
+ id=18,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_bow'),
+ 19:
+ dict(
+ name='lateral_right_bow',
+ id=19,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_bow'),
+ 20:
+ dict(
+ name='medial_right_wrist',
+ id=20,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_wrist'),
+ 21:
+ dict(
+ name='lateral_right_wrist',
+ id=21,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_wrist'),
+ 22:
+ dict(
+ name='medial_left_shoulder',
+ id=22,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_shoulder'),
+ 23:
+ dict(
+ name='lateral_left_shoulder',
+ id=23,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_shoulder'),
+ 24:
+ dict(
+ name='medial_left_bow',
+ id=24,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_bow'),
+ 25:
+ dict(
+ name='lateral_left_bow',
+ id=25,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_bow'),
+ 26:
+ dict(
+ name='medial_left_wrist',
+ id=26,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_wrist'),
+ 27:
+ dict(
+ name='lateral_left_wrist',
+ id=27,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_wrist'),
+ 28:
+ dict(
+ name='medial_right_hip',
+ id=28,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_hip'),
+ 29:
+ dict(
+ name='lateral_right_hip',
+ id=29,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_hip'),
+ 30:
+ dict(
+ name='medial_right_knee',
+ id=30,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_knee'),
+ 31:
+ dict(
+ name='lateral_right_knee',
+ id=31,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_knee'),
+ 32:
+ dict(
+ name='medial_right_ankle',
+ id=32,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_ankle'),
+ 33:
+ dict(
+ name='lateral_right_ankle',
+ id=33,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_ankle'),
+ 34:
+ dict(
+ name='medial_left_hip',
+ id=34,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_hip'),
+ 35:
+ dict(
+ name='lateral_left_hip',
+ id=35,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_hip'),
+ 36:
+ dict(
+ name='medial_left_knee',
+ id=36,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_knee'),
+ 37:
+ dict(
+ name='lateral_left_knee',
+ id=37,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_knee'),
+ 38:
+ dict(
+ name='medial_left_ankle',
+ id=38,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_ankle'),
+ 39:
+ dict(
+ name='lateral_left_ankle',
+ id=39,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_ankle'),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('head', 'neck'), id=0, color=[51, 153, 255]),
+ 1:
+ dict(link=('neck', 'left_shoulder'), id=1, color=[51, 153, 255]),
+ 2:
+ dict(link=('neck', 'right_shoulder'), id=2, color=[51, 153, 255]),
+ 3:
+ dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('left_shoulder', 'left_hip'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('right_shoulder', 'right_hip'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('left_hip', 'right_hip'), id=9, color=[51, 153, 255]),
+ 10:
+ dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_hip', 'right_knee'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_knee', 'left_ankle'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('right_knee', 'right_ankle'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(link=('right_neck', 'left_neck'), id=14, color=[255, 255, 255]),
+ 15:
+ dict(
+ link=('medial_right_shoulder', 'lateral_right_shoulder'),
+ id=15,
+ color=[255, 255, 255]),
+ 16:
+ dict(
+ link=('medial_right_bow', 'lateral_right_bow'),
+ id=16,
+ color=[255, 255, 255]),
+ 17:
+ dict(
+ link=('medial_right_wrist', 'lateral_right_wrist'),
+ id=17,
+ color=[255, 255, 255]),
+ 18:
+ dict(
+ link=('medial_left_shoulder', 'lateral_left_shoulder'),
+ id=18,
+ color=[255, 255, 255]),
+ 19:
+ dict(
+ link=('medial_left_bow', 'lateral_left_bow'),
+ id=19,
+ color=[255, 255, 255]),
+ 20:
+ dict(
+ link=('medial_left_wrist', 'lateral_left_wrist'),
+ id=20,
+ color=[255, 255, 255]),
+ 21:
+ dict(
+ link=('medial_right_hip', 'lateral_right_hip'),
+ id=21,
+ color=[255, 255, 255]),
+ 22:
+ dict(
+ link=('medial_right_knee', 'lateral_right_knee'),
+ id=22,
+ color=[255, 255, 255]),
+ 23:
+ dict(
+ link=('medial_right_ankle', 'lateral_right_ankle'),
+ id=23,
+ color=[255, 255, 255]),
+ 24:
+ dict(
+ link=('medial_left_hip', 'lateral_left_hip'),
+ id=24,
+ color=[255, 255, 255]),
+ 25:
+ dict(
+ link=('medial_left_knee', 'lateral_left_knee'),
+ id=25,
+ color=[255, 255, 255]),
+ 26:
+ dict(
+ link=('medial_left_ankle', 'lateral_left_ankle'),
+ id=26,
+ color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 40,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/ochuman.py b/vendor/ViTPose/configs/_base_/datasets/ochuman.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ef20838fe583fde133a97e688d30e91ae562746
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/ochuman.py
@@ -0,0 +1,181 @@
+dataset_info = dict(
+ dataset_name='ochuman',
+ paper_info=dict(
+ author='Zhang, Song-Hai and Li, Ruilong and Dong, Xin and '
+ 'Rosin, Paul and Cai, Zixi and Han, Xi and '
+ 'Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min',
+ title='Pose2seg: Detection free human instance segmentation',
+ container='Proceedings of the IEEE conference on computer '
+ 'vision and pattern recognition',
+ year='2019',
+ homepage='https://github.com/liruilong940607/OCHumanApi',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/onehand10k.py b/vendor/ViTPose/configs/_base_/datasets/onehand10k.py
new file mode 100644
index 0000000000000000000000000000000000000000..016770f14f3075dfa7d59389524a0c11a4feb802
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/onehand10k.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='onehand10k',
+ paper_info=dict(
+ author='Wang, Yangang and Peng, Cong and Liu, Yebin',
+ title='Mask-pose cascaded cnn for 2d hand pose estimation '
+ 'from single color image',
+ container='IEEE Transactions on Circuits and Systems '
+ 'for Video Technology',
+ year='2018',
+ homepage='https://www.yangangwang.com/papers/WANG-MCC-2018-10.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/panoptic_body3d.py b/vendor/ViTPose/configs/_base_/datasets/panoptic_body3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3b19ac462415a840ca2e0b9e214bdb35d91b5e4
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/panoptic_body3d.py
@@ -0,0 +1,160 @@
+dataset_info = dict(
+ dataset_name='panoptic_pose_3d',
+ paper_info=dict(
+ author='Joo, Hanbyul and Simon, Tomas and Li, Xulong'
+ 'and Liu, Hao and Tan, Lei and Gui, Lin and Banerjee, Sean'
+ 'and Godisart, Timothy and Nabbe, Bart and Matthews, Iain'
+ 'and Kanade, Takeo and Nobuhara, Shohei and Sheikh, Yaser',
+ title='Panoptic Studio: A Massively Multiview System '
+ 'for Interaction Motion Capture',
+ container='IEEE Transactions on Pattern Analysis'
+ ' and Machine Intelligence',
+ year='2017',
+ homepage='http://domedb.perception.cs.cmu.edu',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='neck', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(name='nose', id=1, color=[51, 153, 255], type='upper', swap=''),
+ 2:
+ dict(name='mid_hip', id=2, color=[0, 255, 0], type='lower', swap=''),
+ 3:
+ dict(
+ name='left_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 4:
+ dict(
+ name='left_elbow',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 5:
+ dict(
+ name='left_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='left_knee',
+ id=7,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 8:
+ dict(
+ name='left_ankle',
+ id=8,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 9:
+ dict(
+ name='right_shoulder',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 10:
+ dict(
+ name='right_elbow',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 11:
+ dict(
+ name='right_wrist',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='right_knee',
+ id=13,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 14:
+ dict(
+ name='right_ankle',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 15:
+ dict(
+ name='left_eye',
+ id=15,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 16:
+ dict(
+ name='left_ear',
+ id=16,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 17:
+ dict(
+ name='right_eye',
+ id=17,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 18:
+ dict(
+ name='right_ear',
+ id=18,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear')
+ },
+ skeleton_info={
+ 0: dict(link=('nose', 'neck'), id=0, color=[51, 153, 255]),
+ 1: dict(link=('neck', 'left_shoulder'), id=1, color=[0, 255, 0]),
+ 2: dict(link=('neck', 'right_shoulder'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
+ 4: dict(
+ link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('left_ankle', 'left_knee'), id=7, color=[0, 255, 0]),
+ 8: dict(link=('left_knee', 'left_hip'), id=8, color=[0, 255, 0]),
+ 9: dict(link=('right_ankle', 'right_knee'), id=9, color=[255, 128, 0]),
+ 10: dict(link=('right_knee', 'right_hip'), id=10, color=[255, 128, 0]),
+ 11: dict(link=('mid_hip', 'left_hip'), id=11, color=[0, 255, 0]),
+ 12: dict(link=('mid_hip', 'right_hip'), id=12, color=[255, 128, 0]),
+ 13: dict(link=('mid_hip', 'neck'), id=13, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1.0, 1.0, 1.0, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2,
+ 1.5, 1.0, 1.0, 1.0, 1.0
+ ],
+ sigmas=[
+ 0.026, 0.026, 0.107, 0.079, 0.072, 0.062, 0.107, 0.087, 0.089, 0.079,
+ 0.072, 0.062, 0.107, 0.087, 0.089, 0.025, 0.035, 0.025, 0.035
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/panoptic_hand2d.py b/vendor/ViTPose/configs/_base_/datasets/panoptic_hand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a65731ba87b155beb1b40591fd9acb232c2afc6
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/panoptic_hand2d.py
@@ -0,0 +1,143 @@
+dataset_info = dict(
+ dataset_name='panoptic_hand2d',
+ paper_info=dict(
+ author='Simon, Tomas and Joo, Hanbyul and '
+ 'Matthews, Iain and Sheikh, Yaser',
+ title='Hand keypoint detection in single images using '
+ 'multiview bootstrapping',
+ container='Proceedings of the IEEE conference on '
+ 'Computer Vision and Pattern Recognition',
+ year='2017',
+ homepage='http://domedb.perception.cs.cmu.edu/handdb.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/posetrack18.py b/vendor/ViTPose/configs/_base_/datasets/posetrack18.py
new file mode 100644
index 0000000000000000000000000000000000000000..5aefd1c97fe083df35ee88bebab4f99134c27971
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/posetrack18.py
@@ -0,0 +1,176 @@
+dataset_info = dict(
+ dataset_name='posetrack18',
+ paper_info=dict(
+ author='Andriluka, Mykhaylo and Iqbal, Umar and '
+ 'Insafutdinov, Eldar and Pishchulin, Leonid and '
+ 'Milan, Anton and Gall, Juergen and Schiele, Bernt',
+ title='Posetrack: A benchmark for human pose estimation and tracking',
+ container='Proceedings of the IEEE Conference on '
+ 'Computer Vision and Pattern Recognition',
+ year='2018',
+ homepage='https://posetrack.net/users/download.php',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='head_bottom',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 2:
+ dict(
+ name='head_top', id=2, color=[51, 153, 255], type='upper',
+ swap=''),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('nose', 'head_bottom'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'head_top'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(
+ link=('head_bottom', 'left_shoulder'), id=14, color=[51, 153,
+ 255]),
+ 15:
+ dict(
+ link=('head_bottom', 'right_shoulder'),
+ id=15,
+ color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/vendor/ViTPose/configs/_base_/datasets/rhd2d.py b/vendor/ViTPose/configs/_base_/datasets/rhd2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..f48e63702635e140276543d372138de57ae4634e
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/rhd2d.py
@@ -0,0 +1,141 @@
+dataset_info = dict(
+ dataset_name='rhd2d',
+ paper_info=dict(
+ author='Christian Zimmermann and Thomas Brox',
+ title='Learning to Estimate 3D Hand Pose from Single RGB Images',
+ container='arXiv',
+ year='2017',
+ homepage='https://lmb.informatik.uni-freiburg.de/resources/'
+ 'datasets/RenderedHandposeDataset.en.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/wflw.py b/vendor/ViTPose/configs/_base_/datasets/wflw.py
new file mode 100644
index 0000000000000000000000000000000000000000..bed6f56f30f7a2f093e44c5726212e2a0d4659d2
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/wflw.py
@@ -0,0 +1,582 @@
+dataset_info = dict(
+ dataset_name='wflw',
+ paper_info=dict(
+ author='Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, '
+ 'Quan and Cai, Yici and Zhou, Qiang',
+ title='Look at boundary: A boundary-aware face alignment algorithm',
+ container='Proceedings of the IEEE conference on computer '
+ 'vision and pattern recognition',
+ year='2018',
+ homepage='https://wywu.github.io/projects/LAB/WFLW.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-32'),
+ 1:
+ dict(
+ name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-31'),
+ 2:
+ dict(
+ name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-30'),
+ 3:
+ dict(
+ name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-29'),
+ 4:
+ dict(
+ name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-28'),
+ 5:
+ dict(
+ name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-27'),
+ 6:
+ dict(
+ name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-26'),
+ 7:
+ dict(
+ name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-25'),
+ 8:
+ dict(
+ name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-24'),
+ 9:
+ dict(
+ name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-23'),
+ 10:
+ dict(
+ name='kpt-10',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-22'),
+ 11:
+ dict(
+ name='kpt-11',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-21'),
+ 12:
+ dict(
+ name='kpt-12',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-20'),
+ 13:
+ dict(
+ name='kpt-13',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-19'),
+ 14:
+ dict(
+ name='kpt-14',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-18'),
+ 15:
+ dict(
+ name='kpt-15',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-17'),
+ 16:
+ dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
+ 17:
+ dict(
+ name='kpt-17',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-15'),
+ 18:
+ dict(
+ name='kpt-18',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-14'),
+ 19:
+ dict(
+ name='kpt-19',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-13'),
+ 20:
+ dict(
+ name='kpt-20',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-12'),
+ 21:
+ dict(
+ name='kpt-21',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-11'),
+ 22:
+ dict(
+ name='kpt-22',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-10'),
+ 23:
+ dict(
+ name='kpt-23', id=23, color=[255, 255, 255], type='',
+ swap='kpt-9'),
+ 24:
+ dict(
+ name='kpt-24', id=24, color=[255, 255, 255], type='',
+ swap='kpt-8'),
+ 25:
+ dict(
+ name='kpt-25', id=25, color=[255, 255, 255], type='',
+ swap='kpt-7'),
+ 26:
+ dict(
+ name='kpt-26', id=26, color=[255, 255, 255], type='',
+ swap='kpt-6'),
+ 27:
+ dict(
+ name='kpt-27', id=27, color=[255, 255, 255], type='',
+ swap='kpt-5'),
+ 28:
+ dict(
+ name='kpt-28', id=28, color=[255, 255, 255], type='',
+ swap='kpt-4'),
+ 29:
+ dict(
+ name='kpt-29', id=29, color=[255, 255, 255], type='',
+ swap='kpt-3'),
+ 30:
+ dict(
+ name='kpt-30', id=30, color=[255, 255, 255], type='',
+ swap='kpt-2'),
+ 31:
+ dict(
+ name='kpt-31', id=31, color=[255, 255, 255], type='',
+ swap='kpt-1'),
+ 32:
+ dict(
+ name='kpt-32', id=32, color=[255, 255, 255], type='',
+ swap='kpt-0'),
+ 33:
+ dict(
+ name='kpt-33',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-46'),
+ 34:
+ dict(
+ name='kpt-34',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-45'),
+ 35:
+ dict(
+ name='kpt-35',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-44'),
+ 36:
+ dict(
+ name='kpt-36',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-43'),
+ 37:
+ dict(
+ name='kpt-37',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-42'),
+ 38:
+ dict(
+ name='kpt-38',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-50'),
+ 39:
+ dict(
+ name='kpt-39',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-49'),
+ 40:
+ dict(
+ name='kpt-40',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-48'),
+ 41:
+ dict(
+ name='kpt-41',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-47'),
+ 42:
+ dict(
+ name='kpt-42',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-37'),
+ 43:
+ dict(
+ name='kpt-43',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-36'),
+ 44:
+ dict(
+ name='kpt-44',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-35'),
+ 45:
+ dict(
+ name='kpt-45',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-34'),
+ 46:
+ dict(
+ name='kpt-46',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-33'),
+ 47:
+ dict(
+ name='kpt-47',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-41'),
+ 48:
+ dict(
+ name='kpt-48',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-40'),
+ 49:
+ dict(
+ name='kpt-49',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-39'),
+ 50:
+ dict(
+ name='kpt-50',
+ id=50,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-38'),
+ 51:
+ dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(name='kpt-52', id=52, color=[255, 255, 255], type='', swap=''),
+ 53:
+ dict(name='kpt-53', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(name='kpt-54', id=54, color=[255, 255, 255], type='', swap=''),
+ 55:
+ dict(
+ name='kpt-55',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-59'),
+ 56:
+ dict(
+ name='kpt-56',
+ id=56,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-58'),
+ 57:
+ dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
+ 58:
+ dict(
+ name='kpt-58',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-56'),
+ 59:
+ dict(
+ name='kpt-59',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-55'),
+ 60:
+ dict(
+ name='kpt-60',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-72'),
+ 61:
+ dict(
+ name='kpt-61',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-71'),
+ 62:
+ dict(
+ name='kpt-62',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-70'),
+ 63:
+ dict(
+ name='kpt-63',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-69'),
+ 64:
+ dict(
+ name='kpt-64',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-68'),
+ 65:
+ dict(
+ name='kpt-65',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-75'),
+ 66:
+ dict(
+ name='kpt-66',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-74'),
+ 67:
+ dict(
+ name='kpt-67',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-73'),
+ 68:
+ dict(
+ name='kpt-68',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-64'),
+ 69:
+ dict(
+ name='kpt-69',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-63'),
+ 70:
+ dict(
+ name='kpt-70',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-62'),
+ 71:
+ dict(
+ name='kpt-71',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-61'),
+ 72:
+ dict(
+ name='kpt-72',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-60'),
+ 73:
+ dict(
+ name='kpt-73',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-67'),
+ 74:
+ dict(
+ name='kpt-74',
+ id=74,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-66'),
+ 75:
+ dict(
+ name='kpt-75',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-65'),
+ 76:
+ dict(
+ name='kpt-76',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-82'),
+ 77:
+ dict(
+ name='kpt-77',
+ id=77,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-81'),
+ 78:
+ dict(
+ name='kpt-78',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-80'),
+ 79:
+ dict(name='kpt-79', id=79, color=[255, 255, 255], type='', swap=''),
+ 80:
+ dict(
+ name='kpt-80',
+ id=80,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-78'),
+ 81:
+ dict(
+ name='kpt-81',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-77'),
+ 82:
+ dict(
+ name='kpt-82',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-76'),
+ 83:
+ dict(
+ name='kpt-83',
+ id=83,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-87'),
+ 84:
+ dict(
+ name='kpt-84',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-86'),
+ 85:
+ dict(name='kpt-85', id=85, color=[255, 255, 255], type='', swap=''),
+ 86:
+ dict(
+ name='kpt-86',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-84'),
+ 87:
+ dict(
+ name='kpt-87',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-83'),
+ 88:
+ dict(
+ name='kpt-88',
+ id=88,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-92'),
+ 89:
+ dict(
+ name='kpt-89',
+ id=89,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-91'),
+ 90:
+ dict(name='kpt-90', id=90, color=[255, 255, 255], type='', swap=''),
+ 91:
+ dict(
+ name='kpt-91',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-89'),
+ 92:
+ dict(
+ name='kpt-92',
+ id=92,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-88'),
+ 93:
+ dict(
+ name='kpt-93',
+ id=93,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-95'),
+ 94:
+ dict(name='kpt-94', id=94, color=[255, 255, 255], type='', swap=''),
+ 95:
+ dict(
+ name='kpt-95',
+ id=95,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-93'),
+ 96:
+ dict(
+ name='kpt-96',
+ id=96,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-97'),
+ 97:
+ dict(
+ name='kpt-97',
+ id=97,
+ color=[255, 255, 255],
+ type='',
+ swap='kpt-96')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 98,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/datasets/zebra.py b/vendor/ViTPose/configs/_base_/datasets/zebra.py
new file mode 100644
index 0000000000000000000000000000000000000000..eac71f796a761bbf87b123f8b7b8b4585df0c525
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/datasets/zebra.py
@@ -0,0 +1,64 @@
+dataset_info = dict(
+ dataset_name='zebra',
+ paper_info=dict(
+ author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
+ 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
+ 'Couzin, Iain D',
+ title='DeepPoseKit, a software toolkit for fast and robust '
+ 'animal pose estimation using deep learning',
+ container='Elife',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='snout', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='head', id=1, color=[255, 255, 255], type='', swap=''),
+ 2:
+ dict(name='neck', id=2, color=[255, 255, 255], type='', swap=''),
+ 3:
+ dict(
+ name='forelegL1',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 4:
+ dict(
+ name='forelegR1',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 5:
+ dict(
+ name='hindlegL1',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 6:
+ dict(
+ name='hindlegR1',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 7:
+ dict(name='tailbase', id=7, color=[255, 255, 255], type='', swap=''),
+ 8:
+ dict(name='tailtip', id=8, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={
+ 0: dict(link=('head', 'snout'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('neck', 'head'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('forelegL1', 'neck'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('forelegR1', 'neck'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('hindlegL1', 'tailbase'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('hindlegR1', 'tailbase'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('tailbase', 'neck'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('tailtip', 'tailbase'), id=7, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 9,
+ sigmas=[])
diff --git a/vendor/ViTPose/configs/_base_/default_runtime.py b/vendor/ViTPose/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..d78da5a1a91e8625d1b8b1d72c4c3bb56956dd67
--- /dev/null
+++ b/vendor/ViTPose/configs/_base_/default_runtime.py
@@ -0,0 +1,19 @@
+checkpoint_config = dict(interval=10)
+
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+
+# disable opencv multithreading to avoid system being overloaded
+opencv_num_threads = 0
+# set multi-process start method as `fork` to speed up the training
+mp_start_method = 'fork'
diff --git a/vendor/ViTPose/configs/_base_/filters/gausian_filter.py b/vendor/ViTPose/configs/_base_/filters/gausian_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2b8fd884cb19b4ec91d8bc74291b7773724bb2dd
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,18 @@
+# 2D Animal Keypoint Detection
+
+2D animal keypoint detection (animal pose estimation) aims to detect the key-point of different species, including rats,
+dogs, macaques, and cheetah. It provides detailed behavioral analysis for neuroscience, medical and ecology applications.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_animal_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [DEMO](/demo/docs/2d_animal_demo.md) to generate fancy demos.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c62b4eecc9f8f1442dfd48ba57ef4734950e4225
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,7 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection and pose estimation.
+
+They perform object detection first, followed by single-object pose estimation given object bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..6241351c401c3732b2c9d06e78b27133cdabdc0f
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.md
@@ -0,0 +1,40 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
+
+Results on AnimalPose validation set (1117 instances)
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py) | 256x256 | 0.736 | 0.959 | 0.832 | 0.775 | 0.966 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256_20210426.log.json) |
+| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py) | 256x256 | 0.737 | 0.959 | 0.823 | 0.778 | 0.962 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256_20210426.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b1c84e242bd428d39e5d5062ce02ea71c2c318c6
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: Animal-Pose
+ Name: topdown_heatmap_hrnet_w32_animalpose_256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.736
+ AP@0.5: 0.959
+ AP@0.75: 0.832
+ AR: 0.775
+ AR@0.5: 0.966
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: topdown_heatmap_hrnet_w48_animalpose_256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.959
+ AP@0.75: 0.823
+ AR: 0.778
+ AR@0.5: 0.962
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c83979f37f12475f0621e787c319ffb182fae5d3
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/animalpose.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/animalpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7db4f23561c59aa3675fce79396a109d9099538a
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/animalpose.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/animalpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df1a2806a760ffdcf901549e3162e5b3a80a100
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/animalpose.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/animalpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..e362e53bd92c587febb17d7f4c3b4cd2db4bac5f
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/animalpose.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/animalpose'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbd663dc59e6dda7f491efb0f8c2c4b3b0f5719f
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/animalpose.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/animalpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalPoseDataset',
+ ann_file=f'{data_root}/annotations/animalpose_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..6fe6f771d273ee4def4729739dd9c3b13dca47f8
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
+
+Results on AnimalPose validation set (1117 instances)
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py) | 256x256 | 0.688 | 0.945 | 0.772 | 0.733 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json) |
+| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py) | 256x256 | 0.696 | 0.948 | 0.785 | 0.737 | 0.954 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json) |
+| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py) | 256x256 | 0.709 | 0.948 | 0.797 | 0.749 | 0.951 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6900f8a5ccb625926872ea145e1f6919afa93d99
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
@@ -0,0 +1,56 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: Animal-Pose
+ Name: topdown_heatmap_res50_animalpose_256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.688
+ AP@0.5: 0.945
+ AP@0.75: 0.772
+ AR: 0.733
+ AR@0.5: 0.952
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: topdown_heatmap_res101_animalpose_256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.696
+ AP@0.5: 0.948
+ AP@0.75: 0.785
+ AR: 0.737
+ AR@0.5: 0.954
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: topdown_heatmap_res152_animalpose_256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.709
+ AP@0.5: 0.948
+ AP@0.75: 0.797
+ AR: 0.749
+ AR@0.5: 0.951
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_base_ap10k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_base_ap10k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd5daf5e746ce0a116c3fa7bc98231eaa305ed51
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_base_ap10k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/apt36k'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_huge_ap10k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_huge_ap10k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d2f8ab0630bb0f997b529303179b0e425c553ac
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_huge_ap10k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_large_ap10k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_large_ap10k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e44c27b3088a3a670ba03e7961a3df6dd3706c2
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_large_ap10k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_small_ap10k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_small_ap10k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c3f2b97905ba47318cde61f4eec35b4624bc554
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/ViTPose_small_ap10k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..b9db08981c729c2fc63aafc4cf92b1bb86271f63
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
@@ -0,0 +1,41 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py) | 256x256 | 0.738 | 0.958 | 0.808 | 0.592 | 0.743 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json) |
+| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py) | 256x256 | 0.744 | 0.959 | 0.807 | 0.589 | 0.748 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8cf0ced8b3401de47703881b7c4dd8137852d931
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: AP-10K
+ Name: topdown_heatmap_hrnet_w32_ap10k_256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.738
+ AP@0.5: 0.958
+ AP@0.75: 0.808
+ APL: 0.743
+ APM: 0.592
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: AP-10K
+ Name: topdown_heatmap_hrnet_w48_ap10k_256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.744
+ AP@0.5: 0.959
+ AP@0.75: 0.807
+ APL: 0.748
+ APM: 0.589
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..da3900c03b1ddc8c2706383c3de97127363533d3
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2012ec8ee0ab65ce761368083e21ae082b2ead2
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8496a3cc6960f9b8f7c29266912b4b20427669fb
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c5699cdb9da9884301d0c402437c936d9c2f608
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e1be927e51fe495c1f18026533017020fa03072
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py) | 256x256 | 0.699 | 0.940 | 0.760 | 0.570 | 0.703 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.log.json) |
+| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py) | 256x256 | 0.698 | 0.943 | 0.754 | 0.543 | 0.702 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..48b039fce89bb6fb6b1cd3d7b6c6e32fd7f5d2d5
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: AP-10K
+ Name: topdown_heatmap_res50_ap10k_256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.699
+ AP@0.5: 0.94
+ AP@0.75: 0.76
+ APL: 0.703
+ APM: 0.57
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: AP-10K
+ Name: topdown_heatmap_res101_ap10k_256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.698
+ AP@0.5: 0.943
+ AP@0.75: 0.754
+ APL: 0.702
+ APM: 0.543
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_base_apt36k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_base_apt36k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3aa5d40ecf8fea1212e8b641fe7e14321fff618
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_base_apt36k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ap10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
+ img_prefix=f'{data_root}/data/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_huge_apt36k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_huge_apt36k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0562e79a286b58f19db3b911aa8c6864f8209458
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_huge_apt36k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/apt36k'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_large_apt36k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_large_apt36k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4ae268d4c68f35ac2d757c15406706f90483d4e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_large_apt36k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/apt36k'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_small_apt36k_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_small_apt36k_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..691d373b5ce391a41c997a300aaea7ccb0d63d7e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/apt36k/ViTPose_small_apt36k_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ap10k.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/apt36k'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{data_root}/annotations/val_annotations_1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
\ No newline at end of file
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
new file mode 100644
index 0000000000000000000000000000000000000000..097c2f6554d19af4b87ffd32a2c26b68d0031184
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
@@ -0,0 +1,40 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+ATRW (ACM MM'2020)
+
+```bibtex
+@inproceedings{li2020atrw,
+ title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
+ author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
+ booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
+ pages={2590--2598},
+ year={2020}
+}
+```
+
+
+
+Results on ATRW validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py) | 256x256 | 0.912 | 0.973 | 0.959 | 0.938 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256_20210414.log.json) |
+| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py) | 256x256 | 0.911 | 0.972 | 0.946 | 0.937 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256_20210414.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c33437024ca9231d2acfb0d001d33c2540b0f793
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: ATRW
+ Name: topdown_heatmap_hrnet_w32_atrw_256x256
+ Results:
+ - Dataset: ATRW
+ Metrics:
+ AP: 0.912
+ AP@0.5: 0.973
+ AP@0.75: 0.959
+ AR: 0.938
+ AR@0.5: 0.985
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: ATRW
+ Name: topdown_heatmap_hrnet_w48_atrw_256x256
+ Results:
+ - Dataset: ATRW
+ Metrics:
+ AP: 0.911
+ AP@0.5: 0.972
+ AP@0.75: 0.946
+ AR: 0.937
+ AR@0.5: 0.985
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef080ea929c2c612ea2182fafe544b7018423a92
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/atrw.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/atrw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_train.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..86e647784e6c2236ed80ac30fb359622d1b17064
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/atrw.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/atrw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_train.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..342e02711c119e1915433076508d10735ff088fa
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/atrw.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/atrw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_train.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ed68cc0622bb3b5cc8f43718e340fe7312ca8dc
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/atrw.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/atrw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_train.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..28998435a06824d322f4035f33e82e3fd8351c1e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/atrw.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/atrw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_train.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalATRWDataset',
+ ann_file=f'{data_root}/annotations/keypoint_val.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.md
new file mode 100644
index 0000000000000000000000000000000000000000..6e75463e57ee26d9e7da6abde9c815ecfb24c323
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ATRW (ACM MM'2020)
+
+```bibtex
+@inproceedings{li2020atrw,
+ title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
+ author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
+ booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
+ pages={2590--2598},
+ year={2020}
+}
+```
+
+
+
+Results on ATRW validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py) | 256x256 | 0.900 | 0.973 | 0.932 | 0.929 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256-546c4594_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256_20210414.log.json) |
+| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py) | 256x256 | 0.898 | 0.973 | 0.936 | 0.927 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256-da93f371_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256_20210414.log.json) |
+| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py) | 256x256 | 0.896 | 0.973 | 0.931 | 0.927 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256-2bb8e162_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256_20210414.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d448cfcbf6f1fcaa30a579d5a7bd9c6959c437a3
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml
@@ -0,0 +1,56 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: ATRW
+ Name: topdown_heatmap_res50_atrw_256x256
+ Results:
+ - Dataset: ATRW
+ Metrics:
+ AP: 0.9
+ AP@0.5: 0.973
+ AP@0.75: 0.932
+ AR: 0.929
+ AR@0.5: 0.985
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256-546c4594_20210414.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: ATRW
+ Name: topdown_heatmap_res101_atrw_256x256
+ Results:
+ - Dataset: ATRW
+ Metrics:
+ AP: 0.898
+ AP@0.5: 0.973
+ AP@0.75: 0.936
+ AR: 0.927
+ AR@0.5: 0.985
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256-da93f371_20210414.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: ATRW
+ Name: topdown_heatmap_res152_atrw_256x256
+ Results:
+ - Dataset: ATRW
+ Metrics:
+ AP: 0.896
+ AP@0.5: 0.973
+ AP@0.75: 0.931
+ AR: 0.927
+ AR@0.5: 0.985
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256-2bb8e162_20210414.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..334300d9a6827d4eb6faeb42e08ba0ec0740ab16
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/fly.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=32,
+ dataset_joints=32,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 192],
+ heatmap_size=[48, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fly'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..90737b88886face476b0b3755c7690c64ebf485f
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/fly.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=32,
+ dataset_joints=32,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 192],
+ heatmap_size=[48, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fly'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..20b29b5eb78a1b96702ef3c1d516019261659854
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/fly.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=32,
+ dataset_joints=32,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 192],
+ heatmap_size=[48, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fly'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalFlyDataset',
+ ann_file=f'{data_root}/annotations/fly_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.md
new file mode 100644
index 0000000000000000000000000000000000000000..24060e422b28e1ac4284b699bf6fe3e8c6378a08
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.md
@@ -0,0 +1,44 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Vinegar Fly (Nature Methods'2019)
+
+```bibtex
+@article{pereira2019fast,
+ title={Fast animal pose estimation using deep neural networks},
+ author={Pereira, Talmo D and Aldarondo, Diego E and Willmore, Lindsay and Kislin, Mikhail and Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W},
+ journal={Nature methods},
+ volume={16},
+ number={1},
+ pages={117--125},
+ year={2019},
+ publisher={Nature Publishing Group}
+}
+```
+
+
+
+Results on Vinegar Fly test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :-------- | :--------: | :------: | :------: | :------: |:------: |:------: |
+|[pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py) | 192x192 | 0.996 | 0.910 | 2.00 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192-5d0ee2d9_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192_20210407.log.json) |
+|[pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py) | 192x192 | 0.996 | 0.912 | 1.95 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192-41a7a6cc_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192_20210407.log.json) |
+|[pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py) | 192x192 | 0.997 | 0.917 | 1.78 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192-fcafbd5a_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192_20210407.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c6475883418a1dbfdfbd4634477a14aa35459bef
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml
@@ -0,0 +1,50 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: Vinegar Fly
+ Name: topdown_heatmap_res50_fly_192x192
+ Results:
+ - Dataset: Vinegar Fly
+ Metrics:
+ AUC: 0.91
+ EPE: 2.0
+ PCK@0.2: 0.996
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192-5d0ee2d9_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Vinegar Fly
+ Name: topdown_heatmap_res101_fly_192x192
+ Results:
+ - Dataset: Vinegar Fly
+ Metrics:
+ AUC: 0.912
+ EPE: 1.95
+ PCK@0.2: 0.996
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192-41a7a6cc_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Vinegar Fly
+ Name: topdown_heatmap_res152_fly_192x192
+ Results:
+ - Dataset: Vinegar Fly
+ Metrics:
+ AUC: 0.917
+ EPE: 1.78
+ PCK@0.2: 0.997
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192-fcafbd5a_20210407.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.md
new file mode 100644
index 0000000000000000000000000000000000000000..9fad3944eba7d330a4a395c5171c8fd7efce38de
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.md
@@ -0,0 +1,44 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Horse-10 (WACV'2021)
+
+```bibtex
+@inproceedings{mathis2021pretraining,
+ title={Pretraining boosts out-of-domain robustness for pose estimation},
+ author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
+ booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
+ pages={1859--1868},
+ year={2021}
+}
+```
+
+
+
+Results on Horse-10 test set
+
+|Set | Arch | Input Size | PCK@0.3 | NME | ckpt | log |
+| :--- | :---: | :--------: | :------: | :------: |:------: |:------: |
+|split1| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py) | 256x256 | 0.951 | 0.122 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1-401d901a_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1_20210405.log.json) |
+|split2| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py) | 256x256 | 0.949 | 0.116 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2-04840523_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2_20210405.log.json) |
+|split3| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py) | 256x256 | 0.939 | 0.153 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3-4db47400_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3_20210405.log.json) |
+|split1| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py) | 256x256 | 0.973 | 0.095 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1-3c950d3b_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1_20210405.log.json) |
+|split2| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py) | 256x256 | 0.969 | 0.101 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2-8ef72b5d_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2_20210405.log.json) |
+|split3| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py) | 256x256 | 0.961 | 0.128 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3-0232ec47_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3_20210405.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml
new file mode 100644
index 0000000000000000000000000000000000000000..16504855b154d17608dbf3c65442b920b21f425e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml
@@ -0,0 +1,86 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w32_horse10_256x256-split1
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.122
+ PCK@0.3: 0.951
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1-401d901a_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w32_horse10_256x256-split2
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.116
+ PCK@0.3: 0.949
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2-04840523_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w32_horse10_256x256-split3
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.153
+ PCK@0.3: 0.939
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3-4db47400_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w48_horse10_256x256-split1
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.095
+ PCK@0.3: 0.973
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1-3c950d3b_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w48_horse10_256x256-split2
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.101
+ PCK@0.3: 0.969
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2-8ef72b5d_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_hrnet_w48_horse10_256x256-split3
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.128
+ PCK@0.3: 0.961
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3-0232ec47_20210405.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py
new file mode 100644
index 0000000000000000000000000000000000000000..76d2f1c812f1b3f71c7d7dca3f2133baabf29753
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4f2bb278c4110b1a8b9826c54cd07606664179c
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py
new file mode 100644
index 0000000000000000000000000000000000000000..38c2f82f9e97883264472fec7e9fa6128fcec1d1
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fea30d63a2c52ed8b1d2ccc9b525355a7ca56ad
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py
new file mode 100644
index 0000000000000000000000000000000000000000..49f0920e5759ddc2f14e4a9cee94fa9354b0cd86
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e0a4991f18cd89b0eb24cc0e2a8c881ef566bef
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py
new file mode 100644
index 0000000000000000000000000000000000000000..f67903582115f40086ebccccfeb272d0bb072189
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5203d2c92f11920d6417073617e5b6f0434c66e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py
new file mode 100644
index 0000000000000000000000000000000000000000..c371bf0ae7c9493c0a28653bce758d7f5748be1e
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py
new file mode 100644
index 0000000000000000000000000000000000000000..b119c4808fc845b49e2c2452c45bd2756162bf6f
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py
new file mode 100644
index 0000000000000000000000000000000000000000..68fefa69b65cde7302d29f1b44ce7deda4c2a9d1
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5673f77f996ef3e94de7e4d673c9a063935102
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a14e16b9920476fec9a290cc12a60fdfa2b25b1
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split1.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9463010e5133b327ad94fe90e581280f0e11856
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split2.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py
new file mode 100644
index 0000000000000000000000000000000000000000..7612dd829a20ba4d754822a5da5bb59b564200af
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/horse10.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 21
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/horse10'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-train-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalHorse10Dataset',
+ ann_file=f'{data_root}/annotations/horse10-test-split3.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.md
new file mode 100644
index 0000000000000000000000000000000000000000..0b7797e103f0e952dde801be09087e0ab2351b98
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.md
@@ -0,0 +1,47 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Horse-10 (WACV'2021)
+
+```bibtex
+@inproceedings{mathis2021pretraining,
+ title={Pretraining boosts out-of-domain robustness for pose estimation},
+ author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
+ booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
+ pages={1859--1868},
+ year={2021}
+}
+```
+
+
+
+Results on Horse-10 test set
+
+|Set | Arch | Input Size | PCK@0.3 | NME | ckpt | log |
+| :--- | :---: | :--------: | :------: | :------: |:------: |:------: |
+|split1| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py) | 256x256 | 0.956 | 0.113 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1-3a3dc37e_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1_20210405.log.json) |
+|split2| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py) | 256x256 | 0.954 | 0.111 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2-65e2a508_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2_20210405.log.json) |
+|split3| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py) | 256x256 | 0.946 | 0.129 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3-9637d4eb_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3_20210405.log.json) |
+|split1| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py) | 256x256 | 0.958 | 0.115 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1-1b7c259c_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1_20210405.log.json) |
+|split2| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py) | 256x256 | 0.955 | 0.115 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2-30e2fa87_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2_20210405.log.json) |
+|split3| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py) | 256x256 | 0.946 | 0.126 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3-2eea5bb1_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3_20210405.log.json) |
+|split1| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py) | 256x256 | 0.969 | 0.105 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1-7e81fe2d_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1_20210405.log.json) |
+|split2| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py) | 256x256 | 0.970 | 0.103 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2-3b3404a3_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2_20210405.log.json) |
+|split3| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py) | 256x256 | 0.957 | 0.131 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3-c957dac5_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3_20210405.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d1b39195422f059946f0eef1e6924b1599f91ee8
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml
@@ -0,0 +1,125 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: Horse-10
+ Name: topdown_heatmap_res50_horse10_256x256-split1
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.113
+ PCK@0.3: 0.956
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1-3a3dc37e_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res50_horse10_256x256-split2
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.111
+ PCK@0.3: 0.954
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2-65e2a508_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res50_horse10_256x256-split3
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.129
+ PCK@0.3: 0.946
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3-9637d4eb_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res101_horse10_256x256-split1
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.115
+ PCK@0.3: 0.958
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1-1b7c259c_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res101_horse10_256x256-split2
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.115
+ PCK@0.3: 0.955
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2-30e2fa87_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res101_horse10_256x256-split3
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.126
+ PCK@0.3: 0.946
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3-2eea5bb1_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res152_horse10_256x256-split1
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.105
+ PCK@0.3: 0.969
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1-7e81fe2d_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res152_horse10_256x256-split2
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.103
+ PCK@0.3: 0.97
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2-3b3404a3_20210405.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Horse-10
+ Name: topdown_heatmap_res152_horse10_256x256-split3
+ Results:
+ - Dataset: Horse-10
+ Metrics:
+ NME: 0.131
+ PCK@0.3: 0.957
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3-c957dac5_20210405.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..18ba8ace4ed0b867112e275c9499a308bfa09d4c
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/locust.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=35,
+ dataset_joints=35,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/locust'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..3966ef2e5c26da9661bda9fdbc0e0d88b77928d7
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/locust.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=35,
+ dataset_joints=35,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/locust'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..0850fc27818a1378c16b7f4c922f5a51e5de15f6
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/locust.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=35,
+ dataset_joints=35,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/locust'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalLocustDataset',
+ ann_file=f'{data_root}/annotations/locust_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.md
new file mode 100644
index 0000000000000000000000000000000000000000..20958ffb9c165e1041b1ef102237132005e87036
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Desert Locust (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+Results on Desert Locust test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :-------- | :--------: | :------: | :------: | :------: |:------: |:------: |
+|[pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py) | 160x160 | 0.999 | 0.899 | 2.27 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160_20210407.log.json) |
+|[pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py) | 160x160 | 0.999 | 0.907 | 2.03 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160_20210407.log.json) |
+|[pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py) | 160x160 | 1.000 | 0.926 | 1.48 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160_20210407.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c01a219745866c79cb6656ffcb0aabffc81a47ac
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml
@@ -0,0 +1,50 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: Desert Locust
+ Name: topdown_heatmap_res50_locust_160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.899
+ EPE: 2.27
+ PCK@0.2: 0.999
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Desert Locust
+ Name: topdown_heatmap_res101_locust_160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.907
+ EPE: 2.03
+ PCK@0.2: 0.999
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Desert Locust
+ Name: topdown_heatmap_res152_locust_160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.926
+ EPE: 1.48
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.md
new file mode 100644
index 0000000000000000000000000000000000000000..abcffa04a1395a3978a1be5effc19317d56b975a
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.md
@@ -0,0 +1,40 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MacaquePose (bioRxiv'2020)
+
+```bibtex
+@article{labuguen2020macaquepose,
+ title={MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture},
+ author={Labuguen, Rollyn and Matsumoto, Jumpei and Negrete, Salvador and Nishimaru, Hiroshi and Nishijo, Hisao and Takada, Masahiko and Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro},
+ journal={bioRxiv},
+ year={2020},
+ publisher={Cold Spring Harbor Laboratory}
+}
+```
+
+
+
+Results on MacaquePose with ground-truth detection bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py) | 256x192 | 0.814 | 0.953 | 0.918 | 0.851 | 0.969 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_macaque_256x192_20210407.log.json) |
+| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w48_macaque_256x192.py) | 256x192 | 0.818 | 0.963 | 0.917 | 0.855 | 0.971 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_macaque_256x192-9b34b02a_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_macaque_256x192_20210407.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d02d1f8c42d3ad581021cf16757da9fdbee7dd53
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: MacaquePose
+ Name: topdown_heatmap_hrnet_w32_macaque_256x192
+ Results:
+ - Dataset: MacaquePose
+ Metrics:
+ AP: 0.814
+ AP@0.5: 0.953
+ AP@0.75: 0.918
+ AR: 0.851
+ AR@0.5: 0.969
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w48_macaque_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MacaquePose
+ Name: topdown_heatmap_hrnet_w48_macaque_256x192
+ Results:
+ - Dataset: MacaquePose
+ Metrics:
+ AP: 0.818
+ AP@0.5: 0.963
+ AP@0.75: 0.917
+ AR: 0.855
+ AR@0.5: 0.971
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_macaque_256x192-9b34b02a_20210407.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5085dccdc9c12b030b57f132737f28fc13d6283
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/macaque.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/macaque'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w48_macaque_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w48_macaque_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..bae72c8c71f1b9b66e35bb26e3c22eb850b44554
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w48_macaque_256x192.py
@@ -0,0 +1,172 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/macaque.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/macaque'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3656eb68544bf335e8768e3c67dd95b53ec723e2
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/macaque.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/macaque'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2267b27a0314e5dc86fa62f179cfefa898ff6494
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/macaque.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/macaque'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c51c96518d9e61346035a7dbc663ac9462ce7a1
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/macaque.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/macaque'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalMacaqueDataset',
+ ann_file=f'{data_root}/annotations/macaque_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.md
new file mode 100644
index 0000000000000000000000000000000000000000..f6c7f6bd53d191df630e114123e08461c580799b
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MacaquePose (bioRxiv'2020)
+
+```bibtex
+@article{labuguen2020macaquepose,
+ title={MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture},
+ author={Labuguen, Rollyn and Matsumoto, Jumpei and Negrete, Salvador and Nishimaru, Hiroshi and Nishijo, Hisao and Takada, Masahiko and Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro},
+ journal={bioRxiv},
+ year={2020},
+ publisher={Cold Spring Harbor Laboratory}
+}
+```
+
+
+
+Results on MacaquePose with ground-truth detection bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py) | 256x192 | 0.799 | 0.952 | 0.919 | 0.837 | 0.964 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192_20210407.log.json) |
+| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py) | 256x192 | 0.790 | 0.953 | 0.908 | 0.828 | 0.967 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_macaque_256x192-e3b9c6bb_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_macaque_256x192_20210407.log.json) |
+| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py) | 256x192 | 0.794 | 0.951 | 0.915 | 0.834 | 0.968 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_macaque_256x192-c42abc02_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_macaque_256x192_20210407.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.yml
new file mode 100644
index 0000000000000000000000000000000000000000..31aa7566008d55d4b7b03f8d091e465032411d86
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.yml
@@ -0,0 +1,56 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: MacaquePose
+ Name: topdown_heatmap_res50_macaque_256x192
+ Results:
+ - Dataset: MacaquePose
+ Metrics:
+ AP: 0.799
+ AP@0.5: 0.952
+ AP@0.75: 0.919
+ AR: 0.837
+ AR@0.5: 0.964
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res101_macaque_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MacaquePose
+ Name: topdown_heatmap_res101_macaque_256x192
+ Results:
+ - Dataset: MacaquePose
+ Metrics:
+ AP: 0.79
+ AP@0.5: 0.953
+ AP@0.75: 0.908
+ AR: 0.828
+ AR@0.5: 0.967
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_macaque_256x192-e3b9c6bb_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MacaquePose
+ Name: topdown_heatmap_res152_macaque_256x192
+ Results:
+ - Dataset: MacaquePose
+ Metrics:
+ AP: 0.794
+ AP@0.5: 0.951
+ AP@0.75: 0.915
+ AR: 0.834
+ AR@0.5: 0.968
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_macaque_256x192-c42abc02_20210407.pth
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..693867c5263f84a182a1d7742ffc996eacb42fd7
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py
@@ -0,0 +1,124 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/zebra.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=9,
+ dataset_joints=9,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/zebra'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc07d3f9721d165aee3c3bf82f030aee9833653
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py
@@ -0,0 +1,124 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/zebra.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=9,
+ dataset_joints=9,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/zebra'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..3120b473f8abd6073b4a06a99c89b23e98137145
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py
@@ -0,0 +1,124 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/zebra.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=1,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=9,
+ dataset_joints=9,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/zebra'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='AnimalZebraDataset',
+ ann_file=f'{data_root}/annotations/zebra_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.md b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.md
new file mode 100644
index 0000000000000000000000000000000000000000..3d34d598ac1f2a19cea7d7d92304c6fd79daed51
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Grévy’s Zebra (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+Results on Grévy’s Zebra test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :-------- | :--------: | :------: | :------: | :------: |:------: |:------: |
+|[pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py) | 160x160 | 1.000 | 0.914 | 1.86 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160_20210407.log.json) |
+|[pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py) | 160x160 | 1.000 | 0.916 | 1.82 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160_20210407.log.json) |
+|[pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py) | 160x160 | 1.000 | 0.921 | 1.66 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160_20210407.log.json) |
diff --git a/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.yml b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.yml
new file mode 100644
index 0000000000000000000000000000000000000000..54912ba569e3b545e04587bbd1ffa2191d6f16da
--- /dev/null
+++ b/vendor/ViTPose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.yml
@@ -0,0 +1,50 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res50_zebra_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: topdown_heatmap_res50_zebra_160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.914
+ EPE: 1.86
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res101_zebra_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: topdown_heatmap_res101_zebra_160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.916
+ EPE: 1.82
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth
+- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/res152_zebra_160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: topdown_heatmap_res152_zebra_160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.921
+ EPE: 1.66
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..02682f406b67ad8e5884e0c5d1a25e7bd1a67f3c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,19 @@
+# Image-based Human Body 2D Pose Estimation
+
+Multi-person human pose estimation is defined as the task of detecting the poses (or keypoints) of all people from an input image.
+
+Existing approaches can be categorized into top-down and bottom-up approaches.
+
+Top-down methods (e.g. deeppose) divide the task into two stages: human detection and pose estimation. They perform human detection first, followed by single-person pose estimation given human bounding boxes.
+
+Bottom-up approaches (e.g. AE) first detect all the keypoints and then group/associate them into person instances.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_body_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/2d_human_pose_demo.md#2d-human-pose-demo) to run demos.
+
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2048f2182b77605924ec48913c3203e3bc0a61be
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/README.md
@@ -0,0 +1,25 @@
+# Associative embedding: End-to-end learning for joint detection and grouping (AE)
+
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+AE is one of the most popular 2D bottom-up pose estimation approaches, that first detect all the keypoints and
+then group/associate them into person instances.
+
+In order to group all the predicted keypoints to individuals, a tag is also predicted for each detected keypoint.
+Tags of the same person are similar, while tags of different people are different. Thus the keypoints can be grouped
+according to the tags.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4737739ccafdce31982effd05e0a1b44a20d789
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.md
@@ -0,0 +1,61 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC validation set without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py) | 512x512 | 0.315 | 0.710 | 0.243 | 0.379 | 0.757 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512_20210130.log.json) |
+
+Results on AIC validation set with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py) | 512x512 | 0.323 | 0.718 | 0.254 | 0.379 | 0.758 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512_20210130.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..37d24a423192e918733801aa44970fb3f30b838d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.yml
@@ -0,0 +1,42 @@
+Collections:
+- Name: HigherHRNet
+ Paper:
+ Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
+ Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HigherHRNet
+ Training Data: AI Challenger
+ Name: associative_embedding_higherhrnet_w32_aic_512x512
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.315
+ AP@0.5: 0.71
+ AP@0.75: 0.243
+ AR: 0.379
+ AR@0.5: 0.757
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: AI Challenger
+ Name: associative_embedding_higherhrnet_w32_aic_512x512
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.323
+ AP@0.5: 0.718
+ AP@0.75: 0.254
+ AR: 0.379
+ AR@0.5: 0.758
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_aic_512x512-9a674c33_20210130.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..67602935cc952381b8081b993f220ad3a86c90d8
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
@@ -0,0 +1,195 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.01, 0.01],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf5fef221acb115d43fbf567ce3603d724921a33
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py
@@ -0,0 +1,198 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.01, 0.01],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..89b6b18ef6229c2a1c78d0d6248f6489f3cb3e14
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.md
@@ -0,0 +1,61 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC validation set without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py) | 512x512 | 0.303 | 0.697 | 0.225 | 0.373 | 0.755 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512_20210131.log.json) |
+
+Results on AIC validation set with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py) | 512x512 | 0.318 | 0.717 | 0.246 | 0.379 | 0.764 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512_20210131.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3be9548fb8529e1deda50ef2b0b9ed5968d9848d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HRNet
+ Training Data: AI Challenger
+ Name: associative_embedding_hrnet_w32_aic_512x512
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.303
+ AP@0.5: 0.697
+ AP@0.75: 0.225
+ AR: 0.373
+ AR@0.5: 0.755
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: AI Challenger
+ Name: associative_embedding_hrnet_w32_aic_512x512
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.318
+ AP@0.5: 0.717
+ AP@0.75: 0.246
+ AR: 0.379
+ AR@0.5: 0.764
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_aic_512x512-77e2a98a_20210131.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e4b8363336397e703985c71fd62092d83176018
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
@@ -0,0 +1,191 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=14,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.01],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..676e1708bf55edafd005c1f89f3319609a74ee8c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.md
@@ -0,0 +1,67 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | 512x512 | 0.677 | 0.870 | 0.738 | 0.723 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_20200713.log.json) |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | 640x640 | 0.686 | 0.871 | 0.747 | 0.733 | 0.898 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640_20200712.log.json) |
+| [HigherHRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | 512x512 | 0.686 | 0.873 | 0.741 | 0.731 | 0.892 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_20200712.log.json) |
+
+Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | 512x512 | 0.706 | 0.881 | 0.771 | 0.747 | 0.901 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_20200713.log.json) |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | 640x640 | 0.706 | 0.880 | 0.770 | 0.749 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640_20200712.log.json) |
+| [HigherHRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | 512x512 | 0.716 | 0.884 | 0.775 | 0.755 | 0.901 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_20200712.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5302efe00f9e31682b6498d526963dc2b50db89b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.yml
@@ -0,0 +1,106 @@
+Collections:
+- Name: HigherHRNet
+ Paper:
+ Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
+ Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HigherHRNet
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w32_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.677
+ AP@0.5: 0.87
+ AP@0.75: 0.738
+ AR: 0.723
+ AR@0.5: 0.89
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w32_coco_640x640
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.686
+ AP@0.5: 0.871
+ AP@0.75: 0.747
+ AR: 0.733
+ AR@0.5: 0.898
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w48_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.686
+ AP@0.5: 0.873
+ AP@0.75: 0.741
+ AR: 0.731
+ AR@0.5: 0.892
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w32_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.706
+ AP@0.5: 0.881
+ AP@0.75: 0.771
+ AR: 0.747
+ AR@0.5: 0.901
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512-8ae85183_20200713.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w32_coco_640x640
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.706
+ AP@0.5: 0.88
+ AP@0.75: 0.77
+ AR: 0.749
+ AR@0.5: 0.902
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_640x640-a22fe938_20200712.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w48_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.716
+ AP@0.5: 0.884
+ AP@0.75: 0.775
+ AR: 0.755
+ AR@0.5: 0.901
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..36ba0c8550af2c802a236cde54791494b2c34733
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.md
@@ -0,0 +1,75 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HigherHRNet-w32_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py) | 512x512 | 0.678 | 0.862 | 0.736 | 0.724 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp-8cc64794_20210222.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp_20210222.log.json) |
+| [HigherHRNet-w48_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py) | 512x512 | 0.690 | 0.872 | 0.750 | 0.734 | 0.891 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp-7cad61ef_20210222.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp_20210222.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1a04988d251b7f7c42639fccb160291614432c35
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.yml
@@ -0,0 +1,43 @@
+Collections:
+- Name: HigherHRNet
+ Paper:
+ Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
+ Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HigherHRNet
+ - UDP
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w32_coco_512x512_udp
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.678
+ AP@0.5: 0.862
+ AP@0.75: 0.736
+ AR: 0.724
+ AR@0.5: 0.89
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_512x512_udp-8cc64794_20210222.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_higherhrnet_w48_coco_512x512_udp
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.69
+ AP@0.5: 0.872
+ AP@0.75: 0.75
+ AR: 0.734
+ AR@0.5: 0.891
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_512x512_udp-7cad61ef_20210222.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6f549bad31b8cc18e47fd4c47cd3246540840e3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..6109c2e61c916cf0e6075d3929150c466d2f482c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
@@ -0,0 +1,197 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..2daf4840bdbe946179fcc380844fe2226654fb05
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b92efc4ffc8e7cde69abe5c5b68d743e06cef72
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py
@@ -0,0 +1,197 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..031e6fc286923f2c2215ebf8233cbb6217600741
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff298aece7fb69b56c4b37c19d17ac412864efc4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
@@ -0,0 +1,197 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=17,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..b72e57023bf48443b5b0a2f65b9dcca1ef0c541a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.md
@@ -0,0 +1,63 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HourglassAENet (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hourglass_ae](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py) | 512x512 | 0.613 | 0.833 | 0.667 | 0.659 | 0.850 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512-90af499f_20210920.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512_20210920.log.json) |
+
+Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hourglass_ae](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py) | 512x512 | 0.667 | 0.855 | 0.723 | 0.707 | 0.877 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512-90af499f_20210920.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512_20210920.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5b7d5e88f952e6f8fa0ea425496e736c47155e19
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: Associative Embedding
+ Paper:
+ Title: 'Associative embedding: End-to-end learning for joint detection and grouping'
+ URL: https://arxiv.org/abs/1611.05424
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/associative_embedding.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HourglassAENet
+ Training Data: COCO
+ Name: associative_embedding_hourglass_ae_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.613
+ AP@0.5: 0.833
+ AP@0.75: 0.667
+ AR: 0.659
+ AR@0.5: 0.85
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512-90af499f_20210920.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_hourglass_ae_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.667
+ AP@0.5: 0.855
+ AP@0.75: 0.723
+ AR: 0.707
+ AR@0.5: 0.877
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hourglass_ae/hourglass_ae_coco_512x512-90af499f_20210920.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..351308a2dfdb28a694b91fa1100fd71690331b90
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py
@@ -0,0 +1,167 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassAENet',
+ num_stacks=4,
+ out_channels=34,
+ ),
+ keypoint_head=dict(
+ type='AEMultiStageHead',
+ in_channels=34,
+ out_channels=34,
+ num_stages=4,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=4,
+ ae_loss_type='exp',
+ with_heatmaps_loss=[True, True, True, True],
+ with_ae_loss=[True, True, True, True],
+ push_loss_factor=[0.001, 0.001, 0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001, 0.001, 0.001],
+ heatmaps_loss_factor=[1.0, 1.0, 1.0, 1.0])),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True, True, True],
+ with_ae=[True, True, True, True],
+ select_output_index=[3],
+ project2image=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='MultitaskGatherTarget',
+ pipeline_list=[
+ [dict(type='BottomUpGenerateTarget', sigma=2, max_num_people=30)],
+ ],
+ pipeline_indices=[0] * 4,
+ keys=['targets', 'masks', 'joints']),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=6),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..39f3e3b8e80ee070d0881e16058b93e6dcdb5576
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.md
@@ -0,0 +1,65 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py) | 512x512 | 0.654 | 0.863 | 0.720 | 0.710 | 0.892 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512_20200816.log.json) |
+| [HRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py) | 512x512 | 0.665 | 0.860 | 0.727 | 0.716 | 0.889 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512_20200816.log.json) |
+
+Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py) | 512x512 | 0.698 | 0.877 | 0.760 | 0.748 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512_20200816.log.json) |
+| [HRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py) | 512x512 | 0.712 | 0.880 | 0.771 | 0.757 | 0.909 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512_20200816.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2838b4a70bc3556ea971aa2f37bcf54ef1310009
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.yml
@@ -0,0 +1,73 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HRNet
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w32_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.654
+ AP@0.5: 0.863
+ AP@0.75: 0.72
+ AR: 0.71
+ AR@0.5: 0.892
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w48_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.665
+ AP@0.5: 0.86
+ AP@0.75: 0.727
+ AR: 0.716
+ AR@0.5: 0.889
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w32_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.698
+ AP@0.5: 0.877
+ AP@0.75: 0.76
+ AR: 0.748
+ AR@0.5: 0.907
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w48_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.712
+ AP@0.5: 0.88
+ AP@0.75: 0.771
+ AR: 0.757
+ AR@0.5: 0.909
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512-cf72fcdf_20200816.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..2388e5670e5577715799b85e98d02513518d6611
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.md
@@ -0,0 +1,75 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w32_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py) | 512x512 | 0.671 | 0.863 | 0.729 | 0.717 | 0.889 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512_udp-91663bf9_20210220.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512_udp_20210220.log.json) |
+| [HRNet-w48_udp](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py) | 512x512 | 0.681 | 0.872 | 0.741 | 0.725 | 0.892 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512_udp-de08fd8c_20210222.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512_udp_20210222.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..adc8d8dbc5f3ce13709935fe5412f611bf908f0c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.yml
@@ -0,0 +1,43 @@
+Collections:
+- Name: UDP
+ Paper:
+ Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for
+ Human Pose Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/udp.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
+ In Collection: UDP
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HRNet
+ - UDP
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w32_coco_512x512_udp
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.671
+ AP@0.5: 0.863
+ AP@0.75: 0.729
+ AR: 0.717
+ AR@0.5: 0.889
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512_udp-91663bf9_20210220.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_hrnet_w48_coco_512x512_udp
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.681
+ AP@0.5: 0.872
+ AP@0.75: 0.741
+ AR: 0.725
+ AR@0.5: 0.892
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_512x512_udp-de08fd8c_20210222.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..11c63d587178fbbbf8b6825c54c55cdb9f884ff6
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
@@ -0,0 +1,189 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb0ef809615b236645139e09cb28cffac35d2360
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..67629a1fd2014724e76a2802f04fee0c9cfc09a2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py
@@ -0,0 +1,189 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..44c2cecddcbb5d295009d64b2e3e5f17fc4d8cd3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..c385bb4f066c8ee5f0795bcb04db5c6722bcb10d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
@@ -0,0 +1,189 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..b86aba82760e4174397c8af5997aa4a9062e7190
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..711506240b798b549eb005a4debd333e2b61f43d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py
@@ -0,0 +1,189 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=8),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8ca32df54c8454e3640518d580dea87586ae663
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py
@@ -0,0 +1,193 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=8),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..a9b222551d153f3734074a1b5c4d34d570381e9a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.md
@@ -0,0 +1,63 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py) | 512x512 | 0.380 | 0.671 | 0.368 | 0.473 | 0.741 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512-4d96e309_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512_20200816.log.json) |
+
+Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py) | 512x512 | 0.442 | 0.696 | 0.422 | 0.517 | 0.766 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512-4d96e309_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512_20200816.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..95538eba854d71b46feb38e0db2d6069719f2947
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - MobilenetV2
+ Training Data: COCO
+ Name: associative_embedding_mobilenetv2_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.38
+ AP@0.5: 0.671
+ AP@0.75: 0.368
+ AR: 0.473
+ AR@0.5: 0.741
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512-4d96e309_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_mobilenetv2_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.442
+ AP@0.5: 0.696
+ AP@0.75: 0.422
+ AR: 0.517
+ AR@0.5: 0.766
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/mobilenetv2_coco_512x512-4d96e309_20200816.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b0d818707fa875cdc028e45233ad1b0684c0fdf
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=1280,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=1,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..d68700d118145cb881ecafce156a790ec45f6b0c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff87ac8a51ddab62b7afd4fb0599da5db7ea1d70
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9ed79cc2eb52303b2a1a6e0d440ee519dcc9ebe
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..e473a83298e05719be75679320cf8299fd3d48cd
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..5022546c74185b8b60e075b32b289c1870f3e111
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
@@ -0,0 +1,159 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ )),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=1,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..8643525dd322aeed0b75dce3b17a706a9c9ff90b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=1,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..04b8505ddf2a7833ff8851f26ce660d752bd752c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.md
@@ -0,0 +1,69 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py) | 512x512 | 0.466 | 0.742 | 0.479 | 0.552 | 0.797 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512_20200816.log.json) |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py) | 640x640 | 0.479 | 0.757 | 0.487 | 0.566 | 0.810 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640-2046f9cb_20200822.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640_20200822.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py) | 512x512 | 0.554 | 0.807 | 0.599 | 0.622 | 0.841 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512_20200816.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py) | 512x512 | 0.595 | 0.829 | 0.648 | 0.651 | 0.856 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512-364eb38d_20200822.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512_20200822.log.json) |
+
+Results on COCO val2017 with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py) | 512x512 | 0.503 | 0.765 | 0.521 | 0.591 | 0.821 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512_20200816.log.json) |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py) | 640x640 | 0.525 | 0.784 | 0.542 | 0.610 | 0.832 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640-2046f9cb_20200822.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640_20200822.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py) | 512x512 | 0.603 | 0.831 | 0.641 | 0.668 | 0.870 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512_20200816.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py) | 512x512 | 0.660 | 0.860 | 0.713 | 0.709 | 0.889 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512-364eb38d_20200822.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512_20200822.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..45c49b8ecb72e9f1172091b8e2a7ddd2720498c5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.yml
@@ -0,0 +1,137 @@
+Collections:
+- Name: Associative Embedding
+ Paper:
+ Title: 'Associative embedding: End-to-end learning for joint detection and grouping'
+ URL: https://arxiv.org/abs/1611.05424
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/associative_embedding.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - ResNet
+ Training Data: COCO
+ Name: associative_embedding_res50_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.466
+ AP@0.5: 0.742
+ AP@0.75: 0.479
+ AR: 0.552
+ AR@0.5: 0.797
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res50_coco_640x640
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.479
+ AP@0.5: 0.757
+ AP@0.75: 0.487
+ AR: 0.566
+ AR@0.5: 0.81
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640-2046f9cb_20200822.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res101_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.554
+ AP@0.5: 0.807
+ AP@0.75: 0.599
+ AR: 0.622
+ AR@0.5: 0.841
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res152_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.595
+ AP@0.5: 0.829
+ AP@0.75: 0.648
+ AR: 0.651
+ AR@0.5: 0.856
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512-364eb38d_20200822.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res50_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.503
+ AP@0.5: 0.765
+ AP@0.75: 0.521
+ AR: 0.591
+ AR@0.5: 0.821
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res50_coco_512x512-5521bead_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res50_coco_640x640
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.525
+ AP@0.5: 0.784
+ AP@0.75: 0.542
+ AR: 0.61
+ AR@0.5: 0.832
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res50_coco_640x640-2046f9cb_20200822.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res101_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.603
+ AP@0.5: 0.831
+ AP@0.75: 0.641
+ AR: 0.668
+ AR@0.5: 0.87
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res101_coco_512x512-e0c95157_20200816.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
+ In Collection: Associative Embedding
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: associative_embedding_res152_coco_512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.66
+ AP@0.5: 0.86
+ AP@0.75: 0.713
+ AR: 0.709
+ AR@0.5: 0.889
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/res152_coco_512x512-364eb38d_20200822.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..44451f645a291469141a97aacf41a3fac6926964
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.md
@@ -0,0 +1,61 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: | :------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py) | 512x512 | 0.655 | 0.859 | 0.705 | 0.728 | 0.660 | 0.577 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512-1aa4a132_20201017.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512_20201017.log.json) |
+
+Results on CrowdPose test with multi-scale test. 2 scales (\[2, 1\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: | :------: |
+| [HigherHRNet-w32](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py) | 512x512 | 0.661 | 0.864 | 0.710 | 0.742 | 0.670 | 0.566 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512-1aa4a132_20201017.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512_20201017.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b8a2980665d032846c32796196cc22a8be26f29e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.yml
@@ -0,0 +1,44 @@
+Collections:
+- Name: HigherHRNet
+ Paper:
+ Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
+ Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HigherHRNet
+ Training Data: CrowdPose
+ Name: associative_embedding_higherhrnet_w32_crowdpose_512x512
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.655
+ AP (E): 0.728
+ AP (H): 0.577
+ AP (M): 0.66
+ AP@0.5: 0.859
+ AP@0.75: 0.705
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512-1aa4a132_20201017.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: associative_embedding_higherhrnet_w32_crowdpose_512x512
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.661
+ AP (E): 0.742
+ AP (H): 0.566
+ AP (M): 0.67
+ AP@0.5: 0.864
+ AP@0.75: 0.71
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_crowdpose_512x512-1aa4a132_20201017.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..18739b8b79109cd9db6f69c23423c6884892e93e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
@@ -0,0 +1,192 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..a853c3f57feaa0454226eb6c0ad5c05a381e2f73
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py
@@ -0,0 +1,196 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ce567b9b27ca8144ea6a31fb95e55c278db59d3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py
@@ -0,0 +1,192 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9bf0e33420bf7479e94ea30af847c1d84cefd02
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py
@@ -0,0 +1,196 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..f82792de8cf49e926e4360a4641f3346f886c4e5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py
@@ -0,0 +1,192 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7f2c89c8abe50aefb9f09ce1b84c84e60526c98
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py
@@ -0,0 +1,196 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=14,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=False,
+ align_corners=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True,
+ use_udp=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40,
+ use_udp=True),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ use_udp=True,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1], use_udp=True),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+ ],
+ use_udp=True),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e1cb8b735fcc88f7305ba32b42c0e1fa55dfb29
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=1280,
+ num_joints=14,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e3ca353bf26ca718a9af0085706e88b14c3ee87
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=14,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..c31129e69e5b1cbc17016bd8dd0524ae8c15e2d1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=14,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..350f7fda2664f6b468fc6ea5857ade39ce97fd2f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=2048,
+ num_joints=14,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=14,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ )),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..dc15eb19bddc839c8f780c0d867f6d5611dea796
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.md
@@ -0,0 +1,62 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MHP (ACM MM'2018)
+
+```bibtex
+@inproceedings{zhao2018understanding,
+ title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
+ author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
+ booktitle={Proceedings of the 26th ACM international conference on Multimedia},
+ pages={792--800},
+ year={2018}
+}
+```
+
+
+
+Results on MHP v2.0 validation set without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_mhp_512x512.py) | 512x512 | 0.583 | 0.895 | 0.666 | 0.656 | 0.931 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512-85a6ab6f_20201229.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512_20201229.log.json) |
+
+Results on MHP v2.0 validation set with multi-scale test. 3 default scales (\[2, 1, 0.5\]) are used
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [HRNet-w48](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_mhp_512x512.py) | 512x512 | 0.592 | 0.898 | 0.673 | 0.664 | 0.932 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512-85a6ab6f_20201229.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512_20201229.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8eda9252d16dc61e309f5d9e97c950468f51effd
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_mhp_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HRNet
+ Training Data: MHP
+ Name: associative_embedding_hrnet_w48_mhp_512x512
+ Results:
+ - Dataset: MHP
+ Metrics:
+ AP: 0.583
+ AP@0.5: 0.895
+ AP@0.75: 0.666
+ AR: 0.656
+ AR@0.5: 0.931
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512-85a6ab6f_20201229.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_mhp_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MHP
+ Name: associative_embedding_hrnet_w48_mhp_512x512
+ Results:
+ - Dataset: MHP
+ Metrics:
+ AP: 0.592
+ AP@0.5: 0.898
+ AP@0.75: 0.673
+ AR: 0.664
+ AR@0.5: 0.932
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_mhp_512x512-85a6ab6f_20201229.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c5b4dfc9fd28783ef2c7cd1abd4035939e73721
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py
@@ -0,0 +1,187 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mhp.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.005,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[400, 550])
+total_epochs = 600
+channel_cfg = dict(
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=16,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=16,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.01],
+ pull_loss_factor=[0.01],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mhp'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_train.json',
+ img_prefix=f'{data_root}/train/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_val.json',
+ img_prefix=f'{data_root}/val/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_val.json',
+ img_prefix=f'{data_root}/val/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..47346a72e44ee340239c18a7ba7c7dd9aba91bb2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/README.md
@@ -0,0 +1,24 @@
+# DeepPose: Human pose estimation via deep neural networks
+
+## Introduction
+
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+DeepPose first proposes using deep neural networks (DNNs) to tackle the problem of human pose estimation.
+It follows the top-down paradigm, that first detects human bounding boxes and then estimates poses.
+It learns to directly regress the human body keypoint coordinates.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b46b8f50144d4805f224efad9c4b90510ff567ee
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..580b9b0ae67894c9dede5513f6137a69f4ecb513
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c978eeb3b15b24c62ee9c81d6142c4e6fd69d9be
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..5aaea7d1e132884c118ce907f93e7199ab7200b1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.md
@@ -0,0 +1,59 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [deeppose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py) | 256x192 | 0.526 | 0.816 | 0.586 | 0.638 | 0.887 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192-f6de6c0e_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192_20210205.log.json) |
+| [deeppose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py) | 256x192 | 0.560 | 0.832 | 0.628 | 0.668 | 0.900 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_20210205.log.json) |
+| [deeppose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py) | 256x192 | 0.583 | 0.843 | 0.659 | 0.686 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_20210205.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..21cc7ee3b52efb92207838eda66d8a9b78714bbb
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.yml
@@ -0,0 +1,57 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - ResNet
+ Training Data: COCO
+ Name: deeppose_res50_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.526
+ AP@0.5: 0.816
+ AP@0.75: 0.586
+ AR: 0.638
+ AR@0.5: 0.887
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_coco_256x192-f6de6c0e_20210205.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: deeppose_res101_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.56
+ AP@0.5: 0.832
+ AP@0.75: 0.628
+ AR: 0.668
+ AR@0.5: 0.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: deeppose_res152_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.583
+ AP@0.5: 0.843
+ AP@0.75: 0.659
+ AR: 0.686
+ AR@0.5: 0.907
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..948975600e9d4d2c824295d85f3f7d0a07d3461e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res101_mpii_256x256.py
@@ -0,0 +1,120 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res152_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res152_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e8ce0ea91172e4b8f9a059b6eb6451af2bca852
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res152_mpii_256x256.py
@@ -0,0 +1,120 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..314a21aea21092ac43695448506636840ce45f66
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res50_mpii_256x256.py
@@ -0,0 +1,120 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6eb8e5859d0f783579f7feb9f45af4da89192b1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.md
@@ -0,0 +1,58 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [deeppose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res50_mpii_256x256.py) | 256x256 | 0.825 | 0.174 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_20210203.log.json) |
+| [deeppose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res101_mpii_256x256.py) | 256x256 | 0.841 | 0.193 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256_20210205.log.json) |
+| [deeppose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res152_mpii_256x256.py) | 256x256 | 0.850 | 0.198 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256_20210205.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1685083653287bbee7fbf04474334a4acfb8d0c3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.yml
@@ -0,0 +1,48 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res50_mpii_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - ResNet
+ Training Data: MPII
+ Name: deeppose_res50_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.825
+ Mean@0.1: 0.174
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res101_mpii_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: deeppose_res101_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.841
+ Mean@0.1: 0.193
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/res152_mpii_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: deeppose_res152_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.85
+ Mean@0.1: 0.198
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c6fef1486076e21762b23ea55f5d856fc36ce68b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,10 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: human detection and pose estimation.
+
+They perform human detection first, followed by single-person pose estimation given human bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
+
+Various neural network models have been proposed for better performance.
+The popular ones include stacked hourglass networks, and HRNet.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..58f4567b60438e407baa26cb71502a32360b23d2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py
@@ -0,0 +1,151 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_huge_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_huge_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..277123bf26fd137af306114989127622ab2870e2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_huge_aic_256x192.py
@@ -0,0 +1,151 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_large_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_large_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c64241adf07acab214545f8ccb5ad59772dd60b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_large_aic_256x192.py
@@ -0,0 +1,151 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_small_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_small_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..af66009deac70a9f01c702516853da9a7fd27546
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_small_aic_256x192.py
@@ -0,0 +1,151 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..5331aba3379f908914ac487c48619d2f8767038e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.md
@@ -0,0 +1,39 @@
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC val set with ground-truth bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_256x192.py) | 256x192 | 0.323 | 0.762 | 0.219 | 0.366 | 0.789 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192_20200826.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d80203665815204aaa190f7789871422f060d031
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture:
+ - HRNet
+ Training Data: AI Challenger
+ Name: topdown_heatmap_hrnet_w32_aic_256x192
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.323
+ AP@0.5: 0.762
+ AP@0.75: 0.219
+ AR: 0.366
+ AR@0.5: 0.789
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..407782cc1fe99a1b4710300764ea8804fad81ebd
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_256x192.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..772e6a23d19fb0ad833a3f8a8670fadd3bbac45b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w32_aic_384x288.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..62c98ba67ea818b34d0ae7de47bab548aee939dc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_256x192.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef063eb2e817151773546ab39bb24127579fd6e3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_w48_aic_384x288.py
@@ -0,0 +1,167 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup=None,
+ # warmup='linear',
+ # warmup_iters=500,
+ # warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dd2143d66b8940e09430a10a683190c8674a901
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c1b750ab22130bdd42a2486b971b07a1c65cdb1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d4b64ddd742d926c8c8f9cbdbf1c3db00e9744c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4d2276205c67a48b01eec1be2790b9dc7c8ea35
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res152_aic_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a937af4e9053c5bd2911a3d560181e9bce151c26
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_256x192.py
@@ -0,0 +1,134 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..556cda077a103d7a826a70457d91958c1ddfd80e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res50_aic_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aic.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_train.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownAicDataset',
+ ann_file=f'{data_root}/annotations/aic_val.json',
+ img_prefix=f'{data_root}/ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..e733aba36d3905f626febfff9027658d433c50c7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC val set with ground-truth bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_256x192.py) | 256x192 | 0.294 | 0.736 | 0.174 | 0.337 | 0.763 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192_20200826.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7fb30979bfcacfbd46f3886aa223510a6eaf7492
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/res101_aic_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: AI Challenger
+ Name: topdown_heatmap_res101_aic_256x192
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.294
+ AP@0.5: 0.736
+ AP@0.75: 0.174
+ AR: 0.337
+ AR@0.5: 0.763
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e11fe346b32b552eb95a19b41a3225af7e260ba
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=2,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=2,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=([
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 2),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(15, 15), (11, 11), (9, 9), (7, 7)] + [(11, 11), (9, 9),
+ (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..564a73fb5c16bae1ac7f7f8b61ae4cb4ec286c68
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=3,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=3,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=([
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 3),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(15, 15), (11, 11), (9, 9), (7, 7)] * 2 + [(11, 11), (9, 9),
+ (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..86c1a742a43eea2dbe613b68770e747988d92f96
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=3,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ num_steps=4,
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=3,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=([
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 3),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(15, 15), (11, 11), (9, 9), (7, 7)] * 2 + [(11, 11), (9, 9),
+ (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0144234cbdf364efe28f65f1218249f156e82d91
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=4,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=([
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 4),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(15, 15), (11, 11), (9, 9), (7, 7)] * 3 + [(11, 11), (9, 9),
+ (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f639173081be86d3e54ae586c5a7a569779cb8d1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.75,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_simple_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_simple_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d410a1534f35d0bcd1f9d01f408748081576a2b5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_simple_coco_256x192.py
@@ -0,0 +1,171 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.75,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=0,
+ num_deconv_filters=[],
+ num_deconv_kernels=[],
+ upsample=4,
+ extra=dict(final_conv_kernel=3, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..298b2b59ef8310c73d481e95eb9fa39a8d0a7fef
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=32,
+ layer_decay_rate=0.85,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_small_simple_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_small_simple_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..42ac25cf1f8556a5ee0e29b9fa3834fa9a1fff37
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_small_simple_coco_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5704614306e57c17c5dc1f4df2cc8383f186cacc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='AlexNet', num_classes=-1),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[40, 56],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..f159517386f9a70e5ca6800e842f166a734cf608
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.md
@@ -0,0 +1,41 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py) | 256x192 | 0.623 | 0.859 | 0.704 | 0.686 | 0.903 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192_20200817.log.json) |
+| [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py) | 384x288 | 0.650 | 0.864 | 0.725 | 0.708 | 0.905 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_384x288-80feb4bc_20200821.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_384x288_20200821.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f3b3c4d15622680518ba0762c168cc8361b676c3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: CPM
+ Paper:
+ Title: Convolutional pose machines
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/cpm.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py
+ In Collection: CPM
+ Metadata:
+ Architecture: &id001
+ - CPM
+ Training Data: COCO
+ Name: topdown_heatmap_cpm_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.623
+ AP@0.5: 0.859
+ AP@0.75: 0.704
+ AR: 0.686
+ AR@0.5: 0.903
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_cpm_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.65
+ AP@0.5: 0.864
+ AP@0.75: 0.725
+ AR: 0.708
+ AR@0.5: 0.905
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_384x288-80feb4bc_20200821.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9d118b62842ceb4d37be55f2072917fc377a835
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py
@@ -0,0 +1,143 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[24, 32],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e3ae32c397e3730325fbe65c6ef8b2880473654
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py
@@ -0,0 +1,143 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[36, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ab6b159827c948494e87fbd74191cc5e95a80dc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..a99fe7b0b8ddbbcc6993b2e76a0c1fbe49b4614e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.md
@@ -0,0 +1,42 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hourglass_52](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py) | 256x256 | 0.726 | 0.896 | 0.799 | 0.780 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256_20200709.log.json) |
+| [pose_hourglass_52](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_384x384.py) | 384x384 | 0.746 | 0.900 | 0.813 | 0.797 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..28f09df2afdcfbfdbbcfb0a27f52291038691c5f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: Hourglass
+ Paper:
+ Title: Stacked hourglass networks for human pose estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hourglass.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: &id001
+ - Hourglass
+ Training Data: COCO
+ Name: topdown_heatmap_hourglass52_coco_256x256
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.726
+ AP@0.5: 0.896
+ AP@0.75: 0.799
+ AR: 0.78
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_384x384.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hourglass52_coco_384x384
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.746
+ AP@0.5: 0.9
+ AP@0.75: 0.813
+ AR: 0.797
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..10c0ca5c0e1526515e491adbafc10d80ad8ddbf1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_coco.md
@@ -0,0 +1,42 @@
+
+
+
+HRFormer (NIPS'2021)
+
+```bibtex
+@article{yuan2021hrformer,
+ title={HRFormer: High-Resolution Vision Transformer for Dense Predict},
+ author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
+ journal={Advances in Neural Information Processing Systems},
+ volume={34},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrformer_small](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_256x192.py) | 256x192 | 0.737 | 0.899 | 0.810 | 0.792 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192-b657896f_20220226.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192_20220226.log.json) |
+| [pose_hrformer_small](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_384x288.py) | 384x288 | 0.755 | 0.906 | 0.822 | 0.805 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288-4b52b078_20220226.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288_20220226.log.json) |
+| [pose_hrformer_base](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_base_coco_256x192.py) | 256x192 | 0.753 | 0.907 | 0.821 | 0.806 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192-66cee214_20220226.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192_20220226.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..edb658b28445a615fe61a06c2f4de609dc3a8400
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_256x192.py
@@ -0,0 +1,192 @@
+_base_ = ['../../../../_base_/datasets/coco.py']
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=5, create_symlink=False)
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_small-09516375_20220226.pth',
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.1,
+ with_rpe=False,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ num_mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(32, 64),
+ num_heads=[1, 2],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(32, 64, 128),
+ num_heads=[1, 2, 4],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(32, 64, 128, 256),
+ num_heads=[1, 2, 4, 8],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7]))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_root = 'data/coco'
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file=f'{data_root}/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc9b62e2aecf50f4ccb694d2882a41a76ad5d53c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_small_coco_384x288.py
@@ -0,0 +1,192 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=5, create_symlink=False)
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_small-09516375_20220226.pth',
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.1,
+ with_rpe=False,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ num_mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(32, 64),
+ num_heads=[1, 2],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(32, 64, 128),
+ num_heads=[1, 2, 4],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(32, 64, 128, 256),
+ num_heads=[1, 2, 4, 8],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7]))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_root = 'data/coco'
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file=f'{data_root}/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=256),
+ test_dataloader=dict(samples_per_gpu=256),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+)
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..533a974cd46303d8cc1249b8be2c494f95f62278
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.md
@@ -0,0 +1,62 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Albumentations (Information'2020)
+
+```bibtex
+@article{buslaev2020albumentations,
+ title={Albumentations: fast and flexible image augmentations},
+ author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A},
+ journal={Information},
+ volume={11},
+ number={2},
+ pages={125},
+ year={2020},
+ publisher={Multidisciplinary Digital Publishing Institute}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [coarsedropout](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_coarsedropout.py) | 256x192 | 0.753 | 0.908 | 0.822 | 0.806 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout_20210320.log.json) |
+| [gridmask](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_gridmask.py) | 256x192 | 0.752 | 0.906 | 0.825 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask_20210320.log.json) |
+| [photometric](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_photometric.py) | 256x192 | 0.753 | 0.909 | 0.825 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric_20210320.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..58b7304e2944fb111d61e41ee5a18573ca7d8490
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.yml
@@ -0,0 +1,56 @@
+Collections:
+- Name: Albumentations
+ Paper:
+ Title: 'Albumentations: fast and flexible image augmentations'
+ URL: https://www.mdpi.com/649002
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/albumentations.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_coarsedropout.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_256x192_coarsedropout
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.908
+ AP@0.75: 0.822
+ AR: 0.806
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_gridmask.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_256x192_gridmask
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.752
+ AP@0.5: 0.906
+ AP@0.75: 0.825
+ AR: 0.804
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_photometric.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_256x192_photometric
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.909
+ AP@0.75: 0.825
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..794a08419aab4609bba8d9a05db6510800ff1851
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.md
@@ -0,0 +1,60 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_dark.py) | 256x192 | 0.757 | 0.907 | 0.823 | 0.808 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192_dark-07f147eb_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192_dark_20200812.log.json) |
+| [pose_hrnet_w32_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_dark.py) | 384x288 | 0.766 | 0.907 | 0.831 | 0.815 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288_dark-307dafc2_20210203.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288_dark_20210203.log.json) |
+| [pose_hrnet_w48_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192_dark.py) | 256x192 | 0.764 | 0.907 | 0.830 | 0.814 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192_dark-8cba3197_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192_dark_20200812.log.json) |
+| [pose_hrnet_w48_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_dark.py) | 384x288 | 0.772 | 0.910 | 0.836 | 0.820 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-e881a4b6_20210203.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark_20210203.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..49c2e863bb85b76d4f853948f9f1c77ebdbe13a6
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.yml
@@ -0,0 +1,73 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_256x192_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.907
+ AP@0.75: 0.823
+ AR: 0.808
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192_dark-07f147eb_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_384x288_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.766
+ AP@0.5: 0.907
+ AP@0.75: 0.831
+ AR: 0.815
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288_dark-307dafc2_20210203.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w48_coco_256x192_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.764
+ AP@0.5: 0.907
+ AP@0.75: 0.83
+ AR: 0.814
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192_dark-8cba3197_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w48_coco_384x288_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.772
+ AP@0.5: 0.91
+ AP@0.75: 0.836
+ AR: 0.82
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-e881a4b6_20210203.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..c2e4b70494428786d83b747a4c494f5a9876268b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.md
@@ -0,0 +1,56 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+FP16 (ArXiv'2017)
+
+```bibtex
+@article{micikevicius2017mixed,
+ title={Mixed precision training},
+ author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+ journal={arXiv preprint arXiv:1710.03740},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32_fp16](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_fp16_dynamic.py) | 256x192 | 0.746 | 0.905 | 0.88 | 0.800 | 0.943 | [ckpt](hrnet_w32_coco_256x192_fp16_dynamic-290efc2e_20210430.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192_fp16_dynamic_20210430.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..47f39f4eb9e592b233f22a66aa8d8908a46b7201
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_fp16_dynamic.py
+ In Collection: HRNet
+ Metadata:
+ Architecture:
+ - HRNet
+ Training Data: COCO
+ Name: topdown_heatmap_hrnet_w32_coco_256x192_fp16_dynamic
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.746
+ AP@0.5: 0.905
+ AP@0.75: 0.88
+ AR: 0.8
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: hrnet_w32_coco_256x192_fp16_dynamic-290efc2e_20210430.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_udp_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_udp_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..acc7207a7b5710832e3f8a53a734ac8d2c7e08b9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_udp_coco.md
@@ -0,0 +1,63 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32_udp](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp.py) | 256x192 | 0.760 | 0.907 | 0.827 | 0.811 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_256x192_udp-aba0be42_20210220.pth) | [log](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_256x192_udp_20210220.log.json) |
+| [pose_hrnet_w32_udp](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_udp.py) | 384x288 | 0.769 | 0.908 | 0.833 | 0.817 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_384x288_udp-e97c1a0f_20210223.pth) | [log](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_384x288_udp_20210223.log.json) |
+| [pose_hrnet_w48_udp](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192_udp.py) | 256x192 | 0.767 | 0.906 | 0.834 | 0.817 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w48_coco_256x192_udp-2554c524_20210223.pth) | [log](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w48_coco_256x192_udp_20210223.log.json) |
+| [pose_hrnet_w48_udp](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_udp.py) | 384x288 | 0.772 | 0.910 | 0.835 | 0.820 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w48_coco_384x288_udp-0f89c63e_20210223.pth) | [log](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w48_coco_384x288_udp_20210223.log.json) |
+| [pose_hrnet_w32_udp_regress](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp_regress.py) | 256x192 | 0.758 | 0.908 | 0.823 | 0.812 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_256x192_udp_regress-be2dbba4_20210222.pth) | [log](https://download.openmmlab.com/mmpose/top_down/udp/hrnet_w32_coco_256x192_udp_regress_20210222.log.json) |
+
+Note that, UDP also adopts the unbiased encoding/decoding algorithm of [DARK](https://mmpose.readthedocs.io/en/latest/papers/techniques.html#div-align-center-darkpose-cvpr-2020-div).
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f3f45e3a9cdb8051e803e7ab4ffc4b09bc55409
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_coarsedropout.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_coarsedropout.py
new file mode 100644
index 0000000000000000000000000000000000000000..9306e5cc701bf40157ca82aa168ec6935cfed8da
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_coarsedropout.py
@@ -0,0 +1,179 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/top_down/hrnet/'
+ 'hrnet_w32_coco_256x192-c78dce93_20200708.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(
+ type='CoarseDropout',
+ max_holes=8,
+ max_height=40,
+ max_width=40,
+ min_holes=1,
+ min_height=10,
+ min_width=10,
+ p=0.5),
+ ]),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a04bd43156a1936fc71890e93929f659ade64e7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_dark.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_fp16_dynamic.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_fp16_dynamic.py
new file mode 100644
index 0000000000000000000000000000000000000000..234d58a2626fa1d17a204884772870dbd66f46e3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_fp16_dynamic.py
@@ -0,0 +1,4 @@
+_base_ = ['./hrnet_w32_coco_256x192.py']
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..5512c3c5b96f100eee0be4934388aba0443ce6fc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp.py
@@ -0,0 +1,173 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp_regress.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp_regress.py
new file mode 100644
index 0000000000000000000000000000000000000000..940ad911d2afb6abc507ebcdb802ce842fc1e3fd
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192_udp_regress.py
@@ -0,0 +1,171 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'CombinedTarget'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=3 * channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='CombinedTargetMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget', encoding='UDP', target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1b8eb20f74c942c72aa373e7c5bd7a08ba89082
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8e7b5282f7e914080840afdf9e7c99d0204e408
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288_udp.py
@@ -0,0 +1,173 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=17,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=3,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..305d680f227d29e39df621c9a6b81b5fae9bc8d7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..1776926bf139097d857c20a3d5350301e61a5d17
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..82a8009d02f103956bbb5b8bdd1b108805dc0441
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_dark.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fa81909af3732e3b25b89b4f897598f8407c425
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288_udp.py
@@ -0,0 +1,173 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=17,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=3,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..593bf2208534c306d2d59b1a93f46b7b60091fe3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_256x192.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=40,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdf41d5fbf3c53d913591d704d3ab122ed4017a9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_384x288.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=40,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..7ce55162b9b7f9c706e95eace342326b978f4013
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.md
@@ -0,0 +1,42 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [LiteHRNet-18](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_256x192.py) | 256x192 | 0.643 | 0.868 | 0.720 | 0.706 | 0.912 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192_20211230.log.json) |
+| [LiteHRNet-18](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_384x288.py) | 384x288 | 0.677 | 0.878 | 0.746 | 0.735 | 0.920 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288_20211230.log.json) |
+| [LiteHRNet-30](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_256x192.py) | 256x192 | 0.675 | 0.881 | 0.754 | 0.736 | 0.924 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192_20210626.log.json) |
+| [LiteHRNet-30](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_384x288.py) | 384x288 | 0.700 | 0.884 | 0.776 | 0.758 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288_20210626.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ba22c59364a6960cf8619fc69b98f10d4f5b1ff
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: LiteHRNet
+ Paper:
+ Title: 'Lite-HRNet: A Lightweight High-Resolution Network'
+ URL: https://arxiv.org/abs/2104.06403
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/litehrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_256x192.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: &id001
+ - LiteHRNet
+ Training Data: COCO
+ Name: topdown_heatmap_litehrnet_18_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.643
+ AP@0.5: 0.868
+ AP@0.75: 0.72
+ AR: 0.706
+ AR@0.5: 0.912
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_18_coco_384x288.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_litehrnet_18_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.677
+ AP@0.5: 0.878
+ AP@0.75: 0.746
+ AR: 0.735
+ AR@0.5: 0.92
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_256x192.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_litehrnet_30_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.675
+ AP@0.5: 0.881
+ AP@0.75: 0.754
+ AR: 0.736
+ AR@0.5: 0.924
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_384x288.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_litehrnet_30_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.7
+ AP@0.5: 0.884
+ AP@0.75: 0.776
+ AR: 0.758
+ AR@0.5: 0.928
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cf19575fae9d0949bf50c577d92ab253fc21318b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture: &id001
+ - MobilenetV2
+ Training Data: COCO
+ Name: topdown_heatmap_mobilenetv2_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.646
+ AP@0.5: 0.874
+ AP@0.75: 0.723
+ AR: 0.707
+ AR@0.5: 0.917
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_coco_256x192-d1e58e7b_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_384x288.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_mobilenetv2_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.673
+ AP@0.5: 0.879
+ AP@0.75: 0.743
+ AR: 0.729
+ AR@0.5: 0.916
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_coco_384x288-26be4816_20200727.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e613b6e0daa5dc901594333604f76159ff9eb12
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e0c0171dec1c2f059483409b6ba2325498c31e2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=[
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(11, 11), (9, 9), (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..22a3f9b1e16d3bc0018774492ce61f21edf817bf
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn_coco.md
@@ -0,0 +1,42 @@
+
+
+
+MSPN (ArXiv'2019)
+
+```bibtex
+@article{li2019rethinking,
+ title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
+ author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
+ journal={arXiv preprint arXiv:1901.00148},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [mspn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py) | 256x192 | 0.723 | 0.895 | 0.794 | 0.788 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192-8fbfb5d0_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192_20201123.log.json) |
+| [2xmspn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py) | 256x192 | 0.754 | 0.903 | 0.825 | 0.815 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192-c8765a5c_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192_20201123.log.json) |
+| [3xmspn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py) | 256x192 | 0.758 | 0.904 | 0.830 | 0.821 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192-e348f18e_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192_20201123.log.json) |
+| [4xmspn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py) | 256x192 | 0.764 | 0.906 | 0.835 | 0.826 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192-7b837afb_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192_20201123.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0963b44abfbe4f4f369b38040315171faf00b5c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..465c00f22815f7119ebaaaeb522c14a82e0d6897
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192_dark.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..037811ad84ffb047b337677a1fcbcbe61d6682ce
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a413c9c3f834fd6aae069557c580bfca814b494
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288_dark.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..24537ccecec040f40efad011d5c0529d6f4cb74d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..88f192f7c77630d83ea443f5d2d547e0515a33f9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288_dark.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f64aad0be882d74efb591688e3a357a36453d9a5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..5121bb08b196fc255ba3d9ab408de791ddd4e7d4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_dark.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd86690d2c4237b812aff4076458d5bacd8b98d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e737b6ae44126b811cff84954712143fcb2b2281
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnest101',
+ backbone=dict(type='ResNeSt', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fb13b1954e019805a231ce427deb41b0e0db7bf
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnest101',
+ backbone=dict(type='ResNeSt', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..399a4d3c983c5a763446ae70b274f265559a5039
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnest200',
+ backbone=dict(type='ResNeSt', depth=200),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a16cd378117d04cd5cb481f6593a1e88ccdba44
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnest200',
+ backbone=dict(type='ResNeSt', depth=200),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=16,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=16),
+ test_dataloader=dict(samples_per_gpu=16),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..4bb1ab04b32ac81aa9e3424d391de658659d257c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest_coco.md
@@ -0,0 +1,46 @@
+
+
+
+ResNeSt (ArXiv'2020)
+
+```bibtex
+@article{zhang2020resnest,
+ title={ResNeSt: Split-Attention Networks},
+ author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+ journal={arXiv preprint arXiv:2004.08955},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnest_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_256x192.py) | 256x192 | 0.721 | 0.899 | 0.802 | 0.776 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192-6e65eece_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192_20210320.log.json) |
+| [pose_resnest_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_384x288.py) | 384x288 | 0.737 | 0.900 | 0.811 | 0.789 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288-dcd20436_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288_20210320.log.json) |
+| [pose_resnest_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py) | 256x192 | 0.725 | 0.899 | 0.807 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192-2ffcdc9d_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192_20210320.log.json) |
+| [pose_resnest_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py) | 384x288 | 0.746 | 0.906 | 0.820 | 0.798 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288-80660658_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288_20210320.log.json) |
+| [pose_resnest_200](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py) | 256x192 | 0.732 | 0.905 | 0.812 | 0.787 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192-db007a48_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192_20210517.log.json) |
+| [pose_resnest_200](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py) | 384x288 | 0.754 | 0.908 | 0.827 | 0.807 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288-b5bb76cb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288_20210517.log.json) |
+| [pose_resnest_269](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_256x192.py) | 256x192 | 0.738 | 0.907 | 0.819 | 0.793 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192-2a7882ac_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192_20210517.log.json) |
+| [pose_resnest_269](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_384x288.py) | 384x288 | 0.755 | 0.908 | 0.828 | 0.806 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288-b142b9fb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288_20210517.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..b66b95420d2edd5ca82fdc7a1ac4ec4c658ce6f8
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.md
@@ -0,0 +1,62 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py) | 256x192 | 0.718 | 0.898 | 0.795 | 0.773 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_20200709.log.json) |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py) | 384x288 | 0.731 | 0.900 | 0.799 | 0.783 | 0.931 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288-e6f795e9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288_20200709.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py) | 256x192 | 0.726 | 0.899 | 0.806 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192-6e6babf0_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192_20200708.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py) | 384x288 | 0.748 | 0.905 | 0.817 | 0.798 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288-8c71bdc9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288_20200709.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py) | 256x192 | 0.735 | 0.905 | 0.812 | 0.790 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192-f6e307c2_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192_20200709.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py) | 384x288 | 0.750 | 0.908 | 0.821 | 0.800 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288_20200709.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ba17ab7ed939c255389e47851575e98c375b053
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.yml
@@ -0,0 +1,105 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO
+ Name: topdown_heatmap_res50_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.718
+ AP@0.5: 0.898
+ AP@0.75: 0.795
+ AR: 0.773
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res50_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.731
+ AP@0.5: 0.9
+ AP@0.75: 0.799
+ AR: 0.783
+ AR@0.5: 0.931
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288-e6f795e9_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res101_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.726
+ AP@0.5: 0.899
+ AP@0.75: 0.806
+ AR: 0.781
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192-6e6babf0_20200708.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res101_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.905
+ AP@0.75: 0.817
+ AR: 0.798
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288-8c71bdc9_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res152_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.735
+ AP@0.5: 0.905
+ AP@0.75: 0.812
+ AR: 0.79
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192-f6e307c2_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res152_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.75
+ AP@0.5: 0.908
+ AP@0.75: 0.821
+ AR: 0.8
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_dark_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_dark_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7a4c79e6d45de4c7c30631b54b826e15804bf6d9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_dark_coco.yml
@@ -0,0 +1,106 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ - DarkPose
+ Training Data: COCO
+ Name: topdown_heatmap_res50_coco_256x192_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.724
+ AP@0.5: 0.898
+ AP@0.75: 0.8
+ AR: 0.777
+ AR@0.5: 0.936
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_dark-43379d20_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res50_coco_384x288_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.735
+ AP@0.5: 0.9
+ AP@0.75: 0.801
+ AR: 0.785
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288_dark-33d3e5e5_20210203.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res101_coco_256x192_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.732
+ AP@0.5: 0.899
+ AP@0.75: 0.808
+ AR: 0.786
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192_dark-64d433e6_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res101_coco_384x288_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.749
+ AP@0.5: 0.902
+ AP@0.75: 0.816
+ AR: 0.799
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288_dark-cb45c88d_20210203.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res152_coco_256x192_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.745
+ AP@0.5: 0.905
+ AP@0.75: 0.821
+ AR: 0.797
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192_dark-ab4840d5_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_res152_coco_384x288_dark
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.909
+ AP@0.75: 0.826
+ AR: 0.806
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288_dark-d3b8ebd7_20210203.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..5b147298be2648a24178af5c2b78a8d9a2b9003f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.md
@@ -0,0 +1,73 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+FP16 (ArXiv'2017)
+
+```bibtex
+@article{micikevicius2017mixed,
+ title={Mixed precision training},
+ author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+ journal={arXiv preprint arXiv:1710.03740},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50_fp16](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_fp16_dynamic.py) | 256x192 | 0.717 | 0.898 | 0.793 | 0.772 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_fp16_dynamic-6edb79f3_20210430.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_fp16_dynamic_20210430.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8c7da122c2d29456c72c0f6e24d0eac5e4dee5b4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192_fp16_dynamic.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO
+ Name: topdown_heatmap_res50_coco_256x192_fp16_dynamic
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.717
+ AP@0.5: 0.898
+ AP@0.75: 0.793
+ AR: 0.772
+ AR@0.5: 0.936
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_fp16_dynamic-6edb79f3_20210430.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc5a5765426d7ebd76570da476fb9b59000cc765
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet101_v1d',
+ backbone=dict(type='ResNetV1d', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c3bcaa1ed4ed8f0c05de0909a7c2a44912b904e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet101_v1d',
+ backbone=dict(type='ResNetV1d', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9397f6291c7db63ac39a56ae76bc164fdce27ba
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet152_v1d',
+ backbone=dict(type='ResNetV1d', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=48,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d54416419c6d87ab22523dea41fe4fc6398cbf74
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet50_v1d',
+ backbone=dict(type='ResNetV1d', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..8435abd01b5b48c5bb85abd9567849cc720cc871
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet50_v1d',
+ backbone=dict(type='ResNetV1d', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..a879858488bdd1afccb7f31b489d79b3c77cf858
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.md
@@ -0,0 +1,45 @@
+
+
+
+ResNetV1D (CVPR'2019)
+
+```bibtex
+@inproceedings{he2019bag,
+ title={Bag of tricks for image classification with convolutional neural networks},
+ author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={558--567},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnetv1d_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py) | 256x192 | 0.722 | 0.897 | 0.799 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_256x192-a243b840_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_256x192_20200727.log.json) |
+| [pose_resnetv1d_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py) | 384x288 | 0.730 | 0.900 | 0.799 | 0.780 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_384x288-01f3fbb9_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_384x288_20200727.log.json) |
+| [pose_resnetv1d_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py) | 256x192 | 0.731 | 0.899 | 0.809 | 0.786 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_256x192-5bd08cab_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_256x192_20200727.log.json) |
+| [pose_resnetv1d_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py) | 384x288 | 0.748 | 0.902 | 0.816 | 0.799 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_384x288-5f9e421d_20200730.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_384x288-20200730.log.json) |
+| [pose_resnetv1d_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_256x192.py) | 256x192 | 0.737 | 0.902 | 0.812 | 0.791 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_256x192-c4df51dc_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_256x192_20200727.log.json) |
+| [pose_resnetv1d_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py) | 384x288 | 0.752 | 0.909 | 0.821 | 0.802 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_384x288-626c622d_20200730.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_384x288-20200730.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f7e9a1bd6616dfbc31bff374f0fa7950be6fc47b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.yml
@@ -0,0 +1,104 @@
+Collections:
+- Name: ResNetV1D
+ Paper:
+ Title: Bag of tricks for image classification with convolutional neural networks
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/He_Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnetv1d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: &id001
+ - ResNetV1D
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d50_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.897
+ AP@0.75: 0.799
+ AR: 0.777
+ AR@0.5: 0.933
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_256x192-a243b840_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d50_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.73
+ AP@0.5: 0.9
+ AP@0.75: 0.799
+ AR: 0.78
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_coco_384x288-01f3fbb9_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d101_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.731
+ AP@0.5: 0.899
+ AP@0.75: 0.809
+ AR: 0.786
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_256x192-5bd08cab_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d101_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.902
+ AP@0.75: 0.816
+ AR: 0.799
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_coco_384x288-5f9e421d_20200730.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_256x192.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d152_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.902
+ AP@0.75: 0.812
+ AR: 0.791
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_256x192-c4df51dc_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_resnetv1d152_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.752
+ AP@0.5: 0.909
+ AP@0.75: 0.821
+ AR: 0.802
+ AR@0.5: 0.944
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_coco_384x288-626c622d_20200730.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..082ccdda8b11db85016ddc3d4fdcf4abae665dc8
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext101_32x4d',
+ backbone=dict(type='ResNeXt', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc548a682c8dbd8414c700c59025b024224e9226
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext101_32x4d',
+ backbone=dict(type='ResNeXt', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..61645dec70964fa8db13d0b58e0871973c568239
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext50_32x4d',
+ backbone=dict(type='ResNeXt', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..8f241f03a418e2c1a0802d8bfaa506b9578acccb
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext_coco.md
@@ -0,0 +1,45 @@
+
+
+
+ResNext (CVPR'2017)
+
+```bibtex
+@inproceedings{xie2017aggregated,
+ title={Aggregated residual transformations for deep neural networks},
+ author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1492--1500},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnext_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_256x192.py) | 256x192 | 0.714 | 0.898 | 0.789 | 0.771 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192-dcff15f6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192_20200727.log.json) |
+| [pose_resnext_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py) | 384x288 | 0.724 | 0.899 | 0.794 | 0.777 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288-412c848f_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288_20200727.log.json) |
+| [pose_resnext_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py) | 256x192 | 0.726 | 0.900 | 0.801 | 0.782 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192-c7eba365_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192_20200727.log.json) |
+| [pose_resnext_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py) | 384x288 | 0.743 | 0.903 | 0.815 | 0.795 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288-f5eabcd6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288_20200727.log.json) |
+| [pose_resnext_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_256x192.py) | 256x192 | 0.730 | 0.904 | 0.808 | 0.786 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192-102449aa_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192_20200727.log.json) |
+| [pose_resnext_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_384x288.py) | 384x288 | 0.742 | 0.902 | 0.810 | 0.794 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288-806176df_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288_20200727.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3176d00b502132aa3409a421bd39b663c7cd100e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-2,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 190, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ num_steps=4,
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=[
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(11, 11), (9, 9), (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..65bf136ebb9760fe4395906cce44385904e40dd7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ num_steps=4,
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=[
+ dict(
+ type='JointsMSELoss', use_target_weight=True, loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='JointsOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='megvii',
+ shift_heatmap=False,
+ modulate_kernel=5))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ use_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ kernel=[(11, 11), (9, 9), (7, 7), (5, 5)],
+ encoding='Megvii'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=4,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..7cbb691e7ac8f1f73842e371dce2da6c943ce85d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn_coco.md
@@ -0,0 +1,44 @@
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [rsn_18](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py) | 256x192 | 0.704 | 0.887 | 0.779 | 0.771 | 0.926 | [ckpt](https://download.openmmlab.com/mmpose/top_down/rsn/rsn18_coco_256x192-72f4b4a7_20201127.pth) | [log](https://download.openmmlab.com/mmpose/top_down/rsn/rsn18_coco_256x192_20201127.log.json) |
+| [rsn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py) | 256x192 | 0.723 | 0.896 | 0.800 | 0.788 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/rsn/rsn50_coco_256x192-72ffe709_20201127.pth) | [log](https://download.openmmlab.com/mmpose/top_down/rsn/rsn50_coco_256x192_20201127.log.json) |
+| [2xrsn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xrsn50_coco_256x192.py) | 256x192 | 0.745 | 0.899 | 0.818 | 0.809 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/rsn/2xrsn50_coco_256x192-50648f0e_20201127.pth) | [log](https://download.openmmlab.com/mmpose/top_down/rsn/2xrsn50_coco_256x192_20201127.log.json) |
+| [3xrsn_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py) | 256x192 | 0.750 | 0.900 | 0.823 | 0.813 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/rsn/3xrsn50_coco_256x192-58f57a68_20201127.pth) | [log](https://download.openmmlab.com/mmpose/top_down/rsn/3xrsn50_coco_256x192_20201127.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b4c33b6168b3da4ac7bfcce3d736c85ef2a6b10
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py
@@ -0,0 +1,134 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet101-94250a77.pth',
+ backbone=dict(type='SCNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=1,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..2909f7872788cdb89d8e9d1ef24363fc3357ae01
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py
@@ -0,0 +1,134 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth',
+ backbone=dict(type='SCNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=1,
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..38754c0c2c26aca8553bee16f9cc6ff0f77c35db
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet_coco.md
@@ -0,0 +1,43 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_scnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_256x192.py) | 256x192 | 0.728 | 0.899 | 0.807 | 0.784 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192-6920f829_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192_20200709.log.json) |
+| [pose_scnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py) | 384x288 | 0.751 | 0.906 | 0.818 | 0.802 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288-9cacd0ea_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288_20200709.log.json) |
+| [pose_scnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py) | 256x192 | 0.733 | 0.903 | 0.813 | 0.790 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192-6d348ef9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192_20200709.log.json) |
+| [pose_scnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_384x288.py) | 384x288 | 0.752 | 0.906 | 0.823 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288-0b6e631b_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288_20200709.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1942597ead9d51a576bfe8da48f9cbb2e80bd61b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet101',
+ backbone=dict(type='SEResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..412f79dcd2b4c8280330d5a9aa92a6370457f3ea
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet101',
+ backbone=dict(type='SEResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..83734d70a0f1a452303fdb99bbadedcac0e22f2c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='SEResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=48,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f499c61904007f2a7edbfe31f199d3f0465989a3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet50',
+ backbone=dict(type='SEResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..87cddbfc3aae50b14f2a05e1499bc4781b2d1cbc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet50',
+ backbone=dict(type='SEResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..75d1b9ceaa7f68496afda063c5fc1e3e25d65590
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet_coco.yml
@@ -0,0 +1,104 @@
+Collections:
+- Name: SEResNet
+ Paper:
+ Title: Squeeze-and-excitation networks
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/seresnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: &id001
+ - SEResNet
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet50_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.728
+ AP@0.5: 0.9
+ AP@0.75: 0.809
+ AR: 0.784
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192-25058b66_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet50_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.905
+ AP@0.75: 0.819
+ AR: 0.799
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288-bc0b7680_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet101_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.734
+ AP@0.5: 0.904
+ AP@0.75: 0.815
+ AR: 0.79
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192-83f29c4d_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet101_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.907
+ AP@0.75: 0.823
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288-48de1709_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_256x192.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet152_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.73
+ AP@0.5: 0.899
+ AP@0.75: 0.81
+ AR: 0.786
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192-1c628d79_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_seresnet152_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.906
+ AP@0.75: 0.823
+ AR: 0.806
+ AR@0.5: 0.945
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288-58b23ee8_20200727.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..59592e13147ab66bb5048e2f547468c409552440
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.md
@@ -0,0 +1,41 @@
+
+
+
+ShufflenetV1 (CVPR'2018)
+
+```bibtex
+@inproceedings{zhang2018shufflenet,
+ title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
+ author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={6848--6856},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_shufflenetv1](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_256x192.py) | 256x192 | 0.585 | 0.845 | 0.650 | 0.651 | 0.894 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_256x192-353bc02c_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_256x192_20200727.log.json) |
+| [pose_shufflenetv1](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_384x288.py) | 384x288 | 0.622 | 0.859 | 0.685 | 0.684 | 0.901 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_384x288-b2930b24_20200804.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_384x288_20200804.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29947512c319b99576c526ed60c83e74ee3acc6a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: ShufflenetV1
+ Paper:
+ Title: 'Shufflenet: An extremely efficient convolutional neural network for mobile
+ devices'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Zhang_ShuffleNet_An_Extremely_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/shufflenetv1.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_256x192.py
+ In Collection: ShufflenetV1
+ Metadata:
+ Architecture: &id001
+ - ShufflenetV1
+ Training Data: COCO
+ Name: topdown_heatmap_shufflenetv1_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.585
+ AP@0.5: 0.845
+ AP@0.75: 0.65
+ AR: 0.651
+ AR@0.5: 0.894
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_256x192-353bc02c_20200727.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_384x288.py
+ In Collection: ShufflenetV1
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_shufflenetv1_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.622
+ AP@0.5: 0.859
+ AP@0.75: 0.685
+ AR: 0.684
+ AR@0.5: 0.901
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_coco_384x288-b2930b24_20200804.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c88ba017408204c1605a13d51a9935db5c01484
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco.md
@@ -0,0 +1,41 @@
+
+
+
+ShufflenetV2 (ECCV'2018)
+
+```bibtex
+@inproceedings{ma2018shufflenet,
+ title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
+ author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={116--131},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_shufflenetv2](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py) | 256x192 | 0.599 | 0.854 | 0.663 | 0.664 | 0.899 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_coco_256x192-0aba71c7_20200921.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_coco_256x192_20200921.log.json) |
+| [pose_shufflenetv2](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py) | 384x288 | 0.636 | 0.865 | 0.705 | 0.697 | 0.909 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_coco_384x288-fb38ac3a_20200921.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_coco_384x288_20200921.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..44745a67781c2b1e9d79f9ee1841c32bde53d16a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://shufflenet_v2',
+ backbone=dict(type='ShuffleNetV2', widen_factor=1.0),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebff9346c548cb2bc657202d0dfa457aa24b18f8
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py
@@ -0,0 +1,135 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://shufflenet_v2',
+ backbone=dict(type='ShuffleNetV2', widen_factor=1.0),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..4cc6f6f5dd7c41c212c81865b6dbbe26ac0b2a3b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg_coco.md
@@ -0,0 +1,39 @@
+
+
+
+VGG (ICLR'2015)
+
+```bibtex
+@article{simonyan2014very,
+ title={Very deep convolutional networks for large-scale image recognition},
+ author={Simonyan, Karen and Zisserman, Andrew},
+ journal={arXiv preprint arXiv:1409.1556},
+ year={2014}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [vgg](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg16_bn_coco_256x192.py) | 256x192 | 0.698 | 0.890 | 0.768 | 0.754 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192-7e7c58d6_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192_20210517.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..c86943c5224543e632a100bf18d83f44f3691d4b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md
@@ -0,0 +1,40 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [S-ViPNAS-MobileNetV3](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py) | 256x192 | 0.700 | 0.887 | 0.778 | 0.757 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_256x192-7018731a_20211122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_256x192_20211122.log.json) |
+| [S-ViPNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e476d28d12d5ae3679865034213443c389767d02
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: ViPNAS
+ Paper:
+ Title: 'ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search'
+ URL: https://arxiv.org/abs/2105.10154
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/vipnas.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ Training Data: COCO
+ Name: topdown_heatmap_vipnas_mbv3_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.7
+ AP@0.5: 0.887
+ AP@0.75: 0.778
+ AR: 0.757
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_256x192-7018731a_20211122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_vipnas_res50_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.711
+ AP@0.5: 0.893
+ AP@0.75: 0.789
+ AR: 0.769
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..96420528833e9fcc8849444db3d4a03307e295cc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py
@@ -0,0 +1,138 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=160,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_filters=(160, 160, 160),
+ num_deconv_groups=(160, 160, 160),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3409caee7837407748e928de81612072161f6801
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py'
+]
+evaluation = dict(interval=10, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_ResNet', depth=50),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=608,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_huge_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_huge_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..612aaf0b32688fdf874c30eefe6bbb3ab0fb9767
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_huge_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py
@@ -0,0 +1,491 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py',
+ '../../../../_base_/datasets/aic_info.py',
+ '../../../../_base_/datasets/mpii_info.py',
+ '../../../../_base_/datasets/ap10k_info.py',
+ '../../../../_base_/datasets/coco_wholebody_info.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=32,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+aic_channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+mpii_channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+crowdpose_channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+ap10k_channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+cocowholebody_channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+
+# model settings
+model = dict(
+ type='TopDownMoE',
+ pretrained=None,
+ backbone=dict(
+ type='ViTMoE',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ num_expert=6,
+ part_features=320
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ associate_keypoint_head=[
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=aic_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=mpii_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=ap10k_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=ap10k_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=cocowholebody_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ ],
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ max_num_joints=133,
+ dataset_idx=0,
+)
+
+aic_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=aic_channel_cfg['num_output_channels'],
+ num_joints=aic_channel_cfg['dataset_joints'],
+ dataset_channel=aic_channel_cfg['dataset_channel'],
+ inference_channel=aic_channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ max_num_joints=133,
+ dataset_idx=1,
+)
+
+mpii_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=mpii_channel_cfg['num_output_channels'],
+ num_joints=mpii_channel_cfg['dataset_joints'],
+ dataset_channel=mpii_channel_cfg['dataset_channel'],
+ inference_channel=mpii_channel_cfg['inference_channel'],
+ max_num_joints=133,
+ dataset_idx=2,
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+ap10k_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ max_num_joints=133,
+ dataset_idx=3,
+)
+
+ap36k_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ max_num_joints=133,
+ dataset_idx=4,
+)
+
+cocowholebody_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=cocowholebody_channel_cfg['num_output_channels'],
+ num_joints=cocowholebody_channel_cfg['dataset_joints'],
+ dataset_channel=cocowholebody_channel_cfg['dataset_channel'],
+ inference_channel=cocowholebody_channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ dataset_idx=5,
+ max_num_joints=133,
+)
+
+cocowholebody_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+ap10k_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+aic_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+mpii_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+aic_data_root = 'data/aic'
+mpii_data_root = 'data/mpii'
+ap10k_data_root = 'data/ap10k'
+ap36k_data_root = 'data/ap36k'
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=8,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=[
+ dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ dict(
+ type='TopDownAicDataset',
+ ann_file=f'{aic_data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{aic_data_root}/ai_challenger_keypoint_train_20170909/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=aic_data_cfg,
+ pipeline=aic_train_pipeline,
+ dataset_info={{_base_.aic_info}}),
+ dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{mpii_data_root}/annotations/mpii_train.json',
+ img_prefix=f'{mpii_data_root}/images/',
+ data_cfg=mpii_data_cfg,
+ pipeline=mpii_train_pipeline,
+ dataset_info={{_base_.mpii_info}}),
+ dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{ap10k_data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{ap10k_data_root}/data/',
+ data_cfg=ap10k_data_cfg,
+ pipeline=ap10k_train_pipeline,
+ dataset_info={{_base_.ap10k_info}}),
+ dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{ap36k_data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{ap36k_data_root}/',
+ data_cfg=ap36k_data_cfg,
+ pipeline=ap10k_train_pipeline,
+ dataset_info={{_base_.ap10k_info}}),
+ dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=cocowholebody_data_cfg,
+ pipeline=cocowholebody_train_pipeline,
+ dataset_info={{_base_.cocowholebody_info}}),
+ ],
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_large_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_large_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..0936de449b9a2bb74510b51e1d4e81f2c11eb8ac
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitPose+_large_coco+aic+mpii+ap10k+apt36k+wholebody_256x192_udp.py
@@ -0,0 +1,491 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco.py',
+ '../../../../_base_/datasets/aic_info.py',
+ '../../../../_base_/datasets/mpii_info.py',
+ '../../../../_base_/datasets/ap10k_info.py',
+ '../../../../_base_/datasets/coco_wholebody_info.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.1,
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_cfg=dict(
+ num_layers=24,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.),
+ 'pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }
+ )
+ )
+
+optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
+
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+aic_channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+mpii_channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+crowdpose_channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+ap10k_channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+cocowholebody_channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+
+# model settings
+model = dict(
+ type='TopDownMoE',
+ pretrained=None,
+ backbone=dict(
+ type='ViTMoE',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.5,
+ num_expert=6,
+ part_features=256
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ associate_keypoint_head=[
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=aic_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=mpii_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=ap10k_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=ap10k_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=cocowholebody_channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ ],
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ max_num_joints=133,
+ dataset_idx=0,
+)
+
+aic_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=aic_channel_cfg['num_output_channels'],
+ num_joints=aic_channel_cfg['dataset_joints'],
+ dataset_channel=aic_channel_cfg['dataset_channel'],
+ inference_channel=aic_channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ max_num_joints=133,
+ dataset_idx=1,
+)
+
+mpii_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=mpii_channel_cfg['num_output_channels'],
+ num_joints=mpii_channel_cfg['dataset_joints'],
+ dataset_channel=mpii_channel_cfg['dataset_channel'],
+ inference_channel=mpii_channel_cfg['inference_channel'],
+ max_num_joints=133,
+ dataset_idx=2,
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+ap10k_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ max_num_joints=133,
+ dataset_idx=3,
+)
+
+ap36k_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ max_num_joints=133,
+ dataset_idx=4,
+)
+
+cocowholebody_data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=cocowholebody_channel_cfg['num_output_channels'],
+ num_joints=cocowholebody_channel_cfg['dataset_joints'],
+ dataset_channel=cocowholebody_channel_cfg['dataset_channel'],
+ inference_channel=cocowholebody_channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ dataset_idx=5,
+ max_num_joints=133,
+)
+
+cocowholebody_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+ap10k_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+aic_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+mpii_train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs', 'dataset_idx'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+aic_data_root = 'data/aic'
+mpii_data_root = 'data/mpii'
+ap10k_data_root = 'data/ap10k'
+ap36k_data_root = 'data/ap36k'
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=8,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=[
+ dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ dict(
+ type='TopDownAicDataset',
+ ann_file=f'{aic_data_root}/annotations/person_keypoints_train2017.json',
+ img_prefix=f'{aic_data_root}/ai_challenger_keypoint_train_20170909/'
+ 'keypoint_train_images_20170902/',
+ data_cfg=aic_data_cfg,
+ pipeline=aic_train_pipeline,
+ dataset_info={{_base_.aic_info}}),
+ dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{mpii_data_root}/annotations/mpii_train.json',
+ img_prefix=f'{mpii_data_root}/images/',
+ data_cfg=mpii_data_cfg,
+ pipeline=mpii_train_pipeline,
+ dataset_info={{_base_.mpii_info}}),
+ dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{ap10k_data_root}/annotations/ap10k-train-split1.json',
+ img_prefix=f'{ap10k_data_root}/data/',
+ data_cfg=ap10k_data_cfg,
+ pipeline=ap10k_train_pipeline,
+ dataset_info={{_base_.ap10k_info}}),
+ dict(
+ type='AnimalAP10KDataset',
+ ann_file=f'{ap36k_data_root}/annotations/train_annotations_1.json',
+ img_prefix=f'{ap36k_data_root}/',
+ data_cfg=ap36k_data_cfg,
+ pipeline=ap10k_train_pipeline,
+ dataset_info={{_base_.ap10k_info}}),
+ dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=cocowholebody_data_cfg,
+ pipeline=cocowholebody_train_pipeline,
+ dataset_info={{_base_.cocowholebody_info}}),
+ ],
+ val=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoDataset',
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
+
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_base_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_base_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad98bc24d78b89e19db7f142aefce74d892ecd81
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_base_crowdpose_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_huge_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_huge_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ddd2885a1457afa74344e8ced59299053af40a5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_huge_crowdpose_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_large_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_large_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d6fd54f8211d2b3d451dc9b5c3331ba85583b0d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_large_crowdpose_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..6d3e2473c30fecf4c7f49b262b4ea2a8cefac992
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.md
@@ -0,0 +1,39 @@
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: | :------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_256x192.py) | 256x192 | 0.675 | 0.825 | 0.729 | 0.770 | 0.687 | 0.553 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192_20201227.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cf1f8b7a2d6aadb6d52f1a7f35e5a70db276ce7d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture:
+ - HRNet
+ Training Data: CrowdPose
+ Name: topdown_heatmap_hrnet_w32_crowdpose_256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.675
+ AP (E): 0.77
+ AP (H): 0.553
+ AP (M): 0.687
+ AP@0.5: 0.825
+ AP@0.75: 0.729
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8fc5f47d5dbc16ae36b83f0df53955670509bb1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..f94fda4a1980fec4b5859f1139b479a764e1f8e6
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w32_crowdpose_384x288.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fccc213e67adf0086a544be68f0dd1cadc8e7746
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8373648aeb83f1e176222de63c185f2b1a36dfc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_w48_crowdpose_384x288.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b425b0c886b4365209ae4d879e91b6dd1458d87a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_320x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_320x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a0fecb24259bfa796c45c69104678903f502552
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_320x256.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 320],
+ heatmap_size=[64, 80],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..0be685a06b510cb94274d02b592e890d5831ec3c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab4b2512b4759642cbf4758f77c4f15df71d2164
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..f54e428c87e3da15ac5eefd8a61d4ab33f275a94
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..22f765f1fc497217fe958a0b0a7ed34a628a6243
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea49a82987522d5978536efa2b5dacffe8be4185
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/crowdpose.py'
+]
+evaluation = dict(interval=10, metric='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ crowd_matching=False,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/crowdpose/annotations/'
+ 'det_for_crowd_test_0.1_0.5.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=6,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/crowdpose'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_trainval.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCrowdPoseDataset',
+ ann_file=f'{data_root}/annotations/mmpose_crowdpose_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..81f9ee0522ee69cb12cc5c0139900fa350423158
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :----------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: | :------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_256x192.py) | 256x192 | 0.637 | 0.808 | 0.692 | 0.739 | 0.650 | 0.506 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192_20201227.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_256x192.py) | 256x192 | 0.647 | 0.810 | 0.703 | 0.744 | 0.658 | 0.522 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192_20201227.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_320x256.py) | 320x256 | 0.661 | 0.821 | 0.714 | 0.759 | 0.671 | 0.536 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256_20201227.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_256x192.py) | 256x192 | 0.656 | 0.818 | 0.712 | 0.754 | 0.666 | 0.532 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192_20201227.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..44b9c8e1d27e2812e1c05182bfe7219cc8ddc30e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.yml
@@ -0,0 +1,77 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res50_crowdpose_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: CrowdPose
+ Name: topdown_heatmap_res50_crowdpose_256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.637
+ AP (E): 0.739
+ AP (H): 0.506
+ AP (M): 0.65
+ AP@0.5: 0.808
+ AP@0.75: 0.692
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: topdown_heatmap_res101_crowdpose_256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.647
+ AP (E): 0.744
+ AP (H): 0.522
+ AP (M): 0.658
+ AP@0.5: 0.81
+ AP@0.75: 0.703
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res101_crowdpose_320x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: topdown_heatmap_res101_crowdpose_320x256
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.661
+ AP (E): 0.759
+ AP (H): 0.536
+ AP (M): 0.671
+ AP@0.5: 0.821
+ AP@0.75: 0.714
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/res152_crowdpose_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: topdown_heatmap_res152_crowdpose_256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.656
+ AP (E): 0.754
+ AP (H): 0.532
+ AP (M): 0.666
+ AP@0.5: 0.818
+ AP@0.75: 0.712
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.md
new file mode 100644
index 0000000000000000000000000000000000000000..c658cba54d9f5baaa26f85bf7c49bbe9bb52d03a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.md
@@ -0,0 +1,44 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+Results on Human3.6M test set with ground truth 2D detections
+
+| Arch | Input Size | EPE | PCK | ckpt | log |
+| :--- | :-----------: | :---: | :---: | :----: | :---: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w32_h36m_256x256.py) | 256x256 | 9.43 | 0.911 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_h36m_256x256-d3206675_20210621.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_h36m_256x256_20210621.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w48_h36m_256x256.py) | 256x256 | 7.36 | 0.932 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_h36m_256x256-78e88d08_20210621.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_h36m_256x256_20210621.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac738b22d879f6d4084a975d40d6688a07376cdb
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.yml
@@ -0,0 +1,34 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w32_h36m_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: Human3.6M
+ Name: topdown_heatmap_hrnet_w32_h36m_256x256
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ EPE: 9.43
+ PCK: 0.911
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_h36m_256x256-d3206675_20210621.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w48_h36m_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: topdown_heatmap_hrnet_w48_h36m_256x256
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ EPE: 7.36
+ PCK: 0.932
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_h36m_256x256-78e88d08_20210621.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w32_h36m_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w32_h36m_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..94a59be92cfcf692a22a7ad35e6d205ad1871b62
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w32_h36m_256x256.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'EPE'], key_indicator='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/h36m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w48_h36m_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w48_h36m_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..03e1e50849ddf6f3528cac5c3fe526176bb16989
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_w48_h36m_256x256.py
@@ -0,0 +1,157 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'EPE'], key_indicator='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/h36m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownH36MDataset',
+ ann_file=f'{data_root}/annotation_body2d/h36m_coco_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.md
new file mode 100644
index 0000000000000000000000000000000000000000..a122e8aa24c7b834d8a6b4cb35e372309d30f50f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.md
@@ -0,0 +1,56 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+JHMDB (ICCV'2013)
+
+```bibtex
+@inproceedings{Jhuang:ICCV:2013,
+ title = {Towards understanding action recognition},
+ author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
+ booktitle = {International Conf. on Computer Vision (ICCV)},
+ month = Dec,
+ pages = {3192-3199},
+ year = {2013}
+}
+```
+
+
+
+Results on Sub-JHMDB dataset
+
+The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.
+
+- Normalized by Person Size
+
+| Split| Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :--- | :--------: | :--------: | :---: | :---: |:---: |:---: |:---: |:---: |:---: | :---: | :-----: |:------: |
+| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py) | 368x368 | 96.1 | 91.9 | 81.0 | 78.9 | 96.6 | 90.8| 87.3 | 89.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) |
+| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py) | 368x368 | 98.1 | 93.6 | 77.1 | 70.9 | 94.0 | 89.1| 84.7 | 87.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) |
+| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py) | 368x368 | 97.9 | 94.9 | 87.3 | 84.0 | 98.6 | 94.4| 86.2 | 92.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) |
+| Average | cpm | 368x368 | 97.4 | 93.5 | 81.5 | 77.9 | 96.4 | 91.4| 86.1 | 89.8 | - | - |
+
+- Normalized by Torso Size
+
+| Split| Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :--- | :--------: | :--------: | :---: | :---: |:---: |:---: |:---: |:---: |:---: | :---: | :-----: |:------: |
+| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py) | 368x368 | 89.0 | 63.0 | 54.0 | 54.9 | 68.2 | 63.1 | 61.2 | 66.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) |
+| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py) | 368x368 | 90.3 | 57.9 | 46.8 | 44.3 | 60.8 | 58.2 | 62.4 | 61.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) |
+| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py) | 368x368 | 91.0 | 72.6 | 59.9 | 54.0 | 73.2 | 68.5 | 65.8 | 70.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) |
+| Average | cpm | 368x368 | 90.1 | 64.5 | 53.6 | 51.1 | 67.4 | 63.3 | 63.1 | 65.7 | - | - |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..eda79a04c24cef7837deb17ee3da44bd3e415310
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.yml
@@ -0,0 +1,122 @@
+Collections:
+- Name: CPM
+ Paper:
+ Title: Convolutional pose machines
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/cpm.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: &id001
+ - CPM
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub1_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 87.3
+ Elb: 81.0
+ Head: 96.1
+ Hip: 96.6
+ Knee: 90.8
+ Mean: 89.5
+ Sho: 91.9
+ Wri: 78.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub2_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 84.7
+ Elb: 77.1
+ Head: 98.1
+ Hip: 94.0
+ Knee: 89.1
+ Mean: 87.4
+ Sho: 93.6
+ Wri: 70.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub3_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 86.2
+ Elb: 87.3
+ Head: 97.9
+ Hip: 98.6
+ Knee: 94.4
+ Mean: 92.4
+ Sho: 94.9
+ Wri: 84.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub1_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 61.2
+ Elb: 54.0
+ Head: 89.0
+ Hip: 68.2
+ Knee: 63.1
+ Mean: 66.0
+ Sho: 63.0
+ Wri: 54.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub2_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 62.4
+ Elb: 46.8
+ Head: 90.3
+ Hip: 60.8
+ Knee: 58.2
+ Mean: 61.1
+ Sho: 57.9
+ Wri: 44.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_cpm_jhmdb_sub3_368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 65.8
+ Elb: 59.9
+ Head: 91.0
+ Hip: 73.2
+ Knee: 68.5
+ Mean: 70.3
+ Sho: 72.6
+ Wri: 54.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..15ae4a0f2059d59d766520635687a481b4f64366
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[368, 368],
+ heatmap_size=[46, 46],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f885f541701295eeab24c6dbebccc4911035b54
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub2_368x368.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[368, 368],
+ heatmap_size=[46, 46],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..69706a76c207b38a122c0b3fe0f7711a41598cb7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub3_368x368.py
@@ -0,0 +1,141 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[368, 368],
+ heatmap_size=[46, 46],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..0870a6cbd306e8fab3e1b342d97ea0f23b3bb7e9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[32, 32],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..51f27b7e60236991bdc68efaaa3357f298927c0a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[32, 32],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..db0026693a29acf55e61d6353618364c3626edc6
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[20, 30])
+total_epochs = 40
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[32, 32],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..857854161247694ab57f1efdb019c3a4da427374
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub1_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d52be3d11e265d515c51263727892f3787f5809d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub2_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf9ab7fb8755e0e8c729a317c13f852f7404c453
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/jhmdb.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'tPCK'], save_best='Mean PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/jhmdb'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownJhmdbDataset',
+ ann_file=f'{data_root}/annotations/Sub3_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.md
new file mode 100644
index 0000000000000000000000000000000000000000..fa2b969180f24aeac67741f1cb31d377a3afc8db
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.md
@@ -0,0 +1,81 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+JHMDB (ICCV'2013)
+
+```bibtex
+@inproceedings{Jhuang:ICCV:2013,
+ title = {Towards understanding action recognition},
+ author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
+ booktitle = {International Conf. on Computer Vision (ICCV)},
+ month = Dec,
+ pages = {3192-3199},
+ year = {2013}
+}
+```
+
+
+
+Results on Sub-JHMDB dataset
+
+The models are pre-trained on MPII dataset only. *NO* test-time augmentation (multi-scale /rotation testing) is used.
+
+- Normalized by Person Size
+
+| Split| Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :--- | :--------: | :--------: | :---: | :---: |:---: |:---: |:---: |:---: |:---: | :---: | :-----: |:------: |
+| Sub1 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py) | 256x256 | 99.1 | 98.0 | 93.8 | 91.3 | 99.4 | 96.5| 92.8 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py) | 256x256 | 99.3 | 97.1 | 90.6 | 87.0 | 98.9 | 96.3| 94.1 | 95.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py) | 256x256 | 99.0 | 97.9 | 94.0 | 91.6 | 99.7 | 98.0| 94.7 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 | 256x256 | 99.2 | 97.7 | 92.8 | 90.0 | 99.3 | 96.9| 93.9 | 96.0 | - | - |
+| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py) | 256x256 | 99.1 | 98.5 | 94.6 | 92.0 | 99.4 | 94.6| 92.5 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py) | 256x256 | 99.3 | 97.8 | 91.0 | 87.0 | 99.1 | 96.5| 93.8 | 95.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py) | 256x256 | 98.8 | 98.4 | 94.3 | 92.1 | 99.8 | 97.5| 93.8 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 99.1 | 98.2 | 93.3 | 90.4 | 99.4 | 96.2| 93.4 | 96.0 | - | - |
+
+- Normalized by Torso Size
+
+| Split| Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :--- | :--------: | :--------: | :---: | :---: |:---: |:---: |:---: |:---: |:---: | :---: | :-----: |:------: |
+| Sub1 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py) | 256x256 | 93.3 | 83.2 | 74.4 | 72.7 | 85.0 | 81.2 | 78.9 | 81.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py) | 256x256 | 94.1 | 74.9 | 64.5 | 62.5 | 77.9 | 71.9 | 78.6 | 75.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py) | 256x256 | 97.0 | 82.2 | 74.9 | 70.7 | 84.7 | 83.7 | 84.2 | 82.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 | 256x256 | 94.8 | 80.1 | 71.3 | 68.6 | 82.5 | 78.9 | 80.6 | 80.1 | - | - |
+| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py) | 256x256 | 92.4 | 80.6 | 73.2 | 70.5 | 82.3 | 75.4| 75.0 | 79.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py) | 256x256 | 93.4 | 73.6 | 63.8 | 60.5 | 75.1 | 68.4| 75.5 | 73.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py) | 256x256 | 96.1 | 81.2 | 72.6 | 67.9 | 83.6 | 80.9| 81.5 | 81.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 94.0 | 78.5 | 69.9 | 66.3 | 80.3 | 74.9| 77.3 | 78.0 | - | - |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0116ecac101574b050030a4157e0b66abd7e5a46
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.yml
@@ -0,0 +1,237 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub1_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 92.8
+ Elb: 93.8
+ Head: 99.1
+ Hip: 99.4
+ Knee: 96.5
+ Mean: 96.1
+ Sho: 98.0
+ Wri: 91.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub2_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 94.1
+ Elb: 90.6
+ Head: 99.3
+ Hip: 98.9
+ Knee: 96.3
+ Mean: 95.0
+ Sho: 97.1
+ Wri: 87.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub3_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 94.7
+ Elb: 94.0
+ Head: 99.0
+ Hip: 99.7
+ Knee: 98.0
+ Mean: 96.7
+ Sho: 97.9
+ Wri: 91.6
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub1_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 92.5
+ Elb: 94.6
+ Head: 99.1
+ Hip: 99.4
+ Knee: 94.6
+ Mean: 96.1
+ Sho: 98.5
+ Wri: 92.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub2_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 93.8
+ Elb: 91.0
+ Head: 99.3
+ Hip: 99.1
+ Knee: 96.5
+ Mean: 95.2
+ Sho: 97.8
+ Wri: 87.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub3_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 93.8
+ Elb: 94.3
+ Head: 98.8
+ Hip: 99.8
+ Knee: 97.5
+ Mean: 96.7
+ Sho: 98.4
+ Wri: 92.1
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub1_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub1_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 78.9
+ Elb: 74.4
+ Head: 93.3
+ Hip: 85.0
+ Knee: 81.2
+ Mean: 81.9
+ Sho: 83.2
+ Wri: 72.7
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub2_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub2_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 78.6
+ Elb: 64.5
+ Head: 94.1
+ Hip: 77.9
+ Knee: 71.9
+ Mean: 75.5
+ Sho: 74.9
+ Wri: 62.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_jhmdb_sub3_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_jhmdb_sub3_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 84.2
+ Elb: 74.9
+ Head: 97.0
+ Hip: 84.7
+ Knee: 83.7
+ Mean: 82.9
+ Sho: 82.2
+ Wri: 70.7
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub1_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub1_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 75.0
+ Elb: 73.2
+ Head: 92.4
+ Hip: 82.3
+ Knee: 75.4
+ Mean: 79.2
+ Sho: 80.6
+ Wri: 70.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub2_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub2_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 75.5
+ Elb: 63.8
+ Head: 93.4
+ Hip: 75.1
+ Knee: 68.4
+ Mean: 73.7
+ Sho: 73.6
+ Wri: 60.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/res50_2deconv_jhmdb_sub3_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: topdown_heatmap_res50_2deconv_jhmdb_sub3_256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 81.5
+ Elb: 72.6
+ Head: 96.1
+ Hip: 83.6
+ Knee: 80.9
+ Mean: 81.2
+ Sho: 81.2
+ Wri: 67.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/res50_mhp_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/res50_mhp_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b0a322040a5bafd5de7505b34f72ffe91117ed9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/res50_mhp_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mhp.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ image_thr=0.0,
+ bbox_file='',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mhp'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_train.json',
+ img_prefix=f'{data_root}/train/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_val.json',
+ img_prefix=f'{data_root}/val/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMhpDataset',
+ ann_file=f'{data_root}/annotations/mhp_val.json',
+ img_prefix=f'{data_root}/val/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..befa17ea9548975429e917385bdf45a2a9b7c723
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.md
@@ -0,0 +1,59 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MHP (ACM MM'2018)
+
+```bibtex
+@inproceedings{zhao2018understanding,
+ title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
+ author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
+ booktitle={Proceedings of the 26th ACM international conference on Multimedia},
+ pages={792--800},
+ year={2018}
+}
+```
+
+
+
+Results on MHP v2.0 val set
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/res50_mhp_256x192.py) | 256x192 | 0.583 | 0.897 | 0.669 | 0.636 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mhp_256x192-28c5b818_20201229.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mhp_256x192_20201229.log.json) |
+
+Note that, the evaluation metric used here is mAP (adapted from COCO), which may be different from the official evaluation [codes](https://github.com/ZhaoJ9014/Multi-Human-Parsing/tree/master/Evaluation/Multi-Human-Pose).
+Please be cautious if you use the results in papers.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.yml
new file mode 100644
index 0000000000000000000000000000000000000000..777b1dbb5f5d2fd03bbe56214785a8ce675f0a1c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/res50_mhp_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: MHP
+ Name: topdown_heatmap_res50_mhp_256x192
+ Results:
+ - Dataset: MHP
+ Metrics:
+ AP: 0.583
+ AP@0.5: 0.897
+ AP@0.75: 0.669
+ AR: 0.636
+ AR@0.5: 0.918
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_mhp_256x192-28c5b818_20201229.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbd0eef61be608bc5e151b48f55786691546d922
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cc680aee55c86336f29824e3f8986a282f2056c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7105e38a0561723017fef2c0d8479b609239c641
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_small_mpii_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_small_mpii_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f80f5228ab683eef03921d855e9c8b8f93620549
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_small_mpii_256x192.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e9012f672f17a455a3637fd49da69f533d01bb0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii_368x368.py) | 368x368 | 0.876 | 0.285 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368_20200822.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c62a93f069002b55bf2e3d3a716e0826fbae56d7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: CPM
+ Paper:
+ Title: Convolutional pose machines
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/cpm.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii_368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture:
+ - CPM
+ Training Data: MPII
+ Name: topdown_heatmap_cpm_mpii_368x368
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.876
+ Mean@0.1: 0.285
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii_368x368.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii_368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..62b81a5c79299c6633de519ae0cf99d02031b4cf
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii_368x368.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=channel_cfg['num_output_channels'],
+ feat_channels=128,
+ num_stages=6),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=6,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[368, 368],
+ heatmap_size=[46, 46],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b96027fe54821bbac819d374c42e5bfa30cabb2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_256x256.py
@@ -0,0 +1,129 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_384x384.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_384x384.py
new file mode 100644
index 0000000000000000000000000000000000000000..30f2ec04ee60e38e4c9ee16327252d45f3748e9b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_384x384.py
@@ -0,0 +1,129 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[384, 384],
+ heatmap_size=[96, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..d429415acfae5d43924653e30fbf76eb09de52ba
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.md
@@ -0,0 +1,41 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_hourglass_52](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_256x256.py) | 256x256 | 0.889 | 0.317 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256_20200812.log.json) |
+| [pose_hourglass_52](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_384x384.py) | 384x384 | 0.894 | 0.366 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ecd47008a220dc6a296c49b35cc12456599b490b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.yml
@@ -0,0 +1,34 @@
+Collections:
+- Name: Hourglass
+ Paper:
+ Title: Stacked hourglass networks for human pose estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hourglass.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: &id001
+ - Hourglass
+ Training Data: MPII
+ Name: topdown_heatmap_hourglass52_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.889
+ Mean@0.1: 0.317
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass52_mpii_384x384.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_hourglass52_mpii_384x384
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.894
+ Mean@0.1: 0.366
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..b7100183eae55d59ba2d1afe459c85a94df0acf0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.md
@@ -0,0 +1,57 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_dark.py) | 256x256 | 0.904 | 0.354 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark_20200927.log.json) |
+| [pose_hrnet_w48_dark](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_dark.py) | 256x256 | 0.905 | 0.360 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark_20200927.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..795e135a923be338965e750a28160033bedd2f5d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.yml
@@ -0,0 +1,35 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: MPII
+ Name: topdown_heatmap_hrnet_w32_mpii_256x256_dark
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.904
+ Mean@0.1: 0.354
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_hrnet_w48_mpii_256x256_dark
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.905
+ Mean@0.1: 0.36
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..d4c205ca64c8537cf6189e4d206711f31b24edfe
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.md
@@ -0,0 +1,41 @@
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256.py) | 256x256 | 0.900 | 0.334 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_20200812.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256.py) | 256x256 | 0.901 | 0.337 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..94607111ef62935b44fb072f87efbdf42796ed5a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.yml
@@ -0,0 +1,34 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: MPII
+ Name: topdown_heatmap_hrnet_w32_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.9
+ Mean@0.1: 0.334
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_hrnet_w48_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.901
+ Mean@0.1: 0.337
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ef7e84d708f8426ab5aaa0502c15c82de4e81a6
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256.py
@@ -0,0 +1,154 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..503920eb1c50271b7f6615081464bf13265f97b9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_dark.py
@@ -0,0 +1,154 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..d31a172fbf2a022a815ac65554afeb829e70ab8e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w32_mpii_256x256_udp.py
@@ -0,0 +1,161 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..99a4ef131ea479c18cd754256bc73d221e7ff348
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256.py
@@ -0,0 +1,154 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_dark.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..4531f0f99617711548bd2374f9095b049726580b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_dark.py
@@ -0,0 +1,154 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_udp.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..d373d830d9e1242248f2fef534a903916b851cfe
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_w48_mpii_256x256_udp.py
@@ -0,0 +1,161 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_18_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_18_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2a31e2c266d999af8b9532aefb97ae22779896f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_18_mpii_256x256.py
@@ -0,0 +1,145 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', key_indicator='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=40,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_30_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_30_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b56ac9325df90ba3d13e1190d9db63bdc93f678
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_30_mpii_256x256.py
@@ -0,0 +1,145 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', key_indicator='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(3, 8, 3),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=40,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..d77a3bae6155f25180c12e541111529ab80d9594
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [LiteHRNet-18](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_18_mpii_256x256.py) | 256x256 | 0.859 | 0.260 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256_20210623.log.json) |
+| [LiteHRNet-30](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_30_mpii_256x256.py) | 256x256 | 0.869 | 0.271 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256_20210622.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ae20a7352692714813ee839c62100a9b0f8c6250
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.yml
@@ -0,0 +1,34 @@
+Collections:
+- Name: LiteHRNet
+ Paper:
+ Title: 'Lite-HRNet: A Lightweight High-Resolution Network'
+ URL: https://arxiv.org/abs/2104.06403
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/litehrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_18_mpii_256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: &id001
+ - LiteHRNet
+ Training Data: MPII
+ Name: topdown_heatmap_litehrnet_18_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.859
+ Mean@0.1: 0.26
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_30_mpii_256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_litehrnet_30_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.869
+ Mean@0.1: 0.271
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..f811d33041b8af9cfe226c9391228721b3a4ba98
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mobilenet_v2/mpii/mobilenet_v2_mpii_256x256.py) | 256x256 | 0.854 | 0.235 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87a4912b4ae4842480bf0642bac2d214fa65a4c5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mobilenet_v2/mpii/mobilenet_v2_mpii_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: MPII
+ Name: topdown_heatmap_mpii
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.854
+ Mean@0.1: 0.235
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b13feaf1fc77695d59fcc334e687909b72147aa2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e09b84e98e02e044b4b7c7d967041582fd28502
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res101_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res152_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res152_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c5456e0041c8aeff08f8ce975206c3cdf2156f0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res152_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4c9898e43e2ed7ce0b2306d0b4f14b312d82bff
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res50_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..64a5337b5005144483a6c500237019d71bae9cad
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res50_mpii_256x256.py) | 256x256 | 0.882 | 0.286 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256_20200812.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res101_mpii_256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256_20200812.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res152_mpii_256x256.py) | 256x256 | 0.889 | 0.303 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..227eb34c59cd05c9ff0d654a5fb27552af12aab7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.yml
@@ -0,0 +1,48 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res50_mpii_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: MPII
+ Name: topdown_heatmap_res50_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.882
+ Mean@0.1: 0.286
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res101_mpii_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_res101_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/res152_mpii_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_res152_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.889
+ Mean@0.1: 0.303
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d35b83a44ec1d555b6896c1ce8699802901faf29
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d101_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet101_v1d',
+ backbone=dict(type='ResNetV1d', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d152_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d152_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6e26ca93989ec7549dd7179a0f99e984b5505f4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d152_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet152_v1d',
+ backbone=dict(type='ResNetV1d', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..e10ad9ed76a7626451e764835ba26805de65a086
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d50_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnet50_v1d',
+ backbone=dict(type='ResNetV1d', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..27a655eedd1be7a8a7b11728e78ab6b88b16808a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.md
@@ -0,0 +1,41 @@
+
+
+
+ResNetV1D (CVPR'2019)
+
+```bibtex
+@inproceedings{he2019bag,
+ title={Bag of tricks for image classification with convolutional neural networks},
+ author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={558--567},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_resnetv1d_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d50_mpii_256x256.py) | 256x256 | 0.881 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256_20200812.log.json) |
+| [pose_resnetv1d_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d101_mpii_256x256.py) | 256x256 | 0.883 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256_20200812.log.json) |
+| [pose_resnetv1d_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d152_mpii_256x256.py) | 256x256 | 0.888 | 0.300 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b02c3d44f17436c9ee248a3271651a85fef98555
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.yml
@@ -0,0 +1,47 @@
+Collections:
+- Name: ResNetV1D
+ Paper:
+ Title: Bag of tricks for image classification with convolutional neural networks
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/He_Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnetv1d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d50_mpii_256x256.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: &id001
+ - ResNetV1D
+ Training Data: MPII
+ Name: topdown_heatmap_resnetv1d50_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.881
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d101_mpii_256x256.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_resnetv1d101_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.883
+ Mean@0.1: 0.295
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d152_mpii_256x256.py
+ In Collection: ResNetV1D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_resnetv1d152_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d01af2be2e1dd85b90245443dddb1a706938b159
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext101_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext101_32x4d',
+ backbone=dict(type='ResNeXt', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext152_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext152_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d730b49a1196d90767f04fb595891fe01b4c76f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext152_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext152_32x4d',
+ backbone=dict(type='ResNeXt', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..22d97420bba76867dca4325da7c928b6d157d78f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext50_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://resnext50_32x4d',
+ backbone=dict(type='ResNeXt', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..b118ca4fd0999e83daa64c6f2ee1f4b764dc2c12
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ResNext (CVPR'2017)
+
+```bibtex
+@inproceedings{xie2017aggregated,
+ title={Aggregated residual transformations for deep neural networks},
+ author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1492--1500},
+ year={2017}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_resnext_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext152_mpii_256x256.py) | 256x256 | 0.887 | 0.294 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256_20200927.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c3ce9cd12126bd92da34ff99f889e6c96faaf77d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: ResNext
+ Paper:
+ Title: Aggregated residual transformations for deep neural networks
+ URL: http://openaccess.thecvf.com/content_cvpr_2017/html/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnext.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext152_mpii_256x256.py
+ In Collection: ResNext
+ Metadata:
+ Architecture:
+ - ResNext
+ Training Data: MPII
+ Name: topdown_heatmap_resnext152_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.887
+ Mean@0.1: 0.294
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4f746671f00fb12b9a511cd643b04b10530e268
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet101_mpii_256x256.py
@@ -0,0 +1,124 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet101-94250a77.pth',
+ backbone=dict(type='SCNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a4011f3419ac1272f03be3f13d89d1021cac94b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet50_mpii_256x256.py
@@ -0,0 +1,124 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth',
+ backbone=dict(type='SCNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..0a282b77f9a1d09842e738f67cb5d4c13bb342e8
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.md
@@ -0,0 +1,40 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_scnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet50_mpii_256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256_20200812.log.json) |
+| [pose_scnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet101_mpii_256x256.py) | 256x256 | 0.886 | 0.293 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..681c59b39967bfd5ada38cdda4cf3dd8cf2969ae
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.yml
@@ -0,0 +1,34 @@
+Collections:
+- Name: SCNet
+ Paper:
+ Title: Improving Convolutional Networks with Self-Calibrated Convolutions
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/scnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet50_mpii_256x256.py
+ In Collection: SCNet
+ Metadata:
+ Architecture: &id001
+ - SCNet
+ Training Data: MPII
+ Name: topdown_heatmap_scnet50_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet101_mpii_256x256.py
+ In Collection: SCNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_scnet101_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.886
+ Mean@0.1: 0.293
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet101_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet101_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffe3cfe2c536ff48e5ed9d1edf59cb94af38c1be
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet101_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet101',
+ backbone=dict(type='SEResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet152_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet152_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa12a8d03ed75d8f656662f85ccac2a0e6d4130a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet152_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='SEResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet50_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet50_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3382e19cc9f3d018eadca99f512bc4cf21c221c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet50_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://se-resnet50',
+ backbone=dict(type='SEResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe25c1cab35cafdf3a487580dd840dcca174bb06
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.md
@@ -0,0 +1,43 @@
+
+
+
+SEResNet (CVPR'2018)
+
+```bibtex
+@inproceedings{hu2018squeeze,
+ title={Squeeze-and-excitation networks},
+ author={Hu, Jie and Shen, Li and Sun, Gang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={7132--7141},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_seresnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet50_mpii_256x256.py) | 256x256 | 0.884 | 0.292 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256_20200927.log.json) |
+| [pose_seresnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet101_mpii_256x256.py) | 256x256 | 0.884 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256_20200927.log.json) |
+| [pose_seresnet_152\*](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet152_mpii_256x256.py) | 256x256 | 0.884 | 0.287 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256_20200927.log.json) |
+
+Note that \* means without imagenet pre-training.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..86e79d30db3a21b09628c1d542aa835969fb880b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.yml
@@ -0,0 +1,47 @@
+Collections:
+- Name: SEResNet
+ Paper:
+ Title: Squeeze-and-excitation networks
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/seresnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet50_mpii_256x256.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: &id001
+ - SEResNet
+ Training Data: MPII
+ Name: topdown_heatmap_seresnet50_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.292
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet101_mpii_256x256.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_seresnet101_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.295
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet152_mpii_256x256.py
+ In Collection: SEResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: topdown_heatmap_seresnet152_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.287
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb165265725276c48cc893655ca025faaf7be3b0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ShufflenetV1 (CVPR'2018)
+
+```bibtex
+@inproceedings{zhang2018shufflenet,
+ title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
+ author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={6848--6856},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_shufflenetv1](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii_256x256.py) | 256x256 | 0.823 | 0.195 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256_20200925.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f707dcfbb4c2be55a7cde70958a1ddac407fe508
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: ShufflenetV1
+ Paper:
+ Title: 'Shufflenet: An extremely efficient convolutional neural network for mobile
+ devices'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Zhang_ShuffleNet_An_Extremely_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/shufflenetv1.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii_256x256.py
+ In Collection: ShufflenetV1
+ Metadata:
+ Architecture:
+ - ShufflenetV1
+ Training Data: MPII
+ Name: topdown_heatmap_shufflenetv1_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.823
+ Mean@0.1: 0.195
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a665ba0727d444b2bf1762e83e65fdc881792cd
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://shufflenet_v1',
+ backbone=dict(type='ShuffleNetV1', groups=3),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=960,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..9990df0c9daf23ca5d3389c7ef2b0862fac50d4a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ShufflenetV2 (ECCV'2018)
+
+```bibtex
+@inproceedings{ma2018shufflenet,
+ title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
+ author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={116--131},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |
+| [pose_shufflenetv2](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii_256x256.py) | 256x256 | 0.828 | 0.205 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256_20200925.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..58a4724215f6004f7ffb8bced17ce9e228a44998
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: ShufflenetV2
+ Paper:
+ Title: 'Shufflenet v2: Practical guidelines for efficient cnn architecture design'
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Ningning_Light-weight_CNN_Architecture_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/shufflenetv2.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii_256x256.py
+ In Collection: ShufflenetV2
+ Metadata:
+ Architecture:
+ - ShufflenetV2
+ Training Data: MPII
+ Name: topdown_heatmap_shufflenetv2_mpii_256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.828
+ Mean@0.1: 0.205
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..25937d116bd16523ed5624a0dff76e3abdf9fc42
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii_256x256.py
@@ -0,0 +1,123 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=list(range(16)),
+ inference_channel=list(range(16)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://shufflenet_v2',
+ backbone=dict(type='ShuffleNetV2', widen_factor=1.0),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiDataset',
+ ann_file=f'{data_root}/annotations/mpii_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res101_mpii_trb_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res101_mpii_trb_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..64e841a09a3bd02709f2b857ea5a10efc3a657ff
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res101_mpii_trb_256x256.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii_trb.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=40,
+ dataset_joints=40,
+ dataset_channel=list(range(40)),
+ inference_channel=list(range(40)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res152_mpii_trb_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res152_mpii_trb_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9862fc8f0160cd4c1b6d9c89a7b3cd1b88346aa
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res152_mpii_trb_256x256.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii_trb.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=40,
+ dataset_joints=40,
+ dataset_channel=list(range(40)),
+ inference_channel=list(range(40)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res50_mpii_trb_256x256.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res50_mpii_trb_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdc24472abab2f77919352ebabdd3e2e138e8a09
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res50_mpii_trb_256x256.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpii_trb.py'
+]
+evaluation = dict(interval=10, metric='PCKh', save_best='PCKh')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+channel_cfg = dict(
+ num_output_channels=40,
+ dataset_joints=40,
+ dataset_channel=list(range(40)),
+ inference_channel=list(range(40)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_gt_bbox=True,
+ bbox_file=None,
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/mpii'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownMpiiTrbDataset',
+ ann_file=f'{data_root}/annotations/mpii_trb_val.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}))
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.md
new file mode 100644
index 0000000000000000000000000000000000000000..10e2b9f8c1c488981ad7c34a7599215b3d55cf8a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII-TRB (ICCV'2019)
+
+```bibtex
+@inproceedings{duan2019trb,
+ title={TRB: A Novel Triplet Representation for Understanding 2D Human Body},
+ author={Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and Liu, Wentao and Qian, Chen and Ouyang, Wanli},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={9479--9488},
+ year={2019}
+}
+```
+
+
+
+Results on MPII-TRB val set
+
+| Arch | Input Size | Skeleton Acc | Contour Acc | Mean Acc | ckpt | log |
+| :--- | :--------: | :------: | :------: |:------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res50_mpii_trb_256x256.py) | 256x256 | 0.887 | 0.858 | 0.868 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_trb_256x256-896036b8_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_trb_256x256_20200812.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res101_mpii_trb_256x256.py) | 256x256 | 0.890 | 0.863 | 0.873 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_trb_256x256-cfad2f05_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_trb_256x256_20200812.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res152_mpii_trb_256x256.py) | 256x256 | 0.897 | 0.868 | 0.879 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_trb_256x256-dd369ce6_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_trb_256x256_20200812.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0f7f7458137ee0fa5eed1853aad25e3a30318eee
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res50_mpii_trb_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: MPII-TRB
+ Name: topdown_heatmap_res50_mpii_trb_256x256
+ Results:
+ - Dataset: MPII-TRB
+ Metrics:
+ Contour Acc: 0.858
+ Mean Acc: 0.868
+ Skeleton Acc: 0.887
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_trb_256x256-896036b8_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res101_mpii_trb_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII-TRB
+ Name: topdown_heatmap_res101_mpii_trb_256x256
+ Results:
+ - Dataset: MPII-TRB
+ Metrics:
+ Contour Acc: 0.863
+ Mean Acc: 0.873
+ Skeleton Acc: 0.89
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_trb_256x256-cfad2f05_20200812.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/res152_mpii_trb_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII-TRB
+ Name: topdown_heatmap_res152_mpii_trb_256x256
+ Results:
+ - Dataset: MPII-TRB
+ Metrics:
+ Contour Acc: 0.868
+ Mean Acc: 0.879
+ Skeleton Acc: 0.897
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_trb_256x256-dd369ce6_20200812.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..84dbfacbb8abb61ac1e7bb5e2eea528d06bb4d13
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py
@@ -0,0 +1,153 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..130fca6264d2e1b6f949787cac23b8a857e22870
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py
@@ -0,0 +1,153 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..af7f5d1e3de14e2ecef1dc8b61aee2d7e50e8f45
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py
@@ -0,0 +1,153 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_small_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_small_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..58bd1caba5bd07a4bef73e7131a995ee678043a4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_small_ochuman_256x192.py
@@ -0,0 +1,153 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.md
new file mode 100644
index 0000000000000000000000000000000000000000..e844b067adb2d8cf59fcd8fe63a6b1e8d5f9825b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.md
@@ -0,0 +1,44 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+OCHuman (CVPR'2019)
+
+```bibtex
+@inproceedings{zhang2019pose2seg,
+ title={Pose2seg: Detection free human instance segmentation},
+ author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={889--898},
+ year={2019}
+}
+```
+
+
+
+Results on OCHuman test dataset with ground-truth bounding boxes
+
+Following the common setting, the models are trained on COCO train dataset, and evaluate on OCHuman dataset.
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_256x192.py) | 256x192 | 0.591 | 0.748 | 0.641 | 0.631 | 0.775 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192_20200708.log.json) |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_384x288.py) | 384x288 | 0.606 | 0.748 | 0.650 | 0.647 | 0.776 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288-d9f0d786_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288_20200708.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_256x192.py) | 256x192 | 0.611 | 0.752 | 0.663 | 0.648 | 0.778 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192_20200708.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_384x288.py) | 384x288 | 0.616 | 0.749 | 0.663 | 0.653 | 0.773 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_20200708.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b3b625af0baa50746f5f82a88d92a7d171e1392
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: OCHuman
+ Name: topdown_heatmap_hrnet_w32_ochuman_256x192
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.591
+ AP@0.5: 0.748
+ AP@0.75: 0.641
+ AR: 0.631
+ AR@0.5: 0.775
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_hrnet_w32_ochuman_384x288
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.606
+ AP@0.5: 0.748
+ AP@0.75: 0.65
+ AR: 0.647
+ AR@0.5: 0.776
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288-d9f0d786_20200708.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_hrnet_w48_ochuman_256x192
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.611
+ AP@0.5: 0.752
+ AP@0.75: 0.663
+ AR: 0.648
+ AR@0.5: 0.778
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_hrnet_w48_ochuman_384x288
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.616
+ AP@0.5: 0.749
+ AP@0.75: 0.663
+ AR: 0.653
+ AR@0.5: 0.773
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ea620501b3522c1e5f91350cf33ce4443624643
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_256x192.py
@@ -0,0 +1,168 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..3612849918fdfe18d9f9a0fd031b49e6928e6d6c
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w32_ochuman_384x288.py
@@ -0,0 +1,168 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d26bd814ca4182542c9f78076672f32cb51acc7f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_256x192.py
@@ -0,0 +1,168 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..246adaf687bf3f69edcd1ab82e4c027e30511d37
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_w48_ochuman_384x288.py
@@ -0,0 +1,168 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c50002c895d3878b6986a36906f65e62b523515e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_256x192.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..84e3842b7ff04055fa5e6f4f7f88e5190af85edc
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res101_ochuman_384x288.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b71fb679b851e9280810df589d46269420834989
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_256x192.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6d95e1fcd780d9305b77595ce670122f56eee53
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res152_ochuman_384x288.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=48,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0649558c4a16eadfe7c6241657ace7c5e57872b1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_256x192.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b7f957c91b7acc9718c4c5d4bb215f5d50537bb
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/res50_ochuman_384x288.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/ochuman.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/ochuman'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoDataset',
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
+ img_prefix='data/coco//train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownOCHumanDataset',
+ ann_file=f'{data_root}/annotations/'
+ 'ochuman_coco_format_test_range_0.00_1.00.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.md
new file mode 100644
index 0000000000000000000000000000000000000000..5b948f811821edcfc007d5ec85663319ebeacd87
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.md
@@ -0,0 +1,63 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+OCHuman (CVPR'2019)
+
+```bibtex
+@inproceedings{zhang2019pose2seg,
+ title={Pose2seg: Detection free human instance segmentation},
+ author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={889--898},
+ year={2019}
+}
+```
+
+
+
+Results on OCHuman test dataset with ground-truth bounding boxes
+
+Following the common setting, the models are trained on COCO train dataset, and evaluate on OCHuman dataset.
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py) | 256x192 | 0.546 | 0.726 | 0.593 | 0.592 | 0.755 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192_20200709.log.json) |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py) | 384x288 | 0.539 | 0.723 | 0.574 | 0.588 | 0.756 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288-e6f795e9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288_20200709.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py) | 256x192 | 0.559 | 0.724 | 0.606 | 0.605 | 0.751 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192-6e6babf0_20200708.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192_20200708.log.json) |
+| [pose_resnet_101](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py) | 384x288 | 0.571 | 0.715 | 0.615 | 0.615 | 0.748 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288-8c71bdc9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288_20200709.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py) | 256x192 | 0.570 | 0.725 | 0.617 | 0.616 | 0.754 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192-f6e307c2_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192_20200709.log.json) |
+| [pose_resnet_152](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py) | 384x288 | 0.582 | 0.723 | 0.627 | 0.627 | 0.752 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288_20200709.log.json) |
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7757701c2597f853ccf45a8ad593f297958e75b7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml
@@ -0,0 +1,105 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: OCHuman
+ Name: topdown_heatmap_res50_coco_256x192
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.546
+ AP@0.5: 0.726
+ AP@0.75: 0.593
+ AR: 0.592
+ AR@0.5: 0.755
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_res50_coco_384x288
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.539
+ AP@0.5: 0.723
+ AP@0.75: 0.574
+ AR: 0.588
+ AR@0.5: 0.756
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_384x288-e6f795e9_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_res101_coco_256x192
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.559
+ AP@0.5: 0.724
+ AP@0.75: 0.606
+ AR: 0.605
+ AR@0.5: 0.751
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_256x192-6e6babf0_20200708.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_res101_coco_384x288
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.571
+ AP@0.5: 0.715
+ AP@0.75: 0.615
+ AR: 0.615
+ AR@0.5: 0.748
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_384x288-8c71bdc9_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_res152_coco_256x192
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.57
+ AP@0.5: 0.725
+ AP@0.75: 0.617
+ AR: 0.616
+ AR@0.5: 0.754
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_256x192-f6e307c2_20200709.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: OCHuman
+ Name: topdown_heatmap_res152_coco_384x288
+ Results:
+ - Dataset: OCHuman
+ Metrics:
+ AP: 0.582
+ AP@0.5: 0.723
+ AP@0.75: 0.627
+ AR: 0.627
+ AR@0.5: 0.752
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_384x288-3860d4c9_20200709.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.md
new file mode 100644
index 0000000000000000000000000000000000000000..9c8117b48b04ecc15a4daefa38738d34171e3318
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.md
@@ -0,0 +1,56 @@
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py) | 256x192 | 87.4 | 88.6 | 84.3 | 78.5 | 79.7 | 81.8 | 78.8 | 83.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py) | 384x288 | 87.0 | 88.8 | 85.0 | 80.1 | 80.5 | 82.6 | 79.4 | 83.6 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py) | 256x192 | 88.2 | 90.1 | 85.8 | 80.8 | 80.7 | 83.3 | 80.3 | 84.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py) | 384x288 | 87.8 | 90.0 | 85.9 | 81.3 | 81.1 | 83.3 | 80.9 | 84.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
+
+Results on PoseTrack2018 val with [MMDetection](https://github.com/open-mmlab/mmdetection) pre-trained [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) human detector
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py) | 256x192 | 78.0 | 82.9 | 79.5 | 73.8 | 76.9 | 76.6 | 70.2 | 76.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) |
+| [pose_hrnet_w32](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py) | 384x288 | 79.9 | 83.6 | 80.4 | 74.5 | 74.8 | 76.1 | 70.5 | 77.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py) | 256x192 | 80.1 | 83.4 | 80.6 | 74.8 | 74.3 | 76.8 | 70.4 | 77.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py) | 384x288 | 80.2 | 83.8 | 80.9 | 75.2 | 74.7 | 76.7 | 71.7 | 77.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
new file mode 100644
index 0000000000000000000000000000000000000000..349daa295a1006a3c9ea424b5c709d47b6196a91
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
@@ -0,0 +1,160 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w32_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 78.8
+ Elb: 84.3
+ Head: 87.4
+ Hip: 79.7
+ Knee: 81.8
+ Shou: 88.6
+ Total: 83.0
+ Wri: 78.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w32_posetrack18_384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 79.4
+ Elb: 85.0
+ Head: 87.0
+ Hip: 80.5
+ Knee: 82.6
+ Shou: 88.8
+ Total: 83.6
+ Wri: 80.1
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w48_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 80.3
+ Elb: 85.8
+ Head: 88.2
+ Hip: 80.7
+ Knee: 83.3
+ Shou: 90.1
+ Total: 84.4
+ Wri: 80.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w48_posetrack18_384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 80.9
+ Elb: 85.9
+ Head: 87.8
+ Hip: 81.1
+ Knee: 83.3
+ Shou: 90.0
+ Total: 84.5
+ Wri: 81.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w32_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.2
+ Elb: 79.5
+ Head: 78.0
+ Hip: 76.9
+ Knee: 76.6
+ Shou: 82.9
+ Total: 76.9
+ Wri: 73.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w32_posetrack18_384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.5
+ Elb: 80.4
+ Head: 79.9
+ Hip: 74.8
+ Knee: 76.1
+ Shou: 83.6
+ Total: 77.3
+ Wri: 74.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w48_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.4
+ Elb: 80.6
+ Head: 80.1
+ Hip: 74.3
+ Knee: 76.8
+ Shou: 83.4
+ Total: 77.4
+ Wri: 74.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_hrnet_w48_posetrack18_384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 71.7
+ Elb: 80.9
+ Head: 80.2
+ Hip: 74.7
+ Knee: 76.7
+ Shou: 83.8
+ Total: 77.8
+ Wri: 75.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0bab25d081111c9eb2b6f30a2e733f10ca48fa
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_256x192.py
@@ -0,0 +1,169 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[10, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.4,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cb933fbaf4f69bb517f1ccbe157ace5afce2d36
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w32_posetrack18_384x288.py
@@ -0,0 +1,169 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_384x288-d9f0d786_20200708.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[10, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.4,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcfb6214c4ace61db2f72f54d3cf40c8f8033296
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_256x192.py
@@ -0,0 +1,169 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[10, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.4,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..78edf760140cd4d6041ae3304d5c14b660857840
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_w48_posetrack18_384x288.py
@@ -0,0 +1,169 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[10, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.4,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..341fa1b13c0b35c726e9f863be55856df774bcab
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth' # noqa: E501
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[10, 15])
+total_epochs = 20
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.4,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.md
new file mode 100644
index 0000000000000000000000000000000000000000..26aee7ba51a4acc1ee549a1292f96f9dea710b4f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.md
@@ -0,0 +1,66 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py) | 256x192 | 86.5 | 87.5 | 82.3 | 75.6 | 79.9 | 78.6 | 74.0 | 81.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192_20201028.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
+
+Results on PoseTrack2018 val with [MMDetection](https://github.com/open-mmlab/mmdetection) pre-trained [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) human detector
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py) | 256x192 | 78.9 | 81.9 | 77.8 | 70.8 | 75.3 | 73.2 | 66.4 | 75.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192_20201028.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f85bc4b64834862f166ed6ba118337dcf1d12fe0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml
@@ -0,0 +1,47 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_res50_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 74.0
+ Elb: 82.3
+ Head: 86.5
+ Hip: 79.9
+ Knee: 78.6
+ Shou: 87.5
+ Total: 81.0
+ Wri: 75.6
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/res50_posetrack18_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: topdown_heatmap_res50_posetrack18_256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 66.4
+ Elb: 77.8
+ Head: 78.9
+ Hip: 75.3
+ Knee: 73.2
+ Shou: 81.9
+ Total: 75.2
+ Wri: 70.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c638432b501656801367f035e70c4ac888130d14
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/README.md
@@ -0,0 +1,9 @@
+# Video-based Single-view 2D Human Body Pose Estimation
+
+Multi-person 2D human pose estimation in video is defined as the task of detecting the poses (or keypoints) of all people from an input video.
+
+For this task, we currently support [PoseWarper](/configs/body/2d_kpt_sview_rgb_vid/posewarper).
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_body_keypoint.md) to prepare data.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..425d116704cc5ca1a9257ffc7575550fabf77981
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md
@@ -0,0 +1,25 @@
+# Learning Temporal Pose Estimation from Sparsely-Labeled Videos
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
+
+PoseWarper proposes a network that leverages training videos with sparse annotations (every k frames) to learn to perform dense temporal pose propagation and estimation. Given a pair of video frames, a labeled Frame A and an unlabeled Frame B, the model is trained to predict human pose in Frame A using the features from Frame B by means of deformable convolutions to implicitly learn the pose warping between A and B.
+
+The training of PoseWarper can be split into two stages.
+
+The first-stage is trained with the pre-trained model and the main backbone is fine-tuned in a single-frame setting.
+
+The second-stage is trained with the model from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
new file mode 100644
index 0000000000000000000000000000000000000000..0fd0a7f5af070590052cbd4cae6338f10402550e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
@@ -0,0 +1,88 @@
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Note that the training of PoseWarper can be split into two stages.
+
+The first-stage is trained with the pre-trained [checkpoint](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth) on COCO dataset, and the main backbone is fine-tuned on PoseTrack18 in a single-frame setting.
+
+The second-stage is trained with the last [checkpoint](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth) from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen.
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 88.2 | 90.3 | 86.1 | 81.6 | 81.8 | 83.8 | 81.5 | 85.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) |
+
+Results on PoseTrack2018 val with precomputed human bounding boxes from PoseWarper supplementary data files from [this link](https://www.dropbox.com/s/ygfy6r8nitoggfq/PoseWarper_supp_files.zip?dl=0)1 .
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 81.8 | 85.6 | 82.7 | 77.2 | 76.8 | 79.0 | 74.4 | 79.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) |
+
+1 Please download the precomputed human bounding boxes on PoseTrack2018 val from `$PoseWarper_supp_files/posetrack18_precomputed_boxes/val_boxes.json` and place it here: `$mmpose/data/posetrack18/posetrack18_precomputed_boxes/val_boxes.json` to be consistent with the [config](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py). Please refer to [DATA Preparation](/docs/en/tasks/2d_body_keypoint.md) for more detail about data preparation.
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d260312f085a95bcc5fbfe5c2d78f76a20ec4e9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
@@ -0,0 +1,47 @@
+Collections:
+- Name: PoseWarper
+ Paper:
+ Title: Learning Temporal Pose Estimation from Sparsely Labeled Videos
+ URL: https://arxiv.org/abs/1906.04016
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/posewarper.md
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
+ In Collection: PoseWarper
+ Metadata:
+ Architecture: &id001
+ - PoseWarper
+ - HRNet
+ Training Data: COCO
+ Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2
+ Results:
+ - Dataset: COCO
+ Metrics:
+ Ankl: 81.5
+ Elb: 86.1
+ Head: 88.2
+ Hip: 81.8
+ Knee: 83.8
+ Shou: 90.3
+ Total: 85.0
+ Wri: 81.6
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
+ In Collection: PoseWarper
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2
+ Results:
+ - Dataset: COCO
+ Metrics:
+ Ankl: 74.4
+ Elb: 82.7
+ Head: 81.8
+ Hip: 76.8
+ Knee: 79.0
+ Shou: 85.6
+ Total: 79.8
+ Wri: 77.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6ab2d8f76830eb56ca1bc03bd11e0522cdc256d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py
@@ -0,0 +1,166 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth' # noqa: E501
+cudnn_benchmark = True
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0001,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[5, 7])
+total_epochs = 10
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.2,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=45,
+ scale_factor=0.35),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=16,
+ workers_per_gpu=3,
+ val_dataloader=dict(samples_per_gpu=16),
+ test_dataloader=dict(samples_per_gpu=16),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eb5de9d3541e2dd1b1416ccd7a224ca1079593b
--- /dev/null
+++ b/vendor/ViTPose/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
@@ -0,0 +1,204 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/posetrack18.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth' # noqa: E501
+cudnn_benchmark = True
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0001,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[10, 15])
+total_epochs = 20
+log_config = dict(
+ interval=100,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseWarper',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ frozen_stages=4,
+ ),
+ concat_tensors=True,
+ neck=dict(
+ type='PoseWarperNeck',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ inner_channels=128,
+ deform_groups=channel_cfg['num_output_channels'],
+ dilations=(3, 6, 12, 18, 24),
+ trans_conv_kernel=1,
+ res_blocks_cfg=dict(block='BASIC', num_blocks=20),
+ offsets_kernel=3,
+ deform_conv_kernel=3,
+ freeze_trans_layer=True,
+ im2col_step=80),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=False,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_nms=True,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.2,
+ bbox_file='data/posetrack18/posetrack18_precomputed_boxes/'
+ 'val_boxes.json',
+ # frame_indices_train=[-1, 0],
+ frame_index_rand=True,
+ frame_index_range=[-2, 2],
+ num_adj_frames=1,
+ frame_indices_test=[-2, -1, 0, 1, 2],
+ # the first weight is the current frame,
+ # then on ascending order of frame indices
+ frame_weight_train=(0.0, 1.0),
+ frame_weight_test=(0.3, 0.1, 0.25, 0.25, 0.1),
+)
+
+# take care of orders of the transforms
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=45,
+ scale_factor=0.35),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'frame_weight'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file',
+ 'center',
+ 'scale',
+ 'rotation',
+ 'bbox_score',
+ 'flip_pairs',
+ 'frame_weight',
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=8,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=4),
+ test_dataloader=dict(samples_per_gpu=4),
+ train=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/README.md b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7ac9137ba963b22de68156cc4512484bdd918f8e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/README.md
@@ -0,0 +1,8 @@
+# Multi-view 3D Human Body Pose Estimation
+
+Multi-view 3D human body pose estimation targets at predicting the X, Y, Z coordinates of human body joints from multi-view RGB images.
+For this task, we currently support [VoxelPose](/configs/body/3d_kpt_mview_rgb_img/voxelpose).
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/3d_body_keypoint.md) to prepare data.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/README.md b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f3160f5b92bf8065cb5823081ccabe3d8d513b09
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/README.md
@@ -0,0 +1,23 @@
+# VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment
+
+
+
+
+VoxelPose (ECCV'2020)
+
+```bibtex
+@inproceedings{tumultipose,
+ title={VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment},
+ author={Tu, Hanyue and Wang, Chunyu and Zeng, Wenjun},
+ booktitle={ECCV},
+ year={2020}
+}
+```
+
+
+
+VoxelPose proposes to break down the task of 3d human pose estimation into 2 stages: (1) Human center detection by Cuboid Proposal Network
+(2) Human pose regression by Pose Regression Network.
+
+The networks in the two stages are all based on 3D convolution. And the input feature volumes are generated by projecting each voxel to
+multi-view images and sampling at the projected location on the 2D heatmaps.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md
new file mode 100644
index 0000000000000000000000000000000000000000..a71ad8e6a0916d14c55782b4677f30d0c43c432f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md
@@ -0,0 +1,37 @@
+
+
+
+VoxelPose (ECCV'2020)
+
+```bibtex
+@inproceedings{tumultipose,
+ title={VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment},
+ author={Tu, Hanyue and Wang, Chunyu and Zeng, Wenjun},
+ booktitle={ECCV},
+ year={2020}
+}
+```
+
+
+
+
+
+
+CMU Panoptic (ICCV'2015)
+
+```bibtex
+@Article = {joo_iccv_2015,
+author = {Hanbyul Joo, Hao Liu, Lei Tan, Lin Gui, Bart Nabbe, Iain Matthews, Takeo Kanade, Shohei Nobuhara, and Yaser Sheikh},
+title = {Panoptic Studio: A Massively Multiview System for Social Motion Capture},
+booktitle = {ICCV},
+year = {2015}
+}
+```
+
+
+
+Results on CMU Panoptic dataset.
+
+| Arch | mAP | mAR | MPJPE | Recall@500mm| ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: | :---: |
+| [prn64_cpn80_res50](/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py) | 97.31 | 97.99 | 17.57| 99.85| [ckpt](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth) | [log](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5_20211103.log.json) |
diff --git a/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py
new file mode 100644
index 0000000000000000000000000000000000000000..90996e1eeff112eec680c710a51722b6ba46ead5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py
@@ -0,0 +1,226 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_body3d.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0001,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 9])
+total_epochs = 15
+log_config = dict(
+ interval=50, hooks=[
+ dict(type='TextLoggerHook'),
+ ])
+
+space_size = [8000, 8000, 2000]
+space_center = [0, -500, 800]
+cube_size = [80, 80, 20]
+sub_space_size = [2000, 2000, 2000]
+sub_cube_size = [64, 64, 64]
+image_size = [960, 512]
+heatmap_size = [240, 128]
+num_joints = 15
+
+train_data_cfg = dict(
+ image_size=image_size,
+ heatmap_size=[heatmap_size],
+ num_joints=num_joints,
+ seq_list=[
+ '160422_ultimatum1', '160224_haggling1', '160226_haggling1',
+ '161202_haggling1', '160906_ian1', '160906_ian2', '160906_ian3',
+ '160906_band1', '160906_band2'
+ ],
+ cam_list=[(0, 12), (0, 6), (0, 23), (0, 13), (0, 3)],
+ num_cameras=5,
+ seq_frame_interval=3,
+ subset='train',
+ root_id=2,
+ max_num=10,
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+)
+
+test_data_cfg = train_data_cfg.copy()
+test_data_cfg.update(
+ dict(
+ seq_list=[
+ '160906_pizza1',
+ '160422_haggling1',
+ '160906_ian5',
+ '160906_band4',
+ ],
+ seq_frame_interval=12,
+ subset='validation'))
+
+# model settings
+backbone = dict(
+ type='AssociativeEmbedding',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='DeconvHead',
+ in_channels=2048,
+ out_channels=num_joints,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=15,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[False],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ )),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=num_joints,
+ nms_kernel=None,
+ nms_padding=None,
+ tag_per_joint=None,
+ max_num_people=None,
+ detection_threshold=None,
+ tag_threshold=None,
+ use_detection_val=None,
+ ignore_too_much=None,
+ ))
+
+model = dict(
+ type='DetectAndRegress',
+ backbone=backbone,
+ pretrained='checkpoints/resnet_50_deconv.pth.tar',
+ human_detector=dict(
+ type='VoxelCenterDetector',
+ image_size=image_size,
+ heatmap_size=heatmap_size,
+ space_size=space_size,
+ cube_size=cube_size,
+ space_center=space_center,
+ center_net=dict(type='V2VNet', input_channels=15, output_channels=1),
+ center_head=dict(
+ type='CuboidCenterHead',
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+ max_num=10,
+ max_pool_kernel=3),
+ train_cfg=dict(dist_threshold=500.0),
+ test_cfg=dict(center_threshold=0.3),
+ ),
+ pose_regressor=dict(
+ type='VoxelSinglePose',
+ image_size=image_size,
+ heatmap_size=heatmap_size,
+ sub_space_size=sub_space_size,
+ sub_cube_size=sub_cube_size,
+ num_joints=15,
+ pose_net=dict(type='V2VNet', input_channels=15, output_channels=15),
+ pose_head=dict(type='CuboidPoseHead', beta=100.0)))
+
+train_pipeline = [
+ dict(
+ type='MultiItemProcess',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=0,
+ scale_factor=[1.0, 1.0],
+ scale_type='long',
+ trans_factor=0),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='DiscardDuplicatedItems',
+ keys_list=[
+ 'joints_3d', 'joints_3d_visible', 'ann_info', 'roots_3d',
+ 'num_persons', 'sample_id'
+ ]),
+ dict(type='GenerateVoxel3DHeatmapTarget', sigma=200.0, joint_indices=[2]),
+ dict(
+ type='Collect',
+ keys=['img', 'targets_3d'],
+ meta_keys=[
+ 'num_persons', 'joints_3d', 'camera', 'center', 'scale',
+ 'joints_3d_visible', 'roots_3d'
+ ]),
+]
+
+val_pipeline = [
+ dict(
+ type='MultiItemProcess',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=0,
+ scale_factor=[1.0, 1.0],
+ scale_type='long',
+ trans_factor=0),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='DiscardDuplicatedItems',
+ keys_list=[
+ 'joints_3d', 'joints_3d_visible', 'ann_info', 'roots_3d',
+ 'num_persons', 'sample_id'
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['sample_id', 'camera', 'center', 'scale']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic/'
+data = dict(
+ samples_per_gpu=1,
+ workers_per_gpu=4,
+ val_dataloader=dict(samples_per_gpu=2),
+ test_dataloader=dict(samples_per_gpu=2),
+ train=dict(
+ type='Body3DMviewDirectPanopticDataset',
+ ann_file=None,
+ img_prefix=data_root,
+ data_cfg=train_data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DMviewDirectPanopticDataset',
+ ann_file=None,
+ img_prefix=data_root,
+ data_cfg=test_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DMviewDirectPanopticDataset',
+ ann_file=None,
+ img_prefix=data_root,
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8b5e57897fa76a36ea601598baa991fbe94e934f
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: VoxelPose
+ Paper:
+ Title: 'VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment'
+ URL: https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123460188.pdf
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/voxelpose.md
+Models:
+- Config: configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py
+ In Collection: VoxelPose
+ Metadata:
+ Architecture:
+ - VoxelPose
+ Training Data: CMU Panoptic
+ Name: voxelpose_voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5
+ Results:
+ - Dataset: CMU Panoptic
+ Metrics:
+ MPJPE: 17.57
+ mAP: 97.31
+ mAR: 97.99
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..30b2bd310cfbabb7911b46c154a8793aa41ebd60
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,17 @@
+# Single-view 3D Human Body Pose Estimation
+
+3D pose estimation is the detection and analysis of X, Y, Z coordinates of human body joints from an RGB image.
+For single-person 3D pose estimation from a monocular camera, existing works can be classified into three categories:
+(1) from 2D poses to 3D poses (2D-to-3D pose lifting)
+(2) jointly learning 2D and 3D poses, and
+(3) directly regressing 3D poses from images.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/3d_body_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/3d_human_pose_demo.md) to run demos.
+
+
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/README.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..297c88896088a17041bc92f0cfba1550e9dabaa2
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/README.md
@@ -0,0 +1,23 @@
+# A simple yet effective baseline for 3d human pose estimation
+
+
+
+
+SimpleBaseline3D (ICCV'2017)
+
+```bibtex
+@inproceedings{martinez_2017_3dbaseline,
+ title={A simple yet effective baseline for 3d human pose estimation},
+ author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
+ booktitle={ICCV},
+ year={2017}
+}
+```
+
+
+
+Simple 3D baseline proposes to break down the task of 3d human pose estimation into 2 stages: (1) Image → 2D pose
+(2) 2D pose → 3D pose.
+
+The authors find that “lifting” ground truth 2D joint locations to 3D space is a task that can be solved with a low error rate.
+Based on the success of 2d human pose estimation, it directly "lifts" 2d joint locations to 3d space.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.md
new file mode 100644
index 0000000000000000000000000000000000000000..0aac3fdd451ac810bafdf19323dd5f0b7c302542
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.md
@@ -0,0 +1,44 @@
+
+
+
+SimpleBaseline3D (ICCV'2017)
+
+```bibtex
+@inproceedings{martinez_2017_3dbaseline,
+ title={A simple yet effective baseline for 3d human pose estimation},
+ author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
+ booktitle={ICCV},
+ year={2017}
+}
+```
+
+
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+Results on Human3.6M dataset with ground truth 2D detections
+
+| Arch | MPJPE | P-MPJPE | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: |
+| [simple_baseline_3d_tcn1 ](/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py) | 43.4 | 34.3 | [ckpt](https://download.openmmlab.com/mmpose/body3d/simple_baseline/simple3Dbaseline_h36m-f0ad73a4_20210419.pth) | [log](https://download.openmmlab.com/mmpose/body3d/simple_baseline/20210415_065056.log.json) |
+
+1 Differing from the original paper, we didn't apply the `max-norm constraint` because we found this led to a better convergence and performance.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec29530a51a7db9593fa15c40c8a846ecda06d9
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py
@@ -0,0 +1,180 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(interval=10, metric=['mpjpe', 'p-mpjpe'], save_best='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ by_epoch=False,
+ step=100000,
+ gamma=0.96,
+)
+
+total_epochs = 200
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(1, 1, 1),
+ dropout=0.5),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=16, # do not predict root joint
+ loss_keypoint=dict(type='MSELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=True,
+ joint_2d_src='gt',
+ need_camera_param=False,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+# 3D joint normalization parameters
+# From file: '{data_root}/annotation_body3d/fps50/joint3d_rel_stats.pkl'
+joint_3d_normalize_param = dict(
+ mean=[[-2.55652589e-04, -7.11960570e-03, -9.81433052e-04],
+ [-5.65463051e-03, 3.19636009e-01, 7.19329269e-02],
+ [-1.01705840e-02, 6.91147892e-01, 1.55352986e-01],
+ [2.55651315e-04, 7.11954606e-03, 9.81423866e-04],
+ [-5.09729780e-03, 3.27040413e-01, 7.22258095e-02],
+ [-9.99656606e-03, 7.08277383e-01, 1.58016408e-01],
+ [2.90583676e-03, -2.11363307e-01, -4.74210915e-02],
+ [5.67537804e-03, -4.35088906e-01, -9.76974016e-02],
+ [5.93884964e-03, -4.91891970e-01, -1.10666618e-01],
+ [7.37352083e-03, -5.83948619e-01, -1.31171400e-01],
+ [5.41920653e-03, -3.83931702e-01, -8.68145417e-02],
+ [2.95964662e-03, -1.87567488e-01, -4.34536934e-02],
+ [1.26585822e-03, -1.20170579e-01, -2.82526049e-02],
+ [4.67186639e-03, -3.83644089e-01, -8.55125784e-02],
+ [1.67648571e-03, -1.97007177e-01, -4.31368364e-02],
+ [8.70569015e-04, -1.68664569e-01, -3.73902498e-02]],
+ std=[[0.11072244, 0.02238818, 0.07246294],
+ [0.15856311, 0.18933832, 0.20880479],
+ [0.19179935, 0.24320062, 0.24756193],
+ [0.11072181, 0.02238805, 0.07246253],
+ [0.15880454, 0.19977188, 0.2147063],
+ [0.18001944, 0.25052739, 0.24853247],
+ [0.05210694, 0.05211406, 0.06908241],
+ [0.09515367, 0.10133032, 0.12899733],
+ [0.11742458, 0.12648469, 0.16465091],
+ [0.12360297, 0.13085539, 0.16433336],
+ [0.14602232, 0.09707956, 0.13952731],
+ [0.24347532, 0.12982249, 0.20230181],
+ [0.2446877, 0.21501816, 0.23938235],
+ [0.13876084, 0.1008926, 0.1424411],
+ [0.23687529, 0.14491219, 0.20980829],
+ [0.24400695, 0.23975028, 0.25520584]])
+
+# 2D joint normalization parameters
+# From file: '{data_root}/annotation_body3d/fps50/joint2d_stats.pkl'
+joint_2d_normalize_param = dict(
+ mean=[[532.08351635, 419.74137558], [531.80953144, 418.2607141],
+ [530.68456967, 493.54259285], [529.36968722, 575.96448516],
+ [532.29767646, 421.28483336], [531.93946631, 494.72186795],
+ [529.71984447, 578.96110365], [532.93699382, 370.65225054],
+ [534.1101856, 317.90342311], [534.55416813, 304.24143901],
+ [534.86955004, 282.31030885], [534.11308566, 330.11296796],
+ [533.53637525, 376.2742511], [533.49380107, 391.72324565],
+ [533.52579142, 330.09494668], [532.50804964, 374.190479],
+ [532.72786934, 380.61615716]],
+ std=[[107.73640054, 63.35908715], [119.00836213, 64.1215443],
+ [119.12412107, 50.53806215], [120.61688045, 56.38444891],
+ [101.95735275, 62.89636486], [106.24832897, 48.41178119],
+ [108.46734966, 54.58177071], [109.07369806, 68.70443672],
+ [111.20130351, 74.87287863], [111.63203838, 77.80542514],
+ [113.22330788, 79.90670556], [105.7145833, 73.27049436],
+ [107.05804267, 73.93175781], [107.97449418, 83.30391802],
+ [121.60675105, 74.25691526], [134.34378973, 77.48125087],
+ [131.79990652, 89.86721124]])
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=True),
+ dict(
+ type='NormalizeJointCoordinate',
+ item='target',
+ mean=joint_3d_normalize_param['mean'],
+ std=joint_3d_normalize_param['std']),
+ dict(
+ type='NormalizeJointCoordinate',
+ item='input_2d',
+ mean=joint_2d_normalize_param['mean'],
+ std=joint_2d_normalize_param['std']),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=[
+ 'target_image_path', 'flip_pairs', 'root_position',
+ 'root_position_index', 'target_mean', 'target_std'
+ ])
+]
+
+val_pipeline = train_pipeline
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b6de86b8f2a860e1a9440c1ee2057490b559308d
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: SimpleBaseline3D
+ Paper:
+ Title: A simple yet effective baseline for 3d human pose estimation
+ URL: http://openaccess.thecvf.com/content_iccv_2017/html/Martinez_A_Simple_yet_ICCV_2017_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline3d.md
+Models:
+- Config: configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py
+ In Collection: SimpleBaseline3D
+ Metadata:
+ Architecture:
+ - SimpleBaseline3D
+ Training Data: Human3.6M
+ Name: pose_lift_simplebaseline3d_h36m
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 43.4
+ P-MPJPE: 34.3
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/simple_baseline/simple3Dbaseline_h36m-f0ad73a4_20210419.pth
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e91fabccfae7d07184caf2039d15ace051ee3b5
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.md
@@ -0,0 +1,42 @@
+
+
+
+SimpleBaseline3D (ICCV'2017)
+
+```bibtex
+@inproceedings{martinez_2017_3dbaseline,
+ title={A simple yet effective baseline for 3d human pose estimation},
+ author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
+ booktitle={ICCV},
+ year={2017}
+}
+```
+
+
+
+
+
+
+MPI-INF-3DHP (3DV'2017)
+
+```bibtex
+@inproceedings{mono-3dhp2017,
+ author = {Mehta, Dushyant and Rhodin, Helge and Casas, Dan and Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and Theobalt, Christian},
+ title = {Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision},
+ booktitle = {3D Vision (3DV), 2017 Fifth International Conference on},
+ url = {http://gvv.mpi-inf.mpg.de/3dhp_dataset},
+ year = {2017},
+ organization={IEEE},
+ doi={10.1109/3dv.2017.00064},
+}
+```
+
+
+
+Results on MPI-INF-3DHP dataset with ground truth 2D detections
+
+| Arch | MPJPE | P-MPJPE | 3DPCK | 3DAUC | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: | :---: |
+| [simple_baseline_3d_tcn1 ](configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.py) | 84.3 | 53.2 | 85.0 | 52.0 | [ckpt](https://download.openmmlab.com/mmpose/body3d/simplebaseline3d/simplebaseline3d_mpi-inf-3dhp-b75546f6_20210603.pth) | [log](https://download.openmmlab.com/mmpose/body3d/simplebaseline3d/simplebaseline3d_mpi-inf-3dhp_20210603.log.json) |
+
+1 Differing from the original paper, we didn't apply the `max-norm constraint` because we found this led to a better convergence and performance.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbe23db0f73fc260af1998fc7461b8b40eeb5144
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.py
@@ -0,0 +1,192 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpi_inf_3dhp.py'
+]
+evaluation = dict(
+ interval=10,
+ metric=['mpjpe', 'p-mpjpe', '3dpck', '3dauc'],
+ key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ by_epoch=False,
+ step=100000,
+ gamma=0.96,
+)
+
+total_epochs = 200
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(1, 1, 1),
+ dropout=0.5),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=16, # do not predict root joint
+ loss_keypoint=dict(type='MSELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/mpi_inf_3dhp'
+train_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=True,
+ joint_2d_src='gt',
+ need_camera_param=False,
+ camera_param_file=f'{data_root}/annotations/cameras_train.pkl',
+)
+test_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=True,
+ joint_2d_src='gt',
+ need_camera_param=False,
+ camera_param_file=f'{data_root}/annotations/cameras_test.pkl',
+)
+
+# 3D joint normalization parameters
+# From file: '{data_root}/annotations/joint3d_rel_stats.pkl'
+joint_3d_normalize_param = dict(
+ mean=[[1.29798757e-02, -6.14242101e-01, -8.27376088e-02],
+ [8.76858608e-03, -3.99992424e-01, -5.62749816e-02],
+ [1.96335208e-02, -3.64617227e-01, -4.88267063e-02],
+ [2.75206678e-02, -1.95085890e-01, -2.01508894e-02],
+ [2.22896982e-02, -1.37878727e-01, -5.51315396e-03],
+ [-4.16641282e-03, -3.65152343e-01, -5.43331534e-02],
+ [-1.83806493e-02, -1.88053038e-01, -2.78737492e-02],
+ [-1.81491930e-02, -1.22997985e-01, -1.15657333e-02],
+ [1.02960759e-02, -3.93481284e-03, 2.56594686e-03],
+ [-9.82312721e-04, 3.03909927e-01, 6.40930378e-02],
+ [-7.40153218e-03, 6.03930248e-01, 1.01704308e-01],
+ [-1.02960759e-02, 3.93481284e-03, -2.56594686e-03],
+ [-2.65585735e-02, 3.10685217e-01, 5.90257974e-02],
+ [-2.97909979e-02, 6.09658773e-01, 9.83101419e-02],
+ [5.27935016e-03, -1.95547908e-01, -3.06803451e-02],
+ [9.67095383e-03, -4.67827216e-01, -6.31183199e-02]],
+ std=[[0.22265961, 0.19394593, 0.24823498],
+ [0.14710804, 0.13572695, 0.16518279],
+ [0.16562233, 0.12820609, 0.1770134],
+ [0.25062919, 0.1896429, 0.24869254],
+ [0.29278334, 0.29575863, 0.28972444],
+ [0.16916984, 0.13424898, 0.17943313],
+ [0.24760463, 0.18768265, 0.24697394],
+ [0.28709979, 0.28541425, 0.29065647],
+ [0.08867271, 0.02868353, 0.08192097],
+ [0.21473598, 0.23872363, 0.22448061],
+ [0.26021136, 0.3188117, 0.29020494],
+ [0.08867271, 0.02868353, 0.08192097],
+ [0.20729183, 0.2332424, 0.22969608],
+ [0.26214967, 0.3125435, 0.29601641],
+ [0.07129179, 0.06720073, 0.0811808],
+ [0.17489889, 0.15827879, 0.19465977]])
+
+# 2D joint normalization parameters
+# From file: '{data_root}/annotations/joint2d_stats.pkl'
+joint_2d_normalize_param = dict(
+ mean=[[991.90641651, 862.69810047], [1012.08511619, 957.61720198],
+ [1014.49360896, 974.59889655], [1015.67993223, 1055.61969227],
+ [1012.53566238, 1082.80581721], [1009.22188073, 973.93984209],
+ [1005.0694331, 1058.35166276], [1003.49327495, 1089.75631017],
+ [1010.54615457, 1141.46165082], [1003.63254875, 1283.37687485],
+ [1001.97780897, 1418.03079034], [1006.61419313, 1145.20131053],
+ [999.60794074, 1287.13556333], [998.33830821, 1422.30463081],
+ [1008.58017385, 1143.33148068], [1010.97561846, 1053.38953748],
+ [1012.06704779, 925.75338048]],
+ std=[[23374.39708662, 7213.93351296], [533.82975336, 219.70387631],
+ [539.03326985, 218.9370412], [566.57219249, 233.32613405],
+ [590.4265317, 269.2245025], [539.92993936, 218.53166338],
+ [546.30605944, 228.43631598], [564.88616584, 267.85235566],
+ [515.76216052, 206.72322146], [500.6260933, 223.24233285],
+ [505.35940904, 268.4394148], [512.43406541, 202.93095363],
+ [502.41443672, 218.70111819], [509.76363747, 267.67317375],
+ [511.65693552, 204.13307947], [521.66823785, 205.96774166],
+ [541.47940161, 226.01738951]])
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=14,
+ root_name='root_position',
+ remove_root=True),
+ dict(
+ type='NormalizeJointCoordinate',
+ item='target',
+ mean=joint_3d_normalize_param['mean'],
+ std=joint_3d_normalize_param['std']),
+ dict(
+ type='NormalizeJointCoordinate',
+ item='input_2d',
+ mean=joint_2d_normalize_param['mean'],
+ std=joint_2d_normalize_param['std']),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=[
+ 'target_image_path', 'flip_pairs', 'root_position',
+ 'root_position_index', 'target_mean', 'target_std'
+ ])
+]
+
+val_pipeline = train_pipeline
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=train_data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_test_valid.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_test_valid.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bca7b505281160a3cce7dee6fe9dba95059f3331
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: SimpleBaseline3D
+ Paper:
+ Title: A simple yet effective baseline for 3d human pose estimation
+ URL: http://openaccess.thecvf.com/content_iccv_2017/html/Martinez_A_Simple_yet_ICCV_2017_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline3d.md
+Models:
+- Config: configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.py
+ In Collection: SimpleBaseline3D
+ Metadata:
+ Architecture:
+ - SimpleBaseline3D
+ Training Data: MPI-INF-3DHP
+ Name: pose_lift_simplebaseline3d_mpi-inf-3dhp
+ Results:
+ - Dataset: MPI-INF-3DHP
+ Metrics:
+ 3DAUC: 52.0
+ 3DPCK: 85.0
+ MPJPE: 84.3
+ P-MPJPE: 53.2
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/simplebaseline3d/simplebaseline3d_mpi-inf-3dhp-b75546f6_20210603.pth
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/README.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8473efc0745516c0c2f751fc7f20c76565263166
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/README.md
@@ -0,0 +1,11 @@
+# Video-based Single-view 3D Human Body Pose Estimation
+
+Video-based 3D pose estimation is the detection and analysis of X, Y, Z coordinates of human body joints from a sequence of RGB images.
+For single-person 3D pose estimation from a monocular camera, existing works can be classified into three categories:
+(1) from 2D poses to 3D poses (2D-to-3D pose lifting)
+(2) jointly learning 2D and 3D poses, and
+(3) directly regressing 3D poses from images.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/3d_body_keypoint.md) to prepare data.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/README.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c820a2f089cf7ca9810931a153915e4aa5e93fab
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/README.md
@@ -0,0 +1,22 @@
+# 3D human pose estimation in video with temporal convolutions and semi-supervised training
+
+## Introduction
+
+
+
+
+VideoPose3D (CVPR'2019)
+
+```bibtex
+@inproceedings{pavllo20193d,
+ title={3d human pose estimation in video with temporal convolutions and semi-supervised training},
+ author={Pavllo, Dario and Feichtenhofer, Christoph and Grangier, David and Auli, Michael},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7753--7762},
+ year={2019}
+}
+```
+
+
+
+Based on the success of 2d human pose estimation, it directly "lifts" a sequence of 2d keypoints to 3d keypoints.
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.md
new file mode 100644
index 0000000000000000000000000000000000000000..cad6bd5051eabe9bc5aa77ca849943fd20614ca1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.md
@@ -0,0 +1,66 @@
+
+
+
+VideoPose3D (CVPR'2019)
+
+```bibtex
+@inproceedings{pavllo20193d,
+ title={3d human pose estimation in video with temporal convolutions and semi-supervised training},
+ author={Pavllo, Dario and Feichtenhofer, Christoph and Grangier, David and Auli, Michael},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7753--7762},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+Results on Human3.6M dataset with ground truth 2D detections, supervised training
+
+| Arch | Receptive Field | MPJPE | P-MPJPE | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: |
+| [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_supervised.py) | 27 | 40.0 | 30.1 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_supervised_20210527.log.json) |
+| [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_81frames_fullconv_supervised.py) | 81 | 38.9 | 29.2 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_81frames_fullconv_supervised_20210527.log.json) |
+| [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised.py) | 243 | 37.6 | 28.3 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised-880bea25_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_20210527.log.json) |
+
+Results on Human3.6M dataset with CPN 2D detections1 , supervised training
+
+| Arch | Receptive Field | MPJPE | P-MPJPE | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: |
+| [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_1frame_fullconv_supervised_cpn_ft.py) | 1 | 52.9 | 41.3 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_1frame_fullconv_supervised_cpn_ft_20210527.log.json) |
+| [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py) | 243 | 47.9 | 38.0 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft_20210527.log.json) |
+
+Results on Human3.6M dataset with ground truth 2D detections, semi-supervised training
+
+| Training Data | Arch | Receptive Field | MPJPE | P-MPJPE | N-MPJPE | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| 10% S1 | [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised.py) | 27 | 58.1 | 42.8 | 54.7 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised-54aef83b_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_20210527.log.json) |
+
+Results on Human3.6M dataset with CPN 2D detections1 , semi-supervised training
+
+| Training Data | Arch | Receptive Field | MPJPE | P-MPJPE | N-MPJPE | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| 10% S1 | [VideoPose3D](/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft.py) | 27 | 67.4 | 50.1 | 63.2 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_cpn_ft-71be9cde_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_cpn_ft_20210527.log.json) |
+
+1 CPN 2D detections are provided by [official repo](https://github.com/facebookresearch/VideoPose3D/blob/master/DATASETS.md). The reformatted version used in this repository can be downloaded from [train_detection](https://download.openmmlab.com/mmpose/body3d/videopose/cpn_ft_h36m_dbb_train.npy) and [test_detection](https://download.openmmlab.com/mmpose/body3d/videopose/cpn_ft_h36m_dbb_test.npy).
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml
new file mode 100644
index 0000000000000000000000000000000000000000..392c494ace4de30d1c7576ac9392ecfc6270751e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml
@@ -0,0 +1,102 @@
+Collections:
+- Name: VideoPose3D
+ Paper:
+ Title: 3d human pose estimation in video with temporal convolutions and semi-supervised
+ training
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Pavllo_3D_Human_Pose_Estimation_in_Video_With_Temporal_Convolutions_and_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/videopose3d.md
+Models:
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_supervised.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: &id001
+ - VideoPose3D
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_27frames_fullconv_supervised
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 40.0
+ P-MPJPE: 30.1
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_81frames_fullconv_supervised.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_81frames_fullconv_supervised
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 38.9
+ P-MPJPE: 29.2
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_243frames_fullconv_supervised
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 37.6
+ P-MPJPE: 28.3
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised-880bea25_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_1frame_fullconv_supervised_cpn_ft.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_1frame_fullconv_supervised_cpn_ft
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 52.9
+ P-MPJPE: 41.3
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_243frames_fullconv_supervised_cpn_ft
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 47.9
+ P-MPJPE: 38.0
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_27frames_fullconv_semi-supervised
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 58.1
+ N-MPJPE: 54.7
+ P-MPJPE: 42.8
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised-54aef83b_20210527.pth
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture: *id001
+ Training Data: Human3.6M
+ Name: video_pose_lift_videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE: 67.4
+ N-MPJPE: 63.2
+ P-MPJPE: 50.1
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_cpn_ft-71be9cde_20210527.pth
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_1frame_fullconv_supervised_cpn_ft.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_1frame_fullconv_supervised_cpn_ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..2de3c3bbcd2ede1dd7031398c865296596d8f4c7
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_1frame_fullconv_supervised_cpn_ft.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.98,
+)
+
+total_epochs = 160
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=4,
+ kernel_sizes=(1, 1, 1, 1, 1),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+train_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=False,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_train.npy',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+test_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=False,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_test.npy',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=train_data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised.py
new file mode 100644
index 0000000000000000000000000000000000000000..23b23fede0bc7840859b997b44f070b9019367d3
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised.py
@@ -0,0 +1,144 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.975,
+)
+
+total_epochs = 160
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=4,
+ kernel_sizes=(3, 3, 3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+data_cfg = dict(
+ num_joints=17,
+ seq_len=243,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=0,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..65d7b49053800b6ecdc6a153a3f4349a90974bc0
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py
@@ -0,0 +1,158 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.98,
+)
+
+total_epochs = 200
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=4,
+ kernel_sizes=(3, 3, 3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+train_data_cfg = dict(
+ num_joints=17,
+ seq_len=243,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_train.npy',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+test_data_cfg = dict(
+ num_joints=17,
+ seq_len=243,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_test.npy',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=0,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=train_data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised.py
new file mode 100644
index 0000000000000000000000000000000000000000..70404c9fcede383f32e3c6cb2a77f9924d804b78
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised.py
@@ -0,0 +1,222 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+checkpoint_config = dict(interval=20)
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe', 'n-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.98,
+)
+
+total_epochs = 200
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ traj_backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ traj_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=1,
+ loss_keypoint=dict(type='MPJPELoss', use_target_weight=True),
+ is_trajectory=True),
+ loss_semi=dict(
+ type='SemiSupervisionLoss',
+ joint_parents=[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
+ warmup_iterations=1311376 // 64 // 8 *
+ 5), # dataset_size // samples_per_gpu // gpu_num * warmup_epochs
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+labeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subset=0.1,
+ subjects=['S1'],
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+unlabeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subjects=['S5', 'S6', 'S7', 'S8'],
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+ need_2d_label=True)
+val_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl')
+test_data_cfg = val_data_cfg
+
+train_labeled_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target',
+ ('root_position', 'traj_target')],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+train_unlabeled_pipeline = [
+ dict(
+ type='ImageCoordinateNormalization',
+ item=['input_2d', 'target_2d'],
+ norm_camera=True),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target_2d'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='static', center_x=0.)
+ ],
+ visible_item='input_2d_visible',
+ flip_prob=0.5,
+ flip_camera=True),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(type='CollectCameraIntrinsics'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'unlabeled_input'),
+ ('target_2d', 'unlabeled_target_2d'), 'intrinsics'],
+ meta_name='unlabeled_metas',
+ meta_keys=['target_image_path', 'flip_pairs'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=dict(
+ type='Body3DSemiSupervisionDataset',
+ labeled_dataset=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=labeled_data_cfg,
+ pipeline=train_labeled_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ unlabeled_dataset=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=unlabeled_data_cfg,
+ pipeline=train_unlabeled_pipeline,
+ dataset_info={{_base_.dataset_info}})),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=val_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b0d9fe5205e44b9062fdc60a7d51f8671e556b4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_semi-supervised_cpn_ft.py
@@ -0,0 +1,228 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+checkpoint_config = dict(interval=20)
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe', 'n-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.98,
+)
+
+total_epochs = 200
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ traj_backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ traj_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=1,
+ loss_keypoint=dict(type='MPJPELoss', use_target_weight=True),
+ is_trajectory=True),
+ loss_semi=dict(
+ type='SemiSupervisionLoss',
+ joint_parents=[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
+ warmup_iterations=1311376 // 64 // 8 *
+ 5), # dataset_size // samples_per_gpu // gpu_num * warmup_epochs
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+labeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_train.npy',
+ subset=0.1,
+ subjects=['S1'],
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+unlabeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_train.npy',
+ subjects=['S5', 'S6', 'S7', 'S8'],
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+ need_2d_label=True)
+val_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file=f'{data_root}/joint_2d_det_files/' +
+ 'cpn_ft_h36m_dbb_test.npy',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl')
+test_data_cfg = val_data_cfg
+
+train_labeled_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target',
+ ('root_position', 'traj_target')],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+train_unlabeled_pipeline = [
+ dict(
+ type='ImageCoordinateNormalization',
+ item=['input_2d', 'target_2d'],
+ norm_camera=True),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target_2d'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='static', center_x=0.)
+ ],
+ visible_item='input_2d_visible',
+ flip_prob=0.5,
+ flip_camera=True),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(type='CollectCameraIntrinsics'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'unlabeled_input'),
+ ('target_2d', 'unlabeled_target_2d'), 'intrinsics'],
+ meta_name='unlabeled_metas',
+ meta_keys=['target_image_path', 'flip_pairs'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=0,
+ val_dataloader=dict(samples_per_gpu=64),
+ test_dataloader=dict(samples_per_gpu=64),
+ train=dict(
+ type='Body3DSemiSupervisionDataset',
+ labeled_dataset=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=labeled_data_cfg,
+ pipeline=train_labeled_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ unlabeled_dataset=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=unlabeled_data_cfg,
+ pipeline=train_unlabeled_pipeline,
+ dataset_info={{_base_.dataset_info}})),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=val_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_supervised.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_supervised.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f28a59b4c273d5dabd043d957b95e6c1286ce6a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_27frames_fullconv_supervised.py
@@ -0,0 +1,144 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.975,
+)
+
+total_epochs = 160
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_81frames_fullconv_supervised.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_81frames_fullconv_supervised.py
new file mode 100644
index 0000000000000000000000000000000000000000..507a9f42c6cd6abdfa949b310a51ce10ad55c0e4
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_81frames_fullconv_supervised.py
@@ -0,0 +1,144 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/h36m.py'
+]
+evaluation = dict(
+ interval=10, metric=['mpjpe', 'p-mpjpe'], key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.975,
+)
+
+total_epochs = 160
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=3,
+ kernel_sizes=(3, 3, 3, 3),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/h36m'
+data_cfg = dict(
+ num_joints=17,
+ seq_len=81,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotation_body3d/cameras.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=0)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=0,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DH36MDataset',
+ ann_file=f'{data_root}/annotation_body3d/fps50/h36m_test.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.md b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..d85edc57b44368c86783c35adf3d320674e68819
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.md
@@ -0,0 +1,41 @@
+
+
+
+VideoPose3D (CVPR'2019)
+
+```bibtex
+@inproceedings{pavllo20193d,
+ title={3d human pose estimation in video with temporal convolutions and semi-supervised training},
+ author={Pavllo, Dario and Feichtenhofer, Christoph and Grangier, David and Auli, Michael},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7753--7762},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MPI-INF-3DHP (3DV'2017)
+
+```bibtex
+@inproceedings{mono-3dhp2017,
+ author = {Mehta, Dushyant and Rhodin, Helge and Casas, Dan and Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and Theobalt, Christian},
+ title = {Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision},
+ booktitle = {3D Vision (3DV), 2017 Fifth International Conference on},
+ url = {http://gvv.mpi-inf.mpg.de/3dhp_dataset},
+ year = {2017},
+ organization={IEEE},
+ doi={10.1109/3dv.2017.00064},
+}
+```
+
+
+
+Results on MPI-INF-3DHP dataset with ground truth 2D detections, supervised training
+
+| Arch | Receptive Field | MPJPE | P-MPJPE | 3DPCK | 3DAUC | ckpt | log |
+| :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| [VideoPose3D](configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt.py) | 1 | 58.3 | 40.6 | 94.1 | 63.1 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_mpi-inf-3dhp_1frame_fullconv_supervised_gt-d6ed21ef_20210603.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_mpi-inf-3dhp_1frame_fullconv_supervised_gt_20210603.log.json) |
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.yml b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70c073a8d9fb69765e32feae242d122b2bd2567a
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: VideoPose3D
+ Paper:
+ Title: 3d human pose estimation in video with temporal convolutions and semi-supervised
+ training
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Pavllo_3D_Human_Pose_Estimation_in_Video_With_Temporal_Convolutions_and_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/videopose3d.md
+Models:
+- Config: configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt.py
+ In Collection: VideoPose3D
+ Metadata:
+ Architecture:
+ - VideoPose3D
+ Training Data: MPI-INF-3DHP
+ Name: video_pose_lift_videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt
+ Results:
+ - Dataset: MPI-INF-3DHP
+ Metrics:
+ 3DAUC: 63.1
+ 3DPCK: 94.1
+ MPJPE: 58.3
+ P-MPJPE: 40.6
+ Task: Body 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/body3d/videopose/videopose_mpi-inf-3dhp_1frame_fullconv_supervised_gt-d6ed21ef_20210603.pth
diff --git a/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt.py b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..dac308a60a11af88932c6c406ef465dcc9862396
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp_1frame_fullconv_supervised_gt.py
@@ -0,0 +1,156 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/mpi_inf_3dhp.py'
+]
+evaluation = dict(
+ interval=10,
+ metric=['mpjpe', 'p-mpjpe', '3dpck', '3dauc'],
+ key_indicator='MPJPE')
+
+# optimizer settings
+optimizer = dict(
+ type='Adam',
+ lr=1e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='exp',
+ by_epoch=True,
+ gamma=0.98,
+)
+
+total_epochs = 160
+
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(
+ type='TCN',
+ in_channels=2 * 17,
+ stem_channels=1024,
+ num_blocks=4,
+ kernel_sizes=(1, 1, 1, 1, 1),
+ dropout=0.25,
+ use_stride_conv=True),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss')),
+ train_cfg=dict(),
+ test_cfg=dict(restore_global_position=True))
+
+# data settings
+data_root = 'data/mpi_inf_3dhp'
+train_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=False,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotations/cameras_train.pkl',
+)
+test_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=False,
+ joint_2d_src='gt',
+ need_camera_param=True,
+ camera_param_file=f'{data_root}/annotations/cameras_test.pkl',
+)
+
+train_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=14,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(
+ type='RelativeJointRandomFlip',
+ item=['input_2d', 'target'],
+ flip_cfg=[
+ dict(center_mode='static', center_x=0.),
+ dict(center_mode='root', center_index=14)
+ ],
+ visible_item=['input_2d_visible', 'target_visible'],
+ flip_prob=0.5),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+val_pipeline = [
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ visible_item='target_visible',
+ root_index=14,
+ root_name='root_position',
+ remove_root=False),
+ dict(type='ImageCoordinateNormalization', item='input_2d'),
+ dict(type='PoseSequenceToTensor', item='input_2d'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), 'target'],
+ meta_name='metas',
+ meta_keys=['target_image_path', 'flip_pairs', 'root_position'])
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=128,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=128),
+ test_dataloader=dict(samples_per_gpu=128),
+ train=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_train.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=train_data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_test_valid.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Body3DMpiInf3dhpDataset',
+ ann_file=f'{data_root}/annotations/mpi_inf_3dhp_test_valid.npz',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=test_data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/README.md b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a0c7817f40f334ddbc79b3e3c2b5f27e9cfff076
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/README.md
@@ -0,0 +1,120 @@
+# Human Body 3D Mesh Recovery
+
+This task aims at recovering the full 3D mesh representation (parameterized by shape and 3D joint angles) of a
+human body from a single RGB image.
+
+## Data preparation
+
+The preparation for human mesh recovery mainly includes:
+
+- Datasets
+- Annotations
+- SMPL Model
+
+Please follow [DATA Preparation](/docs/en/tasks/3d_body_mesh.md) to prepare them.
+
+## Prepare Pretrained Models
+
+Please download the pretrained HMR model from
+[here](https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224-c21e8229_20201015.pth),
+and make it looks like this:
+
+```text
+mmpose
+`-- models
+ `-- pytorch
+ `-- hmr
+ |-- hmr_mesh_224x224-c21e8229_20201015.pth
+```
+
+## Inference with pretrained models
+
+### Test a Dataset
+
+You can use the following commands to test the pretrained model on Human3.6M test set and
+evaluate the joint error.
+
+```shell
+# single-gpu testing
+python tools/test.py configs/mesh/hmr/hmr_resnet_50.py \
+models/pytorch/hmr/hmr_mesh_224x224-c21e8229_20201015.pth --eval=joint_error
+
+# multiple-gpu testing
+./tools/dist_test.sh configs/mesh/hmr/hmr_resnet_50.py \
+models/pytorch/hmr/hmr_mesh_224x224-c21e8229_20201015.pth 8 --eval=joint_error
+```
+
+## Train the model
+
+In order to train the model, please download the
+[zip file](https://drive.google.com/file/d/1JrwfHYIFdQPO7VeBEG9Kk3xsZMVJmhtv/view?usp=sharing)
+of the sampled train images of Human3.6M dataset.
+Extract the images and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── h36m_train
+ ├── S1
+ │ ├── S1_Directions_1.54138969
+ │ │ ├── S1_Directions_1.54138969_000001.jpg
+ │ │ ├── S1_Directions_1.54138969_000006.jpg
+ │ │ └── ...
+ │ ├── S1_Directions_1.55011271
+ │ └── ...
+ ├── S11
+ │ ├── S11_Directions_1.54138969
+ │ ├── S11_Directions_1.55011271
+ │ └── ...
+ ├── S5
+ │ ├── S5_Directions_1.54138969
+ │ ├── S5_Directions_1.55011271
+ │ └── S5_WalkTogether.60457274
+ ├── S6
+ │ ├── S6_Directions_1.54138969
+ │ ├── S6_Directions_1.55011271
+ │ └── S6_WalkTogether.60457274
+ ├── S7
+ │ ├── S7_Directions_1.54138969
+ │ ├── S7_Directions_1.55011271
+ │ └── S7_WalkTogether.60457274
+ ├── S8
+ │ ├── S8_Directions_1.54138969
+ │ ├── S8_Directions_1.55011271
+ │ └── S8_WalkTogether_2.60457274
+ └── S9
+ ├── S9_Directions_1.54138969
+ ├── S9_Directions_1.55011271
+ └── S9_WalkTogether.60457274
+
+```
+
+Please also download the preprocessed annotation file for Human3.6M train set from
+[here](https://drive.google.com/file/d/1NveJQGS4IYaASaJbLHT_zOGqm6Lo_gh5/view?usp=sharing)
+under `$MMPOSE/data/mesh_annotation_files`, and make it like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mesh_annotation_files
+ ├── h36m_train.npz
+ └── ...
+```
+
+### Train with multiple GPUs
+
+Here is the code of using 8 GPUs to train HMR net:
+
+```shell
+./tools/dist_train.sh configs/mesh/hmr/hmr_resnet_50.py 8 --work-dir work_dirs/hmr --no-validate
+```
diff --git a/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/README.md b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b970e4970531b78773681c893c7950831824cd10
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/README.md
@@ -0,0 +1,24 @@
+# End-to-end Recovery of Human Shape and Pose
+
+## Introduction
+
+
+
+
+HMR (CVPR'2018)
+
+```bibtex
+@inProceedings{kanazawaHMR18,
+ title={End-to-end Recovery of Human Shape and Pose},
+ author = {Angjoo Kanazawa
+ and Michael J. Black
+ and David W. Jacobs
+ and Jitendra Malik},
+ booktitle={Computer Vision and Pattern Recognition (CVPR)},
+ year={2018}
+}
+```
+
+
+
+HMR is an end-to-end framework for reconstructing a full 3D mesh of a human body from a single RGB image.
diff --git a/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py
new file mode 100644
index 0000000000000000000000000000000000000000..669cba07d996ddbdb3948861b2c379865429879e
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py
@@ -0,0 +1,149 @@
+_base_ = ['../../../../_base_/default_runtime.py']
+use_adversarial_train = True
+
+optimizer = dict(
+ generator=dict(type='Adam', lr=2.5e-4),
+ discriminator=dict(type='Adam', lr=1e-4))
+
+optimizer_config = None
+
+lr_config = dict(policy='Fixed', by_epoch=False)
+
+total_epochs = 100
+img_res = 224
+
+# model settings
+model = dict(
+ type='ParametricMesh',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ mesh_head=dict(
+ type='HMRMeshHead',
+ in_channels=2048,
+ smpl_mean_params='models/smpl/smpl_mean_params.npz',
+ ),
+ disc=dict(),
+ smpl=dict(
+ type='SMPL',
+ smpl_path='models/smpl',
+ joints_regressor='models/smpl/joints_regressor_cmr.npy'),
+ train_cfg=dict(disc_step=1),
+ test_cfg=dict(),
+ loss_mesh=dict(
+ type='MeshLoss',
+ joints_2d_loss_weight=100,
+ joints_3d_loss_weight=1000,
+ vertex_loss_weight=20,
+ smpl_pose_loss_weight=30,
+ smpl_beta_loss_weight=0.2,
+ focal_length=5000,
+ img_res=img_res),
+ loss_gan=dict(
+ type='GANLoss',
+ gan_type='lsgan',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=1))
+
+data_cfg = dict(
+ image_size=[img_res, img_res],
+ iuv_size=[img_res // 4, img_res // 4],
+ num_joints=24,
+ use_IUV=False,
+ uv_type='BF')
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='MeshRandomChannelNoise', noise_factor=0.4),
+ dict(type='MeshRandomFlip', flip_prob=0.5),
+ dict(type='MeshGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
+ dict(type='MeshAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img', 'joints_2d', 'joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'pose', 'beta', 'has_smpl'
+ ],
+ meta_keys=['image_file', 'center', 'scale', 'rotation']),
+]
+
+train_adv_pipeline = [dict(type='Collect', keys=['mosh_theta'], meta_keys=[])]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='MeshAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=['image_file', 'center', 'scale', 'rotation']),
+]
+
+test_pipeline = val_pipeline
+
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ train=dict(
+ type='MeshAdversarialDataset',
+ train_dataset=dict(
+ type='MeshMixDataset',
+ configs=[
+ dict(
+ ann_file='data/mesh_annotation_files/h36m_train.npz',
+ img_prefix='data/h36m_train',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ dict(
+ ann_file='data/mesh_annotation_files/'
+ 'mpi_inf_3dhp_train.npz',
+ img_prefix='data/mpi_inf_3dhp',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ dict(
+ ann_file='data/mesh_annotation_files/'
+ 'lsp_dataset_original_train.npz',
+ img_prefix='data/lsp_dataset_original',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ dict(
+ ann_file='data/mesh_annotation_files/hr-lspet_train.npz',
+ img_prefix='data/hr-lspet',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ dict(
+ ann_file='data/mesh_annotation_files/mpii_train.npz',
+ img_prefix='data/mpii',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ dict(
+ ann_file='data/mesh_annotation_files/coco_2014_train.npz',
+ img_prefix='data/coco',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline)
+ ],
+ partition=[0.35, 0.15, 0.1, 0.10, 0.10, 0.2]),
+ adversarial_dataset=dict(
+ type='MoshDataset',
+ ann_file='data/mesh_annotation_files/CMU_mosh.npz',
+ pipeline=train_adv_pipeline),
+ ),
+ test=dict(
+ type='MeshH36MDataset',
+ ann_file='data/mesh_annotation_files/h36m_valid_protocol2.npz',
+ img_prefix='data/Human3.6M',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ ),
+)
diff --git a/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.md b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.md
new file mode 100644
index 0000000000000000000000000000000000000000..e76d54e6013315b4091880eee279537004407df1
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.md
@@ -0,0 +1,62 @@
+
+
+
+HMR (CVPR'2018)
+
+```bibtex
+@inProceedings{kanazawaHMR18,
+ title={End-to-end Recovery of Human Shape and Pose},
+ author = {Angjoo Kanazawa
+ and Michael J. Black
+ and David W. Jacobs
+ and Jitendra Malik},
+ booktitle={Computer Vision and Pattern Recognition (CVPR)},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+Results on Human3.6M with ground-truth bounding box having MPJPE-PA of 52.60 mm on Protocol2
+
+| Arch | Input Size | MPJPE (P1)| MPJPE-PA (P1) | MPJPE (P2) | MPJPE-PA (P2) | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |
+| [hmr_resnet_50](/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py) | 224x224 | 80.75 | 55.08 | 80.35 | 52.60 | [ckpt](https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224-c21e8229_20201015.pth) | [log](https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224_20201015.log.json) |
diff --git a/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b5307dd052795c58740d1845f913852fa0d4b164
--- /dev/null
+++ b/vendor/ViTPose/configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: HMR
+ Paper:
+ Title: End-to-end Recovery of Human Shape and Pose
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Kanazawa_End-to-End_Recovery_of_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/hmr.md
+Models:
+- Config: configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py
+ In Collection: HMR
+ Metadata:
+ Architecture:
+ - HMR
+ - ResNet
+ Training Data: Human3.6M
+ Name: hmr_res50_mixed_224x224
+ Results:
+ - Dataset: Human3.6M
+ Metrics:
+ MPJPE (P1): 80.75
+ MPJPE (P2): 80.35
+ MPJPE-PA (P1): 55.08
+ MPJPE-PA (P2): 52.6
+ Task: Body 3D Mesh
+ Weights: https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224-c21e8229_20201015.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..65a4c3dec855ddea53d6d89f9ee3d6e76263a5b1
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,16 @@
+# 2D Face Landmark Detection
+
+2D face landmark detection (also referred to as face alignment) is defined as the task of detecting the face keypoints from an input image.
+
+Normally, the input images are cropped face images, where the face locates at the center;
+or the rough location (or the bounding box) of the hand is provided.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_face_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/2d_face_demo.md) to run demos.
+
+
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/README.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..155c92ac183305d8d159a001f215d44d4566b866
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/README.md
@@ -0,0 +1,24 @@
+# DeepPose: Human pose estimation via deep neural networks
+
+## Introduction
+
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+DeepPose first proposes using deep neural networks (DNNs) to tackle the problem of pose estimation.
+It follows the top-down paradigm, that first detects the bounding boxes and then estimates poses.
+It learns to directly regress the face keypoint coordinates.
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c32cf765d386f02e73b1e5276acfd3de1ebd9db
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_softwingloss.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_softwingloss.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3ebd31d1c5ceb0706597e739c6e7560832b1791
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_softwingloss.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SoftWingLoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_wingloss.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_wingloss.py
new file mode 100644
index 0000000000000000000000000000000000000000..5578c81d697713c16eb227c6e5d956ab544c5b79
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_wingloss.py
@@ -0,0 +1,122 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='WingLoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..e7bad5704326465af9b1d16ff94bc33d16f9e070
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.md
@@ -0,0 +1,75 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+SoftWingloss (TIP'2021)
+
+```bibtex
+@article{lin2021structure,
+ title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
+ author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
+ journal={IEEE Transactions on Image Processing},
+ year={2021},
+ publisher={IEEE}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [deeppose_res50_softwingloss](/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_softwingloss.py) | 256x256 | 4.41 | 7.77 | 4.37 | 5.27 | 5.01 | 4.36 | 4.70 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ffd81c0534cd9c48548461145dbdf5640a492b17
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.yml
@@ -0,0 +1,28 @@
+Collections:
+- Name: SoftWingloss
+ Paper:
+ Title: Structure-Coherent Deep Feature Learning for Robust Face Alignment
+ URL: https://ieeexplore.ieee.org/document/9442331/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/softwingloss.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_softwingloss.py
+ In Collection: SoftWingloss
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ - SoftWingloss
+ Training Data: WFLW
+ Name: deeppose_res50_wflw_256x256_softwingloss
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 5.01
+ NME expression: 4.7
+ NME illumination: 4.37
+ NME makeup: 4.36
+ NME occlusion: 5.27
+ NME pose: 7.77
+ NME test: 4.41
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..f27f74a4548dfc4f8fb033eb1c9c29d04ffd74a1
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.md
@@ -0,0 +1,58 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [deeppose_res50](/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256.py) | 256x256 | 4.85 | 8.50 | 4.81 | 5.69 | 5.45 | 4.82 | 5.20 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..03df2a716ef81252348f1c6713ffe7166892f3aa
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: WFLW
+ Name: deeppose_res50_wflw_256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 5.45
+ NME expression: 5.2
+ NME illumination: 4.81
+ NME makeup: 4.82
+ NME occlusion: 5.69
+ NME pose: 8.5
+ NME test: 4.85
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..eb5fd1929e6ecc3fecf205b60d472bb04ada2cb8
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.md
@@ -0,0 +1,76 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+Wingloss (CVPR'2018)
+
+```bibtex
+@inproceedings{feng2018wing,
+ title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
+ author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
+ booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
+ year={2018},
+ pages ={2235-2245},
+ organization={IEEE}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [deeppose_res50_wingloss](/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_wingloss.py) | 256x256 | 4.64 | 8.25 | 4.59 | 5.56 | 5.26 | 4.59 | 5.07 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss_20210303.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..494258b4ec06a8ef81b097d173911f6c58941cb2
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.yml
@@ -0,0 +1,29 @@
+Collections:
+- Name: Wingloss
+ Paper:
+ Title: Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural
+ Networks
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Feng_Wing_Loss_for_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/wingloss.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/res50_wflw_256x256_wingloss.py
+ In Collection: Wingloss
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ - Wingloss
+ Training Data: WFLW
+ Name: deeppose_res50_wflw_256x256_wingloss
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 5.26
+ NME expression: 5.07
+ NME illumination: 4.59
+ NME makeup: 4.59
+ NME occlusion: 5.56
+ NME pose: 8.25
+ NME test: 4.64
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.md
new file mode 100644
index 0000000000000000000000000000000000000000..aae3b73ffe9a99b76fb815fac3029153b85594c6
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.md
@@ -0,0 +1,44 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+300W (IMAVIS'2016)
+
+```bibtex
+@article{sagonas2016300,
+ title={300 faces in-the-wild challenge: Database and results},
+ author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
+ journal={Image and vision computing},
+ volume={47},
+ pages={3--18},
+ year={2016},
+ publisher={Elsevier}
+}
+```
+
+
+
+Results on 300W dataset
+
+The model is trained on 300W train.
+
+| Arch | Input Size | NME*common* | NME*challenge* | NME*full* | NME*test* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [pose_hrnetv2_w18](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256.py) | 256x256 | 2.86 | 5.45 | 3.37 | 3.97 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256_20211019.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d03f9e716ff41ebf9faada16bf1864809e5ad7f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: 300W
+ Name: topdown_heatmap_hrnetv2_w18_300w_256x256
+ Results:
+ - Dataset: 300W
+ Metrics:
+ NME challenge: 5.45
+ NME common: 2.86
+ NME full: 3.37
+ NME test: 3.97
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..88c9bdf91a97676814e01b6902ab0492b2148c49
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/300w.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=1.5),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/300w'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256_dark.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..6275f6fa41367602f2633fd0e9dd91587c6129ba
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_w18_300w_256x256_dark.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/300w.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/300w'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/res50_300w_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/res50_300w_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9194cfb2f8305fbd08dd946406551c8a0a82eac1
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/res50_300w_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/300w.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/300w'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Face300WDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_300w_valid.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ed6f5b02c8502ef0f23f699ec81554fc88ff36f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,10 @@
+# Top-down heatmap-based face keypoint estimation
+
+Top-down methods divide the task into two stages: face detection and face keypoint estimation.
+
+They perform face detection first, followed by face keypoint estimation given face bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
+
+Various neural network models have been proposed for better performance.
+The popular ones include HRNetv2.
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..52907485c31fead106dcc94908bfaee10e3fa1e0
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.md
@@ -0,0 +1,43 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
+
+Results on AFLW dataset
+
+The model is trained on AFLW train and evaluated on AFLW full and frontal.
+
+| Arch | Input Size | NME*full* | NME*frontal* | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py) | 256x256 | 1.41 | 1.27 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256_20210125.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ee61e35afef3372541a0603f687e7af57b59c2b
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: AFLW
+ Name: topdown_heatmap_hrnetv2_w18_aflw_256x256
+ Results:
+ - Dataset: AFLW
+ Metrics:
+ NME frontal: 1.27
+ NME full: 1.41
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..19161ec6b308ca6af9a166536c209431b749438f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
+
+Results on AFLW dataset
+
+The model is trained on AFLW train and evaluated on AFLW full and frontal.
+
+| Arch | Input Size | NME*full* | NME*frontal* | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256_dark.py) | 256x256 | 1.34 | 1.20 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark_20210125.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ab60120930746f6ab4e6bbee6203c08dec14b482
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: AFLW
+ Name: topdown_heatmap_hrnetv2_w18_aflw_256x256_dark
+ Results:
+ - Dataset: AFLW
+ Metrics:
+ NME frontal: 1.2
+ NME full: 1.34
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b139c2323dbfb0addb3baf3a6c348962e232331f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=19,
+ dataset_joints=19,
+ dataset_channel=[
+ list(range(19)),
+ ],
+ inference_channel=list(range(19)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256_dark.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7ab367de704b615b6fa3caf2cce97b60d4e7c91
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256_dark.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=19,
+ dataset_joints=19,
+ dataset_channel=[
+ list(range(19)),
+ ],
+ inference_channel=list(range(19)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/res50_aflw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/res50_aflw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e216574600978f3ca55af0cfb9f97b233ffe313
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/res50_aflw_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/aflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=19,
+ dataset_joints=19,
+ dataset_channel=[
+ list(range(19)),
+ ],
+ inference_channel=list(range(19)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/aflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceAFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_aflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass52_coco_wholebody_face_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass52_coco_wholebody_face_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7989b49808187dbd3158070e47fcfa54247853d
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass52_coco_wholebody_face_256x256.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..9cc9af478dc6d89a2d8ea4de23d7f3a6d082b827
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md
@@ -0,0 +1,39 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_hourglass_52](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass52_coco_wholebody_face_256x256.py) | 256x256 | 0.0586 | [ckpt](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..03761d866e573566090f40f7fb0d917126dd0f41
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
@@ -0,0 +1,20 @@
+Collections:
+- Name: Hourglass
+ Paper:
+ Title: Stacked hourglass networks for human pose estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hourglass.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass52_coco_wholebody_face_256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture:
+ - Hourglass
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_hourglass52_coco_wholebody_face_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0586
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1d4fb8d329059ffe1821d3c18fa4b9c2ba17947
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md
@@ -0,0 +1,39 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256.py) | 256x256 | 0.0569 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..754598e49a7460596b8e393806a69d4bbe9985b8
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
@@ -0,0 +1,20 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_hrnetv2_w18_coco_wholebody_face_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0569
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..4de0db0cd0cb0e3da7bdcf7aebad9c3101519ff5
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md
@@ -0,0 +1,56 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256_dark.py) | 256x256 | 0.0513 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e8b9e895744e742aa5d9ebc2ca9d3a7d28617fe2
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_hrnetv2_w18_coco_wholebody_face_256x256_dark
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0513
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..88722deaaa1075ea55aa104a3bbe7bd9832c70eb
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256_dark.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3998c3bde899e16fdd629f4450c55244f223765
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_w18_coco_wholebody_face_256x256_dark.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..3db8e5f4e651ebd3b945851eefe6c40e725ef87a
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md
@@ -0,0 +1,38 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face_256x256.py) | 256x256 | 0.0612 | [ckpt](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f1e23e7deea45c7c6df91f3f77fdf400968d288f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
@@ -0,0 +1,20 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_mobilenetv2_coco_wholebody_face_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0612
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1b54e0ca939fc395c9669b1f438f612ea28c221
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/res50_coco_wholebody_face_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/res50_coco_wholebody_face_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c636a329e16715f291eac591d85fb528d7fc6c2
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/res50_coco_wholebody_face_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..b63a74e442d5733ea3ce5bbbd906055acf569119
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_res50](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/res50_coco_wholebody_face_256x256.py) | 256x256 | 0.0566 | [ckpt](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9e25ebc72f5e22c859781486c194c7ff2249f064
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
@@ -0,0 +1,21 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/res50_coco_wholebody_face_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_res50_coco_wholebody_face_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0566
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet50_coco_wholebody_face_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet50_coco_wholebody_face_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b02d71149ec54e6673e1f201c5fc5a0aed47c6d8
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet50_coco_wholebody_face_256x256.py
@@ -0,0 +1,127 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_face.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], key_indicator='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth',
+ backbone=dict(type='SCNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..48029a01caf018a5f190f98e2428b2b329056cad
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md
@@ -0,0 +1,38 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :-------------- | :-----------: | :------: |:------: |:------: |
+| [pose_scnet_50](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet50_coco_wholebody_face_256x256.py) | 256x256 | 0.0565 | [ckpt](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7be429196f67ee588962ce17746659d22b7789d4
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
@@ -0,0 +1,20 @@
+Collections:
+- Name: SCNet
+ Paper:
+ Title: Improving Convolutional Networks with Self-Calibrated Convolutions
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/scnet.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet50_coco_wholebody_face_256x256.py
+ In Collection: SCNet
+ Metadata:
+ Architecture:
+ - SCNet
+ Training Data: COCO-WholeBody-Face
+ Name: topdown_heatmap_scnet50_coco_wholebody_face_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0565
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.md
new file mode 100644
index 0000000000000000000000000000000000000000..051fced17c500d5106b48962235b6a65f369bce1
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.md
@@ -0,0 +1,42 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COFW (ICCV'2013)
+
+```bibtex
+@inproceedings{burgos2013robust,
+ title={Robust face landmark estimation under occlusion},
+ author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
+ booktitle={Proceedings of the IEEE international conference on computer vision},
+ pages={1513--1520},
+ year={2013}
+}
+```
+
+
+
+Results on COFW dataset
+
+The model is trained on COFW train.
+
+| Arch | Input Size | NME | ckpt | log |
+| :-----| :--------: | :----: |:---: | :---: |
+| [pose_hrnetv2_w18](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256.py) | 256x256 | 3.40 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256_20211019.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..abeb759662cd4826599940eea04474e2e59a8375
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.yml
@@ -0,0 +1,20 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COFW
+ Name: topdown_heatmap_hrnetv2_w18_cofw_256x256
+ Results:
+ - Dataset: COFW
+ Metrics:
+ NME: 3.4
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf316bcff72edaff2de157458cc14dde019262ac
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/cofw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=29,
+ dataset_joints=29,
+ dataset_channel=[
+ list(range(29)),
+ ],
+ inference_channel=list(range(29)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=1.5),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/cofw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256_dark.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8eb6e27d5522cc9c9883be09a5f3a5e8cb612f2
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_w18_cofw_256x256_dark.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/cofw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=29,
+ dataset_joints=29,
+ dataset_channel=[
+ list(range(29)),
+ ],
+ inference_channel=list(range(29)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/cofw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/res50_cofw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/res50_cofw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..13b37c1d4f2b626dd87e194258a1e9297de34158
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/res50_cofw_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/cofw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=29,
+ dataset_joints=29,
+ dataset_channel=[
+ list(range(29)),
+ ],
+ inference_channel=list(range(29)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/cofw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceCOFWDataset',
+ ann_file=f'{data_root}/annotations/cofw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..193029918241ace3b208d865513d06583b9f52d3
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.md
@@ -0,0 +1,59 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AdaptiveWingloss (ICCV'2019)
+
+```bibtex
+@inproceedings{wang2019adaptive,
+ title={Adaptive wing loss for robust face alignment via heatmap regression},
+ author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
+ booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
+ pages={6971--6981},
+ year={2019}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [pose_hrnetv2_w18_awing](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_awing.py) | 256x256 | 4.02 | 6.94 | 3.96 | 4.78 | 4.59 | 3.85 | 4.28 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing_20211212.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..af61d3013a1692df5268468dde5396144a0db2f1
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_awing.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - AdaptiveWingloss
+ Training Data: WFLW
+ Name: topdown_heatmap_hrnetv2_w18_wflw_256x256_awing
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.59
+ NME expression: 4.28
+ NME illumination: 3.96
+ NME makeup: 3.85
+ NME occlusion: 4.78
+ NME pose: 6.94
+ NME test: 4.02
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e22009a71aca41fbf354942eb730b9128f7a0df
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.md
@@ -0,0 +1,59 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [pose_hrnetv2_w18_dark](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_dark.py) | 256x256 | 3.98 | 6.99 | 3.96 | 4.78 | 4.57 | 3.87 | 4.30 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark_20210125.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f5133d9627cf72043725b9669bf75fed60d3934f
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: WFLW
+ Name: topdown_heatmap_hrnetv2_w18_wflw_256x256_dark
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.57
+ NME expression: 4.3
+ NME illumination: 3.96
+ NME makeup: 3.87
+ NME occlusion: 4.78
+ NME pose: 6.99
+ NME test: 3.98
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d89b32a6330384102da83f804a6d5cfa5f030f8a
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_awing.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_awing.py
new file mode 100644
index 0000000000000000000000000000000000000000..db83c19a5eb30679413ae9c472849c3825e278dc
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_awing.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='AdaptiveWingLoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_dark.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c28f56f47256f521cc09c1bcd9623959ae44861
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256_dark.py
@@ -0,0 +1,160 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.md b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..70ca3ad5e9a053ec183c01bb31b19f6f02a76ca6
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.md
@@ -0,0 +1,42 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :-----| :--------: | :------------------: | :------------------: |:---------------------------: |:------------------------: | :------------------: | :--------------: |:-------------------------: |:---: | :---: |
+| [pose_hrnetv2_w18](/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256.py) | 256x256 | 4.06 | 6.98 | 3.99 | 4.83 | 4.59 | 3.92 | 4.33 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_20210125.log.json) |
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..517aa89aebfceb51e44e9b23ceb0a6084644f6ad
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml
@@ -0,0 +1,26 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_w18_wflw_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: WFLW
+ Name: topdown_heatmap_hrnetv2_w18_wflw_256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.59
+ NME expression: 4.33
+ NME illumination: 3.99
+ NME makeup: 3.92
+ NME occlusion: 4.83
+ NME pose: 6.98
+ NME test: 4.06
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth
diff --git a/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/res50_wflw_256x256.py b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/res50_wflw_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2f5d3443a20e987c96a454c46a188fc0ff9c1db
--- /dev/null
+++ b/vendor/ViTPose/configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/res50_wflw_256x256.py
@@ -0,0 +1,126 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/wflw.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['NME'], save_best='NME')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-3,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 55])
+total_epochs = 60
+log_config = dict(
+ interval=5,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/wflw'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_train.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FaceWFLWDataset',
+ ann_file=f'{data_root}/annotations/face_landmarks_wflw_test.json',
+ img_prefix=f'{data_root}/images/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6818d3dc1d7f9a25bea8ecc73f1c9b0b563ba21b
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,7 @@
+# 2D Fashion Landmark Detection
+
+2D fashion landmark detection (also referred to as fashion alignment) aims to detect the key-point located at the functional region of clothes, for example the neckline and the cuff.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_fashion_landmark.md) to prepare data.
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/README.md b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2dacfddfd451a49d3044936fdee995d6dfd29ac4
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/README.md
@@ -0,0 +1,24 @@
+# Deeppose: Human pose estimation via deep neural networks
+
+## Introduction
+
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+DeepPose first proposes using deep neural networks (DNNs) to tackle the problem of keypoint detection.
+It follows the top-down paradigm, that first detects the bounding boxes and then estimates poses.
+It learns to directly regress the fashion keypoint coordinates.
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a59b0a9a7e34ee2d65da5b2a257b8723dda1f5d5
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_full_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c6af600fd01277781feca695caec496a96ea8db
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_lower_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..77826c51249d196e739712ddf4d94f75d8218668
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res101_deepfashion_upper_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d587c77d356cf75f786d81e197ee42d321f8f4c
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_full_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a08301516aca6e89002000d89cd8c112c7483ec
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_lower_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c89056e2602d72935adef047a873654fbf586fc
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res152_deepfashion_upper_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..27bb30f2a1090a4a8d481f63a6c8dc984b7502c3
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_full_256x192.py
@@ -0,0 +1,140 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0bb9686100a2ddad266f75e82aaa6b0b42b5017
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_lower_256x192.py
@@ -0,0 +1,140 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca1b245053fb3b8d1c23288155e630dc8d4735
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_upper_256x192.py
@@ -0,0 +1,140 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.md b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.md
new file mode 100644
index 0000000000000000000000000000000000000000..d0f3f2a8d8e6a0139d7ecb5c8d9766c1b709a577
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.md
@@ -0,0 +1,75 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (CVPR'2016)
+
+```bibtex
+@inproceedings{liuLQWTcvpr16DeepFashion,
+ author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
+ title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
+ booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (ECCV'2016)
+
+```bibtex
+@inproceedings{liuYLWTeccv16FashionLandmark,
+ author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
+ title = {Fashion Landmark Detection in the Wild},
+ booktitle = {European Conference on Computer Vision (ECCV)},
+ month = {October},
+ year = {2016}
+ }
+```
+
+
+
+Results on DeepFashion val set
+
+|Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :---: | :--------: | :------: | :------: | :------: |:------: |:------: |
+|upper | [deeppose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_upper_256x192.py) | 256x256 | 0.965 | 0.535 | 17.2 | [ckpt](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_upper_256x192-497799fb_20210309.pth) | [log](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_upper_256x192_20210309.log.json) |
+|lower | [deeppose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_lower_256x192.py) | 256x256 | 0.971 | 0.678 | 11.8 | [ckpt](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_lower_256x192-94e0e653_20210309.pth) | [log](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_lower_256x192_20210309.log.json) |
+|full | [deeppose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_full_256x192.py) | 256x256 | 0.983 | 0.602 | 14.0 | [ckpt](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_full_256x192-4e0273e2_20210309.pth) | [log](https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_full_256x192_20210309.log.json) |
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml
new file mode 100644
index 0000000000000000000000000000000000000000..392ac02117ca9849f94e28ad868ea78366fd4404
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_upper_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - ResNet
+ Training Data: DeepFashion
+ Name: deeppose_res50_deepfashion_upper_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.535
+ EPE: 17.2
+ PCK@0.2: 0.965
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_upper_256x192-497799fb_20210309.pth
+- Config: configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_lower_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion
+ Name: deeppose_res50_deepfashion_lower_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.678
+ EPE: 11.8
+ PCK@0.2: 0.971
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_lower_256x192-94e0e653_20210309.pth
+- Config: configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/res50_deepfashion_full_256x192.py
+ In Collection: ResNet
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion
+ Name: deeppose_res50_deepfashion_full_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.602
+ EPE: 14.0
+ PCK@0.2: 0.983
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/deeppose/deeppose_res50_deepfashion_full_256x192-4e0273e2_20210309.pth
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7eaa145f56aa800ccd4449bf2a7d293587c92e2a
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,9 @@
+# Top-down heatmap-based fashion keypoint estimation
+
+Top-down methods divide the task into two stages: clothes detection and fashion keypoint estimation.
+
+They perform clothes detection first, followed by fashion keypoint estimation given fashion bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
+
+Various neural network models have been proposed for better performance.
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d70d51ee061295b8219e1f09a09437fcceb70110
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a885d3099e5e52bde40c84d24ad2327981e598c
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_full_256x192_udp.py
@@ -0,0 +1,177 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a81cfc1bd4d14e3b33e54ab2ad35b7364ccbc82
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192.py
@@ -0,0 +1,169 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..49d7b7d887a05b63c3ee1abd738d8d16d56d7697
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_lower_256x192_udp.py
@@ -0,0 +1,176 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8bf5bcae11e6bd2afc46b4e2e7a6bd85ea1e7a2
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5b3bbfc39ab9004b9a65e1ddd3767b117ca11af
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w32_deepfashion_upper_256x192_udp.py
@@ -0,0 +1,177 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e61e6a3975770c1d7230a9a13096928dd1b3286
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..43e039db6c9234cc6eb7e288c641cf72e50392be
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_full_256x192_udp.py
@@ -0,0 +1,177 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b03d6801265c24a364686bda8a6aa55ea7867e61
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..c42bb4aa15c86d72107fe8cf3616bc74ba370efa
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_lower_256x192_udp.py
@@ -0,0 +1,177 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa14b3c2bb524adc15247e2b7632cec3c726b45d
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192.py
@@ -0,0 +1,170 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192_udp.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f01adb699a1ce9c48281e1f78a6b51f1de7b476
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/hrnet_w48_deepfashion_upper_256x192_udp.py
@@ -0,0 +1,177 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..038111db308e57fcc53f69c0de2b8ed99c4c872e
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_full_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..530161a5813c090968b954df4cb2f8a495656377
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_lower_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf3b7d2e0bfa1e31cd437f28ade2b8244495b1f3
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res101_deepfashion_upper_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..da19ce28ad2f3b0408dc1802e586727272103b02
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_full_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfe78cf8a29ee60371755995a52dfce4e7eeec26
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_lower_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..93d0ef51654f6473728ba725d1e0acfffd078711
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res152_deepfashion_upper_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_full_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_full_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..559cb3a2298be62bc06a47b2561c1ceda55247a3
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_full_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_full.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_full_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_lower_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_lower_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6be9538ccf0b62a8a6e3501a429633c0a9dc74ec
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_lower_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_lower.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=4,
+ dataset_joints=4,
+ dataset_channel=[
+ [0, 1, 2, 3],
+ ],
+ inference_channel=[0, 1, 2, 3])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_lower_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='lower',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_upper_256x192.py b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_upper_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e45afeccb104c505b492d2d94620f903138b75e
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_upper_256x192.py
@@ -0,0 +1,139 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/deepfashion_upper.py'
+]
+evaluation = dict(interval=10, metric='PCK', save_best='PCK')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=6,
+ dataset_joints=6,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/fld'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_train.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_val.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='DeepFashionDataset',
+ ann_file=f'{data_root}/annotations/fld_upper_test.json',
+ img_prefix=f'{data_root}/img/',
+ subset='upper',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.md b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.md
new file mode 100644
index 0000000000000000000000000000000000000000..ca23c8d1e0abe08f0482e81f32869c0fb7778161
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.md
@@ -0,0 +1,75 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (CVPR'2016)
+
+```bibtex
+@inproceedings{liuLQWTcvpr16DeepFashion,
+ author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
+ title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
+ booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (ECCV'2016)
+
+```bibtex
+@inproceedings{liuYLWTeccv16FashionLandmark,
+ author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
+ title = {Fashion Landmark Detection in the Wild},
+ booktitle = {European Conference on Computer Vision (ECCV)},
+ month = {October},
+ year = {2016}
+ }
+```
+
+
+
+Results on DeepFashion val set
+
+|Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :---: | :--------: | :------: | :------: | :------: |:------: |:------: |
+|upper | [pose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_upper_256x192.py) | 256x256 | 0.954 | 0.578 | 16.8 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_upper_256x192-41794f03_20210124.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_upper_256x192_20210124.log.json) |
+|lower | [pose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_lower_256x192.py) | 256x256 | 0.965 | 0.744 | 10.5 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_lower_256x192-1292a839_20210124.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_lower_256x192_20210124.log.json) |
+|full | [pose_resnet_50](/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_full_256x192.py) | 256x256 | 0.977 | 0.664 | 12.7 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_full_256x192-0dbd6e42_20210124.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_full_256x192_20210124.log.json) |
diff --git a/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bd871418d2bc6bb1ca532f51bc7464b215af4dea
--- /dev/null
+++ b/vendor/ViTPose/configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_upper_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: DeepFashion
+ Name: topdown_heatmap_res50_deepfashion_upper_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.578
+ EPE: 16.8
+ PCK@0.2: 0.954
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_upper_256x192-41794f03_20210124.pth
+- Config: configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_lower_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion
+ Name: topdown_heatmap_res50_deepfashion_lower_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.744
+ EPE: 10.5
+ PCK@0.2: 0.965
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_lower_256x192-1292a839_20210124.pth
+- Config: configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/res50_deepfashion_full_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion
+ Name: topdown_heatmap_res50_deepfashion_full_256x192
+ Results:
+ - Dataset: DeepFashion
+ Metrics:
+ AUC: 0.664
+ EPE: 12.7
+ PCK@0.2: 0.977
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion_full_256x192-0dbd6e42_20210124.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b8047eafa65f864d8797ab6faf834f3fbf5176a3
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,16 @@
+# 2D Hand Pose Estimation
+
+2D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image.
+
+Normally, the input images are cropped hand images, where the hand locates at the center;
+or the rough location (or the bounding box) of the hand is provided.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_hand_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/2d_hand_demo.md) to run demos.
+
+
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/README.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..846d120515552a9ced401bb0bee64dbe3b76a74e
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/README.md
@@ -0,0 +1,24 @@
+# Deeppose: Human pose estimation via deep neural networks
+
+## Introduction
+
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+DeepPose first proposes using deep neural networks (DNNs) to tackle the problem of keypoint detection.
+It follows the top-down paradigm, that first detects the bounding boxes and then estimates poses.
+It learns to directly regress the hand keypoint coordinates.
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/res50_onehand10k_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/res50_onehand10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fdde7549739c6a2adfffbbdddf77a8c2def4f6c
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/res50_onehand10k_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..42b2a01652d81cb09801e9cf96f3453d184a6b95
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.md
@@ -0,0 +1,59 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [deeppose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/res50_onehand10k_256x256.py) | 256x256 | 0.990 | 0.486 | 34.28 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..994a32a658dbd49b775b943331eef01ac099a798
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/res50_onehand10k_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: OneHand10K
+ Name: deeppose_res50_onehand10k_256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.486
+ EPE: 34.28
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/res50_panoptic2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/res50_panoptic2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0fd4d3738ecc2d43797da61ec2c4cb6465e7a12
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/res50_panoptic2d_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..b5082315a9d7be26bdc4aca87324a32f996e76ae
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.md
@@ -0,0 +1,56 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [deeppose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/res50_panoptic2d_256x256.py) | 256x256 | 0.999 | 0.686 | 9.36 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_panoptic_256x256-8a745183_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_panoptic_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1cf7747b501fcf6cbdfc41b90e2978136d94e405
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/res50_panoptic2d_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: CMU Panoptic HandDB
+ Name: deeppose_res50_panoptic2d_256x256
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.686
+ EPE: 9.36
+ PCKh@0.7: 0.999
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_panoptic_256x256-8a745183_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/res50_rhd2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/res50_rhd2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdcfb45cabe874c9518bead088e685730f1c4afb
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/res50_rhd2d_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..292552054428493f0b4b8941d8928fa089b77cd9
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.md
@@ -0,0 +1,57 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [deeppose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/res50_rhd2d_256x256.py) | 256x256 | 0.988 | 0.865 | 3.29 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ba15ad3c865e0792a9a42f1b3325a49263e7361
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: ResNet
+ Paper:
+ Title: Deep residual learning for image recognition
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/resnet.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/res50_rhd2d_256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: RHD
+ Name: deeppose_res50_rhd2d_256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.865
+ EPE: 3.29
+ PCK@0.2: 0.988
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..82d150bd1f9479c8a9794f2d137f0ddcdb862279
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,9 @@
+# Top-down heatmap-based hand keypoint estimation
+
+Top-down methods divide the task into two stages: hand detection and hand keypoint estimation.
+
+They perform hand detection first, followed by hand keypoint estimation given hand bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
+
+Various neural network models have been proposed for better performance.
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass52_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass52_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e79ae581970c2c83dec872da365f3b1b8d016b5
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass52_coco_wholebody_hand_256x256.py
@@ -0,0 +1,137 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=channel_cfg['num_output_channels'],
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..72438883fa6eeb95fb413c8963dc4155743a75dd
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md
@@ -0,0 +1,39 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hourglass_52](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass52_coco_wholebody_hand_256x256.py) | 256x256 | 0.804 | 0.835 | 4.54 | [ckpt](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..426952c6f4f658b6b332a3b78e369f955952baf9
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: Hourglass
+ Paper:
+ Title: Stacked hourglass networks for human pose estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hourglass.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass52_coco_wholebody_hand_256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture:
+ - Hourglass
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_hourglass52_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.835
+ EPE: 4.54
+ PCK@0.2: 0.804
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..15f08e168e484e2775f7f35dd02c832bc6f0393f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md
@@ -0,0 +1,39 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256.py) | 256x256 | 0.813 | 0.840 | 4.39 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1a4b4445d9985fbb1d4e18174127f95ded5269ce
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_hrnetv2_w18_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.84
+ EPE: 4.39
+ PCK@0.2: 0.813
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..e3af94b65c39dca554958c64fadcb7966a6f8407
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md
@@ -0,0 +1,56 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py) | 256x256 | 0.814 | 0.840 | 4.37 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark_20210908.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..31d0a38ab797914e38704c49c85fd0f84ab5f392
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_hrnetv2_w18_coco_wholebody_hand_256x256_dark
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.84
+ EPE: 4.37
+ PCK@0.2: 0.814
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7679379361e187d0e79bace42ecb81ede8f7d593
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cc62f77e4e28ada6ec44e4504e8aad6cdddd34f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..51a9d78e0a358ba91cb7ae76d27750ce54b7a9ec
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md
@@ -0,0 +1,37 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [LiteHRNet-18](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_w18_coco_wholebody_hand_256x256.py) | 256x256 | 0.795 | 0.830 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7751dcb179cc4a0cfa01e07e2be863059f43e99
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: LiteHRNet
+ Paper:
+ Title: 'Lite-HRNet: A Lightweight High-Resolution Network'
+ URL: https://arxiv.org/abs/2104.06403
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/litehrnet.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_w18_coco_wholebody_hand_256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture:
+ - LiteHRNet
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_litehrnet_w18_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.83
+ EPE: 4.77
+ PCK@0.2: 0.795
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_w18_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_w18_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..04c526d860eb42b05a316b700fb99a9cad492edf
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_w18_coco_wholebody_hand_256x256.py
@@ -0,0 +1,152 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=40,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..7fa4afc8b4656d10e10d5a5fc3b11c0379fd896e
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md
@@ -0,0 +1,38 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenetv2](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand_256x256.py) | 256x256 | 0.795 | 0.829 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aa0df1bf7ce36469e4b07496205c3b195e30b66b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_mobilenetv2_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.829
+ EPE: 4.77
+ PCK@0.2: 0.795
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd8af1d2c3989faffd246875d89a07ee1de4298
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/res50_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/res50_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8693eb219243bcc844fe4e7a41f8d05daa2732a3
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/res50_coco_wholebody_hand_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..0d2781ba7e79c6e0272acec96bf1d8e29b5ff9fa
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/res50_coco_wholebody_hand_256x256.py) | 256x256 | 0.800 | 0.833 | 4.64 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d1e22ea7ad4946d196f59e76c31f83e1aea3d89b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/res50_coco_wholebody_hand_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_res50_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.833
+ EPE: 4.64
+ PCK@0.2: 0.8
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet50_coco_wholebody_hand_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet50_coco_wholebody_hand_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa9f9e41c74061e862bb211a9f6a57132dc7aa1f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet50_coco_wholebody_hand_256x256.py
@@ -0,0 +1,132 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody_hand.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth',
+ backbone=dict(type='SCNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='HandCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a7304e4db04f0779f43d53c8c293b6ee1bfc81a
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md
@@ -0,0 +1,38 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_scnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet50_coco_wholebody_hand_256x256.py) | 256x256 | 0.803 | 0.834 | 4.55 | [ckpt](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..241ba81139273842bfbc699d96dac64e572bfd4f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: SCNet
+ Paper:
+ Title: Improving Convolutional Networks with Self-Calibrated Convolutions
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/scnet.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet50_coco_wholebody_hand_256x256.py
+ In Collection: SCNet
+ Metadata:
+ Architecture:
+ - SCNet
+ Training Data: COCO-WholeBody-Hand
+ Name: topdown_heatmap_scnet50_coco_wholebody_hand_256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.834
+ EPE: 4.55
+ PCK@0.2: 0.803
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/hrnetv2_w18_freihand2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/hrnetv2_w18_freihand2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9fc516480933a0302a36d844071714edd68dc4a
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/hrnetv2_w18_freihand2d_256x256.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/freihand2d.py'
+]
+evaluation = dict(
+ interval=10, metric=['PCK', 'AUC', 'EPE'], key_indicator='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/freihand'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand2d_224x224.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand2d_224x224.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7d774bb35554e62a6f3aa9e3a1bef8cc4bf6a49
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand2d_224x224.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/freihand2d.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[50, 70])
+total_epochs = 100
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[224, 224],
+ heatmap_size=[56, 56],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/freihand'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='FreiHandDataset',
+ ann_file=f'{data_root}/annotations/freihand_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..55629b23ea2e462db7b998ca785a8778b34d88c1
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.md
@@ -0,0 +1,57 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+FreiHand (ICCV'2019)
+
+```bibtex
+@inproceedings{zimmermann2019freihand,
+ title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
+ author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={813--822},
+ year={2019}
+}
+```
+
+
+
+Results on FreiHand val & test set
+
+| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |
+|val| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand_224x224.py) | 224x224 | 0.993 | 0.868 | 3.25 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224_20200914.log.json) |
+|test| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand_224x224.py) | 224x224 | 0.992 | 0.868 | 3.27 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224_20200914.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f83395f97263db320f26e629cbbd62ba8368842b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.yml
@@ -0,0 +1,37 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand_224x224.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: FreiHand
+ Name: topdown_heatmap_res50_freihand_224x224
+ Results:
+ - Dataset: FreiHand
+ Metrics:
+ AUC: 0.868
+ EPE: 3.25
+ PCK@0.2: 0.993
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/res50_freihand_224x224.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: FreiHand
+ Name: topdown_heatmap_res50_freihand_224x224
+ Results:
+ - Dataset: FreiHand
+ Metrics:
+ AUC: 0.868
+ EPE: 3.27
+ PCK@0.2: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_base_interhand2d_all_256x192.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_base_interhand2d_all_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..275b3a3a0b72d3333077dbcba548ede0ada43de0
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_base_interhand2d_all_256x192.py
@@ -0,0 +1,162 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_huge_interhand2d_all_256x192.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_huge_interhand2d_all_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2af0f77d17f2153f8454b2e25c59e83239890144
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_huge_interhand2d_all_256x192.py
@@ -0,0 +1,162 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_large_interhand2d_all_256x192.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_large_interhand2d_all_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c33a72f4d596479ff54c71bebbf66242e0c29d
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_large_interhand2d_all_256x192.py
@@ -0,0 +1,162 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_small_interhand2d_all_256x192.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_small_interhand2d_all_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d344dcaa937768f822dacfe6baf6d9c5c4efea0c
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/ViTPose_small_interhand2d_all_256x192.py
@@ -0,0 +1,162 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5d4eac8170c2e1826c242caa4e5a179f8f5dc77
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b0fc2b1382ceff02bf4d0aa4514b4bbded9751e
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/human_annot/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b0cff66bc8a98de7a39581b048e01240db11dae
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
@@ -0,0 +1,146 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand2d.py'
+]
+checkpoint_config = dict(interval=5)
+evaluation = dict(interval=5, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[40, 50])
+total_epochs = 60
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand2DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..197e53d44cbda53397a2b57f0a61cca10378d1c0
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.md
@@ -0,0 +1,66 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+InterHand2.6M (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+Results on InterHand2.6M val & test set
+
+|Train Set| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--- | :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |
+|Human_annot|val(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py) | 256x256 | 0.973 | 0.828 | 5.15 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human_20201029.log.json) |
+|Human_annot|test(H)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py) | 256x256 | 0.973 | 0.826 | 5.27 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human_20201029.log.json) |
+|Human_annot|test(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py) | 256x256 | 0.975 | 0.841 | 4.90 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human_20201029.log.json) |
+|Human_annot|test(H+M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py) | 256x256 | 0.975 | 0.839 | 4.97 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human_20201029.log.json) |
+|Machine_annot|val(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py) | 256x256 | 0.970 | 0.824 | 5.39 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine_20201102.log.json) |
+|Machine_annot|test(H)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py) | 256x256 | 0.969 | 0.821 | 5.52 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine_20201102.log.json) |
+|Machine_annot|test(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py) | 256x256 | 0.972 | 0.838 | 5.03 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine_20201102.log.json) |
+|Machine_annot|test(H+M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py) | 256x256 | 0.972 | 0.837 | 5.11 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine_20201102.log.json) |
+|All|val(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py) | 256x256 | 0.977 | 0.840 | 4.66 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all_20201102.log.json) |
+|All|test(H)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py) | 256x256 | 0.979 | 0.839 | 4.65 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all_20201102.log.json) |
+|All|test(M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py) | 256x256 | 0.979 | 0.838 | 4.42 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all_20201102.log.json) |
+|All|test(H+M)| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py) | 256x256 | 0.979 | 0.851 | 4.46 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all_20201102.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ff9ca057a76e998db1da1871c8376f81f320a199
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.yml
@@ -0,0 +1,177 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_human_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.828
+ EPE: 5.15
+ PCK@0.2: 0.973
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_human_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.826
+ EPE: 5.27
+ PCK@0.2: 0.973
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_human_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.841
+ EPE: 4.9
+ PCK@0.2: 0.975
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_human_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_human_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.839
+ EPE: 4.97
+ PCK@0.2: 0.975
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_human-77b27d1a_20201029.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_machine_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.824
+ EPE: 5.39
+ PCK@0.2: 0.97
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_machine_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.821
+ EPE: 5.52
+ PCK@0.2: 0.969
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_machine_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.838
+ EPE: 5.03
+ PCK@0.2: 0.972
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_machine_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_machine_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.837
+ EPE: 5.11
+ PCK@0.2: 0.972
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_machine-8f3efe9a_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.84
+ EPE: 4.66
+ PCK@0.2: 0.977
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.839
+ EPE: 4.65
+ PCK@0.2: 0.979
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.838
+ EPE: 4.42
+ PCK@0.2: 0.979
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/res50_interhand2d_all_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: topdown_heatmap_res50_interhand2d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ AUC: 0.851
+ EPE: 4.46
+ PCK@0.2: 0.979
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_interhand2d_256x256_all-78cc95d4_20201102.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6d40948042926792cabb2d4ce649458db06700b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_dark.py) | 256x256 | 0.990 | 0.573 | 23.84 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..17b2901b36f1c2f232283183bb07aea48e2c8d86
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: OneHand10K
+ Name: topdown_heatmap_hrnetv2_w18_onehand10k_256x256_dark
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.573
+ EPE: 23.84
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..464e16a4c24e4eed7962ece0032a28796f0af877
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md
@@ -0,0 +1,43 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256.py) | 256x256 | 0.990 | 0.568 | 24.16 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6b104bd7cb417114cc58e98aa333c204e49dc4a8
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: OneHand10K
+ Name: topdown_heatmap_hrnetv2_w18_onehand10k_256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.568
+ EPE: 24.16
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..8247cd08105e23a430eb7ff3da2662476147d582
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_udp](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_udp.py) | 256x256 | 0.990 | 0.572 | 23.87 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7251110179d3a88f3e3dbfc98be990231e8a345f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: UDP
+ Paper:
+ Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for
+ Human Pose Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/udp.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_udp.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - UDP
+ Training Data: OneHand10K
+ Name: topdown_heatmap_hrnetv2_w18_onehand10k_256x256_udp
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.572
+ EPE: 23.87
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..36e930631b0bae66f263f7b05afd3e447af66d70
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_dark.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b1e8a7c93569fd20c461ccb1b6fee562e6657db
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_udp.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..3694a3cdaf3d4142b4bfc73ec11984302a0b29fd
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_w18_onehand10k_256x256_udp.py
@@ -0,0 +1,171 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..6e45d76b517355272151fc977886bf4f583591f8
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md
@@ -0,0 +1,42 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenet_v2](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k_256x256.py) | 256x256 | 0.986 | 0.537 | 28.60 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c4f81d6f4e18d139912e350887ab56e03eab4592
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: OneHand10K
+ Name: topdown_heatmap_mobilenetv2_onehand10k_256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.537
+ EPE: 28.6
+ PCK@0.2: 0.986
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cb41c397ce2b1ade1321d75e178e33a9fe37f7d
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k_256x256.py
@@ -0,0 +1,131 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5bd56682c532be7a5c46963e2662012e040825f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/onehand10k.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/onehand10k'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='OneHand10KDataset',
+ ann_file=f'{data_root}/annotations/onehand10k_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d190760318d2de5c390791a1ff293fb78c08ddd
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.md
@@ -0,0 +1,59 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py) | 256x256 | 0.989 | 0.555 | 25.19 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..065f99d667b0d62f7c0080ed24c9469c5cd8a82b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: OneHand10K
+ Name: topdown_heatmap_res50_onehand10k_256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.555
+ EPE: 25.19
+ PCK@0.2: 0.989
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..6ac86361123f7e1e163a2057dd98a5c51032df63
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.md
@@ -0,0 +1,57 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256_dark.py) | 256x256 | 0.999 | 0.745 | 7.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_panoptic_256x256_dark-1f1e4b74_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_panoptic_256x256_dark_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..33f7f7d25c382f7bc878b52d7d39fc3952c375fb
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: CMU Panoptic HandDB
+ Name: topdown_heatmap_hrnetv2_w18_panoptic_256x256_dark
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.745
+ EPE: 7.77
+ PCKh@0.7: 0.999
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_panoptic_256x256_dark-1f1e4b74_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b4cf1f80c71596e2c049b580e246a589d9987f2
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.md
@@ -0,0 +1,40 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256.py) | 256x256 | 0.999 | 0.744 | 7.79 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_panoptic_256x256-53b12345_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_panoptic_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..06f7bd1a20a40055256c2e49c4b844a71f0a118b
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: CMU Panoptic HandDB
+ Name: topdown_heatmap_hrnetv2_w18_panoptic_256x256
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.744
+ EPE: 7.79
+ PCKh@0.7: 0.999
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_panoptic_256x256-53b12345_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe1ea73624c9fafa782452b59ee5cd671945360e
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.md
@@ -0,0 +1,57 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_udp](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256_udp.py) | 256x256 | 0.998 | 0.742 | 7.84 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_panoptic_256x256_udp-f9e15948_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_panoptic_256x256_udp_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cd1e91e2dbe0d77e6f3a8398589ea8480874b985
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: UDP
+ Paper:
+ Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for
+ Human Pose Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/udp.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic_256x256_udp.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - UDP
+ Training Data: CMU Panoptic HandDB
+ Name: topdown_heatmap_hrnetv2_w18_panoptic_256x256_udp
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.742
+ EPE: 7.84
+ PCKh@0.7: 0.998
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_panoptic_256x256_udp-f9e15948_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..148ba027ecca2e95e6b078ed2371959a72d90f5c
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_dark.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..94c2ab06be0d571ee00e1bc52ff55332f5ca0643
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_udp.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfb89a6adac93bcc2a294559348617fc6e99d451
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_w18_panoptic2d_256x256_udp.py
@@ -0,0 +1,171 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..def2133ca8a77d92b7d74e0ea73f73d1dc1e4183
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.md
@@ -0,0 +1,39 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenet_v2](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic_256x256.py) | 256x256 | 0.998 | 0.694 | 9.70 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_panoptic_256x256-b733d98c_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_panoptic_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1339b1e944c7ee93585546e1fa0a853455fa12c7
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: CMU Panoptic HandDB
+ Name: topdown_heatmap_mobilenetv2_panoptic_256x256
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.694
+ EPE: 9.7
+ PCKh@0.7: 0.998
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_panoptic_256x256-b733d98c_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a164074edc4fc866d22482e91d41730de2d3788f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d_256x256.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/res50_panoptic2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/res50_panoptic2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..774711b19f68b0b335010df04acd456b385eb956
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/res50_panoptic2d_256x256.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/panoptic_hand2d.py'
+]
+evaluation = dict(interval=10, metric=['PCKh', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/panoptic'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='PanopticDataset',
+ ann_file=f'{data_root}/annotations/panoptic_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..f92f22bc561afdb93c16cc278e53c7ec842d2f5c
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.md
@@ -0,0 +1,56 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet_50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/res50_panoptic_256x256.py) | 256x256 | 0.999 | 0.713 | 9.00 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_panoptic_256x256-4eafc561_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_panoptic_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..79dd55598d5452168d31312b46f7a6ebe71861cb
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/res50_panoptic_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: CMU Panoptic HandDB
+ Name: topdown_heatmap_res50_panoptic_256x256
+ Results:
+ - Dataset: CMU Panoptic HandDB
+ Metrics:
+ AUC: 0.713
+ EPE: 9.0
+ PCKh@0.7: 0.999
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_panoptic_256x256-4eafc561_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..15bc4d5f75d31d0e804402c94f200f9314873361
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md
@@ -0,0 +1,58 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_dark](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_dark.py) | 256x256 | 0.992 | 0.903 | 2.17 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6083f92e6b93058ede4d8ed1fce6b057f4e3be55
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: RHD
+ Name: topdown_heatmap_hrnetv2_w18_rhd2d_256x256_dark
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.903
+ EPE: 2.17
+ PCK@0.2: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb1b0ed6d18916b5e20588d91e3e915c4d23ccda
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md
@@ -0,0 +1,41 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256.py) | 256x256 | 0.992 | 0.902 | 2.21 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6fbc9848896bf8a9b2a416c7bd95a932e6a39b73
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: RHD
+ Name: topdown_heatmap_hrnetv2_w18_rhd2d_256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.902
+ EPE: 2.21
+ PCK@0.2: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..e18b661b5e8283740f362a365b7fc3cf42ecddd2
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md
@@ -0,0 +1,58 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on CMU Panoptic (MPII+NZSL val set)
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_hrnetv2_w18_udp](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_udp.py) | 256x256 | 0.998 | 0.742 | 7.84 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..40a19b4e2c741b79de9cb23ffdbd5375f1ede6ae
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: UDP
+ Paper:
+ Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for
+ Human Pose Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/udp.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_udp.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - UDP
+ Training Data: RHD
+ Name: topdown_heatmap_hrnetv2_w18_rhd2d_256x256_udp
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.742
+ EPE: 7.84
+ PCKh@0.7: 0.998
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..4989023f0161c68a32a8ad3c1e6f22d5b36372f0
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_dark.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..2645755550aee2aed054b621229e0aae29e955f3
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_udp.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_udp.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf3acf46eea463dff5d5cde9b151729fe6e727b2
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_w18_rhd2d_256x256_udp.py
@@ -0,0 +1,171 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+target_type = 'GaussianHeatmap'
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False))),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ target_type=target_type,
+ modulate_kernel=11,
+ use_udp=True))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='TopDownGenerateTarget',
+ sigma=2,
+ encoding='UDP',
+ target_type=target_type),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine', use_udp=True),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..448ed41f3c5dc4059238d86d53baf38be9b2277f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md
@@ -0,0 +1,40 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_mobilenet_v2](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d_256x256.py) | 256x256 | 0.985 | 0.883 | 2.80 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bd448d4a59d83e3d747d68e0567b541d36808a7f
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: MobilenetV2
+ Paper:
+ Title: 'Mobilenetv2: Inverted residuals and linear bottlenecks'
+ URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/mobilenetv2.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d_256x256.py
+ In Collection: MobilenetV2
+ Metadata:
+ Architecture:
+ - MobilenetV2
+ Training Data: RHD
+ Name: topdown_heatmap_mobilenetv2_rhd2d_256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.883
+ EPE: 2.8
+ PCK@0.2: 0.985
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..44c94c1852500bf32390faba97b9a60b5357f191
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d_256x256.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=10,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='mmcls://mobilenet_v2',
+ backbone=dict(type='MobileNetV2', widen_factor=1., out_indices=(7, )),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_224x224.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_224x224.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1505698db92a5fb17347d180c69046636e98788
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_224x224.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[224, 224],
+ heatmap_size=[56, 56],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c987d338fc9580e62dbebe4c005f4355dba39334
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/rhd2d.py'
+]
+evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/rhd'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='Rhd2DDataset',
+ ann_file=f'{data_root}/annotations/rhd_test.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.md b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..78dee7b93bda9073ad6afb019d9205c405d2614d
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.md
@@ -0,0 +1,57 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :--------: | :------: | :------: | :------: |:------: |:------: |
+| [pose_resnet50](/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py) | 256x256 | 0.991 | 0.898 | 2.33 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256_20210330.log.json) |
diff --git a/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.yml b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..457ace5fc2186f17b2ff73d0d5f532d090b6da41
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: RHD
+ Name: topdown_heatmap_res50_rhd2d_256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.898
+ EPE: 2.33
+ PCK@0.2: 0.991
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth
diff --git a/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c058280df2ded5486bf04dcb92731ac6c6a93b0a
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,7 @@
+# 3D Hand Pose Estimation
+
+3D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/3d_hand_keypoint.md) to prepare data.
diff --git a/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/README.md b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f7d2a8ccffc8cc6f2249d98e045a82d25f810199
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/README.md
@@ -0,0 +1,19 @@
+# InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image
+
+## Introduction
+
+
+
+
+InterNet (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.md b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..2c141628483305df44bc186fd4caa958f473599e
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.md
@@ -0,0 +1,55 @@
+
+
+
+InterNet (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+InterHand2.6M (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+Results on InterHand2.6M val & test set
+
+|Train Set| Set | Arch | Input Size | MPJPE-single | MPJPE-interacting | MPJPE-all | MRRPE | APh | ckpt | log |
+| :--- | :--- | :--------: | :--------: | :------: | :------: | :------: |:------: |:------: |:------: |:------: |
+| All | test(H+M) | [InterNet_resnet_50](/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py) | 256x256 | 9.47 | 13.40 | 11.59 | 29.28 | 0.99 | [ckpt](https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth) | [log](https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256_20210702.log.json) |
+| All | val(M) | [InterNet_resnet_50](/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py) | 256x256 | 11.22 | 15.23 | 13.16 | 31.73 | 0.98 | [ckpt](https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth) | [log](https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256_20210702.log.json) |
diff --git a/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.yml b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..34749b20c39124a1e9d5aaac91ebd25c45235c69
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: InterNet
+ Paper:
+ Title: 'InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation
+ from a Single RGB Image'
+ URL: https://link.springer.com/content/pdf/10.1007/978-3-030-58565-5_33.pdf
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/internet.md
+Models:
+- Config: configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py
+ In Collection: InterNet
+ Metadata:
+ Architecture: &id001
+ - InterNet
+ - ResNet
+ Training Data: InterHand2.6M
+ Name: internet_res50_interhand3d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ APh: 0.99
+ MPJPE-all: 11.59
+ MPJPE-interacting: 13.4
+ MPJPE-single: 9.47
+ Task: Hand 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth
+- Config: configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py
+ In Collection: InterNet
+ Metadata:
+ Architecture: *id001
+ Training Data: InterHand2.6M
+ Name: internet_res50_interhand3d_all_256x256
+ Results:
+ - Dataset: InterHand2.6M
+ Metrics:
+ APh: 0.98
+ MPJPE-all: 13.16
+ MPJPE-interacting: 15.23
+ MPJPE-single: 11.22
+ Task: Hand 3D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth
diff --git a/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6acb9180e996ef5f50c17633b13207048cf30420
--- /dev/null
+++ b/vendor/ViTPose/configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py
@@ -0,0 +1,181 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/interhand3d.py'
+]
+checkpoint_config = dict(interval=1)
+evaluation = dict(
+ interval=1,
+ metric=['MRRPE', 'MPJPE', 'Handedness_acc'],
+ save_best='MPJPE_all')
+
+optimizer = dict(
+ type='Adam',
+ lr=2e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[15, 17])
+total_epochs = 20
+log_config = dict(
+ interval=20,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=42,
+ dataset_joints=42,
+ dataset_channel=[list(range(42))],
+ inference_channel=list(range(42)))
+
+# model settings
+model = dict(
+ type='Interhand3D',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='Interhand3DHead',
+ keypoint_head_cfg=dict(
+ in_channels=2048,
+ out_channels=21 * 64,
+ depth_size=64,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ ),
+ root_head_cfg=dict(
+ in_channels=2048,
+ heatmap_size=64,
+ hidden_dims=(512, ),
+ ),
+ hand_type_head_cfg=dict(
+ in_channels=2048,
+ num_labels=2,
+ hidden_dims=(512, ),
+ ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True),
+ loss_root_depth=dict(type='L1Loss', use_target_weight=True),
+ loss_hand_type=dict(type='BCELoss', use_target_weight=True),
+ ),
+ train_cfg={},
+ test_cfg=dict(flip_test=False))
+
+data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64, 64],
+ heatmap3d_depth_bound=400.0,
+ heatmap_size_root=64,
+ root_depth_bound=400.0,
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='HandRandomFlip', flip_prob=0.5),
+ dict(type='TopDownRandomTranslation', trans_factor=0.15),
+ dict(
+ type='TopDownGetRandomScaleRotation',
+ rot_factor=45,
+ scale_factor=0.25,
+ rot_prob=0.6),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='MultitaskGatherTarget',
+ pipeline_list=[
+ [dict(
+ type='Generate3DHeatmapTarget',
+ sigma=2.5,
+ max_bound=255,
+ )], [dict(type='HandGenerateRelDepthTarget')],
+ [
+ dict(
+ type='RenameKeys',
+ key_pairs=[('hand_type', 'target'),
+ ('hand_type_valid', 'target_weight')])
+ ]
+ ],
+ pipeline_indices=[0, 1, 2],
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'flip_pairs',
+ 'heatmap3d_depth_bound', 'root_depth_bound'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/interhand2.6m'
+data = dict(
+ samples_per_gpu=16,
+ workers_per_gpu=1,
+ train=dict(
+ type='InterHand3DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_train_joint_3d.json',
+ img_prefix=f'{data_root}/images/train/',
+ data_cfg=data_cfg,
+ use_gt_root_depth=True,
+ rootnet_result_file=None,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='InterHand3DDataset',
+ ann_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_data.json',
+ camera_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_camera.json',
+ joint_file=f'{data_root}/annotations/machine_annot/'
+ 'InterHand2.6M_val_joint_3d.json',
+ img_prefix=f'{data_root}/images/val/',
+ data_cfg=data_cfg,
+ use_gt_root_depth=True,
+ rootnet_result_file=None,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='InterHand3DDataset',
+ ann_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_data.json',
+ camera_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_camera.json',
+ joint_file=f'{data_root}/annotations/all/'
+ 'InterHand2.6M_test_joint_3d.json',
+ img_prefix=f'{data_root}/images/test/',
+ data_cfg=data_cfg,
+ use_gt_root_depth=True,
+ rootnet_result_file=None,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/README.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..904a391e7dd3ad45fa6b90a7ac0b9763f2ec2596
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/README.md
@@ -0,0 +1,19 @@
+# 2D Human Whole-Body Pose Estimation
+
+2D human whole-body pose estimation aims to localize dense landmarks on the entire human body including face, hands, body, and feet.
+
+Existing approaches can be categorized into top-down and bottom-up approaches.
+
+Top-down methods divide the task into two stages: human detection and whole-body pose estimation. They perform human detection first, followed by single-person whole-body pose estimation given human bounding boxes.
+
+Bottom-up approaches (e.g. AE) first detect all the whole-body keypoints and then group/associate them into person instances.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/tasks/2d_wholebody_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/2d_wholebody_pose_demo.md) to run demos.
+
+
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/README.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2048f2182b77605924ec48913c3203e3bc0a61be
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/README.md
@@ -0,0 +1,25 @@
+# Associative embedding: End-to-end learning for joint detection and grouping (AE)
+
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+AE is one of the most popular 2D bottom-up pose estimation approaches, that first detect all the keypoints and
+then group/associate them into person instances.
+
+In order to group all the predicted keypoints to individuals, a tag is also predicted for each detected keypoint.
+Tags of the same person are similar, while tags of different people are different. Thus the keypoints can be grouped
+according to the tags.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..6496280d669e277e4490b86e52ed70ec24622e59
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.md
@@ -0,0 +1,58 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val without multi-scale test
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [HigherHRNet-w32+](/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py) | 512x512 | 0.590 | 0.672 | 0.185 | 0.335 | 0.676 | 0.721 | 0.212 | 0.298 | 0.401 | 0.493 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_wholebody_512x512_plus-2fa137ab_20210517.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_wholebody_512x512_plus_20210517.log.json) |
+| [HigherHRNet-w48+](/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py) | 512x512 | 0.630 | 0.706 | 0.440 | 0.573 | 0.730 | 0.777 | 0.389 | 0.477 | 0.487 | 0.574 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_wholebody_512x512_plus-934f08aa_20210517.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_wholebody_512x512_plus_20210517.log.json) |
+
+Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8f7b133be9eab240a9c5a2c67a923e7950450d4d
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.yml
@@ -0,0 +1,52 @@
+Collections:
+- Name: HigherHRNet
+ Paper:
+ Title: 'HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose
+ Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Cheng_HigherHRNet_Scale-Aware_Representation_Learning_for_Bottom-Up_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/higherhrnet.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HigherHRNet
+ Training Data: COCO-WholeBody
+ Name: associative_embedding_higherhrnet_w32_coco_wholebody_512x512
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.59
+ Body AR: 0.672
+ Face AP: 0.676
+ Face AR: 0.721
+ Foot AP: 0.185
+ Foot AR: 0.335
+ Hand AP: 0.212
+ Hand AR: 0.298
+ Whole AP: 0.401
+ Whole AR: 0.493
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet32_coco_wholebody_512x512_plus-2fa137ab_20210517.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
+ In Collection: HigherHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: associative_embedding_higherhrnet_w48_coco_wholebody_512x512
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.63
+ Body AR: 0.706
+ Face AP: 0.73
+ Face AR: 0.777
+ Foot AP: 0.44
+ Foot AR: 0.573
+ Hand AP: 0.389
+ Hand AR: 0.477
+ Whole AP: 0.487
+ Whole AR: 0.574
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/higher_hrnet48_coco_wholebody_512x512_plus-934f08aa_20210517.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..05574f975347eb26e8503058546e43fcc1c3c527
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
@@ -0,0 +1,195 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=133,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee9edc893edfb38c816ca83238fe63c2aabf8872
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py
@@ -0,0 +1,195 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=32,
+ num_joints=133,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[32],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..d84143b8d2805f8650432147ab6f32b9922b215f
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
@@ -0,0 +1,195 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=133,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c33e80df931f6a18f05ee1ebbb95998f7517600
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py
@@ -0,0 +1,195 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160, 320],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AEHigherResolutionHead',
+ in_channels=48,
+ num_joints=133,
+ tag_per_joint=True,
+ extra=dict(final_conv_kernel=1, ),
+ num_deconv_layers=1,
+ num_deconv_filters=[48],
+ num_deconv_kernels=[4],
+ num_basic_blocks=4,
+ cat_output=[True],
+ with_ae_loss=[True, False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True, True],
+ with_ae=[True, False],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=8),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..4bc12c1946ccc3186370f85e0c0472dcd2d6e108
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.md
@@ -0,0 +1,58 @@
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val without multi-scale test
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [HRNet-w32+](/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py) | 512x512 | 0.551 | 0.650 | 0.271 | 0.451 | 0.564 | 0.618 | 0.159 | 0.238 | 0.342 | 0.453 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_wholebody_512x512_plus-f1f1185c_20210517.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_wholebody_512x512_plus_20210517.log.json) |
+| [HRNet-w48+](/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py) | 512x512 | 0.592 | 0.686 | 0.443 | 0.595 | 0.619 | 0.674 | 0.347 | 0.438 | 0.422 | 0.532 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_wholebody_512x512_plus-4de8a695_20210517.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_wholebody_512x512_plus_20210517.log.json) |
+
+Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..69c1eded0903017450898fd4dc1e72fa5a3af505
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - Associative Embedding
+ - HRNet
+ Training Data: COCO-WholeBody
+ Name: associative_embedding_hrnet_w32_coco_wholebody_512x512
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.551
+ Body AR: 0.65
+ Face AP: 0.564
+ Face AR: 0.618
+ Foot AP: 0.271
+ Foot AR: 0.451
+ Hand AP: 0.159
+ Hand AR: 0.238
+ Whole AP: 0.342
+ Whole AR: 0.453
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_wholebody_512x512_plus-f1f1185c_20210517.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: associative_embedding_hrnet_w48_coco_wholebody_512x512
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.592
+ Body AR: 0.686
+ Face AP: 0.619
+ Face AR: 0.674
+ Foot AP: 0.443
+ Foot AR: 0.595
+ Hand AP: 0.347
+ Hand AR: 0.438
+ Whole AP: 0.422
+ Whole AR: 0.532
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/hrnet_w48_coco_wholebody_512x512_plus-4de8a695_20210517.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f48f8710cb31a3838d2dd93b52b101ebb246ae2
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
@@ -0,0 +1,191 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=133,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=24),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..006dea83217a96bd623266b90a1528a6b491fe62
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py
@@ -0,0 +1,191 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=32,
+ num_joints=133,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed3aeca41ae0f70f9e90b66fe4896062dbaf90d1
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
@@ -0,0 +1,191 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=133,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=16),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75d2ab17636349cef45076eeea61a350d539237
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py
@@ -0,0 +1,191 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[200, 260])
+total_epochs = 300
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+data_cfg = dict(
+ image_size=640,
+ base_size=320,
+ base_sigma=2,
+ heatmap_size=[160],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+ type='AssociativeEmbedding',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=48,
+ num_joints=133,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=133,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0],
+ supervise_empty=False)),
+ train_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ img_size=data_cfg['image_size']),
+ test_cfg=dict(
+ num_joints=channel_cfg['dataset_joints'],
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ align_corners=False,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ flip_test=True))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='BottomUpRandomAffine',
+ rot_factor=30,
+ scale_factor=[0.75, 1.5],
+ scale_type='short',
+ trans_factor=40),
+ dict(type='BottomUpRandomFlip', flip_prob=0.5),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='BottomUpGenerateTarget',
+ sigma=2,
+ max_num_people=30,
+ ),
+ dict(
+ type='Collect',
+ keys=['img', 'joints', 'targets', 'masks'],
+ meta_keys=[]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+ dict(
+ type='BottomUpResizeAlign',
+ transforms=[
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+ 'center', 'scale', 'flip_index'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ workers_per_gpu=2,
+ train_dataloader=dict(samples_per_gpu=8),
+ val_dataloader=dict(samples_per_gpu=1),
+ test_dataloader=dict(samples_per_gpu=1),
+ train=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='BottomUpCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/deeppose/coco-wholebody/res50_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/deeppose/coco-wholebody/res50_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e24b56fb95f45a8d1e8f9928cb49f88591e7486f
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/deeppose/coco-wholebody/res50_coco_wholebody_256x192.py
@@ -0,0 +1,130 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50, num_stages=4, out_indices=(3, )),
+ neck=dict(type='GlobalAveragePooling'),
+ keypoint_head=dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(flip_test=True))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTargetRegression'),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/README.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d95e939ce35225e614245eeb43d2f1ff589afe97
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
@@ -0,0 +1,10 @@
+# Top-down heatmap-based whole-body pose estimation
+
+Top-down methods divide the task into two stages: human detection and whole-body pose estimation.
+
+They perform human detection first, followed by single-person whole-body pose estimation given human bounding boxes.
+Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint.
+
+Various neural network models have been proposed for better performance.
+The popular ones include stacked hourglass networks, and HRNet.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_base_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_base_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..02db322650b1f58655998dcab20c0ef23fb8ec33
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_base_wholebody_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=768,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_huge_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_huge_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccd8fd29afd372198cd4e89189c3f2186f96b810
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_huge_wholebody_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1280,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_large_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_large_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..df96867906844766bfdf8cf12ce5246b4d9d73a8
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_large_wholebody_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=1024,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_small_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_small_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1d4b054dcea5ff46c0723d13e445546dc307440
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/ViTPose_small_wholebody_256x192.py
@@ -0,0 +1,149 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='ViT',
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=12,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=384,
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..d486926d2c473af7f78dae746f469ee39f920472
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md
@@ -0,0 +1,41 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [pose_hrnet_w32](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192.py) | 256x192 | 0.700 | 0.746 | 0.567 | 0.645 | 0.637 | 0.688 | 0.473 | 0.546 | 0.553 | 0.626 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_20200918.log.json) |
+| [pose_hrnet_w32](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288.py) | 384x288 | 0.701 | 0.773 | 0.586 | 0.692 | 0.727 | 0.783 | 0.516 | 0.604 | 0.586 | 0.674 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288_20200922.log.json) |
+| [pose_hrnet_w48](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192.py) | 256x192 | 0.700 | 0.776 | 0.672 | 0.785 | 0.656 | 0.743 | 0.534 | 0.639 | 0.579 | 0.681 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192_20200922.log.json) |
+| [pose_hrnet_w48](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288.py) | 384x288 | 0.722 | 0.790 | 0.694 | 0.799 | 0.777 | 0.834 | 0.587 | 0.679 | 0.631 | 0.716 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_20200922.log.json) |
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..707b893b6aa26d86ec4440de8b2264d71cfd9f7e
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
@@ -0,0 +1,92 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w32_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.7
+ Body AR: 0.746
+ Face AP: 0.637
+ Face AR: 0.688
+ Foot AP: 0.567
+ Foot AR: 0.645
+ Hand AP: 0.473
+ Hand AR: 0.546
+ Whole AP: 0.553
+ Whole AR: 0.626
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w32_coco_wholebody_384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.701
+ Body AR: 0.773
+ Face AP: 0.727
+ Face AR: 0.783
+ Foot AP: 0.586
+ Foot AR: 0.692
+ Hand AP: 0.516
+ Hand AR: 0.604
+ Whole AP: 0.586
+ Whole AR: 0.674
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w48_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.7
+ Body AR: 0.776
+ Face AP: 0.656
+ Face AR: 0.743
+ Foot AP: 0.672
+ Foot AR: 0.785
+ Hand AP: 0.534
+ Hand AR: 0.639
+ Whole AP: 0.579
+ Whole AR: 0.681
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w48_coco_wholebody_384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.722
+ Body AR: 0.79
+ Face AP: 0.777
+ Face AR: 0.834
+ Foot AP: 0.694
+ Foot AR: 0.799
+ Hand AP: 0.587
+ Hand AR: 0.679
+ Whole AP: 0.631
+ Whole AR: 0.716
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..3edd51bffb2cfaedfcf1e5c86170146993c2be01
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md
@@ -0,0 +1,58 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [pose_hrnet_w32_dark](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192_dark.py) | 256x192 | 0.694 | 0.764 | 0.565 | 0.674 | 0.736 | 0.808 | 0.503 | 0.602 | 0.582 | 0.671 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark_20200922.log.json) |
+| [pose_hrnet_w48_dark+](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py) | 384x288 | 0.742 | 0.807 | 0.705 | 0.804 | 0.840 | 0.892 | 0.602 | 0.694 | 0.661 | 0.743 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark_20200918.log.json) |
+
+Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c15c6beda09a2586e135e184074501144ef018ae
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192_dark.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w32_coco_wholebody_256x192_dark
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.694
+ Body AR: 0.764
+ Face AP: 0.736
+ Face AR: 0.808
+ Foot AP: 0.565
+ Foot AR: 0.674
+ Hand AP: 0.503
+ Hand AR: 0.602
+ Whole AP: 0.582
+ Whole AR: 0.671
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_hrnet_w48_coco_wholebody_384x288_dark_plus
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.742
+ Body AR: 0.807
+ Face AP: 0.84
+ Face AR: 0.892
+ Foot AP: 0.705
+ Foot AR: 0.804
+ Hand AP: 0.602
+ Hand AR: 0.694
+ Whole AP: 0.661
+ Whole AR: 0.743
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9c12160f1cd41ce3461b15cc747a0683e5b0e97
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b0745fa5dde9241c939e6a6c4fcd5a5b222252c
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_256x192_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e867fa57b62bdb36f6919850566b90a78a27865
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..97b7679cf0fa38d81fee10cff5edac97107838ad
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w32_coco_wholebody_384x288_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..039610e0ce2c4134485ac770f252d7574c0e94fd
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..e19f03feaa3ec8f07b061d1ad095c05b95fd3157
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_256x192_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..0be7d03e942d8d22543baa23d69fdec790ae50f4
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288.py
@@ -0,0 +1,165 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup=None,
+ # warmup='linear',
+ # warmup_iters=500,
+ # warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..5239244b78e371e4603ca16bc40096d397e82567
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8a9856a6ac8c188b61cc87bb76d0649e187ea2f
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-741844ba_20200812.pth' # noqa: E501
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..917396a4bc403d52aa0ba8216d909bf431b71c0d
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_384x288.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd2422e4334b5297a02ffd99c66830a279277448
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet101',
+ backbone=dict(type='ResNet', depth=101),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a59d1dcb9692b5cfe7456a988b394edb1221a03f
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_384x288.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe03a6c88805c69d2b0e51ace69b0a6e4066274a
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet152',
+ backbone=dict(type='ResNet', depth=152),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e39682b52a7b8e2a7798454a88193c610c5bae2
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_256x192.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_384x288.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d9de5d128cb432b26498259fbb8f7b0c269132b
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_384x288.py
@@ -0,0 +1,133 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..143c33f2e19bedca856178ba5de3e7c7521b7d8b
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [pose_resnet_50](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_256x192.py) | 256x192 | 0.652 | 0.739 | 0.614 | 0.746 | 0.608 | 0.716 | 0.460 | 0.584 | 0.520 | 0.633 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_50](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_384x288.py) | 384x288 | 0.666 | 0.747 | 0.635 | 0.763 | 0.732 | 0.812 | 0.537 | 0.647 | 0.573 | 0.671 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288_20201004.log.json) |
+| [pose_resnet_101](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_256x192.py) | 256x192 | 0.670 | 0.754 | 0.640 | 0.767 | 0.611 | 0.723 | 0.463 | 0.589 | 0.533 | 0.647 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_101](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_384x288.py) | 384x288 | 0.692 | 0.770 | 0.680 | 0.798 | 0.747 | 0.822 | 0.549 | 0.658 | 0.597 | 0.692 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288_20201004.log.json) |
+| [pose_resnet_152](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_256x192.py) | 256x192 | 0.682 | 0.764 | 0.662 | 0.788 | 0.624 | 0.728 | 0.482 | 0.606 | 0.548 | 0.661 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_152](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_384x288.py) | 384x288 | 0.703 | 0.780 | 0.693 | 0.813 | 0.751 | 0.825 | 0.559 | 0.667 | 0.610 | 0.705 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288_20201004.log.json) |
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..84fea0885a4105e4a83f50868db5a0aaa9263e7e
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
@@ -0,0 +1,134 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res50_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.652
+ Body AR: 0.739
+ Face AP: 0.608
+ Face AR: 0.716
+ Foot AP: 0.614
+ Foot AR: 0.746
+ Hand AP: 0.46
+ Hand AR: 0.584
+ Whole AP: 0.52
+ Whole AR: 0.633
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res50_coco_wholebody_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res50_coco_wholebody_384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.666
+ Body AR: 0.747
+ Face AP: 0.732
+ Face AR: 0.812
+ Foot AP: 0.635
+ Foot AR: 0.763
+ Hand AP: 0.537
+ Hand AR: 0.647
+ Whole AP: 0.573
+ Whole AR: 0.671
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res101_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.67
+ Body AR: 0.754
+ Face AP: 0.611
+ Face AR: 0.723
+ Foot AP: 0.64
+ Foot AR: 0.767
+ Hand AP: 0.463
+ Hand AR: 0.589
+ Whole AP: 0.533
+ Whole AR: 0.647
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res101_coco_wholebody_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res101_coco_wholebody_384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.692
+ Body AR: 0.77
+ Face AP: 0.747
+ Face AR: 0.822
+ Foot AP: 0.68
+ Foot AR: 0.798
+ Hand AP: 0.549
+ Hand AR: 0.658
+ Whole AP: 0.597
+ Whole AR: 0.692
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res152_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.682
+ Body AR: 0.764
+ Face AP: 0.624
+ Face AR: 0.728
+ Foot AP: 0.662
+ Foot AR: 0.788
+ Hand AP: 0.482
+ Hand AR: 0.606
+ Whole AP: 0.548
+ Whole AR: 0.661
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/res152_coco_wholebody_384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_res152_coco_wholebody_384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.703
+ Body AR: 0.78
+ Face AP: 0.751
+ Face AR: 0.825
+ Foot AP: 0.693
+ Foot AR: 0.813
+ Hand AP: 0.559
+ Hand AR: 0.667
+ Whole AP: 0.61
+ Whole AR: 0.705
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..b7ec8b96608f7cfc1a067703763b34ef76a276ad
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md
@@ -0,0 +1,38 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [S-ViPNAS-MobileNetV3](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192.py) | 256x192 | 0.619 | 0.700 | 0.477 | 0.608 | 0.585 | 0.689 | 0.386 | 0.505 | 0.473 | 0.578 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_20211205.log.json) |
+| [S-ViPNAS-Res50](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192.py) | 256x192 | 0.643 | 0.726 | 0.553 | 0.694 | 0.587 | 0.698 | 0.410 | 0.529 | 0.495 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_20211112.log.json) |
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f52ddcdfa4075aaa194679c7fd6a4cbd5fcb6af4
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
@@ -0,0 +1,50 @@
+Collections:
+- Name: ViPNAS
+ Paper:
+ Title: 'ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search'
+ URL: https://arxiv.org/abs/2105.10154
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/vipnas.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_vipnas_mbv3_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.619
+ Body AR: 0.7
+ Face AP: 0.585
+ Face AR: 0.689
+ Foot AP: 0.477
+ Foot AR: 0.608
+ Hand AP: 0.386
+ Hand AR: 0.505
+ Whole AP: 0.473
+ Whole AR: 0.578
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_vipnas_res50_coco_wholebody_256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.643
+ Body AR: 0.726
+ Face AP: 0.587
+ Face AR: 0.698
+ Foot AP: 0.553
+ Foot AR: 0.694
+ Hand AP: 0.41
+ Hand AR: 0.529
+ Whole AP: 0.495
+ Whole AR: 0.607
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea7a9e9035ca9fbad53e7d9fa5c58437faf847a9
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md
@@ -0,0 +1,55 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :-----: | :-----: | :------: |:-------: |:------: | :------: |
+| [S-ViPNAS-MobileNetV3_dark](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192_dark.py) | 256x192 | 0.632 | 0.710 | 0.530 | 0.660 | 0.672 | 0.771 | 0.404 | 0.519 | 0.508 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark_20211205.log.json) |
+| [S-ViPNAS-Res50_dark](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py) | 256x192 | 0.650 | 0.732 | 0.550 | 0.686 | 0.684 | 0.784 | 0.437 | 0.554 | 0.528 | 0.632 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark_20211112.log.json) |
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ec948af798aea584577bf4aca6f5cf6c1085ef56
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
@@ -0,0 +1,51 @@
+Collections:
+- Name: ViPNAS
+ Paper:
+ Title: 'ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search'
+ URL: https://arxiv.org/abs/2105.10154
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/vipnas.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192_dark.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ - DarkPose
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_vipnas_mbv3_coco_wholebody_256x192_dark
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.632
+ Body AR: 0.71
+ Face AP: 0.672
+ Face AR: 0.771
+ Foot AP: 0.53
+ Foot AR: 0.66
+ Hand AP: 0.404
+ Hand AR: 0.519
+ Whole AP: 0.508
+ Whole AR: 0.607
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: topdown_heatmap_vipnas_res50_coco_wholebody_256x192_dark
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.65
+ Body AR: 0.732
+ Face AP: 0.684
+ Face AR: 0.784
+ Foot AP: 0.55
+ Foot AR: 0.686
+ Hand AP: 0.437
+ Hand AR: 0.554
+ Whole AP: 0.528
+ Whole AR: 0.632
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c36894785f2098f814704299e6837880e7b5694
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=160,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_filters=(160, 160, 160),
+ num_deconv_groups=(160, 160, 160),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9b825ef7531b20e39e895a743b1c358d0fab652
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_mbv3_coco_wholebody_256x192_dark.py
@@ -0,0 +1,136 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=160,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_filters=(160, 160, 160),
+ num_deconv_groups=(160, 160, 160),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c64edb5fc403abf3c58a0b101b58dca8ee933a1
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192.py
@@ -0,0 +1,134 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_ResNet', depth=50),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=608,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py
new file mode 100644
index 0000000000000000000000000000000000000000..12a00d54aee342623c33c050e183a36031be2865
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py
@@ -0,0 +1,134 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/coco_wholebody.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_ResNet', depth=50),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=608,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=30,
+ scale_factor=0.25),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset',
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.md b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.md
new file mode 100644
index 0000000000000000000000000000000000000000..1b22b4b53da6d8acb06464342495822068870441
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.md
@@ -0,0 +1,57 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+Halpe (CVPR'2020)
+
+```bibtex
+@inproceedings{li2020pastanet,
+ title={PaStaNet: Toward Human Activity Knowledge Engine},
+ author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
+ booktitle={CVPR},
+ year={2020}
+}
+```
+
+
+
+Results on Halpe v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Whole AP | Whole AR | ckpt | log |
+| :---- | :--------: | :------: |:-------: |:------: | :------: |
+| [pose_hrnet_w48_dark+](/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w48_halpe_384x288_dark_plus.py) | 384x288 | 0.531 | 0.642 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_halpe_384x288_dark_plus-d13c2588_20211021.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_halpe_384x288_dark_plus_20211021.log.json) |
+
+Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on Halpe dataset. We find this will lead to better performance.
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.yml b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9c7b419fa43dbbe203cbd14fb09cd22cdf74350c
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.yml
@@ -0,0 +1,22 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/techniques/dark.md
+Models:
+- Config: configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w48_halpe_384x288_dark_plus.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNet
+ - DarkPose
+ Training Data: Halpe
+ Name: topdown_heatmap_hrnet_w48_halpe_384x288_dark_plus
+ Results:
+ - Dataset: Halpe
+ Metrics:
+ Whole AP: 0.531
+ Whole AR: 0.642
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_halpe_384x288_dark_plus-d13c2588_20211021.pth
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w32_halpe_256x192.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w32_halpe_256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d6a2825f3375879af3bfe74967c27268848e0e2
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w32_halpe_256x192.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/halpe.py'
+]
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=136,
+ dataset_joints=136,
+ dataset_channel=[
+ list(range(136)),
+ ],
+ inference_channel=list(range(136)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=32,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/halpe'
+data = dict(
+ samples_per_gpu=64,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_train_v1.json',
+ img_prefix=f'{data_root}/hico_20160224_det/images/train2015/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_val_v1.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_val_v1.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w48_halpe_384x288_dark_plus.py b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w48_halpe_384x288_dark_plus.py
new file mode 100644
index 0000000000000000000000000000000000000000..b62947864f357c4aef49bc23063df452ccf6b0ee
--- /dev/null
+++ b/vendor/ViTPose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_w48_halpe_384x288_dark_plus.py
@@ -0,0 +1,164 @@
+_base_ = [
+ '../../../../_base_/default_runtime.py',
+ '../../../../_base_/datasets/halpe.py'
+]
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-741844ba_20200812.pth' # noqa: E501
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=5e-4,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+ num_output_channels=136,
+ dataset_joints=136,
+ dataset_channel=[
+ list(range(136)),
+ ],
+ inference_channel=list(range(136)))
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.0,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3, unbiased_encoding=True),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/halpe'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_train_v1.json',
+ img_prefix=f'{data_root}/hico_20160224_det/images/train2015/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_val_v1.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownHalpeDataset',
+ ann_file=f'{data_root}/annotations/halpe_val_v1.json',
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/vendor/ViTPose/demo/MMPose_Tutorial.ipynb b/vendor/ViTPose/demo/MMPose_Tutorial.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b5f08bd39551cc10d4176e2eb852e6cf84c8147e
--- /dev/null
+++ b/vendor/ViTPose/demo/MMPose_Tutorial.ipynb
@@ -0,0 +1,3181 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "F77yOqgkX8p4"
+ },
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9_h0e90xzw0w"
+ },
+ "source": [
+ "# MMPose Tutorial\n",
+ "\n",
+ "Welcome to MMPose colab tutorial! In this tutorial, we will show you how to\n",
+ "- perform inference with an MMPose model\n",
+ "- train a new mmpose model with your own datasets\n",
+ "\n",
+ "Let's start!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bMVTUneIzw0x"
+ },
+ "source": [
+ "## Install MMPose\n",
+ "\n",
+ "We recommend to use a conda environment to install mmpose and its dependencies. And compilers `nvcc` and `gcc` are required."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "9dvKWH89zw0x",
+ "outputId": "c3e29ad4-6a1b-4ef8-ec45-93196de7ffae"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nvcc: NVIDIA (R) Cuda compiler driver\n",
+ "Copyright (c) 2005-2020 NVIDIA Corporation\n",
+ "Built on Tue_Sep_15_19:10:02_PDT_2020\n",
+ "Cuda compilation tools, release 11.1, V11.1.74\n",
+ "Build cuda_11.1.TC455_06.29069683_0\n",
+ "gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\n",
+ "Copyright (C) 2019 Free Software Foundation, Inc.\n",
+ "This is free software; see the source for copying conditions. There is NO\n",
+ "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
+ "\n",
+ "/home/PJLAB/liyining/anaconda3/envs/pt1.9/bin/python\n"
+ ]
+ }
+ ],
+ "source": [
+ "# check NVCC version\n",
+ "!nvcc -V\n",
+ "\n",
+ "# check GCC version\n",
+ "!gcc --version\n",
+ "\n",
+ "# check python in conda environment\n",
+ "!which python"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "26-3yY31zw0y",
+ "outputId": "fad7fbc2-ae00-4e4b-fa80-a0d16c0a4ac3"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: mmcv-full in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (1.3.9)\r\n",
+ "Requirement already satisfied: Pillow in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmcv-full) (8.3.1)\r\n",
+ "Requirement already satisfied: yapf in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmcv-full) (0.31.0)\r\n",
+ "Requirement already satisfied: pyyaml in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmcv-full) (5.4.1)\r\n",
+ "Requirement already satisfied: addict in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmcv-full) (2.4.0)\r\n",
+ "Requirement already satisfied: numpy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmcv-full) (1.21.1)\n",
+ "Requirement already satisfied: mmdet in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (2.15.0)\n",
+ "Requirement already satisfied: numpy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmdet) (1.21.1)\n",
+ "Requirement already satisfied: terminaltables in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmdet) (3.1.0)\n",
+ "Requirement already satisfied: pycocotools in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmdet) (2.0.2)\n",
+ "Requirement already satisfied: six in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmdet) (1.16.0)\n",
+ "Requirement already satisfied: matplotlib in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmdet) (3.4.2)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmdet) (1.3.1)\n",
+ "Requirement already satisfied: cycler>=0.10 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmdet) (0.10.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmdet) (2.8.2)\n",
+ "Requirement already satisfied: pyparsing>=2.2.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmdet) (2.4.7)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmdet) (8.3.1)\n",
+ "Requirement already satisfied: cython>=0.27.3 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from pycocotools->mmdet) (0.29.24)\n",
+ "Requirement already satisfied: setuptools>=18.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from pycocotools->mmdet) (52.0.0.post20210125)\n",
+ "Cloning into 'mmpose'...\n",
+ "remote: Enumerating objects: 12253, done.\u001b[K\n",
+ "remote: Counting objects: 100% (4193/4193), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (1401/1401), done.\u001b[K\n",
+ "remote: Total 12253 (delta 3029), reused 3479 (delta 2695), pack-reused 8060\u001b[K\n",
+ "Receiving objects: 100% (12253/12253), 21.00 MiB | 2.92 MiB/s, done.\n",
+ "Resolving deltas: 100% (8230/8230), done.\n",
+ "Checking connectivity... done.\n",
+ "/home/SENSETIME/liyining/openmmlab/misc/colab/mmpose\n",
+ "Ignoring dataclasses: markers 'python_version == \"3.6\"' don't match your environment\n",
+ "Collecting poseval@ git+https://github.com/svenkreiss/poseval.git\n",
+ " Cloning https://github.com/svenkreiss/poseval.git to /tmp/pip-install-d12g7njf/poseval_66b19fe8a11a4135b1a0064566177a26\n",
+ " Running command git clone -q https://github.com/svenkreiss/poseval.git /tmp/pip-install-d12g7njf/poseval_66b19fe8a11a4135b1a0064566177a26\n",
+ " Resolved https://github.com/svenkreiss/poseval.git to commit 3128c5cbcf90946e5164ff438ad651e113e64613\n",
+ " Running command git submodule update --init --recursive -q\n",
+ "Requirement already satisfied: numpy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from -r requirements/build.txt (line 2)) (1.21.1)\n",
+ "Collecting torch>=1.3\n",
+ " Using cached torch-1.9.0-cp39-cp39-manylinux1_x86_64.whl (831.4 MB)\n",
+ "Collecting chumpy\n",
+ " Using cached chumpy-0.70-py3-none-any.whl\n",
+ "Collecting json_tricks\n",
+ " Using cached json_tricks-3.15.5-py2.py3-none-any.whl (26 kB)\n",
+ "Requirement already satisfied: matplotlib in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from -r requirements/runtime.txt (line 4)) (3.4.2)\n",
+ "Collecting munkres\n",
+ " Using cached munkres-1.1.4-py2.py3-none-any.whl (7.0 kB)\n",
+ "Collecting opencv-python\n",
+ " Using cached opencv_python-4.5.3.56-cp39-cp39-manylinux2014_x86_64.whl (49.9 MB)\n",
+ "Requirement already satisfied: pillow in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from -r requirements/runtime.txt (line 8)) (8.3.1)\n",
+ "Collecting scipy\n",
+ " Using cached scipy-1.7.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (28.5 MB)\n",
+ "Collecting torchvision\n",
+ " Using cached torchvision-0.10.0-cp39-cp39-manylinux1_x86_64.whl (22.1 MB)\n",
+ "Collecting xtcocotools>=1.8\n",
+ " Downloading xtcocotools-1.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (303 kB)\n",
+ "\u001b[K |████████████████████████████████| 303 kB 1.1 MB/s \n",
+ "\u001b[?25hCollecting coverage\n",
+ " Using cached coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl (243 kB)\n",
+ "Collecting flake8\n",
+ " Using cached flake8-3.9.2-py2.py3-none-any.whl (73 kB)\n",
+ "Collecting interrogate\n",
+ " Using cached interrogate-1.4.0-py3-none-any.whl (28 kB)\n",
+ "Collecting isort==4.3.21\n",
+ " Using cached isort-4.3.21-py2.py3-none-any.whl (42 kB)\n",
+ "Collecting pytest\n",
+ " Using cached pytest-6.2.4-py3-none-any.whl (280 kB)\n",
+ "Collecting pytest-runner\n",
+ " Using cached pytest_runner-5.3.1-py3-none-any.whl (7.1 kB)\n",
+ "Collecting smplx>=0.1.28\n",
+ " Using cached smplx-0.1.28-py3-none-any.whl (29 kB)\n",
+ "Collecting xdoctest>=0.10.0\n",
+ " Using cached xdoctest-0.15.5-py3-none-any.whl (113 kB)\n",
+ "Requirement already satisfied: yapf in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from -r requirements/tests.txt (line 9)) (0.31.0)\n",
+ "Collecting albumentations>=0.3.2\n",
+ " Using cached albumentations-1.0.3.tar.gz (173 kB)\n",
+ "Collecting onnx\n",
+ " Downloading onnx-1.10.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (12.3 MB)\n",
+ "\u001b[K |████████████████████████████████| 12.3 MB 4.1 MB/s \n",
+ "\u001b[?25hCollecting onnxruntime\n",
+ " Using cached onnxruntime-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)\n",
+ "Collecting pyrender\n",
+ " Using cached pyrender-0.1.45-py3-none-any.whl (1.2 MB)\n",
+ "Collecting trimesh\n",
+ " Downloading trimesh-3.9.26-py3-none-any.whl (634 kB)\n",
+ "\u001b[K |████████████████████████████████| 634 kB 978 kB/s \n",
+ "\u001b[?25hCollecting typing-extensions\n",
+ " Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)\n",
+ "Requirement already satisfied: six>=1.11.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from chumpy->-r requirements/runtime.txt (line 1)) (1.16.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->-r requirements/runtime.txt (line 4)) (2.8.2)\n",
+ "Requirement already satisfied: cycler>=0.10 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->-r requirements/runtime.txt (line 4)) (0.10.0)\n",
+ "Requirement already satisfied: pyparsing>=2.2.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->-r requirements/runtime.txt (line 4)) (2.4.7)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->-r requirements/runtime.txt (line 4)) (1.3.1)\n",
+ "Requirement already satisfied: cython>=0.27.3 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from xtcocotools>=1.8->-r requirements/runtime.txt (line 11)) (0.29.24)\n",
+ "Requirement already satisfied: setuptools>=18.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from xtcocotools>=1.8->-r requirements/runtime.txt (line 11)) (52.0.0.post20210125)\n",
+ "Collecting mccabe<0.7.0,>=0.6.0\n",
+ " Using cached mccabe-0.6.1-py2.py3-none-any.whl (8.6 kB)\n",
+ "Collecting pycodestyle<2.8.0,>=2.7.0\n",
+ " Using cached pycodestyle-2.7.0-py2.py3-none-any.whl (41 kB)\n",
+ "Collecting pyflakes<2.4.0,>=2.3.0\n",
+ " Using cached pyflakes-2.3.1-py2.py3-none-any.whl (68 kB)\n",
+ "Collecting toml\n",
+ " Using cached toml-0.10.2-py2.py3-none-any.whl (16 kB)\n",
+ "Collecting colorama\n",
+ " Using cached colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n",
+ "Collecting tabulate\n",
+ " Using cached tabulate-0.8.9-py3-none-any.whl (25 kB)\n",
+ "Collecting click\n",
+ " Using cached click-8.0.1-py3-none-any.whl (97 kB)\n",
+ "Collecting py\n",
+ " Using cached py-1.10.0-py2.py3-none-any.whl (97 kB)\n",
+ "Requirement already satisfied: attrs in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from interrogate->-r requirements/tests.txt (line 3)) (21.2.0)\n",
+ "Collecting iniconfig\n",
+ " Using cached iniconfig-1.1.1-py2.py3-none-any.whl (5.0 kB)\n",
+ "Requirement already satisfied: packaging in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from pytest->-r requirements/tests.txt (line 5)) (21.0)\n",
+ "Collecting pluggy<1.0.0a1,>=0.12\n",
+ " Using cached pluggy-0.13.1-py2.py3-none-any.whl (18 kB)\n",
+ "Collecting scikit-image>=0.16.1\n",
+ " Using cached scikit_image-0.18.2-cp39-cp39-manylinux2010_x86_64.whl (34.6 MB)\n",
+ "Requirement already satisfied: PyYAML in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from albumentations>=0.3.2->-r requirements/optional.txt (line 1)) (5.4.1)\n",
+ "Collecting opencv-python-headless>=4.1.1\n",
+ " Using cached opencv_python_headless-4.5.3.56-cp39-cp39-manylinux2014_x86_64.whl (37.1 MB)\n",
+ "Collecting protobuf\n",
+ " Using cached protobuf-3.17.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\n",
+ "Collecting flatbuffers\n",
+ " Using cached flatbuffers-2.0-py2.py3-none-any.whl (26 kB)\n",
+ "Collecting motmetrics>=1.2\n",
+ " Using cached motmetrics-1.2.0-py3-none-any.whl (151 kB)\n",
+ "Collecting shapely\n",
+ " Using cached Shapely-1.7.1-1-cp39-cp39-manylinux1_x86_64.whl (1.0 MB)\n",
+ "Collecting tqdm\n",
+ " Downloading tqdm-4.62.0-py2.py3-none-any.whl (76 kB)\n",
+ "\u001b[K |████████████████████████████████| 76 kB 1.0 MB/s \n",
+ "\u001b[?25hCollecting networkx\n",
+ " Using cached networkx-2.6.2-py3-none-any.whl (1.9 MB)\n",
+ "Collecting freetype-py\n",
+ " Using cached freetype_py-2.2.0-py3-none-manylinux1_x86_64.whl (890 kB)\n",
+ "Collecting pyglet>=1.4.10\n",
+ " Using cached pyglet-1.5.18-py3-none-any.whl (1.1 MB)\n",
+ "Collecting imageio\n",
+ " Using cached imageio-2.9.0-py3-none-any.whl (3.3 MB)\n",
+ "Collecting PyOpenGL==3.1.0\n",
+ " Using cached PyOpenGL-3.1.0-py3-none-any.whl\n",
+ "Collecting pytest-benchmark\n",
+ " Using cached pytest_benchmark-3.4.1-py2.py3-none-any.whl (50 kB)\n",
+ "Collecting flake8-import-order\n",
+ " Using cached flake8_import_order-0.18.1-py2.py3-none-any.whl (15 kB)\n",
+ "Collecting pandas>=0.23.1\n",
+ " Using cached pandas-1.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)\n",
+ "Collecting xmltodict>=0.12.0\n",
+ " Using cached xmltodict-0.12.0-py2.py3-none-any.whl (9.2 kB)\n",
+ "Requirement already satisfied: pytz>=2017.3 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from pandas>=0.23.1->motmetrics>=1.2->poseval@ git+https://github.com/svenkreiss/poseval.git->-r requirements/optional.txt (line 4)) (2021.1)\n",
+ "Collecting tifffile>=2019.7.26\n",
+ " Using cached tifffile-2021.7.30-py3-none-any.whl (171 kB)\n",
+ "Collecting PyWavelets>=1.1.1\n",
+ " Using cached PyWavelets-1.1.1-cp39-cp39-manylinux1_x86_64.whl (4.3 MB)\n",
+ "Collecting py-cpuinfo\n",
+ " Using cached py_cpuinfo-8.0.0-py3-none-any.whl\n",
+ "Skipping wheel build for albumentations, due to binaries being disabled for it.\n",
+ "Building wheels for collected packages: poseval\n",
+ " Building wheel for poseval (setup.py) ... \u001b[?25l-\b \b\\\b \bdone\n",
+ "\u001b[?25h Created wheel for poseval: filename=poseval-0.1.0-py3-none-any.whl size=25993 sha256=412ec354869baa10f28ba8938ca6a63c0c9233d8fbb839377f201c398d1cf5a6\n",
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-12d_ns95/wheels/0f/4a/c4/17e52eb6f9f3371b8cf1863940bff5118b00875b66809f9f51\n",
+ "Successfully built poseval\n",
+ "Installing collected packages: toml, py, pluggy, iniconfig, pytest, pyflakes, pycodestyle, py-cpuinfo, mccabe, xmltodict, typing-extensions, tifffile, scipy, PyWavelets, pytest-benchmark, pandas, networkx, imageio, flake8-import-order, flake8, trimesh, tqdm, torch, tabulate, shapely, scikit-image, PyOpenGL, pyglet, protobuf, opencv-python-headless, motmetrics, freetype-py, flatbuffers, colorama, click, xtcocotools, xdoctest, torchvision, smplx, pytest-runner, pyrender, poseval, opencv-python, onnxruntime, onnx, munkres, json-tricks, isort, interrogate, coverage, chumpy, albumentations\n",
+ " Running setup.py install for albumentations ... \u001b[?25l-\b \b\\\b \bdone\n",
+ "\u001b[?25hSuccessfully installed PyOpenGL-3.1.0 PyWavelets-1.1.1 albumentations-1.0.3 chumpy-0.70 click-8.0.1 colorama-0.4.4 coverage-5.5 flake8-3.9.2 flake8-import-order-0.18.1 flatbuffers-2.0 freetype-py-2.2.0 imageio-2.9.0 iniconfig-1.1.1 interrogate-1.4.0 isort-4.3.21 json-tricks-3.15.5 mccabe-0.6.1 motmetrics-1.2.0 munkres-1.1.4 networkx-2.6.2 onnx-1.10.1 onnxruntime-1.8.1 opencv-python-4.5.3.56 opencv-python-headless-4.5.3.56 pandas-1.3.1 pluggy-0.13.1 poseval-0.1.0 protobuf-3.17.3 py-1.10.0 py-cpuinfo-8.0.0 pycodestyle-2.7.0 pyflakes-2.3.1 pyglet-1.5.18 pyrender-0.1.45 pytest-6.2.4 pytest-benchmark-3.4.1 pytest-runner-5.3.1 scikit-image-0.18.2 scipy-1.7.1 shapely-1.7.1 smplx-0.1.28 tabulate-0.8.9 tifffile-2021.7.30 toml-0.10.2 torch-1.9.0 torchvision-0.10.0 tqdm-4.62.0 trimesh-3.9.26 typing-extensions-3.10.0.0 xdoctest-0.15.5 xmltodict-0.12.0 xtcocotools-1.10\n",
+ "Obtaining file:///home/SENSETIME/liyining/openmmlab/misc/colab/mmpose\n",
+ "Requirement already satisfied: chumpy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (0.70)\n",
+ "Requirement already satisfied: json_tricks in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (3.15.5)\n",
+ "Requirement already satisfied: matplotlib in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (3.4.2)\n",
+ "Requirement already satisfied: munkres in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (1.1.4)\n",
+ "Requirement already satisfied: numpy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (1.21.1)\n",
+ "Requirement already satisfied: opencv-python in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (4.5.3.56)\n",
+ "Requirement already satisfied: pillow in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (8.3.1)\n",
+ "Requirement already satisfied: scipy in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (1.7.1)\n",
+ "Requirement already satisfied: torchvision in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (0.10.0)\n",
+ "Requirement already satisfied: xtcocotools>=1.8 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from mmpose==0.16.0) (1.10)\n",
+ "Requirement already satisfied: cython>=0.27.3 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from xtcocotools>=1.8->mmpose==0.16.0) (0.29.24)\n",
+ "Requirement already satisfied: setuptools>=18.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from xtcocotools>=1.8->mmpose==0.16.0) (52.0.0.post20210125)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmpose==0.16.0) (2.8.2)\n",
+ "Requirement already satisfied: cycler>=0.10 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmpose==0.16.0) (0.10.0)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmpose==0.16.0) (1.3.1)\n",
+ "Requirement already satisfied: pyparsing>=2.2.1 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from matplotlib->mmpose==0.16.0) (2.4.7)\n",
+ "Requirement already satisfied: six in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from cycler>=0.10->matplotlib->mmpose==0.16.0) (1.16.0)\n",
+ "Requirement already satisfied: torch==1.9.0 in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from torchvision->mmpose==0.16.0) (1.9.0)\n",
+ "Requirement already satisfied: typing-extensions in /home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages (from torch==1.9.0->torchvision->mmpose==0.16.0) (3.10.0.0)\n",
+ "Installing collected packages: mmpose\n",
+ " Running setup.py develop for mmpose\n",
+ "Successfully installed mmpose-0.16.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "# install pytorch\n",
+ "!pip install torch\n",
+ "\n",
+ "# install mmcv-full\n",
+ "!pip install mmcv-full\n",
+ "\n",
+ "# install mmdet for inference demo\n",
+ "!pip install mmdet\n",
+ "\n",
+ "# clone mmpose repo\n",
+ "!rm -rf mmpose\n",
+ "!git clone https://github.com/open-mmlab/mmpose.git\n",
+ "%cd mmpose\n",
+ "\n",
+ "# install mmpose dependencies\n",
+ "!pip install -r requirements.txt\n",
+ "\n",
+ "# install mmpose in develop mode\n",
+ "!pip install -e ."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "aIEhiA44zw0y",
+ "outputId": "31e36b6e-29a7-4f21-dc47-22905c6a48ca"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "torch version: 1.9.0+cu111 True\n",
+ "torchvision version: 0.10.0+cu111\n",
+ "mmpose version: 0.18.0\n",
+ "cuda version: 11.1\n",
+ "compiler information: GCC 9.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check Pytorch installation\n",
+ "import torch, torchvision\n",
+ "print('torch version:', torch.__version__, torch.cuda.is_available())\n",
+ "print('torchvision version:', torchvision.__version__)\n",
+ "\n",
+ "# Check MMPose installation\n",
+ "import mmpose\n",
+ "print('mmpose version:', mmpose.__version__)\n",
+ "\n",
+ "# Check mmcv installation\n",
+ "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n",
+ "print('cuda version:', get_compiling_cuda_version())\n",
+ "print('compiler information:', get_compiler_version())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KyrovOnDzw0z"
+ },
+ "source": [
+ "## Inference with an MMPose model\n",
+ "\n",
+ "MMPose provides high level APIs for model inference and training."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 421
+ },
+ "id": "AaUNCi28zw0z",
+ "outputId": "441a8335-7795-42f8-c48c-d37149ca85a8"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Use load_from_http loader\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/PJLAB/liyining/anaconda3/envs/pt1.9/lib/python3.9/site-packages/mmdet/core/anchor/builder.py:16: UserWarning: ``build_anchor_generator`` would be deprecated soon, please use ``build_prior_generator`` \n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Use load_from_http loader\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/PJLAB/liyining/anaconda3/envs/pt1.9/lib/python3.9/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)\n",
+ " return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)\n",
+ "/home/PJLAB/liyining/anaconda3/envs/pt1.9/lib/python3.9/site-packages/mmdet/core/anchor/anchor_generator.py:324: UserWarning: ``grid_anchors`` would be deprecated soon. Please use ``grid_priors`` \n",
+ " warnings.warn('``grid_anchors`` would be deprecated soon. '\n",
+ "/home/PJLAB/liyining/anaconda3/envs/pt1.9/lib/python3.9/site-packages/mmdet/core/anchor/anchor_generator.py:360: UserWarning: ``single_level_grid_anchors`` would be deprecated soon. Please use ``single_level_grid_priors`` \n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUAAAADWCAIAAAAvuswXAAAgAElEQVR4ATTBWcxtW3oe5PdrxphzrrX+brenK1eVq9zEtuIYiOQGZFkEkwShKMhICIgQN3CBiAQ3thIgDogICRTETYyDwTIxoEAgNwETWZBQQTgkohFEgJvYVafqNPuc3fzNWmvOMcbXsMuI56HHX7ikngCZohAlOxGf70e2ZJVMhzAzZUIUus8geEN0n/f14kYf7jcfVGux1cKJSd2DmeZLYWU7RuuZGcSuqiPdR2dS7yizJIcNZyuRbw3i1FKJCUBZsDzS+WoA+nA3Hj9V2sntJ5udaql88YzaaKdPKLa0rm0d0VMg05Xtbkrv3h44ELAQ1u5GjQjkFioKcmLxzADSnTR0Ec9UUndnEJIQbymxJ5KBSCG2y2u+eUdffmpSdf80BIoUMv78w3NvYKLlQprH+W4oNDnqnp9+cLm5H+/PaugeVQVK7Q69bzePHm/tOC1oI+SiLVdKdajI699Af63JNl9WhruD1QAdR47Iso+wTJOxBUW++3sqLe3ianf/8vTwoq53UVCgqZqczAWYnbiiU18bK08F28aifbe/8m2rV8tc9NNPT1/97t93d383P5zfuWzvXl3zdlI/7+d62/kv//o3EfPYLAAqoxSxRrUoyJkmiLuNabeLaT1c7Szj/Nr6aahCJt4echu9mGbJynUMc0A0yi6lTGtbo3OZlTkJ4REprNU5aT2ljsnJBOSR0+WU7JpEjPNxUGqmB4UIk5CHF2jCWTiTFTkcHknsy4UK0/FuC6vEg5nDkl3dAUZRidGtkZkxKzPniJQggYrKjgKgSHgM8otnYtbzVE8PXmTSyS3dezAV6yZKEInN0wKclCwqFqMU8ZJESUZ2hhTmKYqWseVolk4iRJoZmZ4AiZTwSApRAiOImCPCibjMJJOOPnyLUqa6ZyD7Oei7fvDpduoBGAUZMrKv0U+JtwigVFXWjKARo+502oltaS0i/fG7iw06H7v3TA8i1Glu2wD88slOJzk9rH6SzEgEEbiwCvdubuCaOmlbe3b2iDrz4TCP3t1znpcQoxrX75d5LrdvTh4hLNOSQSUJV4+mz765vv7NlQ2kU9s2BiOTq8qSkcFRGaHgiOxmbGLDmDgDoCQmKAdciKQQFfZILUTg3gYTwxFBXCIyZSZQlizj6POBSJmmpBrTMvUxxrmPu4kpI0Inchdb3Vr4MOZcHk+P3p+N21L36+rRB5LuPun9aCJKNeYLyVmmq/P10935fI7g44eyfpZMzJdeSNez7Q5lnmJ7oPWYRbTjjBByJrAoYde5ZtXFRrcz+yARrgsL0bSTrNZvdRLd1i2BecF51asnzKU303EyjXKi/id/+hf+5t/+ld/+tf/xnT2eXMy0Pixk81Jfbf2//fBVeGVw3YtnH2cb53z3vYvhw7q65/HuuNtfBdrWel1qO5sN10JOKHNa3WgUP7FkcR9Uox6EoGml91MyXT+fR2w+yF3K4jdPy7r6+XM+fbYBxEFEPF3UHqOWUhZt595OTYizRNHCxG1rU5ksBitN82TDraV1r4V4oVp1O28Z6sODQoSjO8CeQRCyRFJ44tuYBQnPSIHIXrkYk6wPWYR1byatlLm9yTSaZrJ0c1Dy6MYMSMCJQgDKyPlpcRitHJFJLiLe05sBAqAW6cPDnAgQogQBRGLhzMTEIAIQ4ct+sjQPQ4JFEkDm9XsLaLSjbcdKP/gjz9ZTvn69AsW7WzO4phOreQQ4VVUkkjgFQEKUg6OHefBsytq3yFBG50JaxBzUcneYWLmtzSKJqE7FhjlGmTQzIyjgEPbhAiVwFp/nKfpQyYvri+A4nu5yF1dX+0g7HzONDk9MZh3B+51+9g0/f7RyqlmaWy3q5hKaFElBIM8sJBlpEYWUiN0cSYFISp2ElZkSnBaW4DIl5TS6ERJOECZty+Fid1OSO4yPn25Xj1Av5vMa96/GdPDrpxfW7fWHvZ0aa4kcnDUjbQQ7EJFaLp/zdENcSwa8bSrlzYd93HkEkmO5meanwrWPRrYRnX07G5Rkx8Jg03ZE0tAdadGISAuM0lsnJwRToemCoRFJ7GhbkGsOS0GpwiVJg0KmpbStW8uE0Nze++Lh4RXVpWPlh885Lsqf/lf/o6/92l/6nV/7G88WerSb7c2rR/uik3x+3n714zceqqJlZlEZm42jHWat++l0xMPtiYLaGI+fXgyH9c3ChzkBEFw8oasvzUg5fmLnl8NjXL83Xb9XX7043b0ApcgUj97bkdj93ZqGZTfpTLd327jzfFBmzpExkgqFJLFOCzFR37pQkcK9dZUSHkTsZHWRaaf9lLaF93FxWHqCCK33MWxaJJExPAYygwThwBAVZuLejZnBQcqIhANKKF1LGavYeaiq7Hi6rLG27TQuLuY2bIzIoDAXYYgRM1IoJDzoOpf9NB5GPw8qBEBA4+w+GECd4EHpXkoZEenORJ6h87Tbl7a2GEFJRGBhSydJchk9IDntpO4P1rdC7A5670vPehvtvAnSXMmDhSGEzIgAJwAmQEGF0iNF2FJYzDwVQogAp7oFF9dJt+4IFJbCiiSSfAvfRkFjWsow8wQyI5FBZAkgCgCiARXOCq6x381Bej41LS1Cyg6HR+weo3td+HRH0tyitjvrfRBxREyluEUiE5mDyUFAEqtSRoYHk0ASTGAkQxlSCOyqhSjPRw8DUYAoOfY36kl1yelaS0VNItezj3S9//joSfsnhZFvPsxYE0xgEy8pHgQhiu4I2l3P5YLlIMM65yjT9Oabo71p2QnEWfPiuTJLDrG2mmnmKAtzhVSzk8Q2q0aPznNA2FqQkQoplXYeJMzirORBCAyHpMAskyCQIomcZ9dF3aifGTRGxMVlGfey38+p2zhO2NO//qf+g1/963/5m//L33rvWq8mztevbhYtu/L5w/mvfP1FdyGAgrmyCBfWq3r5+vR5b+jnQUK60MVTKXN9+LQn5xieg3qL6ZIefXUh9vvPRnstZmP3LN7/3uu+ndaH4KzrKYLi8ePr0baXH67Wsx5i/2g5fraNB0DYmlEIU5IQkRCNWtUiWncy1SrDBjERSZBd3Mx1j/tPW47c7eZpJjPyzG1rZZrKAX2z9uBxQsCoRHqSKyVFBJIAIsqQSIQQA0zKZRZGJrzdwgdkx7LzaKSC4UHJYUgPFgJ5SBAxh4YlBDQlC1FQIDNAHj4kzaZ56u4EVOUItJGczswQQhVWjz4qSwwHkw0CiAkAeQdp7p9UTL0ftdZZZqebR/twDgNxZiIyiUkKkMjg8CCKJBCBlSMTwswx1cl6IAK/KzMpMyh5EYgXERj1NeE8FSHmbpZEoJgP7B5IU9WtmztiJBAiBCCT3prKRBr1gDH4dD8SDTbpbjz/zoPMWO+GtVzXTXVCoh/R1+E9YfxWMpigiUEBk0SUhcgzjYgVlaI7lSRwDiQFl6x1niaXKqc3vh4714xkUuwu68iupM+/t9QbF67bRuvp9V73pzf58GZ45wIcbxmnYeZSFIhgUhZ4+sjCmnNi8sPVJFJWO3Pm6WWMewcoAsxy9e60bm59FGZUZCQ8idjh5ERgropqbhtcYigNQGJeikXP5NFTKcPEPFQ5M4nA0DrxvJQ2xnQYQvV8Cne23sMRFsJ08XQZ5lipPrv40z/97//Kf/9Ln/1v/+fTG74i0fXz/f7xottn2/hvfvtu7ZaZDN5fKXHSKBjClOfztjXXS/3SD17L4cF9bS+Xu0/76Y2T1rAWKSm4fB59RI7KvBWd9DLqBe8P/P7TRx9+8tpNSXTalYcXb85nU62l8vkep89HbuyjpStRMGmteXFVThtyG0bshGyZ6bzw5fNpa2N/SM3p/rVZs1IKyCet5aLc36+aqZfZz2kPaZtFCIHSPB0gsAAgpISDicyHCLOyHIKqVSmiut73850XXmTpEemezOxGBCCSiZxSlNwHQAIJhBYCZTLgYBaHIxIgSslAJnb7EhHnU4eBGSRSLwUZ1rMojRZplBGJCIFmJcpUL3u5eCJwvr/t82GiR0+u3NItmBgEomQhsJsbwJT0bcxECMqIEGYmEmKAPRz/PwZ5BAmViesyx8D5tMJTqXgaKRIZCa3pTnOpOtPaxxieIwgohSNCdUq4uZdF5r2ao60mggxyC6p+cTOpWDsTijHK3SvDSB+Rg9IzghKhKgIgOCNk8qtnVaieHnpvESAhJUVEuLlq0cUE1TuFNGs8WoJCg1DSS5Sp1IX2z+nwtD7cn+YdFZXsfL6N9ZWM1TCiPYQHRJQkk4zB6c6Qtg4O1UVlpt1hpiTjMU3y6pN1u9uIGEAmll01OAmKotRq7t2MSAiRhgwEh1YmAI6+hTiRMglk4jLFaKBw65Fe3Z0oRYQk94fFMzwi2Zdl31obY7ghg4REhByeSDWp7xz+rT/xS//Vr/zc6f/5u88flR3RvL04LAuJf/zQfuW3XntASwn3MnNmts3ViwonYbOxHCpfnfZP6XC1a3f+yW+d/KESA0zWPQVXT8knUymlEJM83K9aSyk47IR2PbNsZxPWeY/TcU2betNxl+e7TiEkQSNHoOzqkw9KlVy35K3evjnZcCaRRepVeeeLTz3W0W/ffOLRMNZIxzC7vNrNl+XNm/tJWarYRtvq5JQWfR2cTCBwMhOzhMM9I0aZBEhmTg1mlpRgmsrUzqNvHUzEEBUPi2AVtt6BLPPkYe7mnkiISp2VmGy4dycmVnDh9HALlYIg4sjM0ZNZhdNH7C40HL1HmZHgfia3JgJoSJBbcpmCc7eXw+V8Oq8QoyfvXaV7uGdQuLCgTBLpSSEiYYkUkEdmJEDQTCJBEDODmIDMjP9PUqaVqknsZiKSEWMNlpwOU8J9QCrcGBEkHkQZSEsRUuWM0DKBsI5NKxNHBEcESHY7JsbpoSOFFcy2XKsNHF+m9wEnBMISYKIUYQhJJKJwyeWq1EmOx963SCdSY+ZAJOc0yaOnxRvfv/KObiMyJUcIslxQCtJlvqTrd663sW7nVatHJFOJQYUgOd191r07M9D5rWmvI8N6F5LoNLbu7iKiVXUSKbi82r38dFsfzplJREBSksxMBcwQZvPwCBZxcxhAIM30BAiJDBICkoJAEtBIq0qRgfDITCZhZlDUpQSlBSKSmVjIbGQQkVCEFibo1lc2vvjikz/zM7/4n/6X/7Z//Vvv3CzXajwe9hiq+LsP+Ku/8aKWiUQjRiJBCApviSQRCc5lt+dlm64JEkSIdb799OzdIpODe/dHz2dezm7LsPHOB3j9hk5vfKpT3WW9hCq/dXw47Q7X6+lka9jG48j9PIgAzQyhuT3/8vT+96RC7+/H+q3rF79znHU5tvN7X3mnyfHczrt9AZ23u7LdOTp7wzCXgmlHESRK6TgdO4WUUtJsbK5ciAKEiMxIEMIVMrgQsxYQKAliwyxIiIV59OFO4Njt5+49IgkgZEYSs/vIRAYSKSpgmpepbSOGA8kVFkEAE6cHiFS1d0OiaAFlOJaZ3MgDjlbKpBOdb7t3BpEQMZFUcMUy68hzRqlloqvnF4Rgoirzeh4sUWZ2dwiKFhvhg7Q4kVi4eyonssDBQpRkZhEBYJ6nPsyRWhnmbzELEadlInTWRHoHxFQmhGVkAhFAJhEtSyEid7hFy1FmRXh0irRInZZhusS2IeEihUNmNot+NBqMBCW7BwEqFEgqQgkKXnayu6TudH/cRosKQQGcnIxqLnOZd0Q+nW4bJgKjbT0aL1dyeDK13rY7v7xapovLN2/ufG1lKUSpkxA7hEop68n2FwmSh0+2cYw61/lQM8xa+Mhx5uzet8FS6jV2c6X00x0f71eiJKIIu3p0kZKtj8I1aJg5g9IRSAoiQkoQKByZycJSI10ycqo8YN4V7sgQ5QSZOYMRyppZkoSJwj0yOCKEkEEEiIQW7WY0yqPveu9n/+U/9wt/4Wfjd775wdPLC42PXj084Xh2wb91b//D198oOEhIIj2pZNlTpGJgrEYMPdSrx6X5rcys81QnOd2d2eb1TZzuNowCMb2RqycQFSm99VhfKkXB3MGyv8rlEp5uJz7f5ul1kLMNjuHCnBz1cV7dLEG2u8jdExsj7n7j0D5jSqzZphtermlWbR46N2t8fMX9YUSHezAzaLBMHs6MGMiRDOIiYajKqmGBTBl9aBHW1AkpWFfjlYkInIHg4DEcrkSSsESCQUpakAHvLqzEMLeIJAjSibmUIqpb3wSc8CCjKAAyAoBMBMjoDkAzU0inGdnHSARnDKHKc/oprQXBUSSNmQuEdKGn703Ru4Lo6uZSi19dX1vT492d09hfl26gJABmxiikwz3dk0C1ElLcPdMYkiDvUcu0LLvNmvdNiQYoPJmJKBDsESwMJnhkkhRJcoKEjwykYxh0zmWZ+jlGs4DXWZPTR5IDIJ0l4DkCBFRYBoGQxAwmckdYZHeGaNGET1MZQtH77iDJ7H20VVIGoQiNIEryZV+hmR120mwml1yXejquMCpL0T3qku0s49iK1rZ2G16qTBcFk087LZNOOwbY2yiXdbvb2rppJV1qu8P5c4sGWomZw2Nbx+Pn+3e+VO/e+OuP2/nOAp21Rrbn715vaXcPD1XmtMw0ZIl0ZmLiTEQEOAHOiMIZM6VHZmotDLIRnOoWdSbV0tZGRGnsEckpmu4Mc7ylIoJMT8a8nwlxOtpM9Oir3/kn/8V/95f/kz8zPv743cvF1/OLT18/vZavXO3+71fHr33zyMGOtyhiEEOEU2RaSmstu++eLGWXIW1edNpX8MjwTB7b/OobJ28oe90/1v1VGd1aO2mZTrex2y1UB5GLCk/s6AW0PdDtR56rMUmUUXc0qT7+krTonLv7F50UOWBnbg9J4Ua4fqdYEmz0Bt2xzBwrZUPvfX8xP7w6WYM7WJnJWdhHooMyA8SFdWbVcFLbbHcjYJzucHEdSbK9stEzAdEMp2hGKeZZCyNgqRKDd0woorLs9P50sgwaCckp5pHnDplQjLsQZyQRUsASmeTGWpOpeDfNNCqgUBX3ZMp0dov0ZHDmFO7EgzSlKiXZGiH05N3ZBysH3Ty5mOe6LPtt9fP6IErznq0jPdw54TqlR1p3IfWAVskMZiJCwAFKh7sfpsoytW5mkRKEQaAcGmH4NgJIVcyclVmYCW5DtYzh7RzLXuZ5Wk+2nYcIuIAlPSg9EsnCQYlIVoLAPZmTlVglI5CEQXYeWViJlVVF1tyUS50QoO0cRNhdkUdjnzZrWqE1ItQ3YMyU2WhkRgaUhQTTnuc9nY/mZzXz9BQp4L7sK4qnBk18cTO7wc372PpqV1dXZi2c+0Nst0ENFDTNtZRyPJ7D49EHBVxvP2+tbZQOV+F88uz6bO28bexi0YnSjJmDSSMCSSAiTygnB8MhwonMpCLwyEiiEp7MVFTb1pkYhYk6EmESkYTfRWBNIYKkTjXCRyOJ+OAHf+CP/zP/xn/8y/8mvfr83ctdbW9uJgiGWP5fL+1rH96neYAAiHCpKkIEbbaBEBZlz9fPL852VyozRb0oOmmZlt7y9dcftvtBSvsnS6LBOPowzSJUJomM/SVPS9k2Dy8yBVO8ftH8DFI6PNvGqIdd3HxAD2+oZHn1DbQzcTANPT9sFDFMlxueLjyBvnJSsNTRey0C6ZePpu2eH15t7WTTVCMHi/oIOClnG6PMhRTKznXqJ5/2fLgod69GUheZYmQ/O6e6jUgwkVskExLTHLTjdjSBKM9jxOXFodmpj/7k2Qc3j3br/Zu/7wd+TC7e/+aLFy9f3xFyW8+n00M3BxxEjFKn3Qfvv3/7+rO71y+Z8/7hLpMIUuZA1HBm4cAW7hGRQZLh6UkhxNGTihErgekLX3wqPLfWiWLLPhURzb4leQYIkmBnVhgNi/31zbRcnc+ven+gyJFOICa2PsQLEWvh5MFciQeSR2NlZy7rudkIVWEhMEWmMlQ5k1q3seZhX0Xk7v7I0GmugCfCIiMDSCQinYuwcrinQyu0qke6UUb65koF1YsU75aB4bzf14sbbtGOtyYllv2BODH0bMd5X6qyBTAIXqm4186up7stHN5SJp537I29ZR/bO+++i5TPP/90t5sg0clqjcOj+fbuzFCUUcoyNrdu0RE9xYqdIy3LxPvL/RjjeP9w8WR69t6TT77xZu0d4WNLJVxe7wdHRPrqVAOI3pI4WRMgJN6KrtBkBSPDmRNvBRNnRkSCM8Hgt9ICoME2VRDIh1DCM/BWJogZycqQ9NiQM2x89Yd//x//Y3/qF37xZ6fj/fs3h3h49ZXnk/Ww3v/3r9/9zU/bUmt3Cx/hISJahHUEMoO8p0xVdrh5fnF/vGPyi8cXb+7vOIu16K9chvZwvkKdOIzYs1xOwqMUGpEZPO9ofzHf3d8dnk4y+d3r9XL3+NF7cRov19sn4wSnozcoZT+527S+sjhyOFiG0CyT7J4MFro/tmVXM8V7gmSYE/NYt3HK2CAqScHJYUnCpSYxAukR+/3kcD9RSHKCnG2kE5U50si3JCfzyDQuHJKcMe9o91ROb7IfkYZ0ziCduBBff+H3/PAPf//f+Gt/Zbl80mVaHz6PM5g4wkdvCTYbAEQUmeAQSXL34KBW5qyT8pC71y2Zpkupk8aWYwsDZfP0qFUDMXo7PNrvr+vp1OmD73g6Go/RdgdOod77cqhtJW+uU3CVdUuOzlSa57Mvfunx0+96/eo37199I4cNI2QqcWaywDZQEBeBdWKVKkmekXC2kdYjkWUSYvLISQoJzL11S6PDMrPwcT0rg1nMPDwjMziYmRygQAELpZEnREKZRwchWYWFx9aYRFTcXUUJOc87Er9fNyTfPK6n1epEN5ccNequTFxPI2K1TD48Lc5tvcv1TR6Po59WEmEB22TDk8bT58/GoNPp5bNHj9a+umQ52P5qGiF3dw+jtd182R58PFgP50x0ys5YkWxllqDsW+qsl9d6eh1tdOZIUy24uNmfx+qeEjxkJDxciFFUPDwTSEJCKoTYmiFEiJg5iYRyhGcQCU+lbutKSQAyKBOggIADKYy3PIJSiTK9zEUU5y2zj9/3D/3EP/dH/6U//x/+a8t6fHqofrz9yvv7bGznl//rN9v//KJdXezLVNNtDHsrIvTSIsjOsBVSUg8yX9c+OovLpO3UuZfmhlN4B026XM5ScT41hC+HGiOZQSX62SP0cLOvOzz9ykSEF5+8gkcQ6i7Jp4cXZGPsDnk+6XLZpqtcP+bTx7BGsvCstBmWR6QaUejm+dzb1u/K1jpDxjq2c2bnGMmMDBfoGBYUF5dFJ4mI0aMuCsnxAEtKD6EIA1eBOiXZ5kLatxAFT3DxHEhXXjZqUxJUtJ2bkEYKhn/h9/6Bn/jxH/rPfvnPFdsMKHOOkWFIz9GdAP42ZEYITXNxzxwpTMTpCJ0m8jE2lD3tn6VOdPpc7l93rpjmyU7RjiM8hHn3aL54GsOCHj9+6m4kMc2SjLEOYfF0DNaZoDki0IkEBJkfP/6BH/zRT771+rNPvuaD3T06CTOXKEvpq/WTTVpJSSYQIQzMSZxugaG9mRYiyQQINQxuRvAIBhIgt1j2FZDh5t5tJDSIiJOQGRSsTEkZSAaBo6Uo0TSkqp05V+cloVmgIgLW0d26EeVyVd79An/H+5fTzc7aiHBSbO4qRKHLrrjzpx9un316l1QcLTnXB8Y5A14nbs1F89mXlmk/nXtzg0zkCEre7lsEts3HmxibwHtAplqLwDf0tWeQwzODlHwEnAmkxMleZtWlJtzGgFOmZ5EkZCQBHOzuUoU0MziRkSZZGOCCsoh1z8yIfItFskc6wEQkuXU3zhqFhKkwi9mWlMwc4ao03Vw/3N7GOX/0j/zhf/IP/7M///P/ysVmjxaf3R5flKlwv3v1tz5q/8enzRGzTjpLKSUTp+PZyRGejjDiPS2HlIoxhIgt+v5Q+zm7GxpsS+EqB/FhBNgYy37aPSNVd51k9Iej7fc3u/1yjjui3lePGIxp2ul+xy8/7tZivtCUpsq6Mzvh4Vvqd4oaBE5O5px2lMj98ymR66vRN8u3KL2JbcFJBESmMhMSBExgjmXZZ2w2sD24yGxpiGBQILJESeUievC6mx8+3dwTbMJFNNbj4FAmmWZxeA8rUtxinP297/tDP/njP/DX/+tfujuuNEuhfvvQlBfAt7VzSSkQ5rH6XKfGY38tpzeRDZlpZqqyu6LDo/LmdahG2efxs1wfYr6Y60XOXN989DCOyuy7x/NyTb4ZXR6uwLHslNUyta8+uoMI4Ewjhii/FQkCpif77/6+H3n5Yv3k6/8TA5EeG5AkVXlxIfEeYXR1PXEhG2YtM0OqBKidR2xgATGYJTxGczcrQsGSCbMhynUq87yY2bqu7kmceCuRmVIFBDjCEelEEDA0mQsB6W4DukidSjs3hiTB3ODBFV98b/mh3/v0+TuPTuPcm3dzRzIGqwww1K7p8etX9tsffjRA64hmdryPvnadSIUU+uzJdZPj/DhWa32tHHR8GL31w6HevHNhVl799u3Dy3OR0kYs01SZpKitnk7m1r2Jqg337pRCoIDrxPOySOFu27Z1SiFhAjIDBARFhlYuu5JOfW0qCoUN4yIysSSIAhQi1FeKYRkMEgSNrTNXnZyzuFupkmmeGcFIcjdIiiAa/QM/9Uf+8X/wj/38z/2Ji/N455EeOGQcd9Pip4e//dn4O591swYHKfKtoMyEQwrXWmzk6MY1y06opgg55ePn17evT+O+w8hbZjCVCACUUsr+hg9PVMowohjYtn5xedBKrz8/te5P3qXeKboUme8+u2v3IkqHRxMvA9K3e7q42N1+vNl9LvvFPJgpYJY21zrfiA+7e2XpQBIXRpqPtBbhWYi/TTiZ6wKkW09RRmYMVi0B780YFB7uXqbClecrkolvP+npVGfqzSMMwUKMDJmYCqZdMTNNtDWffeUnf+zv+eLX/upf2lab9/tF5ZNPP4lUiCVxuZDrJ2Vdjwku3+kAACAASURBVP3EynzcxuFqTrPtIZAYYxDJ/qZc3sjrF6Ofkgv8zN4zxadlQrg1t8YALzcsi7Uj6PJwlbDDxTTvZGuxHls6g8jCM5IoVSlJSFiYl2dXX/rKD3368WdvPvo7aUYga55JRCJTiBKzhPN+ES4UHtbQugUnETIzNiaODGcqQGRQZooAzBFk1uskQShVi+jpdHYHAkCQIJJIKSPTEuAwZ8pSxTk5hJAXl7sOrOeVwNaNU5My0ih82pfv/sL1D3zfo6urfaJvI4aNSJ90t4a9uH+I2i6n+fYVjie8eHn2HigW2p++P9fd7vOPbreXush8uIjdM7x4+fDqIyLplDUzdhelXuvu4urum/fHT3upNDxuLq5y9OCMlkTCSqftvK2dkgDE4PQAp0582B/Kouf13JtZz3QXJRDlW0TJWZRoEkSkOUMyOXpEQKfiOoh9nnm3n46vPMhB5JZkPB9oPTsFpaFUsOToQSIRPHowUxXIQu1kP/FTP/WHfvQf+4Wf+5mLzb/wzuUcp+LbpJV7+2vfePj1NyNiYFCpyixm5p4clBTEFI6khFDZs0yBBieadjKaxykBSkdYgJyLyFya2/5QZMdTzUHezrbsJiksBdv5VOf5vS+XN7fnh9vkMT+8GBTJi4uKzgyJKlwmevnN4bfTNCWRBGXEkLcgXjscY/jYjFkgYOGM9BZIpgwkIrPMk1aeJXtDd0cGJcxCq+6W/bqtPjwdJDLcyyGllBwWQaARDoSGByhKES2SJaZZe+8z9q3jg+//R//hH/v+v/gX/uzp9tSGF9K+RaKMXOs0OdvukoizrRBSECdomhwDZjGGAUwqSd035FaTE4GMSEoKSiS+jcE073n/SHsDXS4HUplmmRZdx7BmlORpkYFkFpZCHlmWVCnX7335yZPvffHpr99//FuStW0D5kV1uIdTmUmr9hYMrgurynbu6xZEIPKqEknTXMPcLYcbwAkQJQtFkFkX9RRlylpKOnXz6EEEnohYe2/hQc4BrzQRMsgcXGsuu4pSHL6dmrfMICWy8IhQ4qnqvC9PnuA7P3hadbfZphqHffEipz4+uXuz4v5i/+TTb9xOupyOcbptus/5EX3wPbtGvr50e1X7w6qyt+mYwOlzWx9YKEiSa1mudP+YT5+fb79J046J5GLaxehEGpFtNBQAtB1H37pUxYCPYEXdFymcETbSu4cRMqSyc4ojhKCkki5ZKxEhM80zN+RAeFKtIGeJOsk4w6XVSRjKxtfv4e62tTcT3Hd7KaUcHwYImWTDM6OounQM/oP/xD/147//D/7Cz/30I6PveP8mH15dlKii3Nuv/s7pN++7ckqURLp7a0NVoUkRPhKpSCYZZWZPYARJjWjCEhkgApCeUpiUy6LNO3n0jGUqUBTlCAdRZJYD7S8FlNPCrYNa+fxbp8sbfuer5Xznd29s3l0E2v5Kbj8a68cUlsxaptJtq6RCesaKoHCDUUQ6BZNkGmcSBCUVXLQO85729GqxnqetA4EkgJlVRVZbk+A9GZYkPCFAMSIcWpJIEZbgCC9VDstkbEkgsN3BSL/89/7RP/Cj3/Of/9K/c7o/BXGMtOaZJXIgSCFOmYAwewqwMiawwQPgTEIysXsIk3MmVwIjwkSEEus2hAuIKSmS91eoe6LdcjEVLLsaoNY7UZqFG0BU5pBCY0RaEXVa6tXz9x4//vKbj7/1+sW3KmdrXYh2y24bbT1bZZrnuXmPgcystfbew4mYIpyZyqy7XRljtLNn5rQsREESPsTcPAwwCBOR0Le5BUDJSRyRbGYIRAQn6aThwc5lQr3Ecjmvp7Fu4T29OTwIAiSQqlK1OrIUPuwrUwrZfr9cPqnyrL4+NbSj9a33cv+6w4Nj31ub90Uqba3LMpYnZXeQCdrXfP3SpIy2SXtwEYwtq5blZjk8sbH6p7/RWIJ8nqWKOBgGj5OiDJ1hR9rOHexu7J24Rllm8hy2MbMb0kmESABCd9O5crWIzMhShQiRSQwRWAvyQoHuXiZ1DDKORvNMUkB9Nl11ke2++8oiyszxu0QkIswMwKi5c/wj/8I//2Nf/fv//J/9mZvav/zsnTi9vCzYKWXSf/f1Nx8dK0+MPspuBnA6nt0iKdzch8MjWUrRRBAQ6cwcSCJ4QCDMmXAmCaHg1CKBjmAkMmI6qAivW1OZ5oOU2VGi1Kmdhm1Y1225mp5/V+kP6+mVlFq5QKs8vDydP9McYEUEoiU7AklCFt+Gtzw5kZQJykxElqq7ZR9uiTEMIgrALYDIhHsyMRCinJzdOxmxEgm35srk7lIkMjhLll7naS6wSLAE7sQetWHo9p0/8k//5A9/93/xi//e/f1D95CktpkWTgSReHgMF2GA3JKDiCklRBBOyaELEXnRIpVQ8Oj9erxrD6+rlhVZt9cjmyAQxMi0HHVi2u/2y1xK1WYDSBBsWCSSeJqYlSOzbYNGkurhyfXV5RfvPn99evOtKhIpYRYZw8wDzBARYjBxZjKzuyMZRBEuIlqhyjbcBhFBpJBAlUS4D4scWqj1/5cnOI/9NL8Lw/4+PsdzfI/fPefOzh7e2fWJDYZAneIAImraRhVqpaqIJGrV649UaouQoialbVDvVq0pCiESSUhiFSUmohjTcpZgHDCHbdY2e3jvnZ2Z38zv+F7P83yO9/vdsdX29RIENFEAdN61bVu05DKpQK0VDREQ0QGWGJ1jYgzUCjWKDCqL7WqjqWoCrRpjqLU4x8BkCM45NK0Ki97tL2eLJc6u7t09u6cyVvVjyeMutzE4CqMWXyCG7nKa9vfc7FqbaCU2yiWvLyT6sLkoLDjr2u1mckEX12O335mt3vs65wHKZCAWvQVPLgbKHgik5HEnaSrOQ87VhCkIsENVAwMAVUMw53zXz6aUpnHwbcPOl5LQ1JwQo6phNgSPiEQa9tEFJU85gazCuKpgwuRQUXhywWmGWtFMiYwYiJx9k4ponMVqikP9N/+T//iFa8//vZ/88cMoT51cqZuHHcm8cbXKr722fmcD2NisazgEqFjGXKacTWqpKoaGyIwIACZSkZSYAYGZzLElQ0WRgo1SdAoKBiDVkWNHzAYYxYqY1Cqh75o5NXMsmsvOzODo+HjIQx53LhRQPw643HfIOK7H3QPUhDFEyYqVpGRFMDQAUDMDsKomRo6AUEEZkB2ZAhGToZjGGB6bpnHKCY2kmEMPBFULgAECkT0GyKaAqAaGTAbivLqGtHIIUXWUSs5D2XE1ItFnPvFv/cDHb3/6Z/6ncZgEULMgR7VSpSIykjISmBkAky+afUehc743zdY0bdUsqGDSzNj3EIjGQVZnOl+wSq0bP17AuM1mBMAGoCY46/rgWFDZEzh4DBEQoRZUFe9cKRUJnfipSnu498St23fvroZ3XyULiiKqZoYAgIhEgEaIBoaIAICIzpGImoGZhUhgINXAyMwASVWBlBh8iIBiVnNWJqfVANBF7rpGoJZSpIhUBQMwMNCmwb39Dki1WhLMVfq+2ds/XK/PG9euL6bt5ZYIDYyZqiIzhuBFRRW8kza4k8NWSTOk2bK5v0q7aXDeEbl2RnHP7x7sNKGbt7isDOHy0aPZzFewcZNbP0ujDReJlEste1f56Mngug50eONLmu5nreAdhQDI4ILr2E8TTkMShVqliW4aq6C5oAAsZiIKAERIwOzck7efubxcPXzwTjefBb+33Z4RVvOKCJLQpooUVDU25BcsmIhwGgUntoyIJkIEDkkRUSoIVCIA0H7W+RCmaSqlAAB5UtE6yb/3n/2Na/Ojn/upnzh09amTKzqctVj2552a/dIr5/fWAFE8uZRqmaojN2t8MSilllIBEJEQQVXMlB045xTMTDE4p06yPoattIvGwNKYsRoR+cBdHzbb4iJ5zynnDHW2F/s9V23sZ3uxCbXo2cOLfCFxr/bLcH6/EEBsG1a7vJfTSgFIkiy7HtGqiACoGQCImoiiIRKSN8HKyOQgBMfoG+zOVxeIMJ/Pcs7DNJFxzQqKQPCYAahWJEAgM3jMoBIjMhDB/jXXde7hgy1hbHpaP5o0swgAMRs+/+f+0vd++/V/8nf/1+1mzNWsWAVgBiBQBVN1RKCmYMH79ihCEAJkh1AhTUkfA4cIbe/8TLXobsgcQtu5prPpHFfvld0mBR9zVgJnqrjsl1Kr7yjOHDCVIs6RWrFEJVdGNrOSiaxKaF74M3/2Ix/+5B9/5aU//b8/jVqJWMEAEUURyBgBjFTBOQAQEedciEbEpVQECpFqUSkKRqpqBmKGBOwcO69WDYoKOvZSjZCAgViAzYysCBggoIqJYNvCcr8TsjKVaVImvHXz4MknP7TZPZwv99964+wbr76ICIgQY8iJHVbnsYI1DpvOB++XvQsNGHCuw+k0TgMaCVLju+xat1mVkupsGYVz2biaJXTWLCJYIYy1ym5Vy05C4xbXyv4N5gDDGd9/KQ33wDH3XQOYg3Ps+XgRHp3m1VRCiLVkB7bZlgqVG3UYq4oIIIJaBTMXfNO2IYSLs41vqHEn4/SAnJoTLUBTKKieAdRMGFHMCAABRJXNgFw1ZGdMiCJqRsCTmcXY9H0/pqmWUmtVMzAkNWH8j378by4ofvqn//sDlicP98v20SLgwbKvap99/cHDFU8loQQ1NAXncDZ3U8pSVUQBQBUAzDkyM0AjZjUFMEMgIzRCAgFxLQIYKIOpAgJCbDw6dI6KZAACwixDnPH+UW8NI0Jel7KxOmW/72Jf0gWkNSpaE126sOlcq5p3dP3KESNtd0OqdRwTIKhBqdUEiNB3YFyZPLE1c4diPc7HpNvtVkSZXC1aa1UzUzA1RCQiMwMCE0IDJjA25xHQkI1n1gSF6lOGsCxlzWnjkA3YoMBzn/zLn/joyWf+7qfGIU9ZSFms+sBIWKuYEBqgASAiSXe1FayyUx+aNI7BB0NAKyqMBNRWECxi3R75Fin4zf1sWzduJiTUSgRca8X5bA6K7CH0ROBUVczUjAVyEVNkxQIKytyED33/933Hx3/gn//BN772uZ9UCIxKRKoKAGpKRIigasRmQqqGqqH3IXJOCiCzWczFSlYtaFL1MUBVILLQRFWVWgGBG8MSlbNn13aR0OesuYjkHYkz0FIrMjat98HVajVlQrx6/eT6jcOTK4fHV269+NW7f/Llf4ZiBsrBS8Zlx/O+2Qyp6/GJK/OOeTlvY9+K4i6PDy92712Ml0OqNddYnPMlVRQuU+GeJSsyhI7j0gBMK0rhOhWwEns3P0K/UNfg5T1dve70ApDJsykDEvWeQ+CL89I1jXNu/JYyoqKQV/ZUihkoEioYK5jDp5566vrJnS+99DtWAkVMqzV3QEHLBl0K6Ei7DAayZi1VlIgJqagxGQGCUmUwRFYFFUACH6BpvHN+TClPyRSBmEvOzIzTj/7ET2NZ//yn/seDxp6+elh2l3uelj1U5c+8fH66MxmFzIsZESABEZATNVMxIhJBMyGHzGiqAICIAGAGqgoGwXsLQgzsXRVDVWRTNUQHiNDA0ZX91fl53hXfeWj0ys39abdenda8ASKOkd1e6ZchjbI9LTpxN+s3Z9t8ZkRsprFxHJ1KtaKgbCSqkKuqAhOSs9Czi0HSRAG7LvZN93C9rhvLg6gpqLFzqqZmqgWRiNgUwAzVELGCIYP3ohJjC9RbbE12YuZyFctYMwARIwjrBz75l7/nI9d+8ef+l2FXRAnmQgJlIFNVAHRQBwUl59ERYMB2FghtHKwU8dErFMcOfM4JoZBi9Q3HmXVz9B0NO4RNPX8IOpgiOk+gFfcWSxE0BXaIBIioqlUqk7rgEaimqtUQmdrwwic/+fHv+sHf+8PXv/7ZnwTwBoKIZgbfhI+BmQEAgoihATOSY4Nqhj7A4d4sFxiHosUQoZQiBmYGaCFGUZFSySEFcBWNJbb90cnVWnGzmfI4TbsLEFB4TB8jBO+dGaqYoTQz/8zTt9///ju3br/vxa+++cXf/xUy2m1HZJ8zeJRZ33Lwt47aJ67NHFrbeiIy4Gr14eXwtTfWG9mZSBLhiGkqaGxKbJBzcZG5Qw7mHLngSi054WJfXQD05gILC2YaT2VzN6L67mA8vh2HFWze47wrxDbru3GcUkq1VhUyUCRwgaWqqCAiOTIk5Hrl5MrHP/KDr771x47mr999NY+X7Lnrw7CqNiGAKYOZWhYBJGN4DIsBIxAgKCqJmoEZAiCAEtvBwZ6ZrcfBsoChmqJHRWLOf+2/+Nuri3d+4W//b/vRnrt6WDcXe5EWHRZ1n/n6owcbBTEEZwhECAD2GCoiAgACIamqAAERgD0GCIhIpmCmRAQArneqAoAGwKxNFxQVCRULBdc13W61LaqxjYbZBSyjjqtK4JGgaZ1f6PK4r7k+eDWnlRBT2VQwFFHnyEwUOXiHBgSETEVqrYUf8wigauJc0FqdJ0RYzOePLi/rZIGCmRRTREJDEXWI9hihgoAqIgAiEDITkklFBOVg4SiUbeVMpaoWICA1ASSK/MInf+R7PnLy2X/4qd2QhLQ/2UPNlw+mlDKxhyplMATHjggRnC4PmqbhnHQccs3K5EopzYxzNVVzUQHr4ckiNJDd1PaeM959t4znakjzfiY14eHeiVhuIi0X7Xbn1us1AJgqsYWWvXc51TGZibgufuDP/cD3fOcP/O4fvf61X/6UCQMYACAiAJjB/88QVOwx7yjEMKUBwc3mbm/RTcl221JzYYKcq4CBATl0wZsqmCEBIC4bYu+EYpwtU4acBPIQbMOGglqRStFatIqSKZhTtNlB8/ydOx/64PtPrtz4+stv/d7vfo6M1qtdEUD0Wk0kx8Y/dWV268Z83sWu856dKRjZxXr809PLlaRhO27XEwKjGSGbqiXVCuAIG2CGVKd2Hjgggnb7IMolM7nEvXVNo1u7/1KN0F/5kB3coIu34f7LVUdYLCOiu7xc5VxVFZFEhR0yoxmoKRL64A0o29S28flnPoaUXnjuO//gxd9767WvQAlICBrQFICkChKwg6JZqoI9pqqMDgABAUlR1R4DAEQFwK5v+75NNeexTGPyjcNOiBgb+Ws/9lP33n3jF3/27+x7ed/JAsfdzMFywea6f/L1Bw8uVLMBgAEQIYABKDpDQkQwVQTWx0DNFAFMEZEQGUHNABHVFBmJmPCbAMUFrlDnez0FrLUwut16UMa2CaaSxqkUtGqIGFrH3ucytX2LQKv31jYxEFDBAgaA8BgKIqgaIgIou4aIainee2RTrSVXMEZEBiylhDayo5QyGqKaMQGCqhgYGZCxmYGnfraPYOM41Jx869B5k6xJfAPdtWZa57KZavYoCACIIADNvH3hkz/8vR+/8iuf+antlBRypV7GPK0VQNXAUq0TqoCRmhGCNJ1DFjI2tZJEBRWMUIC9bzgsxUdqZqwoLrBrs+542Pm8gbzT4GPOIx7u7fvoYggN+/sX25SSY4cGj7VzL1qkGrO2wYW2eebjn/jwh7/v975295Xf+Jk8qck3IaKZIeNj9C21ioqCadvFfhZ323HYFR/qcq9PCWuhmhOZiYgiAYALjMyqyojISuivH3gmd7kzcRFd1CIeh5v7NPekVipAVVR027GcXgy7VVWDKzcPn//gsy/cecH7/stfeeX3v/DrWqRWNeTogwipKjvpWr5x0h4v+1kXFrPORNnjkGyV0ksPLu+9d4GiSJ5RmsCBaShlWBcDFLTYshKYr65DmpyfCzpC1f4Y9m96rbo9xXtfgd66/Y9AoXH1its9muZN63xT6jQMowqaYRUFFIAym3XKmHICsLZr+xAvtlOlqevinWef/1d+8Ie/+vKX/9lv/FMusdYq4qpO5HwXwXkqSlog5WSAuRTJCmyGYGIIzszwWwwKkw/BX712YlrOz7eb3Y5bJqrMbAv86z/6t157+Uu/9Pd/9rjh5446ttyS9nOnrvvF1x88OsOyqQhkgMSGVJlBEQGMCAzARAHA4JsIUaqZISIjVSQyBEAkA1MCM0S1alUt9nGx30+pIup83g3juNmsmdCh04qVChGrVW6Y2Uji5mxEdVaVzRfMDskITQEQzKopARCgOo9AoNVMARTNFFANwIwQjYzhMbbgyZByqqj4GLCBk27RStV0WVARPPUHhwg2rteWEnWz0Ha1XJRNEafNArEi+pg3pU7K5BRMpDTz9iN//q98z0f2PvvLn6rGPsg07jV5tr0YSl1NZUSNu80UYzCsaaqsiEiCAEnJmYKKITnzyCLkGnAMcYbQCDccWwLSzX0thZzheCmgxA7w+OCQfSS2YVdFBL/FzGLQ44N9ER2TeIYmAsawPHny5Ok7q7Ny+sbXJU1JhJFVhQAMkRgZCVUFDVVUgLwd7C2GXXr48GFsXOvDxbDLGaJTMC0Zcs0ASEQKZqCOHkMXG0eEVqogueh8QCOH6YkDiG3DUtmRQvGh7bq9e6fjS2+8Nt/fv3nzxhNPXTs5vpXK8OY3Tn/7879ZdhOb80HJxyoUHC56Oljw1aPZch5C8ES+WmFrN/n8dJ1ffXu9GSYqKugd1OW8Y+bL9W4aTVTJ1zALQugbbFrbroBDbpchZzm+Tf1+HIedk3j3DyXmvr+jm8u8fl27Dk2IifvGrXfjMBZQBq0KhFaPD7qEsE6DIjiz5fJ4N22aHp9/9jvnC/oX/8y/+srLX/7SF34Zq45JUsVhrMg0a/lwOc/DuFbIqVa1Iec8GXtEBFAEJUB5zAyRQcViDMu9WZ5kgh2QkWDmSuD9PPznP/ZTf/iHv/Z//P3P3Fjoh28c2XC57OHk+OCR2q/ePbv/huYxISEAEDokABBkNKiAqIqM8JiYAgASmCoCgjqggo4BEdWYqRYFQzBEtaoQe3QhemIfu37enZ2djuOOgLQaGjtP6KSYgrPYtAZ5vESsYmbo2cRYiX0zTbvYNWI47TYIDKbeMzHmVM0IDRUE/j9oBACIaGY+ICKXVIlIUNAYHR5eW2KV80cXDgIoEoOpCQJ5ZgfIkFO2rGbgIgNrDJy2midzDFrBDLkJH/4LP/KB5/r/83N/p2XCRq2G7X0rOUpNWlLNVTQzNUQqScxMxIgYyIzMeTKppgyPEapq1zvfOOUaWtcvSSVvzm3aUhqTq94EIBJeObkCqLVCKYYAZgYAiMCutm1DxDnlEBoArUhHN5984skX3njt7fW9r4EMCp6ZmLGNgRxVtVQkZ2FiB77UIqCeDRBzys5zoG7IJRf0XL2DnDUnIQAkMELVivCYIhIzmVZTj4Fi0zikcZSWNv2iVRFGBhYAQPSqNuRMvj08Pun7JvqZyphH98orXxs2A4IjL7FrpSIYBKeLNuzv8bWTfj5roIKC9r4TD2+cbV976+E4jg4cewoE874xxfVut9mWVHPoSD1DqRTB73PdqSowsUrxnbUHbK52TfPoq8xju3jCnZ1u6iU0DU2jqFrXuTGlXASNQNWAAPRg2VeTbZmUjAn7LqDj0OD3f+8PTXn77e//7nfe+so3/vg3p6kOScZCu6nscmHQvg2Nd6Io1XZTHYoCmG9QtJA5NhCTMVVVIsRalQi7PtYq1CJ6zFMCthjYzeKP/9jf+5X/6+d+7ec/e+uAP/bUDduczqItF8uHAr/xzsOHp2ZiVhyiErGBqVYiY8cGhsQqxR5DADAQYI/sPCDXMgGhAdBjTJIrVCRAU60GHBTYNbFv2jY4Pn94KmIIaChIpsZAhQKHjgAJicoWZCwUzIBrEQIycJqk6+bFbBrOTZAQiZAjShWpBgaI5JwTEVVFQwBARDNjQgNAInau1KQVHbvQOkBR0eD8NE0K0sWmbdpqmnMqUqUqmRMR8kgM874bdmMajBlKEiPk4D/6L/2V29fwV3/pH0T0GrAU1KnGuK+a8rTLaUKwmghMQNEeA3OeAbTUEgKbiGoULUjAjlwgdgiECNAfaNv7ml0VKBsrg23XCZnx6Gi5XLg0hc0wEjhRMTVA8N4RsZkSgYHGZkFNd/WJ6zdvvu+1V988f/OrdUqCwoTe86xvPWJKdcy1iB2cHJSdbdb31dgAEQEMkIgwGbqS0SwpohCA9CYjoBETgCKAmbRt6GIjIttBZsv25Oqxij16sCLdPXVtURG9EoAhWU5JmX3w/fzAMJqTGOaI+eGD3Uuvvj7tBlAMvd241rGPacqeAJhFp+hcF3tgBIDo/WVGqoXQD7u1d41jk1piaAVpHNOQCnmHDmrJLOgaV0Nl1u2QtWgfG0CnnFxnLrjLd0bI/XwR1hcbqKCgpbIhEIqoGCgDSJVSKjMDARgoqLIxQxObpp93M/+dH/v+XKdvu/PRt978yt2X/9gjTEU2u3y5GTejmEj0TKjo1AeXsg0FRPHg6g0VGdfnUgZi3o1lmsREVcE5jo1DpAoFAxqCGTq2xcnef/offuqzv/Izv/+533lin567ut+Wbed1Npu9N8mvvnphTbPYd9tH0+5CAQERHkMEMyAmdlRrMRMgMFM0MDAkNmCrFQGAiL1DZ1pEsxEggAlgM3dqSBRKSahqpQI4BDRfw8KBksDE3vvGXKA8cd6WslVmM2VRi22gBoeHA5oDj0BiCUzNEMApAhIgAkoBJBIRAED4f5kZGDITO7bHsGox5hBiUFIAjSGoSCkpOh+dL1KnktDIDJDJqDD7POY+dlPKtaBJlaoYXdc3T377v/zMdf8bv/wLHrrqVJWqTnt7V4fhEiQhF6i2vahswD4Acj/rhmlbkmitqGagRE60KqhzxIGRzJFT5dCZaqkVvceu7bXi+aM1AOHR/sHVk3Ya2rPtGlEfExEAaAIiOyL0DtSI/R43/fH1w+s3X3j91dcevfnHmiZBZCQwbGJMMmmpgM7FePO55zen46PTl0EYsBLDtzCiIvg0gUCKxNkJ83FKl07FENUqGZKzo70FEW62u1x5sT87uXLM0DxcD8HSrVkpJkJISAAguZAjNHGhYT/nkH1YNr2/vJQ/MpXaFwAAIABJREFUevG13ercSjk8ab/tuaPQtiKVQD1zxUSea9YZNyIgpi/fnxYNXtnflzrUgqI25Txkebja5ixqwM4hgWExCLUog4Kin5FIdRLY+SKZHDddL2NB2gsAm93aDGLbijXACCKiRSWZVJGChoA05CJVTQWshugUwSh0M7557blSNkf7N88u7m1P74Xo1WAYp1IqEahAE5hIXXDzWYdIu129mIbDKx+qRS4eviJ5w8wlFzNW1Zyrc9y0zoRSScoKHpAqgY/7/i/9Gz/6m7/28+986bXbx/GwwT2U6PLt20++sR1+682HNo+ugfV7MjyqRIpoZkhGRcwAulkjUlUrkKmJgZqaVZaKBIYKhoCOOVQwMgUwAFMBCC0pIhkrVCYEgVqFyVmAvZvLJuL5+nQ2W6pOIdbdKpRtTWtkAzRfanYdQUv5vGhS7tA8cCUtaoHQvokARUQzwbeYGZgiopkBALtIhGCqUoEshiBg6H30XEolYjCEmtqmAbUxT6LKzAYa54F8MvPDZZHRRCpBIyUTMra+7fjGh77/iX37/O/8qpcu2eS9Uwqz2fE0nZuOFDIJXN6b2BgdV6GjkyupjprTbr1BNTMABCIWU0AEkBAB2eVkzKBibA1CsahNEwmploIH+wc3jxfbic4vL8mzqpkpgMw6H4IHMDAVAWsPHMdbT1+/fvu73njp1bt/+vmctkaAgIQYQiM1p5oZwLXx6pPPbi6m1YM3AFlVEFFVATT6CIhTgmpp3jgKPtli2q7BikFBcggBUZrWE+EwTIR4cDQ/PLhqaNttaoOb4wWpoWN7DAAMnENABDADRaez2ZGIrSu+/Npud35Xarlxrb1z+6DtHAKaMXB1pMGHknU38GaaVN1g84O9p9PuaxGjongqqJZL3Y7DIONmqgwszmrV2Ww/+jANm1RCf6jDdJY3xuigsoL6nvJWyoqqCqgSR2ZQLQBmhm2MQzXUbOTSuDOOnqBWMlRAIwLfRBEhrleu3KiTItSuPXz73ddqCQCGCADKgEVg2XHoYuMkRpcVk7phEGhDngpNGwREkCElNQcmoBGhHB4tS4HNsAFyyMkYyPPezcO/8e/+9D/6zH/98m9/8Wju7lxfyLBpS7ly6/is2Oc3u81mgqIw+Zps1vW11m0aU1JUYMbQ+b1+NuVsBKXmYZcMxMRMycQeQyREJK9AqNUYGVBLFecdOiPmPE1d31ZQSUoO0ZtvOC6D4hhakqIAihMPFzqNgAkIXSkipAgIaqaAgKFh9coNIIoWDH1IW5X1pMCmikQIoGqEDkQkwnJ2PedtLQOiEbnZflehIKAPXEUkg05mWvquq7XUXJRI2VzsF8dSy7Q9L7ZzaVMcezMTEe+9eeW+ffbDf3H/ZPvF3/gcCuSputiwb/rZ9XG6a2OCUHXQtFMwp5jUmqMbV7gVB+ny0Wa4nKyCQyTvilQCVDNukD1KUUJCIRUgcAZG3lygXDLuHRxcOWi3ibfDjkFM0cyQtO0DsyulShVT3bv+bOO7w5PFya0Pvfnqy+9+/Z+TstSCZAbCDsBckeoAIPprt5/fXA6r+99QAFQEQBExsyawD3EYIZWpjcRNk2EvbVeoA7EZEGEE9oDGTFJrYD25st91S0DbJmwc0/BWFzg2nn0lJLDGISoqMSgKQSBPYHC2xdfu5rw5zWm6ca197tnjGIgAS1LFArUadQ838OB0XaWZcr5+531Xjj72tS//UhQPkABT7+Dm8bwPuK1y7/yiilDP5Gdhhteu3149omzmomw2d60+LGMokxhUanK+7KYzqbU4xBCdb3ypkrKYikeXyF+/9szq/J3NxUNB5xEErUI1thDdrFsAMIDeee7D4y43gRfzq1/44m9tL9eI8BgismE1O5zRcm/uvHmHBjhNRbvlzae+Lad8du81HS5mLdcybnfjZoD1phSBtsPQBNPKFFUzISfh4ztX//pf/Z//4T/+H974/B8czf3z1w+jbubE2PLZRF94VERBpBI0KWdHtLdcEOmDi3cc7AU/T3XbuLibtnEejHXYpd1mQvVtmEMtwzA6FxBJMImqSJ41bUEgbpxviVFgIgMkMoaUktTsHDhHoiNGaeYxhlDNGk/rs93qrJQNQkGtaqAIpKbI4IM3rELQzztDLaMdXmtXZ9t8WbSgiCASMwMxgGIt3WHfzJabzUpKJXPoqJk1Shqd56AItFtP28tpEXtDSLkYGLuqbFduPh2XedoMlw93ZaN5KIaAQCLWd/NSzRq99uR3td344K0v97FPYz04OFaKFCjJQ0kKhnks0bdDyjpIKnJ0bY/84F1z9nBdk41DYseqamqeWKS4hpvW5zQkAXIeEaUI5cxMDI7J4bWTKzdOZufberm5RARVNANA7UJw3qVSRKFdLLqDJ2ax9SFfffL5Bw/eefDaV8cVmwoiIFYfzESLFBQVh08888H1+e7y/mtA6JBUrRRRsb7zzodxwjGPkU18VD6wtAUZiFUNvetc3CNiRFMVptr33vuu5pyIScnlB23kpmPnpG2CVvBe1VwqhBRSmtRZ4916cPcfZRsfDsP29q35s88cEBIrprFUo1TNwt57Z3l7dupoMabxiQ89f3zwHS9++XOczOqqdVal3rh5zTOtLu+fDyOgUk+He0+I3/jGed5bHn1wvqjri3vnFy/WXC4faZ4wNJRXls5VpHq02dwt92dTrufnY67CAjaffcd3/+uvfP3XL+6/XdUeI8IKFci6Nh4dHJk6MHj6qQ+crx7O+27WHn/hD39rezYQMgAgkjAaUOdS03gRNBNmllqvPv/8zVvfzt7O7r+zfufFo/2mbSjndHFB33jrvSJxf5/u3F4c7gUtVlR3eXjwUNub1/7qv/3fffoz/+3li3+619HVed9gjqZx0d29kN9946Lv5ujh6o2nNtsxTck563u3TWdQerB2TEN0fipj6INQVaHdekRxwcfYL2ut+/sHInWzEkNUKdFD6BdEIVdTA7ORkHa7HXqH4EpOYGYqw+40Q1ausXEeIlJSgWGtBqqTSRI1dcQKpgTkCID7ftZ2nQJo0Ukua9K8K2QgKkTExMTOrMi4iXs9z/qaMypZZSzFmJGJmQoJqoFAniw4AyBRQCJT4sB7Rze5TVZ2u9VURikpIzkzZHLOBZFd4dofPBN9rpdnbdvnVPYXB9y6JBM5rpkZA5ot9/bPLi/QTMR1s0Zh14Vm2BbHjYimade0bU7p+pUrD87fLmJm1eqY1/eu3Hi2afZ36+nho7c9YwxQ0g5v3rxxctCsEzy8fIiKKghAiNbFBgiyFh/9fHmt3btONTfteOvJZ9669+bm9MHmvMs5MTiy4rjUcllEyFQd33r6zuZiXD1405BMqxnWqirQRBdiFHXbcd0xJfbqj2hamyUkUUMwH7omhEakAKkCEVbCoFKqBYQO87lIVcyeXdtA14HnbjtM27EoMPkGfdsEEnBpk72sdrv1U0/O3/fsvgOGCpLlcoVvrRN2y3G7rtsdQyhan/y2j145/tiLf/LrvmJ028P50enZaQiuCX57eYYhMuqg27abx25etRKWbu+DzfydNA6r1XsIZVzFNAB7q+uSLlRQguO438dIaUrD1iRP3gAOZt/9ff/+S1/8hdX9NxSoiiQAICSzNjYnV/ZzBjB4+vYHTi/uH+0fNuHgi1/6re35xsxUwXuvgOSCx4wOyKyIGoCU+syHP7Z/9HSF9eWj7b3X3wquOJZcMxQ43+xm/XXCs4++7/rNa7HUtCvTlPIwULh29Ud+6L/69D/+b8ZXX22jLWNcaHKa28PFvXP97TfOJYsRNN0Be19MiQVhqEI1p7aZpWyOAJHQOQAWMzLwpFJH8weiNURXa6nTZSmmIgQlxJhSQU9Vs0OHgEhc1YpkVSNEJkoF21mMDSPWWqgCqJrkChWgchoLICAiEauhIYBBYEcOqykBAKFW1joqICEiEZjVKmhF0xRmndsL3hEbQMVhtS2GvmlNqpnTPJkIsUOPCGQCCKyU2HE/u7IbH9VpMkUCLCUBEAAgovdei2CYLa8/X/JZOn0NvZuG4XC5p42ACSK7YGMSNmRyRWxal6J2cn2vXdTtJWilccgl5RBajn6cJkeMLLU6BAQZIKubLRWdZ8jbnXfRVNs24O0nbx/uMXSLDNGyDmNWkRDdrGnEoIp2bV/NcSCrEONw49az79y9W4ZHlvZrxrHsas0BsOp5KRZIfd/PjvZkKKvzB6A05WLiax40ZwGbLWcAIU0jiCQTCAdld0E2+hgRnIpg7IOfAUcFICREIQrOQTvrzjYXXkBqAauIHkCRCwhq3YqqaARP+1ePY9vvNjldXnC+ABlnh0fP3V70fWW0cYfvvJfvnQ4htABIxkgZjE+ef/bmUx8/v//2cHZhMjSdn3appEyorUNxroql3alYkcgoaqqza88b3dfh/rSpVR1x61vw7bR6N4+PCij4JT3z/J06Xd1sTzcXj7YX73mKfNx+x3f9B1/94s/WTeaWPMapslAya72Dw8P9aRyR6NqtJ3aX27Zz8/7w9ddfHNYyTKsYFxxltR4dhrZDA8cmxAXZDyPsXT2++eTzTPHB3Xdff+UPEBfOdcRkRXbrt+P8KccP9g5vPX1jedjnWlbn49bRbHHrqT//iX/nH/zvf7O8eTpvpAfYD4oMbYhv7vTFR7re7JZ9p25m7ONiybUQPNqVdT7nKzfi9iLtNqXrmlowlQk0iyoSN02/HdaQzbEXBhDNqRI52l8cBDi7f8aUk6PgW0ektRQpRcERGdXQcalQR3AsITiONBZliuPlRtQAMScN1EMdqqIqILJBJWN2AuSQMPh+msZSBgQGIwAwEDQAIh89eWr60O2B5TnV7epSmz4sF5qy7jZ12CQRZUfORREDQCavpNDIvD2c1hfTuANFAlQRQDQzVei6LtXR98fzqy9Ivru790YIuKnwxN7Berrw3iG7OCdWIwex49W6nr2xMehv3DnwB7luwrBZT5uUBiVlP/PjRSVn1FQ1lKEqMFYLPgCBEaWpqFZmmi06fPLJ9wfeQNtXaxAxTblvWu9IsRqCqJiBI0fsiFzf2XLv2v1H921aW21NsaZUSgaPLcJYldB8v3d8fKRJh2HFLgBV03h++t756akPeHT1JDR7U0oXp+9tp8F3R7o7RyzsPRiVVEXENTMXekNWqABE6J+9c2V/8YE/eenFtL5wHtWI0AMgkho5sK2nUSSB882safpZTjCcb2V7b3/RNMuDOO8P5yU6fPfB8OjdVBQN0wc/8P7X3n7QxdZTs//U4bN3/uxbL3/p0d13ahkUimeHaojmHaRcNVugVG2o3dyDlvX9Wx/8ofPNS9PmVXQ2jo0BLPYjhXH1Ttm8JwiuOYCnX3gSdLFZPXr43umwmggonOw9875/7fSdX2nR+VYcUHGY61QLBd8S9LVU73nvcEkKolm1P109SKtkiKGZX7t6/M79e8NlaluOzcLFMt+jWsr9dzeHT9yZ7x+Y6fri4YM33zBzzNEcyZTG9dvN/Kk2ni1vfoi8m0XbcynITk36mze/71/44X/06Z+Ae5eLOfhSFkFj8K3zZ9S9er584+GDg3l05pRid9gzWKl2sX5ZNvnKzeO6Wwy7bTuf+ebAhQh5EilVhV1cX5xtL+5p3hJxqlbSWI3f/4kfyo9efuVPvugjcZwjAYKqllxzEYghANZ+0YmltNbAhOi8p4v1VAuR4pRSv+QitFjcJrmYhmHcTSKmoATYdD16JkAzGMeRWGvJZqhijyETgItNxx7beXBdJTtCS+t13j/oD+dpmtL9dy+GXQlNS8FpZQBEZEJGisplPuvW5/emYVIxUkAAJLJvwhCCIIXZlebgFuh5fvQmedrh8qPPPfvKN36PDAicbw1Ju6Vf7If1Znr3K1u19uozR2EP0m6XBwXhmnMe0VgtSaljCK2B6ZirCSgTICJRcEkYKLcNIigeX3um95v+8HDMXLVatejDrG9Z0QhVINdaLSO3IcDx0aEP86/8xT8q487UvPO1yGMhcDasmhsPojqbLUoq47gFxNhEEx63a/dfMhGGWRvifs5pWJ0LmmsP6+YRs5IjRKoJtGTywTWzqqTqEZE55nrZLvusYKMxeuNKyACISNS0BNY4C1jI94CCjou6cXUmu/cO99r54bWB9+axdsGtNnW1Gp3PXdfced8nvv7yHzngPszCtf7mjW97/eu/m7crIjAjrZnA9pbLKY+ryxVWbiMCibR9wzCu75088z2r9du71SvcmyI6JnJWZNi+Gzf30//DFpxAW3qWBaJ+32/6xz2efeZzakxVksqcCgkJSQiERIXABQWujV4RxMZr220jDd3QIIOICoIgcG3RbkUmQRkUAgECJGYCQhKKmpIaTtWZ5z3vf/im99K5y7X6rtXPw0DIKh+bugI5htKsL5zOehoYYb0xMX0Lc8dSoXhkZyYnPRvlWeksFJnyvrBWjrWm4ijW+bC9s9nuQd8VpdHAAslxfmpmY6fNQMVJ6LxA5pkssl5/e3VXyCCupd5ZW+bZyCOgiqKgWtcjM2wv1CcuS8O2rE0WIMDbuoTYd4LQzl1x061Hf+nvPvPusFs2qozrsqJASAy5WCv5ma7MGYLuBmQr0WQ/l4PBatyQFoeDLZ+OVZwtRx3Oomhm/kgYJLkZeaOdJyZDIdjuxoW8u868yQvjy8JzedWdv5mvP/HMEw+lzZaUIXlHpAGo0CYvckYQhipKg15v5HJKExQCnQ36w0LKhAs56HeFsiDi6tglo87TYI01znougoChjcKmQ+1L6z15Ai6FNYWzxmjLkLFIBCp23soAkrTl+VDycSFtt82np5tF7/ywNxx0cwCRVpskBTlOBIwx7z3n3HsvVDka7pIh5zwZT94LIYjIe4iiyPEobe6lMImk1e1FlMKn+6+/cs+xp/4ZrSAdMYGEEDck8lxndvNs20MwuX88rAXemLIgb5h3wKzkUeCKQZbnHAQxT2XGBMuz0ltLnoChVw2lCgYZEuDYzGXNJD905EgQjTlrS2M77U6p82YtSNO682w4LAjZcKRFaOdn5onCYy/7wcTH5oti0KjU1tptsPryPTNjcW1ztJvUZGGwADbqZXnWNcY1m1VT4PGX/UC9W2AE+/bv1ZpdvHDeZiWGSqVTRX+bkVEh51zkQ0tao5QqqRovwRMwjRAAMBFHBkA4Ag/OATLAZ2lGoYq9HfliBxDDSIog4GFS9i3TG0nIo9oExVVtIWCiv7Wpy6ISp+MTVWJidW2bYRmGldkrLxtrXnPu+PcD7zzj5E1ZDsHbyfGpUme9Xs8YHkSScZXWKkmodgft2sReMqUebLIIHRiGzHsGJDqrmztLq4LzysR4bfogckV5vnbuSVd464tgcm5m5qbO7vfGqo3aRHV8YkqGdadHo9Hy+lZfeNnP1IFLbqhVx8+ffXRndWmUqyLgFjRnQS2Bueb4iTNLzvK9h+biakta6wlcNupvrfV7/cwY5zwasoAcPAui+tSePMv7O+dm9lwrYMWKcQtOW4uOQX93aoauv/3O51x516c++/64oyuhk1pXQ1SBSKJoMaczPZHWWqOdJW8HnIelUxzN9NwVeZFtbSywmLy3wx1nOZ+YvKQY6SxvK+TAAlAhhoEEVvZ30lD3R/3+es9Le+0v/HZ/8ZELTz4ZTx3wPifjEKxUQaFh0N3y2sVREERy+3fazhCBQwT3fGQPchUEyJjRpXxYyh/UZNrKBytgDGOhSsZ4EJZFe6K+Z1TsjLqbxnohY0BB4LzNbVkwxlVtvFqpaTuQQaHUuIddyesO+3o0liThzsoJXZBzHkEiUw6Rc+mdY4wREQpACpTKS51zBKut1VYg45w754Ig4pwPLKs2D7BKWAmk7qxqX9Rmrh9vFItnTwrGrQamHHMcQh+EsL3czrtllNSShuKJ4kxx7oqCsj4DR1GlAaZflkQoZCjcaChCPuztgvPkyZO3bCwKcqs7giSOzx6pJ/rQJfurzb0WwBSlMbooR81KGNXHybitncFg1LGccRbOjEdWVI7d/fDYx8dCIQWvbLR3xl6Qv3Rwbe/mzP5w7qf7n5hZ2DPQNgAkl1nPVMDJJ9+4/sv8PTg9Xbnm6I3r68VTP37EFiUPIxa1it4GZ14FLAgCXYK3JjcuiaqFJyRuSQTcE2Nchhgp1M44A947zqUUsRBl4Q2CECHoAcdho9qScSTSse21izTcUFIfvvbGsHFomGWDTtbdWulunCOytWrVGcrzImCchLv0ttur8YFzj/8TYMLQCiEYAQC2JiYFyzc2t7KyVCpwKMdak2Otqd12L242i8HKaLBRuswXBnmsggTB93f6w+3tQKqkMRZP7IuS2mB3fevicactgO+/zVXG9nmzFkV1T2C9JuJCckQoikwK4RwjL6SUZZ45XWpjkQseJQQukCpR0W5nhzMuVSijWJRFZnJQrKIiAc4ZKj0UoyIDm5ki5dwJAMO5sWmSOrDgU8+cFkCZB1ZKZg4eOTw9Nn/y+MPSek4ajYlDJYSQUmxmbuikipJ8uK0z8uSIszgQxljjPQqTxBWOYb06U6nV82K4u7PsISDk5AzjgjwXoIl0JEW/n21dsTn2yvjKu/5DsfLQ+VPH6+Ozo9yDzaXiKAKmqtvLZ9AaJTlINvi9DH9f5t8e2Nsd/Kv6q2bl98rybSb+cL2gEH2GXpR+UG0etmXfDTrVfdcKglH3QjZoM2QEShBZl1urEZAHSRAlQqk4jZBzRsbqUT7sOzbZqMvNxfOKMwPSe3DOcy48AhEhACMABEIVRpEuBkJwq0dOl5JJFF5rL6QCcEbbsNZi9QPz++eHG0+Pet1o+ob9U3Tq+MlIhkU2EAocYCS59rwcDPLBsD4ZWtQMpJDCM8r7zGfOe5BhJAQfDbqCSWuMkoy8Mw4QiSEB+DKaq8jd0U4fGeDU3mtikc3MzPBonEtjS6uk0qaIJKq0TtZ2e6W2eVBNK+lYxA2PKk/e/XDyp2kk1eDq/uLrzw1v6MD/onl84qb33DZVayGUnkRJzpvgvhu+wN5F49PNub2HV5c2Vy48zVBgEPKopQfbiKWUyLkMVBpEtd3tbc4w3TPdOrB36fii2dp1XKooAsnAeGAciROnKOb1atRpZ8M8lzwlmyeBb9bHBkVXJfV+pwP5BvnywJErJ/ZfGaX17m6xs76a7Z6zpiCisjDGlrEMueCHnnf7dHJg++S93gcjk1uHA+cybRIVpYmwVhXaofAe8iAIwjDO86I2e4ke7BSDgSNn8iEwBt6URX/ULbnTSZqOPCoRhnElH3az9pp1kjE/eLu+4r6XM30silsjbXI9RMc8HyEI4cdUpOK43u9n2ujS9ECbbDQCLkkFRBQH8XhtfOHCmVZ1IgyCXrbbqE4K4C7LrNJBvRJjMsmDfZ4dmGh1u/35Ky7z/XK9t3Vi/eLJwWBt2Gk1jvAESw2U5Z576fStL77tir2HvvjpP0qtDyGjYTYxlkRhnCTRowudLV+Z23fpxtJPhru6KEsvUCKIEArQQaQlVFFXJ2am663ZRr2xurIwNn85E1G/sx1EFUS3uXCh7P1kXzx1fn359GuX0w8nl7/odcOLj24tr9RbM/3ByOqRFEIGqYiqGysnXKE548b7/C05PiCL7wzgfxE8GqQvqeq3m+hDdctiwQwC5xKi6kx/2A6QwvosOluMBqYcABVaF2AIwJF3AIikgzAVUhGCI80Y894ToMFWs1Udjoo4SghcWZTekZSC0BND7zxZR+itcVGgdDFAY8t8AM5wjuio1JZx4Qhl2qg1m0Y06rWITC/vdsKpaxvh7uLZJcHQ6IKYF1x4F6q0YYvd0bA9fckV1meUrxnnsXT5QFiQzuhKreGc1cXIO+IIiMgQrXMAHsGTp8bBOyeru6cef0wqh625yydrojlWc0FNsKAsCgY2K/rNtIJhyogcCeeLEtzM1AFmc8/EqZf9OPiTkCM89aNH4X/nOe+8be+ZPd7q0gAoXub46B1fV3/AKq1qo7F3Z3Nt0N4kjzyMWNQq+5uc2zAUQsrRUAMEThspWTLTqszt3zi7Yrs7pIRQdSaQOfBMMqadR/IQBoq88F6D597mQuo0iEZ5T6VVVxIUGwisOlGfP3CtAdvv9xoV3lBVxulnrKVR3lck+oNB67Irr05asPR9EtXlQX6x7dZ6XUKB1hg3ErKOIuKBRB8DA6kUgZs4sK+/tT3Y6QEyzhgwQjBlPgQ9DFkRRqiByXgCGG6vr+TdNgAnMKO325se+RXTOd3rZ4Xx1ltU40FSRc6VJOeBIcf/iYgUEDjzM5poVBQjZF5y3+n0904fatXGsqzfK/oHCrkc8Nlm8ovp1BEzHAsgWxm4IzOCi6DVgNzCU2clcFGbXA3pU53F3ZkrG1Py/vu/6vIgltFdr7x7X2vsK5/9aGop4gVmebMSRHGowuiBs50hr+3Zf2Tt4lNF3zIeadKCJEWjAtelG2OmDlQyVfHg01oYxWJq+tq5fQfAZK3J6VGvvP/ej7VsbR47T+y2n/nNneTPaje85LdWj38l65tKY6Lb2SozHUexlEFhqbd10RrHmTDaPudv8V1PTr3zP64+MD24Y70CAHesVT7wzT6+j+n/qqMPNUTctKN2ECpkTIUpgRdScSGQvDYZYwqJ6Tz3LnfOOGsRGTLPWMh4IJTyOKaiRAaJJTYqtsYmJjwLmVDEGUMo8xwAVBCFcUxAkotSl4NOB4lq1aR0VPTbfrhrs67O+vmwIwXzwFtzV4dJMNQqkOj0dsgxmLqurnbOn17wJremBATODPKaqozl/VVbQGvflVEa6cHiqLvhR4UupUYuGHAZSMFHwz5jSATIGBECAaIH763xU1fdWlPrJx57vFmr4uyBy2ebwdh4VQeRUmhLRFJbW7vVxMi0zhEYD8lhib6aTilyxOj0y481Pl7vXLlz6qPH4H/n7le9EgqfDwfWi7DCvAl/dOereuhwAAAgAElEQVS3gj+Q1fGk1bp0fX1x2N1B50EqDMf0YFsIHcUqjpL27tCVpQMUwIhxZDGGjSBMeFjx4ID5kCkvIgQQnFtXeDJoDdHI28L7EQ+EQqXLQoRVKrEcXUSv4prcd/Ca3qg37Pb3zzXnpvcC84BAxLQpmabtnc3qgUNXpeNi+YHM159Y7ZwdZdWoRs52u1sB5iBFVqCQDQ+o4ihIo8LqfYeu2rywtLW8wQVwKbhSHqFebZBXWKwL6lpy1easN/n64gU7GmoUCJS/01/2jZevn/2W1tY7LHLjgVTaiiqznKUkmRDMWYsA4L0jJPAAFogb44SQyCjvb8Uhb1TTVqMeaha0Lx49dOT5AxLFus+yeDOvHNzHLRRFFgHZinRjY8mFzbJVh34RTI6vz85/oHex6JrTC6etNr/826+bqYx96TMfrniIZYlZMVEJ4kripXxksdgt6Iqrn7t28aliYC0ioUenWLVv/TaUUShmslGpRByEYRCnKkwAWbVZr8RsfHrSajj544dalu8p2X1LP1n4d7vpR6s3vOi3Vk9/NRLNoNJcWXpG56WSKgwDQzbPcuest64sy533zwPAe46uA8C7nph+z9H1dz0x/Uff2X3/13f12436YJVHDeqtezCIksArEF6qICDkEIjYeuGBnM2c4d6X5AwiEoJzQsiIkBNo4JJQyTBJ6o1qtdbuDJkQjIGSwhsD5IGh8d55xxkTTJqi0FrLKGLgvOWeGHkvAMxwzeRdD1RtTnvuKZqcn50RthdI0eXzzaTX3WoP29u9nW0AYAiolBchZT1yLqq0RFINwmC4dnE4bDMWEBBnIk5T750ucyLvPQEKQiSQQBoA0YuxwzePxRunfvyEZBxnD1/aqiR7ZmOoVpnktkQGwerSZqNapmNVD4yzAGxUWgQWRJg3aunJV5w8+o3rB/3R1970z50ru/CvLjs7MXIGHm9de++13hW5cSbP0zjNBuHjL/pa8Idha6remJ5ZX+v1ttehNJ5LmU6W/Q3OXRiLNE17nWExKhGR/j8IxHiQTAVpQ4gkoyKxUiPwMGaxV6aq9ueKketWB9sd73LmCcFb57lk5PSovxQy12xUZ+YPt3udbDSqxMFEqyk4YwDgPaIiU7b727NX3344qmSnH326PzzT7kXNyTQMuztb/UG3OVYhx0adIaiG8RjEMt47ax3OTsxtLy1uLa0hYBCzZrNV+LzWnCpK6HX7wvah2KimqXF2a20NSg0ADuzw3WzvZ27p7R5nSprMD7tlrc4piAYdzUWVqwAEMiG8R0+ODJB2ZA1yRoESImAOrN7GqBFYziO8NB68+oUvunzvQb6y7v/yMzEZJhUPBbcqrFddIGQlYfUJbktQkWWIy2tOxcX+Q29cfbyXZWWB/+aNr4s9ff2Lf1EFH6Bh5Wi6liahWB8MTw4bQ+DXP+fWrLNARYgS0DmOfLO93C9WBU9b9UuNzYl8vzeyflRNZ9rdRaRARr6RBrE3U9HE2U5vspcf315ZenOWfiB67qt+d/Gpfyj7UaUVb65tki6kEJX6mOMBkZAMvDHXTdi/fnn2wMzgjrXKT7vVq+v9B2YGd6xVfrSMr/mi2XrDsvijmlMBjTaRSRUGcRpaU4LxzppA1TRlDkphhQUvvNCS66xAZ8h55xwiMsY8AoIgEDwMJw/dwN3O+uIyETJER8CYIARkgiGS98iYB8Y4krXgHAAnQCaEJ/BMRWEIWbssN+N4vKLMKNx3+PIjZbEo89yPXS5tG4GNBfKxJ76n+yaQMUeXk/fliEQUyMCDUHGq+5tFr8tYwCLBuEyqLWOGNs8YKusMcu4dAgdvLQJHHiTjh2u1fPnM0wHTuO/QYSXkwUvqvJZYsM6gYMnmejdNBs2JhvPESJHDskysV81IN8aSY/ecuP2hW/qDbHXpmYffe2z/344t/vLmVc80f/7bV9ohfvrVa5f986VSUGk7hZWhkMPd4IfP/3r4/ri1tzo1v3fx7FZnbdUbDypQyWQ53ORowkTGUTzsF/kwBwB8VokQEbNWN6any2i+goWcDN/wuld+4Ztnp+TU+JXln77lrU7bD37i8YeffKRsb/aHI1d2Oc8Z43v31a6YPnLft/+qnqQTM3uMNaPh0BpbTVOGjNADkUUtStbv9+ae8yJpooUffHWt1+NSqiixZVlmGeOsMTkz7A/KbpeFDU7cI1NjdZRq7yVXbC1e3FndQEQe2rm5CY82L4PRsGSKhYGyRbm3IYpseO7M005bQvDkh++G/V+8ddg944wddUegKd07zy1117ZVtaZak46DVMpbcqO8zLTONXoLTKGSyAR6omwbo7oMUovuhftqr3nRrdH6lvvq96KNFdGoRWHCnAzLXKiQapVgfjK+/gZ321WpDwcP/8hu78DT50jDU+O1/3T2PATRb77xl0eD1e/8/acn4gDNMEY31QgDlZ7b6C+a+ZGQV1xzw/ryE2bEkopqNpKJsclTp09srC0w4ZWsV5OJ+flpQugNesbK/u4aY2pkctL9u/ZPIckfnV2YHhRPlaONN+fJB6PnvPzfLx/7PBU1EcP22jZ5iONQSpaXZW+wG4dhWZib54Mv/cY4PPFv4egnP9B9+T3ikSPPvAKOfvLxdfGyv1wJ3gfBn9V6OxZ9DpyntbGkMmacBWP7/W3voRWkz5s9cM6OEksDm60P+oP+ABh121vWWgBgjBFD8sB4AEKNH7ya9G5vuw/IiAEgY8ic9yQDzpm3ThA6RjJQThurNfMcELlSwH4GiSumS6KCc1WNL+vR+Zpg3uymcbN+6a3RaHF9tzPXHDt24kd+ZFGysSTuDLPcjAQXDAQQKqn6nQ3tDJdBEjeAkQxrQsGo1+eMARFjwjoASd5YRG481KYvE7DbWd2IJMMrrprXRs7NJ1EjtDJwlgJR2VzvpVHZmEjzvAhEhQte5kmWYYIDluqzr1o69OV9ZamLYnj+Vau1P43mMy2luuRQPCGO7MSXf+Xn7t/zldlY9C1kUtYH6+zxO78j/zCePlydnJlbOLXeW93yRKhCkUyY4ZZgOkwUZyIbGp2XjDFEpJ9Bj6TCitx/2bUgogH521/8/Bdf/XMfufc+vbIop+ldbzx6amnz7/5heXeTxLBnHVGZz+8Z52q2NnH2xTf90he/8fn+xvH56f2BRGtNURqpmHeWnEOE0mpmoq3d7tx1d7hRfOzRLzptIyELp3WeA5GUEmRMznKbo6p4VEyE3pLj7oobX7i2cLa7tc0FcxCOj0VpA6qV5sZm3hsNVJQIGQu/m7fX+jsbAEjeE0HxXtz7uRs7m6ezQWEKg4QkvS6MxMhHaaU5RgHjXJqhtsY6C+SJyHEvkDNPhIDg+yqq8UqNMfb6PePj3c3w1LmGLyr1aTU3Hp88w1SCkYh5yGUIlSS5+Yb4zufKqansvgeyEwtsY4WVBoLKWwZLD3dHv/eO/7y6fPL7n//M3qmWzbtjoZTeFJZtZ2Yhq2vJrj56+9lTj2ddjoKnFbzs8Pzm1jNrS4tjY5eoUGo3bNVnKvWYyYRhMOpvI/nSimK4sz/hu53udmGqw/zU7k7397L0o5Vbf/HNF576/Fh6MDO9xXOLjFshwiSqKhnstheF4NmoWPz9SXjWvY999rvXnllfW/58V8DRTwLABx8cfvvVtcaHG4+cXwa3vnf/obRSE0qS4DvrO1uby1IGqMKakCWytBaYUQlhbPJyaXnJdDPvPXuW9Q4JPPAgrkUTe9H2y8w79CAEACCR916l1aCSmFK7QWbznBgSgQDmAIEhcoGcR0nI04rJrNelt5ikk7pYaSZ5Oeo5UrNH756wawurq3lvMCg6NDQkeTUIR6UryyyQknPJkTlTDkaj+bnZ/mjHmVSTHmvNeCrz0UBrH0qOgIWx5BxnjAgBeeuSW3y+uLm04OwQn3Pn4a0d36wyEXrt0DsDjg96uiJ4YyrJjQYKrXVEoRS1hAznZuHXVmb/dvJI3R5+SeG0+5O3m6vHYM847m8efHBbTjavO/UbJ/76tD/eK/9uoQhB7Sz70694TP1JOnswbU3PXjy90VnddWQxiGQy4fJdBnkYKyIshs4ZAwD0LIboUAZB8ebffvPCdly5tHfPof/jZG/1bz577+7KaTSy6FlM25hNWHsBES0wb4rxPfXW3HPvvr75qlf8yle+t/Lwfe+anL0kDpExW2irrfXWSsBIKafNescurG1cedPdw3b59BP3BZiQlF4bcMY7Y0mTJiEYI00qrR88RBj2FlcMZlcefdH64vnBbhuQvADmeRj4MFIlhs6SCKQIkfmgffE4Zm0iLJnnxIe/78Y/cclw65wuyYIkyVmQMgWuN5RC5d5HzZrgPGuPEAVjHJETEnngnCFDriSHfG56fqcsUhncuLk5vrUxD4wJNxWnUgvpuyptKa0RWaACwYWcHhfVlGlu7UhamzvHhqVTYitJf3nr6fe84w/OnfrR9/7xCxP1RHLbCkQz8pkNljr54rA+YOLm2+8+9/QP9UiwSDEmdNnz9qIdZLPzBxrjiXOSGPeUR+F4IANLw1CC5K3O5lK+ssodaRmOdrcv9LvZm8voo8lzf+F3Fk5+fv/0td3B9oUzC4oLEYaOnBJgCu7JHJ3xn39NAs869dB//eT1dq8++6aFFhz9JAA8ciF/7cHB+CfGFoe9ieleGuyPgolKJYhric3Y9sZWWmm2t3eHxWhqz57xamiYYSyAzHzv/u8OOn1EZIwBgLWGyCOTXFXD1l4wfa09CSLkgjOG4KxlKpCNitEGs9JluXUeOWfEEJFxDpwDMlmv1aamsPSdrZ0Isd6Mt1dPjTXmiJEgL6ePXFIx27ubcRh3dbsRpVk+6o36a5td2+kTQFJpOGcZaBY2Xv/KXzp16qH7Hz2pKnGj0nrujVedP/f0+YtbScAb1bQ77PUy660DQE88Gj8QQnt3ZdO5HK+/+YpO3zlL3gzzohAcAYAsNKphUg2M9Z6Mc2RAVSuTZEasNMtv3K59ODj71un3HF1/1xPT77938N8eHh4eq65jtDKcrdWnln73qfccXX/XE9OfeDz9q2NbpshWfrM99rEkagRptbJxob+73OUcnVBxbQZMG3zJOHGOeZbbAojAGAfAJAPvGEl/+Kq9GFzS2D+648bXrqwvPf7osX73maPXP6+Xb184cd6Uhjl0bmS88GaU1qd4bX7PnujK667+waMPss7i2Oy8jBxjjpPURhMhOWIO8tytnFvb7nZueskrdzfLi0983RNjwBEVMkTAn2FeFW4UgHXg5dh0Wk3KntHkDl17c6+/Uew4AssUB+MACgDmBeeI5DzwHErW2Tzjy4IQiMi4Mntn3PhIyw06I+/IoWech01eSdz2EgqFSlxy4wt6nf7W6WPeWxEmwDghkCPJOCBTUVTm7fHJ2Qz97KiYWllsmXKW06T1IVcgsB5GclQqrWUYMM4VFxzJtGphX0PIWNLoQNYclF4wVp946fbpd7znIydOPPDovd8MQadK1oWbqWLfxAu9/sZgLA/lTc9/6fnVn9DQEbmwNTno9fLNY2VvMLFvql4Zd4SVSlUbG4UpETNuaHND3JD2abvDCTVjuzv9C+3t3n/qV/68edfL//PTxz93yb7buu21Uyd+CChDpbzTHMF6RkTO+c+9JnruXgEAn7j/S/q6v/9pmf7e6bGr7vxTeNarFzYe+rTyMSTjuSShO8nE3laUCO+tt17wgAx2O/0DB2eaYwqAe6NXF9d+8OAzpAk4k0Gowog8Q9Cj4RBZqJqz4DPjUXoynIdhrMkIY0U9SuPQOABVAWHz3V7WHjBiiKB+Jgy00bJarY01O7t9xUKvS4S+GWxHiI5CrMrLr7qnps/l2gatpKpiUkGpC5NTu7Nz7viPmWzs2buPkICRlHDD0evbK88cv9CphbJWr8zPT5R5/5nzi0GAkUgvP3zpT356IrMiDgKdDZeK6esPse7WztlnfoqXHJjPbQQoTdnxhhNYIs+ESEJWqSSeXJg4T36YsWo6I0Az6F54Xfvn/r71qdckD8wM7lir/Ms5+8iS4mEFIQbGCNlbb1p+YGZwx1rl0UX49XuLtDpa/Y3d8f+ekkUAHGza3uZAylR7UOkks12gIk2FCuRw4Kz2ZalNoRHQMcc8RwQLZZjMk3B6tCFFEAb1SlK95xdfnI0uX23flyQtU5rdnZ3lC4t6uMuEhHgsL7o42BXCXnJwLJycRlEyZhkxba1nyBnTRem6bOncSmnc8172mq2NwYXHv+Y8gEdOnp7FGNOWyQADImLIGLIgCeLpMlBHrn3hYGtB5yz3GecpOiYVMC44Q2OGrsy03i6GvWF3hXuwYMExi2TeU2n++fRg/UwJgJ5ZAERrUUkNICKuZFCPHQpfci4sjxLkCXoJLmdxQETc2MFoI03nQcCe4erk2oowWEGaY1TzFKNIGBNEAZL0IJSEQMSO60QmO4OiqWBkWCUQpbXNak2Hv6OGb3j3R//l0a+ce+jRNCBvi4TMeAjbOtgq7KaeMjE/+ty71rdP6M4QIc6YFtwVu8vD7W7SqEahUGmcNmPOfCzHyPOyyPLBKKpJ9MA4Cz1mo3xnbXd1daP/lmH04eod97zpzMnPHTn8gu2t1dMnj0nEIFDGlowBgrTWeu8R2Wv/cfj8x+558LG/fPdhmIatN56ZePdh+P7YG+9QT5zYkv/mM7CWL6fTrhJUBmt6z/55CII8c1Y7LlytVu9tda697NLLD80LGRbl8MGHH3zwgeNACMjDKE3SihRVycr19UUmJNTned51HHzA0EjBhGWAzgoVWatZEM4dumzPtXtXn1lYO3WeFZYki+I4jCKtNQaVMBadXpsjRhjasix6W7q/wYGbMP25X/q/5ysbjz31g2oUGgW8MNoXuWGJjC4+/TSq5t4Dc1yC9z7Pgnq1no+6GiuzUw3jTBwGAmmojbclgmi1GtubC4vr7tL55igrf3hm8wU37osD8cNHv4O/+uoXrW4Vg+FophUyBqW22mA+sr18WE2UUsyxoSXW7mGSTHFWCtxee0MWvy/876+s37JfAMBL/ir80fac4Bxl1VFsqf1Pr3z61t3XwdFP3v0JfdEF0xPm6dcsNT7eKIqciGzGi2GhVFQ6IlHz+XYU0b49lVLr9XXtDSOiPM+DINDkfWkQABFEo+m1onwTSBAWRLFlpjXvXvq8l87tuVoqvrvV//J9924unQcGcXOS2YhJbWlwYErxWtNBKRgwQm/QIwguyVE+shvLG2Vpbn/Fr62tts//4KtECJ4BAgLSsxAd51wCijSs16e2thfQMaykl9348u2LJwYZQ+ct6ZCnzpVCSiAFzHOQ0tnBaGE03GbIATwnrxHNe2vNj7YG64sFOebAAYIHAucdEvdehSoMVDyeVPYHTGEUE6dQOF1aFsbgwQ77W7unW60jCMXh4Znm0gXrJQDUyDUdVZyvMsE5KaDQoxScJAvnZ4Knl/NWEO4Oqdlw1qAlX61UyP7FnrGXveVD93/3Uys/PlaNfJkPQmsasVroWVDJpp8qJD73phddXP5RymS3n++MdozbQV1ChgxrDKA5e9BAJ4iKqcYhzni/2y3zImpKcjbiIIlttPudnf7O8urwrXnlzxu3v+R3Vs9+/ZKDN66tLZ4+eYqTV0pqUwSBdBaNMUSAiL/+oRvefeJz37/4xIP6KAA8fwdecAvcoZ74/tgbv3y28of350vbHVGRaaXhB1YEoYiSJG5666zLVczKQTfvbr76F3/+miuuEcr/9d/81Xe/84REBGAIDAANMwxZEteqtYmh43rUiQ7un7v52pWHH+tcWJPAuHceiDEW1KrVqQkISUqFnkxegPmfAMBZ65ApFQoZIQtj6bKsHGytlt21RIZ9TF/1hrfc/dzp/3Hv3/VXN6Vze2XSJ72p7XAr213bChvN8bk6484byja6IcrNdoeJaHb/uJPoGQIyrrjXZSAT7QvmpbVJHBVliWcvmquPTCcxnnryx3jPy1+8O4DRoHdwOgziKufAEI22K6vr9Ypv1quOk/K4O0ASVW06oeue+bWNqz637/Gf9F9/vRYnf/+D+p8QBfgOifHpSbjpsn2rW1tvy6/5l8qffeC7jIfVetRvv2kj/XCCXgB4a9CjjYIk09ZB6rPtWg3374+yLFtctDYDj2C8S6sVZ3k+7AI5AhY0prw2kHWJAQL3YL2jKMLb77x1bOqKTPf1CH74yP39nS1CHrWmyhJksVWdUNU5xQNunZVcggMPXsrQaUALozbvbm07Z5/30tesrXYuPP41AE4OgBxjjLwnIsY4MkbO7Lv6kqNHXnXfd/6CnGnOXjp7yS2glwpwv/aSX/jB06e/982HB51dIFBc8YjpwqLV3vRKOwISBFYCLxnCH43VPpx2li9o9GDJOm8QBRnCmHErAJwIHAipAhYlqjLfHG+myXBlPZcUM+J5kblyI6nuHfZWbws74xe3yTsnODmsOBcTVpEiZLGHgDPhbABomODMhZZI8i+9TR9/Ib36Q5Ujj0Wtsvjy1Qdv/N33f+Nbf7H507PjVVb0u2NSMWRnulkow6WyAXF63bW3Hjtxv7DgOcu1dbhpTZ/lYSWc7XS2mnsOu6LfmCzHxw44rzubHSE5r5KzJiJWEenqWmdno9/tbAzfkiUfbTz/nt9ef/pbrfH9WzsrSxdWGMcoUEWeSY6EMs9zRBRCXvah7jePb8L/37srnwwf+8Bf/jwEH26tbi+y3DimGDrPAi4YIkiuyBEwyQVH8FKym2+7rlFPvvPNezeWM4ZA5IGc954BEGKY1lvTc7sjgHwHQxmNJ4OdNvRKYJwYIZEQUsRR3KyBBhFIQADrnS298wzRaGPJMEAmFAviUMUO7GBrPdAm4HYoKr/4ujf9n3ffcv+PP//Vr38jauetMJpuNZ7c2thd6gz7eW22WZ8Mgbly4H3BK0KtbLUV4mwqVStt+5LFEdjSZ8bmGKaKsN6q8KVsg+Wq0w+nZ5gKzfqZDbzznpcWhd3YbE9WZFqvEgNE4TVtbm7EiZ1oTRkqAsG0CZhKskJjf23h9Ys333vl48c2f/2I5iff+oHiK4zXEHmeDd78hqO33fHy7Q068/bvfGDrT955z/h7vxXt3ef9W3dm/2bSGae1L4xp7+aFcc6Bg0QPN8Za7OBc7KF97qIaDbl3OaBMqqk2WA7a4LlnEFTHqBBluYrIJaFhIIF78iC8EOicQ+DkiXkFzIrWnM8t5OuTByoTByNkzjFmNJSZBmSC8SIzZUHllqXCGw+3/9LrlhcWV576NiBDL423iEDkiYgTMA5Gm3hq8tB1N492cqv7t9w8e8vlK/UGHxX1sZlbL+7Y7Q21sXt2MLSRkkBVbduF9tYzIsmQa2djzj2Dn77k+KVfu4myAgCRu1I7Dl7KkDNpjFEBEjBkvChzxQLGVFivp7HPhqUhnI7t8lCUZR6kUX5qFy/cH5w7w40RgpeISMAIU4TEu8hhjTxH5jwqriPLFBf/8M7iH99h4VnfqTU8wIO3XHbNG973j/f9P92T5xsxz/ud8RCHxg01E6FaKeZcJbnqyHU/PfaQN+C9sZbIZ85tSxFIHiPxosiielip1idakwLEqBwFQgLYXA+c9hWJW7vZ6sqaHg77bxlV/tv0bXe9cemZL+6du3prc/XcmQsMeRgEJusCDzw657x3JITsvalb+dKLl+781NTJjzftxbV9rzdn7hMPvt9Zx/6QxR+ZHQ22bD4CAEQEAA4IAIhIRA48EQAgMuY9MMYAgIg4SgIicvAzzhNjHCwEkaxMgx6SQ+CM8YAxBMYQJTJkDDkXyAUT4CwILpwz1oIUDKREQCLPpTKm5IyAMQH9fNipOFmCzll618+9+tY7rqCi/5WvfXbz5EWOwb59yfndQbaWtwfD6f1T8YSTNh3ZvuwnlVa13xPDbLWZqLgejJBAodfD7WfaFYyiGGVtphH5vh6u93OeXpsmK+QHm2c28Xl3Pc97vrE9alWTWj0hACnCYmR3NlekMFNTU8Y7BmQoiqv1Trdv+2urb9i4+h+uPH5m67euGcGx3/3j0ReCaIIr1e8O3/Yfr7ntxteeuNjpvvdHf7T5x297ibrr8vGz/eDDdy7e8tVDEFCMwjr+5PGNE2fXHHEQVTPaGJuQB+YCgB6xWcbqoyzb2s6y0jptimGPEfNgVG3C5Q7cAIijB8uJeXKIAsh7h4iMMcMY9wKgDFpz3hLPt8anK63pBDnlhc0LTQ6JyGibjQprAC0oYIW1L3jV6y6eX9w88S/ee/LcescYMobee04MkZz1rFlP6tNCWFLp3Zcv33T12ckWqRBUIjlrCDzgUDobIx8hRQ5KZ4k0t1QyDrqANOFOuI/PLd/96NzCphgNPeMESAjee2EclNr1Btjtm1JzrckSS6rx9J4bc3N2TMzdOv+Tu256xZe+/FhzcPh//OCsPXeGFeuzpocClEBFiETGO4ZMcYwsJOAlYgE4Dl6UXkj+unIE/+pDdwWX/VA9efTSQ//ug1+69yOd0xfHU5H12rO1ZLk3CIT0Il4Pr/EhzM5OrV7sAmltjbXW6a7ON1jRBx4GcWJIM8+BRwQ2in0UqEqYFqPCOOs91UKmHbtwYdHmw8Fb8ujjk7e88LU7C9+emrhsbW1x9eIqSJRS6WGbqYhLnucFZ8IY2/7wFfnT9wJAsvWjK07+vin0+spCPijAef2OMvjQNLihzvsAQEQAwIHhs7z3jgiREQABABEiAoD3hOAAEQgRGYJzHhRnEFbDtJX1VskTMEAWI+PIAyZiriKhQuQhl7HgnAsBCJ48moEBjDlt/M2T/MctIvLeMQDJJZIrR1tSRUwYD8ns/Pzm1ja4Pnnwhba6f2hiajQslrIiBZYxSqJQVNAMsqKwYZD6QF3x2JWp3F+TsSvVvon4a9/9hytL3najXLDVIjg6EfVtZzFjYXClqi6hG66f3sTD185IEfQzlgRJnHzRaJMAACAASURBVBJnoFSATHU7A4G+NV61xGNkmRZh2uhnhe2vrL9h+zXfu+SPbxvBzzzxb5v/9GkRzRAPEWxFlfv34WsHv/o7T79fD3fzj9bhWX97pvrp06h4KqiUil1Y7F0484wnxsKmzTYnJ9n0BNdmNDlz6XhzLxP8wsLuyRPPFNrasvDWNZqBrE2i0/3dnSK3CAqlnWzVSDBLgIwbrREBHOt2M2eLysSBsigx24xSFYWKkLwj54CAOW/BM++BIUPmBQMHdNs9r71w7sz68YeZ5N6RcwRAjKH3nhPjHInAp0kyOe43s6zcmZ0qD4xDIgdjKUnlRAhETEVeSB4JAHRpBaQC61kYA5Lsd3wtpalp+Oz17reeVlqX5EUYcOcsFwSAXDBgXpKzjhnDrQPtCZCDuIRoPW3aluZB9dqF9025R8+umpUHDL9AwxKoATjOKASLnIEnToAeJIICFxM79Xy/dQdx75HBwQfhI98xAHDNY8GvvgsOP1k7e9nczFs+/oV/fG95cbkZwqjfH0+TxZ2yEmBhg+XosrgST01NXFjYsS4HLxlyTkUx2rXZ2aCyl4fjXASOIXqJJvNlm8JuLAQZ64xTUVgPyYG6sLBM1nbf3Is+tve2u39lY+Fbjeb+8+dPdzbaQnAVyLy3gyIkBCGk0ZYxfvE9fwUX74Bn7X3o39c6T3Xa6/nQOGPz/zIMPjiFfqSLAT4LABghPMs55wGIABnjQjpTIiIAEBGQA0AA/BkCQkAluJW1KG3m/WUyxiMgeQDwwBGVR8mEBBYyEQUyYVJyFSITUaK0y698wfxjNz6UfuwKAoEeGAIxJpmnxR9C7RDRGsqx59xw68LCqmMKHEcq+htPHpqqypLyLHjN0ev+ZenxR3b5Lxw5sLh54qcLG2ky3n1XZ8+jR7JnrB+aq46++NBU/Mi9Xz7i8m6qf7y2UsD87ZI3fedepytTt6pkId9eXbnYx6l9TfTeQiIhDCojJM0YAmfMyjQO44qzTlSjaFSo+vhcPyvznbObv9X++tq+q1scfuaJf/vyk194bH0PD5jTAarMDvL/krzifd1PPX9q559+m+BZxzbxN+6NLBSeCYaQd8rt5YuGUEQtV2yN12StDtoQD+sqavEARz29uriki8J7zwj27q2xZH8k9fbaxvZW2yFMTyc3XXc4aaToOGfSGMM45MYfP3Xx3LkL1dZhnQ0l6+0/vD8KU2tLUxoPrDS2KIfOoDOewDHyBDbX9sY7X7N64ez6iYeJMfwZYN47xhAA0HshmHfk0vjSq27vZX23vYmVqFmb2Fk/lmAuoQTQSjLwJXjkzEjp4ijyrgSPXACQdcYFDJMqPPV/+ed9mRnLuHBKkTccGXHu05iHAaV1kgIRKIq5lMQ5FA4qIaukTMVYrer+f7iZzpwBB00clhiuAC2T//YLvEc49H1wZIAhIX/iHYaBDwmnHuKXPyTqzgn0yFgI7PxtdNUP1PHnFadfiC/80r7K2z72xS+8rVjrVSSNhqUrTaHteLPS7poLrFGf2rN378GLK6eDMJKIo0ybwnhdFO2Vyvhhz2vOWf7/sgXf8Z7fZYHon+f5lG/5lXPO7/Q5c6ZlWibJpEwCaZAEMEGuslJkKWYXAQGRXVhw0V33LnB3FaWoqKtukKbLFaXIgkqHEAwhgfTMJFPP1NPLr3+/3095npvNfXn/2ft+AzKJC5U1OqVY9dfBd4FDnqezrXxtc3DpwnqSmPV3r2cf333Ti1+5eOzr9ZH5S5cWBhtdNNpYW3TWdZJFAWZBJKPtqU++Fu59Pzznyr+7MU302soFV2KIYfDve+lHp8X3qnKAzwEAEiAiAAjPEhYRRCKlOEQiQkQRQSZABhAAYCCFYgilNt4YnXWDxUG7C6iFEoQoElFQACIHAETUTEjaiM7S2tiBG1/66++7/RXX3vCG9d94+ePvXlrd8lFzZLRSHj95z5c+eOP171zaeKTd9UeuPTwsaehFg+0XvcHSU8snj2osj2y/6pduPDgxOnz3396/bf+24uKl0+sbrcmZ7gc3p75x+caDG9qXB69/5c/ddMPf/cPnVo8+MD2RrQe33Et3MPyCLx/S/NT01RO18xMxPHD0PF55+EqIqihdFd2uOTs22hCMHmBtZRMw1hvKmrohtVX1m6P72hvd/urpwXuKDzy27Y1X5fCsh986+tUv6KRGlNusyTH6sv/ryb94oPUX3znR638ownNet7xx7GsaRLvAMUj0Ug76gQ2aJsa1TAdFKkQoApaDPjMiau+9EsUiiKit1o1xI6Ec9KqyVNrWG7Y13sjyNEmsTSxpzLI0t3Z5uX3s6OmstSs4t73lrjywQ+VZYMkUG20GJVTRGaTI4iMyCFdy6sL5+cvvuHjm1NqTDwXyjMRCBIGQQiBtxCABgJkc2X/4heWgX3a6upaMtHZfOPlI2e0IRCIMkZmFmSfGcxbrqi5KYJYIQDE69ll9VEux9o5ix5+HWDlAtloxe1QKADWyBp9Y0IQoURFqEsJo0pHUdpoGI0k+Dtlmct2PG9Mr0cZSkB+6HX7luwP4Z3d/0AwQPdFdHxSAxJMXBBFUIYJGg2BBxT128pwDY2oq/OnvpD9/001HFr5JGHSi21uoMA4QjI9OMpBMZ2mWNyJkCNNCI6684Iplrft+ENZ6rUubdujKdj/th7HJqStuuuquge+trK0/9vgPL156pJnNhsBrK521jTY4334XNz4xd+ttr7p48m92zM5vrF8cDPudEiDU2t0NkSmtS+9AUS1J5MJnFwbD34CztyX2d/POY4lK3LAQcS4EJG0+1grlUIqCGQBQJIrihJVTSocAabp9rnnuVJe1A2YUUgqYAUUEhTBBjMylgoQ06mwq5plsLTpfCQIxIBpQKYIGkzE7RBEWlAgiAD6bmn3Ra9933VXx3a983l2n3jr/5RetLq+Vw9L7KjKj71185LHa6I5q2LbN+bkJ6boqeKy8i46qd3cv7jyWmKT50bHZkzsmEnOpuxVFFeXm+Pj4RGvX2Xc+OfU/jmyeXVOSX3bNta9/+S88+dNv/dOPHt23Y/vJ848ubhT16eblp843K3niyluS6lRZrJ07u4FzB3YhUnyWhL3b6hOtcUXGee53e6Uf1EaMUpmrwsD7JJka9ML66rniXZ3ZT4392vWjh8fl3pP4H8ff8aH2Zz/0A2tsTQRvni/+/qrXwJF7AGDXfzh3035979mi/z5f/3hOLEGEGYBRaTEmN9noaH1YRAfirdEC4gYQBRA1KiAAYRQx3kvAmgGW4ABAK2tSrbUiZRKrRIBB1LOQUGlXul5IsgQalrPEggWIkBhQiJ7BUEgSo0gZmyiDBpMLq6u6vm/hxLHNpx9hctu2z09MWKuiglhVVa8TnYd2r8CJ8d0Hbu1321W3r3Izuf2q1QvHim4PCQFU8A4AXFXt3jWlaOz8paPISekGXpgrJwpmZnZttVc2f7XT+EPrhwjCIAwILEJKCXsEJjIIwjFohUSBDB44/Pzl5Sd9NZxolIaZ+hgazpY86m3m4Pt/3jlzc4B/Nv8Des2dWQm8/EL/wG/FmQfojg/kgD5jMIh1VClX5sBEba1oVN6QXr/ywBNv/snMtJn/Kg2/Yic2JtBmEzduuV/uFFyEoYkhAAEZKUsELY26wihpgo2RGAUUQWJAR2QtUY9ru49j0ACD9gUl64JSFgGEELBk9Yn9/AsPpePjO+r2RJqg1j4GAmd9dAFxeRUePzuyugoVxRDrflgmSDaxg2pIqtEbxrUttdVW6+1B2QUXaxIqLvsIxBwFomUFBAVVgNnktstfdNsLvvr1T1QdD9EGrrQ2KFogCrMIAXkiYyGCIkinTKM1WD1GIhJNJAERABZCBSljrtI6GAFNjEhuODU2tevgvpfcvHt549w/3nB871/v67S7IiAhJKFaL123szw12gxBrRV++ziQHgfOWWDxTccW33Ic/pn9oa4/Ptn8vYnp2bmlzslts7OX7Tr02M98P3x4mr31xK9+/Zunxie3Z83HTx/fOn/8B49/A2LdtNK5or9rDX+ya3+x/ORwsDkY9vHGV+0JMRAhCiIphaYcVozYtIkySsjFaKMrkaw2aVXi1ma//aaNy764Axk2lsKxdxh4zu98iz/8XY1kv/bWcOvmm+DIPQDwmk9f+OE58D4U7xvWP5oCcYSIBInRZKOxjYhZq+FAlSyoUBtSACHNEm2U1toqAsTh0A/6vN61WaIkOB+8NsnIaMOmidLGUCKAiJqUBgYBcFVY3grjOSUaWVSUwEAIHEJQRNqqGFlECEEpAKYy+rHpa048+k+9s8d0rg8cONSantLAVgFg5HIQQa+sd1aqam7nNYN+1w+Hqp7s2X1ld+1cZ32j8oGjbzSSWp7EwIKIiQz7+WC4ESsfkar+oJJqfv7wxeXTnXe06x/LORRV5UQAkRQQCAeOSilSCph9CIpIEFVCl193y/kLD1fD9vj4zt5GZ6vbHjXby/5SltbzmoVfOfbYO/vwz7b/k5q6H7MS7n9/gOfc+F/VHR/QhBCIU8DRHXVtdK093OWaP0qTL2Xlz0/I5py/dIPs/S80Ec1uq2drLcjTv993dIujxMpoLRJjoCiSZaxBjAFTI2U5zTHPFaYhUybPqPQVIfqAQJLnqFEIJM2g0VCo4TOXxzu/CxKAPdZHZXIGEXCjE3XQraZpjPiIQWFqTBUr7UMUEaVFW+KApKPV4Aq1scWb7fSZpebx0/21Teq0XdmH4DGgbG5Rr228cLptYn4kP3DlzY/85JvdrbZOWKGxanhgl0qysLIEK2t5t+IomTITlcRaY2649nTleiAggiSCohiN0X60NWlSmt0+Nj02OwTgUC5eWqJ89M4X3bXuiwdu+ofZT13lPfd7fYXoQsihWl5ZaNanvOtxbW/KqyJl4BC8Pv8Xj7mbPPxv9vzVYfptn+Wjt99263cOf5F/Ox0UfTt58Bd/9m002v3xD7+8vLzu1zYDF80d+5PoNnpLcUN23nDnpRP3dbe6mhy+/NeuLVyR2rxmaigcI3t2FReECoFYeNCVfjkMjgVKDtY7bv/rQeNTdQS4Nq19/pW1ex/477ff9LYfnSlf+vHlKHjzS1787bkXwZF7AGD3R9cDsiHT+7ed2U9MJMaijqS91eLjcFipjS7maYnggkiSKIVeJTpNlELJk1QRRXHMyKxXV+oQq+BLUlQfobyZJCkCCiERGQGFqH30WmWhUGcXY6uhTOZRFIHlABxRkarlqUJ0AaoyKJTonffBgd+248jSmaP9S2cKYq3zSBUBElKIEaMEFgTJJkZ37rq+LIZlv6fr6ezsnv7mpWF3UFQOmKemG2PNXKHWOlaBV9aLonBV5ZkIhFnJ9u1Xnbv45OZbVq770l4E6fT6g2FFpEphxRx8LB0jxxBjVTkQEACTqYPXPP/S0iPcrSa2H1k4fh9XLaPJuVXTmBkBve19x79x9zo854UfMrf+tpHIC7fz57/u4Dnb7qNX35mLMCpIgjTreryUqVrjk65YovCLd1xd3PvM6YIYq4v/wR+8T9/8YPPI5SPDs9IYmf5PeKI7O58mycryGa2t99HqUrFLtUyMp1NjY6lFkWJrZdlLQPEWiYRYkDmmBoJmYFKASjMC/eQNfOCexCofgqCmxigYJX6IEoJNoNbEILrW8NoYYR+HGhUggQD7KOMTqjEqtbqMjtHsuHiOCrQxHKNwRBFxJT5xHKPAd79rjm3OvvjqciNKryhUEEG2gnt3x+ddUZQDOr+oF9uycFY9da6xNZhM7cb2naCcqxzEEEG4bgVRCh8m61Ib39b2GSSTc7Pb19f6KoRzi4s8tAPd9X3a+tWV+h81ETShqqrCotisFVRpu11Kxjq6sX3cD7b6QVKWyDdVC589Cf9/Wv+tdeXfXDExMXb/XY+O//50r1iFgFfd9i8Ga09sbS4NN3pDwFbeTMenvOt1u+saJibmr105953uVpkZwje+55ZWfcba+sBXGxvLDjukWEParfpabKJMv+xzjDapu0o6vW7RC4tv6KR/UouOpd3Y97bP3f7UkQ/sh1c89YbHHnzw4kv/8rcGN39gP3y6/p8fefzpz93/fWEkxPLdRfNPmqSVMVokEhFIRLHeeU0iGHbPzR64bHeWJE6cr4pe0XXinS96g34VQuXc+mJWdAcxVqaOtToqAwiglE0zRNQSBUGzEBJWlQz7tSwTUe3orTExePTeKaXTNLVGERkEo3WK2taNLSrfmj0Yu+tFe7X0Ym0aXEFEibXCEpidDyLsLI5P79BBhVhErZsj84P2RQgBCTkEF7lyDiSgQcXkvXgWEgSIgkpnODd36NSZ41tvPnvn/bcoFUOQfq8g0omFLNG9vju7uNXtDZhZBEKIMyMNqYXW7KHjx+4FwLntV5185j4ux9Naveis5bUaZa3zf/6T4bWb8Jw3vKK2517kZwF94R+qU7d6ALj7TtO617CCGH2D9JzKHkvhgTgonHrv3Xe87nUvWe7wfZ/6RnNtc/eJwUM39/ffmb/MNgDg2D2x3x9933yeNZNTjz48YGNQWAgT08rTsRFz5cGrpsdmy77/zv3/sLa2lhghCsAIgIQyNt5E8igQyyGCB5SFt1azn5xuNmba7QUWjexzHQCi81SrU5YIqZAqqGkCHwE5BkaFRBB9VABKsdWQ1Gmshj5ykkiSAZKgQqVxUEjhTObjA0/ons7nJ/tVFQMQcYyoJIaRTDVrEryUASrB02dhvbd9oKcvaz0zP1OQQgEhgmaC0+NQN7B9O89vU+Oj0VrpDs1mp3ZmaeTBJ7MT54vG2BX7999Ylfre275y4G9uFBUqJ8FFrUK7vbm8+vT2ke3dol1le+YmB1S0lAHP8eTrH9/7Fzu+f98/wv9m5GjzBe89hHnzkbt+uu3TM9Wwb2y2tdnZMTebWi5i7C73qaZ0fTY4h6E96Mn0vhdtLj2wurLqBh5/8yOvPzB/+LK5K7bNzhcMTzz96IOPfe/8xeMuVKOj9eFgGIIwq3Loh4Xv9fpVAetv6qQfyzECT9618tLPfOAEPCtfebjVPXZx3923rcMdN8N8PHfFUx/66dGvcBQRLv7NMPn9lAhEhJmN0RwCsxJmkSCK63k6MlJP80yZjEMQYW0MOxCGoqiCD+3+0BfOWoNGFEAIgZlFAJARSVgQSSSAiIBSMM5QRO4rTBEjihJgpVFrEmABiEEQEHTMjdEm3b3/+Z32pcHaYsWSWIOitLWoCRA1pkiEACE1s1O7ErBRqkpwZGQa/CaylK6qhmFQVWVVIYhCEhaODIpCZAIBUiqDmenLzp4/t3L3qZ2f2+19hfS/ICCEaLUIQOGhKp13Icty53xrJK+30pGJQ4/++Osmz6654dpLF368tTSqbS1WPWMspSPtvaeO/8lDADD3cPPXP7h96wdnFREPAdA9fVtcvhWu+6BJFDiONk1WFJ9SMj5qOOQ33bzjDXe/LFE5Y2rE29HtX/rN3z9cDXv/sfrBbP/9D892zsKJz8i53XP/bVotPHj/oLKIHpQemZkfqdmyv2iULYclgmp3e84JgjbGsutpbfJaliQarYpRqrLk6Gv1/MIvLzc/sfd5N9yxdO57ValXlzsoEDS2JtLRMYEYtTZaQWKVK6ogvvLRRa+sZicokSCiuKb3CZQataEii4ZISCISF0SDCCtLfLHbzMfsbGsg4iWEIBSZEJBCsBaUQkBhhKVF6rrJyjSnG+fmWpVWACDGomFMDKdGjY7I1ChOT2F9NCR1FSKfPIfHTqh+ZYmCAAilx14Xtv9lTaBGVCPM2t3MlH3RbecJVN6av0788PTihedN0COL+fo7Lo3/5ZGw/+TyGx82P84nf5ht/OfhxpEuANzyf82NPpRRok6+emvXX00qEyvPRAaYczBkBTyUVQl2MnDs+0urm7Ot1mWDjQc314ehRPyN//PuvfP75lqzE6MjU/O7Wq3tTtSl1Ut//92///JXvzQoBj644L1zrMkwMwFuvb2T/X7dICXcXHjj0/CcK9a/OX38M9+75a/hOQeXP79r8VuPn/mO8DBG6f3qIPmYAdAAKCKECoRjZABAEgnGaBWFGYMCAkQQQIFoIgoIsyJFQMJACpGQkZkBEUgBoVUKI4dnAWsQJcIkNeQyhgCMClU0rDUqLUTAwt77GCMRaUw0CWl98IpbNlcvdJYvRkZSHEVYxMUAhCTAECXK6Ozc7PaDGqB0XZs1m42ZQWcJIrsYXREjchRWQIoVk7AIIggASFAm1TlOT+++cP7C4utOjH9qFh1ro0UiAHiPSgkSAingyMyKlAggB9swO3bfePSR76QjzT0H9l08/0B/o6UQhJ3SOquPlSVY7g5f0N3x+NSB8w/tmG+sPLxabLaxApTaA+/v3/Q7qjOmbZIODIYqprkOPuw+NPOLr35R2tiW19MIQrpVb7W+/zufOoAL+1+cwsNvhSP3XPx+XLkPEtC/vSe/f3GBXBq5igpm9lytqdxcPWpUvd9lEKuiYwYgIQ0smOU1Mtr5YtRoZqycJwU2kYV/dWnsLw5cdcX1C8d/GBz2+wWQQDIyNzuCPIBgPPTyLNOaOHoEYVAucpA4gtwrq61+2S+Dc6IiJ7pFflCETUIhYQLRKInJbdLv9M0wmahbiMGX7b7nAEAgRAgACogVsUa2Ono1PnRWYW+kWaH3xAGVWCN1A82EcyO5RaNJEysFFdiB904IVBSFSgMLH7+brvyciChQsrmFqxv6X70yHNytssT3fJO0NjR/uDs2bzsfPLK0dM9v3beQ4OBkcvbvhmX1c9dcuXtmfPnV859vfmXnn6FmD54X7u5e/sW0TjUBzRGAY6RQFcHoZgawFZopcGK7Cxdivu0GEx576vFL1RDwNW+849DBa/bM7Tlw2Z7J0cnEJlUIQeGFlY0fP/LkPZ++R6CbWFsMPTIgQvTcfkcv+3jdkMo4r6Zv3Tj8trH2o5dvfoPQrKSXXbjs9RPD0+NP/f7M7MGHT3/PD7scpP/OYfYHGYOAILOIgEICEAYBRIFAQIgACixgJVFQEEDpRCliDkTAACysFCCBIs3ChJLXTZKiSaxNNCpxVYgBhL3GzHlxJSOLwgTBAgpzAJDUGudCDFEEuVIsjgzMzR3sdDb6qyuAVtgVMYIIACAii0R2FKkxve3AwVs4DHuDNW2zNG0Nuosk5FggihALAoHGKJHAMytUhJGQySRo4+zM/gtnz1587TO7/nZHYCGlEBCRFEURQNSZrWmlY4zGWGbBgFGXY9OXb648EQimp6/dXHtqc43rNgHxPsQoalAUFquRkTGr7ZF8Nd98xjD6bjXoFcHJP72ruPaPa9bxQKSMUqIMnViFv/q6G9PZsbkdB7LGDtBIZOr9x5685+8aSZYyjmz96sbYny7fBwjgBTuN+vtqnl0o+oMKhgcO36Gwu7JyvF4bA0fOsa8K7wVFaW1Eit2796b1mo8u+FIENtfWWXxZVYu/vDj/xRuuuOLwuWM/6G0Oet0iCuRT28cb6cVzT0EUQQORtEKQgJa0Tq3Ji7Jk0CFWRVGIKOQIAGgaivu+csIcfUAAUKJAXEQCsWNjWiWCfrixGV1EBEQEJAQWRAFUAkhkm80QAa3RGAbrmxACEhESAAiUSZLVckhMGKlhM0tAygAShYhYqcghGApn3857P2kQBQyfPoUqo9uO0IF91dSoLlmP5GHPWO2a/t57t/UAoHns0C9/+yXcP0fnPz9upm++an5q7zVv+S/v+0j3Nb/41eqzR1ePtzcefmnv4Oe3FdWwXq+HshytZYUvtVgvbse4XLmbR2fiY8fU9x6ZmBg/rN0Dxx5fHRaM//KXbx9JmrOTszv37Jmbnhsba6U2c5VzRAsr69/7wbdXl05WAQrvEtIDX7Gvzr9udfzTo86zX03Ahfa179x5+q9m53bm2UiSJE/P/vwL4UFrk6WNzncf+uvgh+hl+K5h+sc5giAyEgKAImUSBQqCRA5ACtJMK4WoEFVQlkQkT1SSJKiUDwGgVGQIlAhorRAsCGoDuVECEZVomyhCAAne5bXcqtQ7RwAxciDvIwtQcMEAI+WDoS9coaIOEYw1SWb7g3KwWSVYI9IqIBmxVjezRqKTysUspYWlwtaTuanJYeF6JVvT6HbWRWJgZpEk1cIRkDgyKQCFRAZ1RBAVqUKcnd59afHC2Vc8deireypXiZBSpBSAIZCIiIqsVqRTLaQkUqyKwNnUxO7N9omy4JnZK4ad5ZWlzUYzN4hFr+j23TC4xKjJiboGU+f+wfXHk5y43+91Yhn9g+90z/+juucKnO6LdxE2y/jzt+y//IqRqfkDI1O7TL0+2jnaXVz/9h/89Orr0ye/2a+xXD3zG0+v/0EESLUV4TrZD10/P4i2GA76ZW/bnmsEur21c5fNTU6MjA2L+NSp8+123zvWWmeJOnhoV61Z6w5cWboEqovLG8MiclBLbzm9/YvPP3LN1VtrC+uLG+dPn4mxnNx7xUSrsXr2DGPlHThXxRhESCcWUQETCEZB4Rh8EGEQVmQjKinXg+cQYuQoEhURCIQYRcQ2m2RSLdjbWImeiRgAiAwiklYAzCEqEDsyjoCsU4FquLkB0SttFEiIEiKTwjTLEUBr3RwZQa01kiCDUblOlVJpZk6/4emJPxvT2mXEvbW2NnF6vBgxLtfAosQCjb/4L25dg6UjMPvw4535h9anpayg91hTZ3Wi617+iln/zA/UfQBw+2Ljqcvyj8/88GU/qq32Qq+nSIMC7HraNxmFxUo4eBm3plAYf/jo+Ncev3ZjZeGRHy+VVYLves/PGouJTkbyyVa90WpNNRqtwOAldAIvLC6sb5zrFYMgXLOpB47eP/ySZ675zp72Vjj+cLvacO1r3zl19OOtyen57XuyWvPoxM/d4u6bnN6+0ll78vyXGyotxJ959eK+v5sBrRGiiDgfjBGWABQjeIgJgiiN1uqxywAAIABJREFUSqHSSkgQwQf27LIksdbys0KFAAjyLCCIAbQyLJ5ZobDRRiurIFptkSGxSaophMjPilGwCkQlc4icEqe2WbogIEASg7AIkqSUuSIIkI/Bu14IwhEyWxttNEmZLEnPXAyZmZ6baSmtVja3OEDNJggIoAQdKXDei0DJA1AYIRau0qAdiR+6yDQ7efmFi2fPv+rp/V/Z4X0QJhHUWqFiAdZaIxLqQIqEBRgARaS2bXrv5uaJytmpbXvaa8fXluLImJXgfYW9dnAR01Rv29ZQCgXSA/3jAaqgahAG6wU88fqLN346cxc3CzcQT4FtPkZve80VtelDrdSN6mgov/9bl8JDD+6+svX4NweDqpxRyeWT7znd/qNBcF441ylDfOiyPd9uKmIcVr1tl11bVmtuc3W0Pppmic7N4oW1zfUOM4rw/PzY/r3zWkunOxiUokJ5YWmz160i88rblvb9/e3PO3K5Gi4sXmr/9LETvaLcd/iW2e2Z62x5QYWKOZZlZU0OkvQGHc/OuRID+BiFJXKMrkzS5jCwFJvVwHHwCAyCSquyKp1zEqNp1BkVRim7G+BZJAIAogKiNEtj9BKicExGxpGIMUVyw611jKyMAUStUyQS4STLSCkQwf+FIYrW1mSptjg+PsOoF15/dP7TB3zwyKXGIrBsrSzUoBxvBDR+JKmHPa//xK9csfU/Ln9k9zc+u3TrYKOjTC3tnWzp6j0f+rWrZpv622/XK4/eu613+2Lj5FWv/XDy7+8+pYGJJeQJchBmKYZ45rxZWfS75mjb9rBtCustAJ90OuX6pu1UHl/+5ht2KpprpPWRmtZZGaSKRCZhQKqNnNtc7pWdkVpTFBnBKBFQfvzCp6//3u5eG578ab9YK5cPvanx4O8F7E1NzzWbE6uH3n6XfTjLR70Ka/4HmaFO2X/iZ85e892dSAlz0NqUpQMSkeh8iYTGWO8doCBC5EiEiU2i59L3RUQrTQCBAUEUoSIEQgAEIEL0EENVpTa1Kq3YsQ8oiIK5sUlSi8JF0RNkrTQJZsYCCkQAASKMAijkvY8SRmp1Qip96cERWBAsyn4VBxC0IBrKVjYbY7WdWY20hcAydMNEJValSukMdL2eN/JantSRgiBVPlRVWXk3CG7YKze7w6mZPQtnT534+ceu/sf9QydInKSkdGRWMcbSV0qDgBUWEY4cIUZrW3NT+1fXjxWOJiZ3b64/2lnPExsZvERd9jAyp7mZ2pai8iQSRJu87quBsAKJzlenXvnMtZ8YT4+fGQy9lvjWm/bv+9mD9aK7uhwuLWYb37x/ai5NRTYWuA5aKTUUv2f0XY+vfiRIdBIV2GZCdnzXRw6Pp1Ft9bemd15VFKudzolWbTaFhkLyyN4zoVKalLV5ojRBfzAsS+eGvdWNQXCsKSy/bWn3V19w9aHdOS+ur5VPHF3YGpRXHnnB/r01M9yQEPI8IyXDYYGoPWK/V5Ay3kUvugrgQ4y+Yh+7paxuDacatj8YOF8AQ79Xsq/KsvCu9GXZHB1lMsH5WPZcVTGLUkqElTbG2ugdAQTvdaOFzzI1lGHRaRMAkAKl0yQzNhFgREqSRCnFzGXR90WplFbGalKk0LPvv3er9YeTDIooNhtYDv1Gp19VYHXQ1MjHmu/8zc9ePvjc8rff9OVdZwMELhxrbubJi2/e85rbdimUrYUHdz30Xlg6Eg6HZ679r+98aNvzv2J15ApDBaqIMKUBQlSV2ujiZI3SCVebtFOtmIxwPbdax7Qm+Kp/+8LooYU0RTxV12l9ZKsoO4OiP5Sz6+21yhUc6nmSN+rkIwPYTJ9++dK+/7lj0POrRwc8rJaueFt6/+8F72ySjk/OdK9/9+Gtr2zbPp+ONNvyxCAupqSP3rW4/1uzwAAgihQzla7vQxSRNEkZsSiGSikRAWSASALWJilZeBZi6SrRURGBAMcoAlprEGSWIIFjCN5bkwIQc2COSquasVpnIcYgVYLKcYzMWZoyCwdPggopMYpQS2StsWlHFVFk56QiUIlq+hDL2GdyIQaF6blLeV6f1omQijGy50qJYcHSV8ySGTuS1UeyJhif2gayVkSpSQ3q4dCfXFvbNr5tYeHUiVc8eddDt2YmqeWNNMu0Jsdl5fywHHiuSu+q0kcJfdcvS+e8bbV2rG8841zSmji0sfaT9qrFEFmVWZ6XfQ5V1FaNTJLNBEVFxFQ4oOqX1bYcb5htHf+5Bz/y+Pwmtd73sfveU2/UdqvFM/3144VBbZWdPFBfdb3Vk37Nu4rYIIjwS6b/00Orv5cAEgKjHtE8qSc+/5oXlmVc3VqpTezu9Rf7KxfHJybHW9OEltmFwISkNLFA9E6R6g+KkTTbWF9e2eihD2WEtXdc2Pv126/ct6/fvrhyaWNzo73Z3rjqxheMtxLf60SJGZE2ACiEGJmBo0LJsiRFGHisQrSEiYFzK+2zS+1r9u3KUx3Yd3rlY48d73XKctiX6L2rsta4KCPMo420t7nZ6wwEnhUSnZJS0TtiCRx1o4WEqHOS4WBryyitbKqIgLTWBlAERCmltRaRwaAXipIQGFgkKoUxsvsPLvndHAAIwGoirYoYULLx5kjMs2z71f/3e+/+6wceufbJn/vcjhOpaBerZmP0wN7mL73oQH2EFzddGfHKr3+u/zOH3cS1rj/8he9P3fYFhRIZ46BUjiJ6MgSdrj661TSJ3DAyHGsWympFLCiomEjhXb92JQgKgwgq4XFNTWvBJP2A7f7QC8QoqU6nG9Mb5ZB9yUjHX3Z2x1f2FN3e6tPD0Kk2jvxacv9HfKjyrDY1M9u57t9csfn1epqntYaZXLNZp4r8yIueft4PLicJmc28c4GrisA7jxFynSAhR8zSHCCCmDKUlRtqpayoiBIkSAig0BpQOh1G76qhDx4oMutUmRBD5SokVGSssY5DxGAYUakowuiR0fHAR9EKfAjBgSAbkxkIASUxNtdaRVBECAAiWlQkiI6bqaTU7Fb9NJrjq0mjtjPNgdFHFhDwoXJeex9j8JEhsDAoiAFAee8VoVGEJit63aw5Njm68+zZo4uvP/HSH92eZrnnGCWkibVpTSFppQgVM4sICJaF67uq58osaXU2Tg28qo1uv3T2qWoQKZJkUVDQGfZBawu5WFMKaDdkxuqm7VMv3Dm6dyJxcfjtW775/odnAWD9lH/4m71zZ/rwLEYEuuquRnEO+meCZdEEfe9KEAdy1fh7Hlv7SETwwIhsYjJi6KFbn/90PesN1vOZQ1xubC6fbaQjU5MzDiMBxBC10tYmkTk4T0Q+hpRwbWV5a7MtpHx0K29ZPPSNO7fvnHbtpZXVjXa73+v1L3/ezWPNpNfeEAEFCEBKKe8rQAHAEEJi0xjKKJjlaQwlChb9cHFl48BlM2PNlGMhDD95+Onl5U4sq1CWIfp8fEKliUV39d5tzZphVxbOb/YriOhD5Z0vSl/5UKEZlGBVzbkhD9s+YpKkkSFJElLEIpoQEAizCK4Y9mNVADOiEogEGIWr3yqz380RlSbSGjRL19SzULRmDjQmGi+4811v3fPgex+84VeX9vzt9DG2Sd6AF1x/4AXXzRiM/YpJOM/T1t/c03n9m1OL7c3y+n/Yuf+vJg06Zi8chKMfhComkuhub8yFYSP3c41OjmasRqlsCoBCwJveuJcIAcBaq1ELBIWCEcEwKU2khSFGl6maBwbkUVN/4mfOH/jWPIZ48mG3enK9d8O7mw/+waAcpGk2NTPTve7dV659DRNbHx0bmd5MTE9QP/iip25/4Ig2ylASvTjvohTOhxglOicEKAQImgBIu+hD8MG7RBvnPSJmWeqdOCgUmswSo3bO2ZQjQx3zzbJbumHTPiu32lS+9OAQgZTxkYflUEiClImtKcQyVsWg8K6wlDIoJrBGJ4oSYwCgKEuOrHXC6MQrzTp66PdDinqrqtVrswLBBy9MGHWQisF6DhKEhUEEgFEJiPZOkAgAUJmq121NTjcbExcvnlz8pZOzn9oRSAkCAWkg0PIsiKK1UVYREgoCc4yx1hppjc4uX3wirY3njfkLC0+EAnRijUVSejAMzrN2XZNpx5RyuPPysTsuH1ODzY6vole3zWz+yS0L7394FgAWTxbf/7ONms4BhCUeurO28K0KUFiEhZ1EECAiENg3/u6n1j7mJXrhQJJEbU1c2LHri1P1bnv5ssMv9uV6v302wRRYuxAVaqWVCAOiUoJCpHSMLBF8WZRVBYqYw+qbFw9+4yVz2yd7K2c7nUGn0+90O4dvfMlYQ7fXlxSqACFJMu89c7Aqda5CBETwQYBIKSUcIsqg5za7xZ4dk9pmwQ0U0MKZ8+dPnhIf2HsWSUebYCyi7N05Mz1VN8jeuSBkkFNDiVWuchniwmp7tQeDksphezTVC+dXtEkRME3TyFFpbVQSoyMwPvS9G1hjTZLavGGtIVRa6eW3n9/5md0oYJRSBD4UC0tdLnrN6R1vfuntd155OAwfO59fa7+4+3t7FvKR5m13HNq9u5kIdHqxG2F2Op84fk/25Fv9W3izixnR/q/s3v/JPSiBmUkp732SQGAMICtrMVb9vbsm04QX1tXs1NzSyX8Kg25KEZ93914i1EYphQq0ECitgMHHipCstjEKSzDGJkqJwoTU0Rdf2Ped7Rj98jNw4fGN3rX/rvHAh6sYktROz8x2rn3384t7mURlqRlbBrusKP/JC49dc+8eg4LPApWnNQu2DF4r453ToLO0BixakVKqdM57b8kgktWGkCrvi6pw4MrSJ4qjCIImFYmIQ6jEe64UR6sbCinEENHnpOv1utZmMCiGXCIFQ0kzryllVtqdtd7aYMD9rX5RVkBEQIgaEMvKN0aaVk0Nq+VyUATnvXfsjEioNbdNzWyHKCik0FYxMFRISeVdqFgrDRhrdasU9XtVDEioGAMq09tYndk2X29MX7jwzOIbTk9/Zi+DI1IKFQqgUgAQfSQkIFZkg/fWUoxxbHayWZ9ZOPHg2MRko7Xj/OknxJOQFRCrNKFwUW0Nedd0/qqrZw5vg2I4yOJWE4utXjncGD7yoL/wvtU/6ewCgPv+eH3lPCsGQbn8ztrT3x4AAQqiCAmSYAB2wq3seYi0OvhRQAkgIkgMSYI8Ov7hPSNFe/3QdS8d9Je31s/kaRZdDD46J4gUQxRhBBOFEYEASGlFEJlZMLhq4+2re7/2gpG6Wb90GkBFlk63c+VNP6ugv3jhtNaJRlOv5wCglCGmyAEAvHcxCGpCRE2aY+h1B5vdYtvUiM1SYZbIG2trCydO+KIMPhCpdLSJJhGQep7aJDOaYvCEiKS1ZqMgeo9KdQfVsAwsWlxvfnL06RPnCLWxCRGFGJI0JSJElgjT083r99UJpTHS7JXVMNqidAh09NXP7Pr8ocQYiV4YxFdPnVrqdYpX3brnT1+8Af+fh9/6P6cXG1e9emT7hBt4Uz1+JuzYe2Dnztm8/vQn+aFfGd4NgFGlcft/n5v/8+39YZeIhAUROKIi8lEoHS3K7mSOjVyv+cm5bdOXTv+k1y0VId74r/elaYJEAJAoxT5qrSKhAUWAghhBNOiI3ErzUgIhH33J+X3f2ZlrvXk+3tv4SDH1PACo/+0r6usP9V/z1f7EEQB4yfmPztHFXu1UBecSahz/Py4e+PYcRKx4CBIsaYVaAJMkEc9Ks6Yksbl3rpYm1iZVGVNtrU5TZbwLgpSaNDFWwGwM1oJrZ7ZljTEqE47iPAt3XRGj06gJdeUrYVYamEOvNyglAFaNrF5PLGrVLQoA4MIurS8VhUeyRek5Os9cucBAZSEcSoxaKUKN7CKmkpqpZqsJETOd5kmGiJogtfUYmHRQKqlCCSp4EefEVUEp5aOUATqbGzNzO7PG2NryqYv/8sTkZ3drQCEM7BEACEEAAREw06byDISoQyq6Nt6qZdMXTj/SmNyWj2xbOPkTCKQ0Vgjoq8taY8/bPX3TtlozKSbiGR9psNU7uU6PHu3jhfbOjTBahZO3x8kDtelPxGAVi0xepmd2J499u4fPYmQOgMgcDVoBBpArJ997dPMPYghBOCCIkEepoU5Hm79zxXhn5dKV176sGK6tLp1LM0U6MkfmRDyLRw4cGEAxUrCGEE300VWRQVMMy29b2v+1W0dytbZ8LkYAxK325vW3vHwwuHRm4RkWMkjaECJqZVEBs8TIhAq9YYwApMlWroQoRcBURcQYRVeVD65qry3GKoIAKW1G6oQEgct+n8RHZgBQiNqkSByiYxYAyRstzwptFnw/46rbGRIS2RQRldbGGgSNJIJmbn6q1TAKXZ4QKjCkATVHf/L1p676m92NRkYIvgwxyv3Hzq9tVt966+ThiQL+X0tHYPbhdhi9f+O6JLfX1X/awnUAWKi/apZOpt+/sfeHb1TMF1fLT33981+q/XHrU9PDXjXs9YmDK8uIAMIuWtUYVcWW+FIrCMn89OzE+sUniqFPNOEtb7pCK0+og7BCBQgCHDgqRGOsQs0c6kk6qFxik1TbAPzMz5zb/Y2ZsbS57K//Xv3D8Bxz8Uf1Bz+69aovw3O2uVOvDV8453/UqS5ojcfvvHDgOzMGjWNPWgGjQgkSBCDRFiKLiFKamROV1rN68KGKlSYS9ihQy+qZSY22oKgKPngwxKm1taQZghcWRIwxuBgIWAA8AzAoFI1ZcA6EnCuTxNbyugtxs71pNWgEQGU0VS5GMlvdTWYclMPIVTlEjdr7KEiIrl92gWwYtCZHx0TyXmdVNVXZdVyqJBUBpTQqHY2y9XquVGItxRC0yphtCKHXW6lN7Gg26hA2H33Z43u/sCdqsioJ3gvEKFL50GzU2DsNenPQB9JG+cxkNp0mxKWLT2a1Ha3xvWvLD5RObcvya+Zbtx6c2WG3sFzs9wadrf4jZ2FrbbDSGa5dbO9dwSt0GtlrpRTSF36z+8rfazLCzG4LCGtnPACHGEMEBwyAlpRBIESNdGjs3z2+9pESI7JmFRSSE0yUyRP14RvmNzcXDx54aSy3lpdOo5KsZrRFdqKV7vecqyB41hrSTGe5GRYxSgguckUM5dqvrO3560N5nnS6a94xs3S7W8+/47X9/smzZ84rxUpRXrNCDEAUpQpBWxMiI6DWKICKkujK4cCGSo2PS4wogiH4ypWD9hYEKIugjUVti9JJ8H7YBw4xRg4RCIwyRORcKc+KnI9OBRJjRjj0hpsbFDUoUVppY22aCZDSBIAxuuiGiPT/kAXf8Z6eVaHo11pPed/31/bsvWfv6b1lkky6aTQhoBCaB4TrRZHiAQufIyoHBNEjRwW8xwKKFUQQUQFBI4Qu0gmQhIQ0kplMZjJl9/r7/d72PM9a60bOP/d+zverCAAGEBDE514Vxm/YmPmrHYX3RGQNOoKLG21s6t973uzPXVHB/zZ/Ley4+4E5eWweL9mFR7fD/8/dr/3cgS9ufeH/6rotf/vpD92+/0Mn/vX6uq01amjbuhwvrayYNpVJo9qsQ3lslKSS7oFDxy6e/mq5KQYiPu21lxMmQm8cGeORoKoqBXHGqgAnyb1Ti21sY4zWGgN07vkr+z49YxgsdD6z/TvwQ/6bvz+4610rr1+EH7q++eJNfMec3tXKnLXu4WfNX/alfWRsiK01ZNU2WicOqNov+qMmAkpoSoVknENBeIJRESAQS2idMTZDoKYN5By3yWDyliyYxCyigGgMkTOejFFUUBUwRIR5TA0l8t4hIoHNrA8SWVNKwZLJvI8hdbu9qmkR1BmvYoehnMg6VskiqaFhU6+ujU/9YLVjs4mp/YuLj0vR5NqPjQqzqFFV0Wgt5nle1RvG9tqWjUHjRBhiU+4+eLX3NlRLp15y8tA/H8zyrJtloglIe32DHltJgmiimqxQsDu2HoIQN6rlPbD03N78eXvwt2+rbz0WnvuMm47L48zt/MLF8xtw8kJcWt1Y36xSUg2QNVHPhEOGCtGC0AHuOFJ89W/aF/9+nwSWTof5MyEJiz5BgoCAEqAHMgQWiVgvn3nDfWt/2AJ21ZXSGDLkiQEGrv8nz5pdW13Zs+uZbbW8NHdBmFEBVEXVWqtKwoQSAdU6QlRNTimhNbFJzvHCq+cOf/S487Cx2SoIEjdNfeW1z2maxxYXFpQaIoeEiRMgGafMao1NKVmLeeGITFuJsTDcoGrM27dbU4QUAcE1dVM3I01KaPK8u7YY1paTCE9Po8u8pKQskkQBVTVGjjEqBzIT47Z1rg+x1DgiNUnUWPJ5x7lMES0qkImhaatSmVUFCZMwMAKqsVS9cTj4oy2ERAjGWos4UqsKh3bvfPWR+Wv3db53IV0191/vnnzXn319jIZ6xnzz17fB/9fdr71n8i9OXvM3xy/f+d5PvPfr1/7bjZ+5DlsGAW8pcTO3UY5WVzZqXVrly/dvzYqIbbr/Ypjdtm1t7r6yVOGEz3zdlWigqdl7BLBt04qmLMvIYOYLZSUDyswptinGFNGahRdu7rhtEpi9y88uXH1h2y8IS/djLyCy+fFnNze94ZjfeIp+e7NZXbLftZ7KanjmefNXfv1A3WJq68neIM+7EiWm1hINisFm28YYYqhApeh1mrZNKWbetPyExJIQBBxoYhE13qekBtUaVBEnGBOTsQpEEnq+yK0HUAl1ZnxoNWLjLCCIcy5GViAFcblPyJTA+zwznhDKUIEqKKbAdc1IBp1vlFMZm4iLS6uLc+OZrNud3LW5vpKgnZ2eBYFypJEBQGKQrJsGg8HmcC3PJzc3GiLYus3GRJur64eO3FSV7eriufOvOLf1fdsIrCfp9zs+61x6qDezbSCobYDFJjz20CTXE96HfAKKrP+R6/8Nfuiu82nXgaNry6vfX+pcPHdhY31UVwlILEHuLSiXYMcPba6u6TbSfcb1AAswr/rjHf/z2vnfvnvH3Z/b/Nbn1wlMUkEQUkAiUEBRAnCEGRpSODH7xntX3uW8b/Pqxo/8y+b8wr1vfsfMyiJP2Hc/bVcYl9u330xmbXN1TgWEQRhiBBFu26iCkAwChhDxCWyiiqhaY0HT+i/M7/zAQWOk5XJ2+8DYtLa6dujSF7bh9MbaMus4MakiGSPKmiClBPqfrPNogICkEcwhVnlT6sQ0kyVRJpIQy7ppNKlBU3R75Xq7PCeqYWZHohyK3EtgVGuoEBUAUFVhGa65cRkEMqthx+4sL7yqsd4TelUSjZ2sj4aqcVkOS2a1ZMkYUFBmn3lEePzlp3Z/6AARgqiCZOjOzC2VZX306CVt2/a7Ew7si5af8Z7Ru2NKLnMT3h6cXv/lJ3fuPZuu2+uv3ufg7te+fv59t7zmozuOZL/7x+98+DnfvvxjP1JrS2SspcStqculublS3Nq6XHOk2+kDBXhwUfud7vrcSes7oY341F88jsamiATRkG/qJobWGtIMJIGyWGc8msznStikRAQXn7e255MzIhENjk/j0kPN2onXZN/6IzIuy3140hufCl+enT3WwmjR3MWmTJHPPm/14Ge2h9hkhvI8I+ONGHRQVVVmc8aEgpODKVRi4KptEMGStBpFsGkaVTbGESiiAIEnZBFhNYSFz1mxjdy0wRJP9LZIiAwpF+q5wlMhtvFAMdV5bkEhRc68t9aMm6pj0bgspeRIjVoRl7i1eZtBEZNUUTaqelw3w2HInHOYM8iwboer69YMBKIlMLRNybdlqEthu1IUvVG5um12FtiGFlVW6kSItGfPNWU9Xr54Ye7Vy1N/MY0WvU29btfa/swud3D3dNf6zA4+/tmNalRCMtZyEvcj25b+9ic24Ye+cbL8zoVeOR62wzrLsNfPDCE4MOTAZpTlFx/ZvOuOlQAbW8DvJJoie+igf+Uv7/wKHP1ROPnYo/UH/vQiKSlqhpQxGBRCBERV9YSZoDPmipk3Prr+bjM1uPa2j3R37/G2CFLe9tSf2D5c/eNnbQnDOLv1yazLa2uPIpGCATApxRACojXkrdUYuW0SAGZkY8K2Yk0ggsuvPr/tvfsAkkW86prDE5PF8tKa23K5Mwvj9bU808BaN43PXEhVU0tSiRxZEhkjxCBKLUaUZtPGhrbMAJiQZZ2macpy1FTjzHhQ8Fm2si71ulpq9xx0aBRQmqZV8QRgLSGxggim0XJ/PFLRjHm8Y78OJnMAZ7xH8dZ6Y4BZkqRq3MSaxTAISEJrPKHz1hPacy977NA/XmYQiBBEjaNTj50bDcvjx6/GwdZetzNcw1cOn/VPk1+/8dprsS2/+LXbR0sLxm5kBlHgNTuvOruow6sOvOzXfqEaL73rve+5/7l3XPrRaxSQjFVVQK3KjbWFjY26ddbPTucFAFi3OKKpgR0unCt62XBtiDe96pDLO6yIQim2hoyoWGsTc2zqxAEMGEXjjBIqAqLMv3Bj7+1TAJaAxgs4f2ezecXr8m//iXW8ZWpndd2vHJr7SL/f0Y6mLefV1k2Mcy9cOfGlHaOmVTVFZq3kYxlbm8U2IoJg69UPulvKNjGOnMlYWK10TGGdTyIqmmIIKVpLrJIZrwSEZBStgRijsTYyC0ci3zbBOmI1krhX5MpBrM0QOtT1hAPXdUaEcBxGW2yGaNq2NgaREMU4wswCqEtSCQRRLWhQNRVaA4aYsa6bJiYkKstGWlHMNuuWSx1X7aisQoqENOjbouiEaNuqTDHmnU5/asLZqVFZnfmpx7f/9b5SSuvG22YnvO2jVyykn01tnI2BB6cfXiVMmcubEJfO/+Df//vOa3cmALjlPadx3T35Sju1fddg1x7bn8yKjmZP6AZvuor/9I6/Wz21yIIRdQDYBxMhfeQ9x+Du18K17/2DPzl/8nRpAXOgjkIGmBssEgWDwmqt7WikBE/d9z8eaf78qi/d1p8+SHmyhhjhi8/5KXf67Aee0w8l2O41/YnNOL7I0GEqOWR13QxHpXMFKiUWUY6JEZ0BRQQAqsqgwc2//NTM+/aqElint52TAAAgAElEQVSZ6HVnpjubw/HeYz9aDh9fX1jduXMCkQzQjpmZIvML6ytnzi+OqiZxo8DGqHdWBdpUh6YIDWyZFpdZQFTmarSemgiUEL2oDjehGan1PL1N8tykGIUBATmJKltnOamqjEs/WkfjexLX825yGRhTeJ+TtZ1u1um5umFJWo6GINFYV9U1ixhjUBFAMu9XXrOy7QO7idD7HNVplMWFklkOHToa+lt3dCYurk/8zcFf+uDsl7/98Jl2PRm0oVw3NPZx5AZTP7P6ox9v/+jd7//w1j0Tn/32dz7yDx8+85J7dn700i5mbQredVMMEsuL5+fGMev2zNaOR+N7g/zieuyCbKydNWCahvGGn9lH1gOQCoER731KCRHJIoqoauAIACoCAM4aInP+BauzH+uoQFYUPMzPfmM4OvHL9rt/okL7981uuepFedHbzY/XKAv472pC5HTxBatHPru1jShIimw0F2l9lqUQnDNAXkJLJGDRkjNkiEitpsCE4K1TVVYJoXHOkcEQVZQRxFtrTd40taoAojFKZFNiACBUADREZLgwZHweUt7UNbKAJu+KEOvpzpZBfwKSgmgbG2NIoVFoCukaA8psyVpriMAQoWpUVmVnDRpEQYsoAE2KMQDUPByNN2JtkTRijKJIMaa6gTqyuuTYjcd48VVrx/5xdxPN1u1HR6EpeoM9Ww9BwuHGRrfnvvm1L+7fd/jhh5bzghjceP2i993LJ5e+8sBiQbDjxOxl1x2d3la4TI3ARDYBnJxqlNoW5pG/f3j00IWV1OynYoPTOoZXPXf2E59e+ekdv/mJpXcaBVI1BF6ho9BB0wfNwQqCglhCK37CxhumfuvOVy7/+OvfWhCjzwXYttVtb/6Nwe3f/L0f6WijM7uuJ5xfW11gtowBJbNG2xhVidsUoiJhkicAsAIpIYYQnfXLrzq79f17GciQ5TbGdmRscd1Tn784f//FM3OAtfWFIWMd5rkx0a+uj1QJQRAJQFBBFRURgVScYrDWgCKnth4NRSMgG1sAiCKm6FTbqWlED6ENhig0gaNhCTHEGFkFKZuQxpqio20T06ZBsTZHZ53PkIgMIgEo1OVYUhQGVWUWYwhQrDEEUP7qsP+nfQXNMqeCIsoxR2sP7DsMvamiu6OkI69dve6vtn6HKIMklFkMjffGJCi6vV8NN9Ob9LoDk9XY/M77//K+r3177mceOvDRK5iTkhrqGMK6LRcXVpQG/Y4MPOWdXtuO5jdkz6R5/OxjoLmS4vWv3o+KeVbExAZRCWOMDok8OrIxJrQGDBkyHJOyJOHHb13a/om+NeSyTCp/9hujqn9jAt3ZnkXEXq9Dt7zl6fT9Td64wN8CqeumfPz5S8e/sEc1JQAkzbM+kMbUtk0DCtYSoiFyzArAKslYS97FUCOQJZMSx5QAFFGttWhsChERjLWGnLOWJYIKEgFAjAnRFNbk+aCNbZJ6a9au100reRsbVCcSc981wILWe5fbTJK2cRSjIBlRIBAEcmQzY7uYO2Mza7y1SKASnVFCVVAASCkaaxg1Z2OQKo1OJXcdUBQU1ZTAl01Y21whzi5eKO99/uNX/uvuteGW+Ytd73xmM8gzbwDUZJ0t9fiREycuv+fuM3VdO+ckDEPkWM6FdvW5z7hKd+6sDTsvLE3TVrnLI0XDENar0bmV9q75tkn7sFgxVaPwtAP9QPDQw6UQ/uz2t/7Twu+RQheNJSwAO0CTBo1IV6CTZf2E4LkSf3jitTs++7xte45bC5C7sLosot/5w/dV//SPr5vgrYPpQ5c/pRyemZs7Z8mziHUeWJMKKIAIkqoCGRtiVAAEUJAYGwAc/vzc1Pv3oHGcQqjaph0VnS03Pf0n5y/cM39+gUxy6FUBCYxBEQVWToyGUDUGMWhFJEUhy5wIEQEMAoa6qocbLAZIvSsYgnVIthtCndkoCTlFFVUVtRZAmFkVVTDv9kPDvujGNMIUEcD4zGUeCQERySIKSGrqWpglsrAQEQKQQQRQ5fqNZf/dAwAUYRFGBZYs73R37zk0Nb1zcu/V0e+59ZH9750+aVIEk3o9j2icN9aZp91w8FVnL9GfZSjaXItX/uHvnP7aPRd++qFtHz5MDE1opqd3NHUlqItLw+07DzteLjdWyRBzg8Wu6Yzn5i8QFmIUn/aLhx0SIUUWZkgEKSYJ0ebQzbuEDq1NBIaoadoYkwhfeO7q7k9NGSOIyLWduzOMLtbh5l87Ov+PU1NXRVgeX/XLN8evNlk8XX8+ptYaOHvr8t5Pz1gCNGSsknpABZDYRmOyruugNSxcVqWI5pkF1CDqjThXiBpATDGBaJ5nztoUW2TwLq9DBCvem5RCkXlrOmU1JjLdYuAgOd8fjsdBygzyWpiMgzT2vlOFMbOANIJPUCRg1sgtKBAaFWXVGIQQ89wzW0nJIhqijs8NqSNwhAWCsV4ZCC0QW3Ko6i31sCAICgGNyakI0CRhAp8bw2w/c+ODz/naFV/71sI99zUB6tx0EBnMhM992y4YYm875Wg1s5Nos/W185Hr4wcHajr97V3qFIrOupyg4MRFx8hoozl7ceL78+e1TQA1ope4D+zlR3sN8PdOjQ2QVXrZ9rd8ePHtAJCDZRAL2kGzE8GKHMn6BeC8pEdSNWv9rZf/wWVfeZlqEpP9x+tf870Hz7zpK1+8568+cfadb/mDQxMTnemt+68Ybs6vLM4BM4JHS5o4SARQ4ISIxmQxcRtq5eBdDgAh1gJu+PNzU+/dReRFQls3Sapub8sNT33JhfPfXZxbRmJVVCYERARANYgsjM6SphRREqj8JyIRJjJqbR5DastROxqpOiR1vhPS2JFD32mqGkKlZIQTgIKIISeaEEGFRDTv92OT8l4/QJ2BK7o9cDYjm6RBIgRDZJqmrMdjZQ0pIACIIgAaeAKqVm8c9t/dUbGgyMKeKImzeb5///FsIh/rnunZK1+6cPX7d9y/pet8n3b0u1fs35dlWUz6gqfumv0Ytq8Ry/DAyvxr3/DLE+tw8qcfnP77A1YMGjSm0zYViayO4p5Dl44WHxyvr/jcZU4rmdo+3a2bRWezxIjPfv0lxjoFaJoW1Dlv67pJrBbRWWessRZYDBpkYTKoLI89e/HIF3fyEzSQ4vmHeG7xyuGLP9q784+PL3xs9YpfaXbc+KT0jZwfPFd/tpUSJMy/cLzrU9MIai0iAhJIFBZkAUTTzQsijLEVlY7xPi+iSEzJk08pGWPkCRzQRWddYbaUTUlKRCAUM1d0s944NLnVIISKKsl5JLUMsW6TYPLGFaZrrR22Y5bgbCc0jWoEgwkjkVg0QglYRWJiRFVEn1JSCAYKJfa258mKaJTEGhEAEAwgJ0YB4bbjckIiZ3KTOWBPaNCiRQfWKBUdY5MK6O1Xnz784cs26mJpfm1lfrUe16opz/qiMckYga7aEd7/gf91ccX+1u/+WdFbv/bawymWKoYlq0Uw1gHzwHDm7tPVqY12c/1KyjdIzrBYVYfgAa4/0ikBHj45tgheSUFfuv03PzT/doeAwFEtIg4QDiFekvUr1s+mjULxMtvd6viFv/L93q9PhhBGF5e+/JRnbl517Odu+5fV5bVPPf+Wz149y9LJeyfacHF9aW48ikDUGxQeXduOFXwUZq09FijUppo1IHgAYmFo25WfX9zy3hklw4klNSk1U9N7r7nu2Y89fuf66tCghCYAEUsiQ6SQIBkLIIxgiYyCioTETlUQlCwBAAI09aiqx1YtWex2J+p6XcVxOWCosDPU1qIKiKagACqiAGDIKkdnp0JYK/qzKQSw5DvofEbqmNlYstaypNDUsQkcQcE4ZyQlUAPKaECUq19dK/5woIK5zznGlCJQd8vMYOvsjpXlhXOnHn/5dW9GY74/sz47MfmqFz/pg5/8nHC9c3sXkrs+HAdbHH7pwa9886uPnfvBg3ff48ide9kPdnzwsAISOed8WY6E20a3Hjkwc/bkPdLWBEq5QbdvZhLLMJ8XOQrhs//bMQEQVWYh8YAChKLgM6cCMUVjwKgnQ6wRgL3vPPLMi8f+fWdKjIZBzfq8/faxu+D/8KxzT+FiwxlvUB999sLOT0wyhCzzzIkMtimAojXeO4eE1nlOnBKDKhkQEFbNKG/bNssyAECEoui0TY3MjCos3mVAgCDTg5myaZgDgyJgGxtFtuQROUYlFOty5WDICLqYNpmtKBqLzvrEQSSqqPPICQFTG5MK574DAKJRGFmazHVRBZ5AmERBFYyNITjznxiMam3JxGB9K6bvwUA/77gIG7F0Zvuhrdddefl1R/Ze/gf290++9rHRmbnRaBhC9HlhxdVhnCQYaz/z33bfvM8CwIWtN2XbTqxX8oMLD8+tny+x6hI5nHjgjnOb59eXTy7Wq7VaO2AZGNzUtlVDYBD00sNFj/D0I1UG2CCvA6vAy3b8j48uvt2JRLIJIXE44YojmN/B45MSrsMtfVtO2k6K7Qtf9Dn3envgqh/ZCOXHD1xWHd//85//1ML3Hv7w237u/sPTykWSvXk2Zi5HoxVnTb+7RZA5lkRZy61RY9CDIhplVE9ZW7cEwGB+8MJHDt92MCaVVGtKLMnY/q49N61t/qAsx0DSNNEYRIRO0SnHNRJkme0UWZQQoyIgoGqAKDHvZCG1lkxIzXC0joTC7DveUt40m+0IF89Q1kl+oo6BU0zKKEmFEUmLwgOoCoZRl7Ix0US92RRTyRbgqEsWOGnmCwBkTjHUwgGBYkzOOWVAMJFVRKyh0S+uD/5ywhhr0CKCJiw3s527pwb96VMPf3f+6R/6reapbzsK05//2cH4oXp9rkx2+86rx3OPjOuN37jq3377zh+d3bF3ecw//vSnnZtfNEQLrzg5+7f7BS2RA8CUosSGil3bpuyZU3dLajt5TplJum3/ji2Lq2dUsBrX+Pxfu1xEWDQyW5OrsvWuqSNwAhQEss57YxNr4mgzEDanfnzh0Gdn6T/ZlMKGv+HOwd/A/+Ga1V911RcU6tzZiy/c3POpaVbNfAYAMQUQ9c4hoIioaqfTs9aKSIxBUZgjAAJYZlZVY4yKENjMu8QNs4YU8rwA0RDDbH+y5gQAkWNiISJARmsIxGLWtpWiEQ7GUJ73HFdtpJAUHRprECi1ARCIJPeDKE1SDiE4460x3hlQH0M56E2G2FqiBNK2rbO2DTVzstaosMtciuCMKcebNbYoW2Z6x47tueLokWuO7Tq8d8/sfafuPnvvI48+fNe/P+mb8hvlrsv3nHt4eflCGSMhgUiDEJ98pPtvv7QbfmiTu/e0u6ZndvYmZ6ydWhtu3Lvx0PLq2r3/8E18fGxLaryV0AZjcxaLIAqAgogvunX247cvRIMRhcTkyYxJX7r9rf+4+E7kFNSwTU/Jtz4QVhdVnZp95Pva9qhDEg9PPi12s92/9KQbXv6zppt/7k1vefKb3zS5ZevZL//HX3/0LWe6TqVIvHvQa0hK0TGAFNkgsBiM3hWRoyX0WdG2LTMbY4lQYjJomxTuf+6ZK754SAAl1igIAGvrrffHp7fWAqlOtULqFF5YhUGAcmdRE6BacCKoqlnmI0PLQYDLqnImD6mKsWGFxCjKbauooRmGcz+wvclmchdYb5uqNcY656pxrcD9QdGGug2w+XjW3ynaFGvL7exRcYXJbUHwBCSynCTF2LalAoMoCxLYFJkQhVRYQLT6hbD17wZVHUARCUA4rM7s3j3RH0yfbradvPY9bzsJT7AX737XmVfFZhzKdMXNP/5AFatnffBtJ+FPDi68aueZhfkzp+7+LKUyxbD4qkd3/t0RJUIwqgCg5aic2nG068uFuUeEW4cWLbRpatCBjeFqU4rGhC9601WqQMYooFcEgCzrhqDjcpgkCGuKanJAyOq29Z4U6bFbF/d+cjrLPJGRFquyunPiA+30DQAw+/CfL13yOgDYxY9fu/qm1tzfojFAp39sfs/tU85Zax0gpMS5zcmYuqlZ2JFFRFVRFZFkjHPW1XUdgI0x3nsi4pg0tc76xGQpC1IjaeGKum0KRSp85ASAoMZbZww2sTEGve3E2HJiY3PmKNqisne9qmmtI9DUy3q5yxMHRO13tw7rjXFTghprMLOum3eVyCLnfqJuGoSEFqu6NIR1DDazdV1mmWPO19dbi7M3XvVjN5y46ejeAzNT3UfOPnLf3Xff/91vDRcfmZ3K7DRtUv3Vp5255Ev7unn22B3D73/50dxCJWLUOEzTk9mDv7kLfujUUjO/Dvev+MPHDvQmu1l3RursobnN+TKGNG6Xl9vP3TWqYkwcUBGQUBHMTzxn5l8/s0wKqMCiFUCJlEBeuv2tf7bwdlHdYuAyLM6ncUBLKFvJTIEpgDqArdVbtr71U8tv2/78//K69/3F/PLS7u27UxvY0alP3Paer/1Ri6aqFf2hohi39VKSZCwVnU4UIYiOCjLU1rXNfeTY6/QkqnUUYwRAA/T9Z5+55NO7RJEIx5sNIlYVTExf2i2WM2siMAIgchRtW3WZ80SE7Lyt6wiAqkJG26DMYsgQGUUsx2sp1Za8cQ6NCa2CtnUZ7v9W2nMYdh5xABoDkyWRdlwqolpLMYY2yPJDfuKApk27uZz2XOFNZrqZDEs2hoqiiDHWKSrEEAKRQRUV4pScI+stgUmBV396vO2ftozHNYtab7lpRhe2Hz4ym/cG9991x4WXPQ4/ZO7+2J+uTAcwrtfJekWwu5bdFAC87Si8/tj5Lemxr3zp/a5sUkznX3bqyD9fMbFlMB5VKTFLWl4ZDab2dvPR+vrjHediFTv9YnEZ+n1hgeFqU24O8Zd+9ym7d152+bFbrOktLp39weNfX1y5xwM522V2TaC19WFZtiFo3bRtLItuvvh/V1Mf7CKaENTxBA/duUfn7P7raOneyU48ftkl4eBT9tCZxfJshZveaGA6/ZzlQ5/b6Ywg+JA2CDMFUFBATZwSU+6csxZQWdkAZi6vmojIAGgMsQRruqFtokQroMaqMiJ1in5MMYSxc47IEZBBq6gCSURCG7zzhmzd1OTIEPWyvIqhqseEBtE6ss6bLMtiYI61d946X9VV7nIwzqhYgx4K1wMyRJyvj5diZJdPmqYepSKUbnU+1KVKRdt37/nxW5581fE9G+Xmww89dPHU/TpemJzCNGPGlkPQzbBRUPHA0y9c+aXDTEJg1s83K2fWD01Pbj84oz27Mr96ZGr0km30ya8sbJnoD4ft5fshh3zT9T991/ojcwzFJCIl8lDkEwh7Tz4UVudHZERQgX7y1q0fuX0REBQggLZiotKYIiD94rbf+MDCO3pgLMZEAEDEygYPGncUfKPpAofW0ku3vvXzS797yRted+MLXrT7wKVok/OZQfzeHV/66w//ehmMatEtBuVwmHU1YQSwFvqRQyfDqm7UdZu27GR5r9cl58v1KiGwRpFgyT70E+cPf3RPazFjO7e8Soa6WZbn2SBH6EBDsbBdMNg0TcfnYiyBWEMhcXoCqzU2s1ZROJF1TjEpcwrjFGNI0s2e0PF5MTd/UVu6545m1yE9eKxTt60qAqjPKEYlAucMc2xrOHmfOXiJLF+EjfV02TUTNtfIlVGraLNOZ9xWYVzXoUVERwYQ6rY1iBZNm2KR5YT0+IsWtn2kz4zOOjIQG114PD+8d1d3Qr/z9fkw84zRC/5oz8YPJi5+sQCtUho14fBlVzxWHDxrjsMP/e4l93oDn/3Cu9LaApE7++JH9370mHUuhKAsT2hbmuwfLuuHy/EiqgeAGFkVQClzHkTLcYXf/I+/n5jaPrvtssg2pPG4Xr948T5th7mhNtRVMyKrViAGiSmJtlU7/vhV3z27NBdZynEKjfOuaNuRy5PL8swZ6wtrkjEOEdvEmXNJxFh7xZcPgaGqbmOqvS/KesMYx0lQodWm8E9wiROCqgoQqZIlxyxVVZEBVm3KmhEnio4kFExNaPNOR0SbtiTCLOtwaA1alhQ1WXJZlhFi29ZJUmaMIauIqgSgiJSSOIuGMlUGCpEtCnuTkbggVdHrVmUZQqnkCzu5Z+bYtSduPnVu+FO33vzfP7n1kQcePDz/qXK0FmPpjGnrjbWlxViu7d7We8pNx7bu6q+ZzbKHsfWN1N2iY1tzcfOCJXzkx+aOfHHaYwcdo2aYoIVx4SfqFCmOcz856d36hn/kW6de8czDMD+3PL8+0+/snsprMX9/dyWYBbWGOgGxp5tXl+sbZ1Y3gH7i1q0fu30xAdUCUVHRBWpy5Gm2BvHFO976oYW3t8wlGlEh0K51N9hi1DRzyC3QtLd7VG/e9qbPLL5r5vK9t77zXYefdBWPdeGhb3zjE1+47CVP+vZ3/q5uKqQis1lsGwRV0siwujKq69DrZG0bVzebcZMG/UIhAmE1gpX1YQwyNbHF2PTw/7W8/S8nW7KFcaOmUeQt3Y7jRDls3zblHY1SSpQCJGs9YKbIaE1kbcpaFfKsEOYst8okot5bJWibkQojIqDNsy6BGQ2Hy4vlhUft1Ez7IzfvW1pfBEDmVBT55samtXbQHzRts7E2mjs9cei4WZorq5G58saJJo5D5JS43x/0im4M7bgePwERrTHe52VTq6ozNrWSZc4Snv3JxQO3bVUFEbXGtJHPnSwO7ZhtsHrwzJQAyY2vOwKPeSqr5bWkxXhc7Ttx9eF9fbv75u+v2iP44C675AG++vW/lvHYGnf2xaf2/vMlILauKhUV4XHd2bdrz9LyPePNIQmllJjFGMusCEhEoIjfu+M2tTQ1e9hmg06WofqN9QsSh6pRJabUxhCUOM86TV3FVGXirKW2rVkig13ZnN8oh+MmlWluc7S+MNyo2opcZsACgoCvU0qoKTEwFD4LqRGwgBK5cdYhGO/cxqjMnDOEIUVSmzQCKiIRKKENISSOgQMBMcBUbyCsjKmsxkoGEQGECJnZkjdoQ0wJUr/IEc1oPEJiEIcAZEhAObWGLJFR1TzrIFoVTtxatWgoCkdN3Brmrft3Xr5v14lj23ddc+Jop7Dn584b7yd6k6uV/PrvfDBf/O6ZM2fLzSECOdvMbsmOXrGzu6+3oiPsudxms26qDXXS2O8OOllvebQcojxwy9lbHnjS2spKFddjYiTMbJZZbkLo0gByJ5SyWlrnmrjZi/ZS7cips8ujwEGfc2V31Ni7FtL3zgcrut00NxzLq3byRG/4vo8ulsyggiRksC8yhW4EMgQZgXnptrf8w9w7GKEjqIQJkqqCsZYwSdoN2QD4osJP7fitB1b+n+r6E7/9qS+g0zrCJ3bv3/Gan+0899j5k/9Rx+CzTGKEmBBYRQ0ZNEZErTMhxM1hXbdKDmJqDaB1+caoSgFSiFWsvnPr/NF/mB22TApqMbRNN7OpNi3BzsFkkfmQmrybCcHaaGQ6PZu7OoWYtKxHhKYoOmVdEyVjvKgiYFWOASHLbJFlHnMEU42qTpE/fGpjdYEmJsO+A1kCBoC6rhFNAIkxImKMiUvaWNwys6NpSoLU27FfyrY01rFiv9NH0bYel00T2tYYNM50syyCsCqpNq0iCoCs/tzmvn+Ysda0bRsjW+sXF/uHZicqN1is94fLX7L1wjepsLJ5vq3LbOLAsNk4euLm3fv61155g81lOH+GQFPTfub2P62XVxFp7mce3fXhS5gTJ0aAFKP6Hbu3dx979LvlKOTetk3LLN5nMSZrLaFRRPzBnV8AL0V3e3cw6/IuaZa4irFtqzVOFWqbYhAJzmRtW6m2oIpIzGyNQwARUkmBx8K2rTbufPz+xc37Z3pbmybbaMvl8SbrOCs6mR9IS2tVFbUhGiStyVoRZklNCLFtQMQa6k8MQms2hqvWokVy3jmbqQKACiuBJlSvUGtQZSJjyIdYOedjbK01iZMzWdOGOjRbOh1RNxqXItGKrU0yDid8pgZViFkQlYBYWxW0ppe4u33y+N7ZSzzBNceu2Dp98PBhH2vz4OnHVhcfv7C04QYHRmV98dEHzp6864Fv30scmroZ9Onw0ZmJg912gAF54Ipx3XQ73bIaF/1ioxw5xI7P+p3+sBq2KT32nKXjX9heNuycpISZzyYnCqtZmdoYhgYzb1ykPMVNVqSYXOG2iT260TZL42pzA1y2o2d3TpooeHyXO7YjA4A77qv++QOPB8FWJCE1KdUEG2zWMe4DcoL/ZedbP7nwjkA6UkVWj8aCiRgUcIAuELcCU4Q/M/vWzy++Y9erXvH8V74im5j43gPfOP/qN1z9P37rkZ3h1PlvK0eWkDnjETm1fd8tXKYaugRoIaYACpYoqORZx0GWWSybYJwNsTFqPnH9Q8/66uEqxBgiI403R54wOtOqdUMplVMCicmQ3Rw3EXwrkZwLDWdW+91CAaMiw9hlWRBJrE1q6zY2dWMEukVHFEbVyGd2NO+WF4yxlBUJCY1BZmGGGCOAGoOinIQNTCued7itU3RNNmxiTFISdvqDCREeV8NQRYtPEDUAichaBNDEIcj/1vxa1X/PgDmqgrU5CEC+/ZJdg2XY29l5xfreW/L7b0vawebc/PmTs9tOrK+cnT12w81PvVq2Pfm1V1ffemxhXI1Gq6M7v/Xx8flziHjx5Y9s/+DRwMkbKyIpxHxq75acH334nsSCKgAooogoItY5QAOEePcdH3dks+50f+u+TtEntADAHEfleipXgEsGRnAiUSWyRNQEiiCKaACYJYVQJQ65y8dNNRytdzIL2NkYrSyPFpbHy0b59PLmWlXnRhPEyBqUWJEMe9sjY5G4rGsW9tb3ii5SVjVjY20ISTkypxDbbrer4BInRLWAZajbpsx8lvmM0DBqYgYFxDb3Wwh804wDN0Eis3jyLIlE+91BFLNZr3ITmNhibmjSct70n3Pj7pln3HjiyO6ZPDNrozpT843TZ++5d+mao+b0Y8tLTVofT993/wPlfZ8iGPcy5BBmZ/zEgZ7dZsSrJqhSTYvEaTAAACAASURBVMZ0bQEEQMSshc8kMQASmaLolikQ0iO3nL3k87siU+RxCFWWdazxpJDneVIJoW7rUZ5l6Ip+3g0hqCoR5RkcbmTnOA3nlpkoNnG20P/6Y7PwQxc39QMfX1u5d74iPx/aZdRNgRbxkEKNZhfgS3b8xvuX364JERIiGiRCNCyNQSMYMe0kN0X4Y9ve+t6Lv2evv+a1b/21+QvnT9z0rI/fdONrvnvH73zoV6gPk2ZQBvbObpSrK+srE4PJzJk6DHPFiW6PQ0ixNRC3Tc4UzneLjKIRETBknRXW22948AV3nkDEJlYxKQAYC9ZmIUpoEjEurY/RpuGoKSsajcpxkxzkS+trnR4azYZl3Dm5I1CFqmRMG9NmKKtxuzFs+q7njVuvyjZEiClxHhtBUcLI5PA/gaqCgAASWQAgY6zvZVkZEuW+Y6zGGIh8p9+f3TZVN5tr68uxTioCKswpsFpEBWEVUhIBAG3eMOq/Z8BJAdAYAgCne/Yc6Nf9m7ft3Pf1Ub+47y/c5KWz3WJpac7R1OrFu3pbjx+6/slw+TOet3O+UqttvLi4cu77/7J6cUEFF15xcuYDBxwUQkqa1jfb3XuPAy+cO30mxtKQV2FhNkSJ0ed5Sskg4bf/4/3OOKCiu2VbMTGb+S6RU9WmrVK1HpphjI3zpKwqLMwKCsrOGuakKSHBE4iQAENMiICg43KUQisaBFISt7o+HIfR6nhBiTfr8Wo9DKBJO1UzThzyrEhCQIgimTGdbo+RBWFc1qqJmUXAey+qVVsjAopam1fVmJC870St2hAkRW8toSMDiEpgYtLIwT/BusQinPIsDzFZQ3WMYd0WcWt/6tIJn2dXPvOtt156annz+/c9OFpb1TBeGoZRA8PGrW3C6kZ14fRjvHpft7mQOTvo+8ldWOyRNqNWoxgjQiQEFokoNx5SSspk0BCoUggxyzJjbEwRAc48Z3Hfp6ac74mGlCKhIwMGyFpbNXXbhG7u+/1+E5Ix0Ol0xuNx0zSUkVfqiFxBnenlpirHqY1vftE2+KFvPDD8xsNxLvnz91zYKO1JbXcoLCAcZEOQthh6+fbf/ODc/1RyXQVBSKAJRMBkQmOKByjvIqqkm7e9+f2L79j/9Btf8ubfOH7JZUWnu3F2vrNn5pfeeWsBJnJjfEeSNKmuebx/Zn8nGyxsrKhGCZL7rApV5Lbf6ZPqZrmZZd4C9ly+pdOb7m3/4o3f+9FvHhIJuZvIXMdRRkCENnIMIVq0rbi6LcuqtaZflml5Y9G7bH0UUhubql1Y3zBCQWoJoqpoTOKkSYImTNyGYWQFNYZNUiIl4QSQ1KqIFEXRtq0ycIrOWmVsWTq9HYMuJoqCrNAKszD1i8m2jSmlGKOKFxFUUEU0QIj6BAS0QkCKWv/Kav7HPVEgQiRAsda47buvTBNHBzPubPGsLQ99dnljfscWOf3Ifc5PcL1g+wd3n7hern7xLduWG6Uu6fmLC3MP3t6sbFhr5195ctvfHmIVBrUoQfzuXfuGG+cunpkTaVVRmAFUVVKA3qDPKRkk/Nbn/tw5730Hjbe9iX5/BqhgJdVYjdZQGpWgnAwZg0ZVRVk4ify/PMEH0K7pWRjmuzzlfd+v/e1UnS1arXZXFYQlggAhqiiimwAm0eBxwNgkY08GHFMM2EFghG2MB2YMeILiYEwodkAIFoSQkERRXUmrVdmmPWfLOef/z1++9rbnee77ztFmnOtKZoqqRAwAImo2oGEpkiU7lDSOWoQNuWESp4CjZY/xZHN6uLp1sj29urrRSV5t+uAnRoLOKUBwzqE3EEEbhpzyEEJ0PpiZQmLGfuiBEI3ZUS7JDFNKgKgi0ftJtdcNm2HsEXG+WHTbVrQQo6kROyLMaWTikvHoI0N/Cl/0upff99Kvv3b1k62rnsIXn/W8bouVMZ1dc6uNW33G2sMoJ2jjYrp7+e5dd4c7g5MBi+bIVsA0Bt9uti565zwiAthsOh1L2vYbzwDgQ4ymBoBgYqZXv+H4ytvmzJWqjsOIyMhABjlnRWjqGoUcYrFST6qcs6oOfV+FKpuBCkfYCdUDUk2ePbuD81e8cvHkc8Nffnr71B2Tpy/Vk/ecHX/85FDMgUzALbXczb4hbQS+8+JP/Mbhm8/MnAIDEiKjLqHciTWSgCoiv/Hij/7OjZ99yVd9yQ/80q8uDs57tFvdMoj70X/zpsQdqGa1nWaWSmmH7vL+gWruZdjk1mtlCtu0jZ5IOWctJpF8LynnPKkbR/SJr3nmtX/98uPjE8J+Np3OmkXgeHHnynbcnK1OEdxOvZfSGEPTD5kM1u0tj9AbQQYofLhsuzZXk0m3Wc9mfrpwfRpksKHgZtmlsQYsInl9to7Ri45EoGrNHNNYmB0iD8MQPAYX1qtBUmkWO/u7nuqhYC6jbFbSbwkyDL2lUQyUwKmpKgCSKSORGRCiCCKwD677oePm3+whOXYuVsEERIfZpdeff9G93e4LTfnGh/9TWZ1FouXRNeJQUo6zg9mF+/W1f/9V9VPVYr7r9KEPf4RWj5gAM179rofPv/U+70BUPeJmhPtffN+z1z59dnMjOpqAmYFpyclMm+lERFAN/+ZP/j2RI2LJxU9ibPbZz5vZLrnp2K9lXIq0MvaEhAigImUEMAAzUzCDzzERMQSHrpRcoAQlRSxarCg7M8KcRlJJBrmMQ+7Ech5tmdOjzzwh0DnXJMA2lzGLUCN59OykGDrKJSGA8867WAWfSu7yQGRFh1SGXAqpuVgTh6FLPoyEkyLWD2s1QaWmmSRJjmwo0rYdoXEg31Wnf70ddIiT+nVf+RWPX8v96Y1te7a8dUpmogmKqiM2evk9B3gww1kqc1vBKVpw5NCEwNp+QEP2bpv6QC6NAmbEcDDd3QytmDC74DyAIVIIsWi57YmvvX7PgxdAiZmJse/7JOqQVBUdA+SdyZ4DHKUj9imltm3xNg5ckkTW0WL06CCSm1CYm/EalgvSym9zP/+L06PPLG+OpQO9i/REgYHuBjKn33/+p/7P6z9tAA7IwEZQRK3NJQIArZGzwLdf/tF3Hr3lS7//Td/9L37Ggm/bFpQ++uEH3/qeX/RhIZKMjFNh74loPXSOOQIfdjev7NzlwB8tbxqlxs8n1W4pmsftWbupQ8xtP93f++TXPfPq97x4vd4QRWb1gVPK3mGbu2yq6iKiiC3mO8uz5UHdNFWcV25d1prYG2xSX9fVYuaceQCJET3Ovbn1mE1BzLVdaxDbrU4rZlfYA6Hv2pHIjWOuqqbv21TaW8fLvnOnxzljqpwDr8Ww7/oy2NhisU6LL0WIRZIhopgimolDdgaACiiCSGYy/mi390vT9WZQYEMKsYnV/sWXfcOd91+6Nn9d9dnfPTte3XjkY5CO8/rIxUpLjdNFNXmBf8OPfF7z2WZnUY8nf/Ynf3rXfIMWQuDHv+OjF996P4cABlhkK+6lL7ny6Cc/0i3zMK51KCJSSkIzF5oQo4GhGn74z9+KSIgIRkiMxMa+me/FamKARLGUUboupSMyRkiiI5ihkWOfSyolgSmAgo6AhBRUDUwAQFXhNkQEUdUMqCkxczEVEQNJOQGaqPQ5p5LPNqtcys32+ImTm1IsVhWSDkWyAhl58wVp2Gxi5XcWB8xhGNVwNHAUsi+x4GDsLI/MPIqVkkP03dCyo7pplsvTtm2DjxWD5eln/+QamnkXzftmsdhsWlDruhYKRtbzF3bq/Xp2weN5HHRUg7PNKkMGgypGzz4NiRERvPPGwackOUkumRgYSUquq5qJSilSVEVD9GPOJecb3766908PKl54ABf8rfWpKuYxd30XKkZ0wfsiQogxRiLati0ylTwQuCpWbbuZz2aZTcckpk0VmX0pyXn0Mfq/uLX5WHsi/SngLrk9o+cwO4D7jb/1yo/8wXM/b2hLUwFrwJgY1QxgB2llWpl+2wt+8p233lIa/K5f+5XXf/k3Pnf0dGPy1h//3k+9GL3bd6iF0wLm62Hpqni2WRLwhfm5Td/WRk0Tnz66EXx05CUVk7TWBKRNqGuuXcWPfO2zd/yXHdE08ztd3642SyHbmc2DbxCRnTlXtd1ZM5mqhJTXhKDIB5N9qmB5etLnvuZ4eW+nriZduyHK56d3dHLmwF1c3J1KTgVjrHNqc+pjrNRG5gpLYL8o1iG4YSu3Nretz9Zn627MYzk52SgyGIt1kabTyidJRWQ+4yqO67WtN+lsNYxjyj3NGzeqOYd7dd3mQQsf/aOjl/7ni0dH27bVthun88bXB7L3ild8wec9e/CGvxWvbdf9Jz7yvhsf/8s8PEt+FwF8vSNhZ+/b/vdX2iczYhXh8ff91wrFhQYMnn7TJy/++ourwEkVUofTS3e/YPHRj3w4963mPAydqUNw3juR7Jwzs1IyPvTut6qqKZopIZkVY3RVA1Q1zWI2f4GoyrBMeQUIhFa0Z3JoOI7D2G3AlAjAVA1BlRBNFaHknAGAiABItRiAc8H0c5AIEAEw5QQI7IgKFFExRaarJ9eeOLyKVIxkuV0V5eVqdDzpAMA4t0kdUoCubfttPjg3j1UzlgFHq5ugAH0eqqqaxCaN487uznJ5VtfN8fZkSEPdTNabrZYeHJy8H8uNFmptN33wFQjUHuqDZnHHxcW5iZudLUGmdePEVv0wqSer9TJpBkBmrmKTxnEsG3ZN228cBy2jD5UBjHmIHOqmKUVyKVlLGbN3zlTBo4rd/LbVpf8yC5VrYs3sl9uNyBi4GseEjspYbgMAImqaRkRyKUhUUlrMduuqOT072d85WPWtldSNg2cB45xLjIHY5p8Y8MPtDSpLM5B8xUU2PbYiRD97/p//2uFPbc1m6ityCaUHLY4X2TpKAdwF4q8++Cfvu/XzLw+zVvru677oO3/hFx75n/7hX106/fQ52PM1kOvHnoqrJ/W23wLjfDJtfN1Kb2N2jrdD66mKPjp2UjLTBBkAUUR2quYjX/nEpd9ZKAlphqpqu6FxdROc6nhbiBOwUrI202rbroBdiOFke1ZxyGNRlGW/nVbT+TSkoUynszz2B9PFWBID7i8WIN6QzbQKiAUbP6kQJ34xaueVATKEMAgxsKbhNK0aqOowRQyjKIMbbeOcD55rngwp59KWskaFzSYXcTdvHj910mu3unlrfXH/0uU918ym0eFffeOnXvv79wzZ+gxtn5+9cXymd222k7/9vd/+IX3lvfJRLe7JTz/02Pv+GLdnHBfFWYwRqvPua3783vYDmzFfPthZfua9phukqXN09Xs+cemt94YQAfHo8OjyPS87WJSPfeRjuRtT3+ehN6BY10YAGQAshJBzwo+845dVgYhvG0r2IV648qJ6egmgJUIFp5gQG8eBiMA8ECMwAqTUHT31oXHoAVRKRkIpGUxUChrfpqqIKDoSsd2mAkiI6Iidc0CcSyk5MaICmhkAGMDYLyE4Nj7drE62z9Zx1nWybodPHV2zUSNXR3l7a7WuIrO5SR2qiS/sPBqz74fRzAAxOJfMhmEkovlsBqBIXgDPlssiA7rouuH6B49tq4MOu5fnB1cOdGrqy6gynVdsExlHMxsq169vgcKQRiOZ1DN2t/lcSu7X5JuseWcyTykjuvV2UyQ3tZ9MpjmVru2UgZG9c8yYVUXk+jef3fH2fTWLoQo+jMPYDVtQkqLFimdCRACo61pKKSIGUEQck6dQiiBYHabZZD6Z3Dw5cqjjKMwOEbzT+Eyev/P0OsNSSquyILqCrOROpPz9Sz/6Bzd/NhC1lsXUGQLjXEIH1lp+wNVfNX/9HS6e5r9+chyvliE7mJf8t8Le7/69S0sVM+hKj0pD7hwGHwHAEE21zJp5zpmIQqw8VWMaUSHGmgEMUM2yFQb4xFc+fcfbdjny+bCzSluDPPEuY0j9JotgqKBIzgYoyNkRj2nE2leOU5+IqTWNrjIxh65kOXdwbmjPfGicozy2Q86KKGmso8/ZKl8tmvr8zsUph8qshoYxnvTPLapzEeqBjLTUcWpA2TLquOpPk7XBNSBQuZlZJHIoVEpywSEYGYPCY08fHp4cdd2WeHrn5enffMNnvvAPHwApDJ4tLrsbn1m/ZEiz1su5L/vBu/r3P/P0zauffmj52Gd0e0wxAjfOIdQXwtf/s7tWHzSuL+3g6Wfe2Q4bpoocPPemxw/+w10IUDl3azXe88BLWW89+sjjMuQ09LkfFQ0IgNgDw+egiOJH/+zfIiKRQ0TNkqHsXr5z58LdTDMARIwABaFokZRvgqmR81ybSs7t6uZRu10DKJiyoZgCgaqaCADC5xiYofMgaiVzjKCaUw7eKwEj5TyCASLa5ygRAQVRiUTURMvOpCTNfR5yTl6o7bp3P/aRZV7V0TkKY+qMoKeQyxY0rrv1jJqqqloYY5irZNOS81g0MQUi7x3X9aSULEA00jhkZBRnrY6oRhGCmy5Pz0C3wjSPi0m1WHcrANz23XrYNN6z8wDkY9C0xdisNyeVDwrALrRdV8qITNGFyCFypMahYRpHUx2yMNNz33x89x/tGzgEKzmDKleVJCFCI0m56PNijJ6dggGiiBBaFZuh63POTZi0/XZa1ZvUV/XMObfdbnIease2kYv/z+GJ4dLgRA0ZLoN5MDT8uxd/4peOfpoyEgEhFgMDqxnvo/gA4otc9fnnfujdhz//iGxbkBpdUfXOvbJu/usPXKlGW5Vu1uwM68HqNKxHx2FvZ3e5PjVMAd1QCnsvqSAxEuzu7Jvy2fYw8sRx6NIwpu31b9le/v0ZQPGuAjFmZ4iTWNXc9HlcDivJIzEAWowVE0mBVb+d1VElA2KvUHHtkXYXi8PDo6auOfqSs5pWVZ1TEdXoq8bHvt8Ag+CA7CNZ9HDX9P47mytd/yy4uTNnKIQ1KIulAv2ivjDKeLx6Zja5YEiNi5rVBYdO2u22rmZ1nN1qr05w14dJLjdLqYpMvFv/55e+6zV/8IrT9dHJer1aZ+fi09uXv/4NX/MYfcG337M9Tens+OyPf+etx499ZrLoXYObm8xV5Nml2bf89IWjd2NcXNlbnX7mz5LE1AszXP07j19+6/2etSG6dja+7OUvPzt+4tqTz+R+1DxoYSBFQBNCLACISACED/3JzwAAEamZiD388MfuuD9+/ud9T8aCAFqk3W6vPvleKOOL739VM38RsOe4y9zomA6vfcxSx+wyOlCg5yFizl1KOQSPSGAskg2MCE3tNhUxVQQjJhNFJAUGKAAGQCoFkJEYwYiLoStqZAogqlhEVQGxNiy5pJSLRxbQbOV0uawbHbI9fvjskyfXmGAvNJULveRN7gYq2bppWGg2EW8WhlScZUK3HrNkzaU7OLggjJ0M0id2MJlMNm1PZkPOBkkT+8o5xpRzUkNmj6oCY1KVsaoqEQEHiqgFVcbpZDGMKzV13puxM3DePfaGG3e8fWca5wowjnkY86xuJpNFztlgAGQzExEAYI8ixcDMZF6dUytqJeURDM2sbdu+73d3D+qqWa2WuQzFxFx84I+fu3mMo8gNKANCrYhkF4zfdOlHf+PwzYbIgsTyWjd7gPg8+jMbd6m+Lus79//pr97450U5OayKCOLdsdo5Fx/7B6/GMXlfEdnQJkBYtqcicsf5S9ePDxFLHedj7hkpkF+nlpnWm5MYJ1Ls4s55MTORQddPfO3xi/7kBcvVGSnOpj4Va4duXs/q2hPjdrNNxqbiHddVHNNQh7jebCgEMzQoopqzRodA4D0XlcV0BwGLmIBFz0XyMPRmtuNnvY7MhpJKEfJxVk8vTvbS9mzXLSp2XBtYjVbHsHCOAzXL7vpyOO5S3p3OK7dLWnLKrpl7pjFtx9J5msQwq+J87E+j562qcXnXqx/6loe+3KHfrE+22+2zZ3w6/YYbzzz85OLLvvFK3ow3rz/7zCPveW9ZHn/JF1/EsvmrD3VINr9wH33FD93TP8Q8OT//bHfzgyW79Tp5Hx//tsfv+U/3iZYMeHKSX/MF9x8dXn380adSmywVDoGRvPdJk+YiFkLwmgs+8uf/Dv6bLPnsJO2cxxB2GIqZqmRR2WyH6GJdL9Q51GTs1LLkjrGKjgHQ0BsAIpRSkJAARISZVZUwqGUzZUbEygANQU0pS9YO2Vdxt5iqFhUxAyhdyiuT7CgCOQNUKQRKXKkZOboNTBFBzUSVmdQUAIkdaT2Ow6pbLvuVdzSvm5xSlwbF/Ozq5OrR0zHOpxyz2NHpCQUOorP5zvXN2gC2/aaT0Tm/X++uNYNBztZvE5MCOxd03sx6yzmnorLt+qLDrNnJKTuPY8pNnBAAOigi02a+WZ5Wvh40L9dnFFkUsJQY4/VvW93/rheAWj8MTD7lQoYlw87Obtdth9wRkX9eYJfLbVlUfRUlpSqGXFIVZulzckqJHXoft9sNoJJiW/LFWzj/8+NnAVTpGMsc3QT0xMo/uPgT//rWz1QFR8TOciSrlS85vs84YT5R+95LP/m7t376uNgS5CVuspT2C6rm9HWX7UtefNqtXIya+r2dg+PlaeF8tlrtz/fPujalXrtCHgmJkYuMVZzkMizm+4dHh1MXgeDi/i5Q854v+MDXfvr1N25ex8qPwyYlzZrHYfAeY/SqKgCevedgBiqiKkgEiAGdD4QM27b1jsecQvBZyqSeolgdK3IslkXUOU/EDvym3UwnjQM861e79X4T43JzVLmqQUTbUuxFaFYdSEd7830FLrrJJQ192a+vZDiTEkKkG91VJ3FeT7QMWyyVn1S0R2AOw3I8Ehs//jUnb/ibFwXaKSkH7x+/ubBzb9zbnf7Wp6q70/uvXHyA5NaD//GX77iYX/Ly5rOPnj38ZIJc7V95AF//j+8dH4rz81X+oBy9t1jPuTnb2lPfffSi376jwbAeS9uFB+67Eyv/kYfef3zzjKV2jvKYJ7OqbtzRzdYrmilUih97x8+XUlQVEc28WqrqYOLMDNFSySISK08Y2HGWPqBXcACOkJXNIZVS0HmTQoxEqCpkYAaIWErB56kWA3XAIoJIhkAYyZsRAca6mjO7lEYANahTWknuHUYlY+dLSpIG4t7MsuRSCrMjIEQzE6SKkBAQAIVGBAiuViEVMVPRYiYIyL5SKxU1fR5SKUaw2p6JJVV94vBGSnnVrq4e3xhMp81k1ffAvh+yJTm32BkyJRvqpmqaCMRjyeM45HHj/KRrRwMxRBCLIbDHwbIkZchNPfPs191KybbdQAZmcvPbN3f/0Z4RpmF0zjE7Qqeq3ofbUk6qWlVVzomNSinIBEjFIDgPpohAiPLfMEUiVpUxtRgdjpm8u/JbV5/ZwqiwQVMpew688Q+c/8lfvfnmxFRJvpP9HPXjRQaCrDojvwP5uy/91G889y+eNpoRXmTnLL1wPz7+LS/s9mo/ylYHn2U2m42Su9Rn0wk3aUgHzey43yiC877fbgkBIIiMVZxsS9eEakgDS5nOdj71ldfuetvedFKdtevoXRoNWIKbOg8iIxFlLWgISqoAAi74MfVIqKJV5dChiDr15JjZqUGMwSNJzjH6PrWIdJt3YeIaRNlstsFVmSVqCMGthzNugg6iJU3nlYLpmEjLpIkeJFCMMJtW+/O4l1Q226OJu8A4rMez9XhaTf3Ye4PBUZCSQwzbTis//dAbPvbl73slmZvNwjgMf/XYA4fzL7s6eY2qvOzW7y52Zpvls0/8zV/8kzc94Kvt8rT+vT/92GNPJP9VP/ale08ViM25K/X2A3LzA5suUcGj9vCJ7zq+9CsXLu3trMcxxkuXL50Td+usPVot1+sz6dbqQo1s5y7Mt531qzOmenGxxg/+4c8iopkhIWQRFsfzUgZm8j6QC4CsYAigaqaIjoKvkDxRyHlQUyQjBNQCoCmPItnyyMzOOVWl54kAIomJmTp2hFhMmUI/tAgjk5NiBqJWqqphDqZAjI4qIC5qaIoYAVHUQqgRK1URGUV6QgC0IY3eO2d1KhsiJfLsXJZcSkIpgIYU1UbmYKBorAKqYmhWtABIypv+pCReb9cZV6eDXju8OchAlusY2uxP+03BYpJdVTezacmDDB16JxraVR6Ldm0/X8wwELMQ8mp9LEVdcDF65+MwjMNYHNO1bzp+4dsPhNFEvXMmImYiJaUxxojoiCjGCAZNFYkoFTGi1HVjSrPFYhgHlTKfz3POKSVPDpFyTgZC5IQ1lxQeHy78+emTmFnCFhWt7IH77ov/2+/dfMsW7ITzQn0LugMASDMlYEWj//7Cj//7wzeLSs3AoPed2x+/5/6T8eSkbS/OFjfXtxzXaMaOgGkWpyFWq3a7qGszK4pjGkseGBkssBPnnJLkJBA8pCw5H35HuveP95lsNbaWdXdxft2eTpqdzebUBzSTJImQTCHcRtUoKctIAAQgWojJDJkIEEuRLMLMwRETAZrjaGYixUy9BqNspKmYp6BUiIkgMEMeCxk67xVLykmlhBAiskea+HoSG+dL8HWF++eafdTSwo2bJ89Ucc+hH8rpmCQlPJjt9117ce++B1/z3td/6GV9v0YzyLM/eP/+J1/76/C8F+vHv3TyxDOPPPzC6cdf/dI9Jq4ni9/6o48/uPsv5fKrAeAVz/0fL9rz9y4+45ePPH1zPD69dWuZ1j+4etXvXzw56ds0rJdw6fzB3ReqTWk7GYx4XMs22why4fLuHef3n3jqUdSdqir4gbf/nJk5F5i45MF5b4AKSEQAQM8z8t4FFUBkQfGuAcdVtafg89ihjlq6XNpSBkRgZECSPKoVJiglMTMiighhgP8fCprLaURM3odSBBFyLiF6x0HVEA0oGICZ0W3smDwitt1aTJnZsfcuiBYAEBFmB2pGBMBs5GNVzAhZUlYiz6HbnKbhpC/dOrVdSg5tLd2l2UEV6mcOD8+GtXfBjFRNUnKumcdmv7lt/7lbzzpPFblb67Pnzq4teXQ9lL47ljEhTsOkLbTarIcRvU+zZoe9hWt0yAAAIABJREFUy3msQmxi00sLbGOft+OmqpunvuHkvj89MB8kZVSSzGPqmVFBHHPbj5XnMfVU154554KAYBaiD9xs2rNUMntXVZOSeyRXUtmZLFJJyZIDzqP52qeVwm8+URU7gmImBWkK+H0XfuzXDn+uEiWiTvMUeQWYGZpSgGFh+A0Xf+q3j34msyXRS+xO/sdznLLGkMc8m05zGiVr0i5UYTHbK0WKZUNx3pvQrJlv1m2xQpoXi3kV533fRReWmzN2TBysyOG3b6/84cV+2EgWcDSfTE5WZwfT3bPlyXw2bybTYRy61KODvttEX018U0qBgI54GAbneBxHdpxLTmV0gR03aWgBQASAEMxAy7Rp+jG5EJhQS2riJGsuBWNwqSQ1G4cUfAih8sGNQzLFqa824zLbGLxDYq9uLGla8aKp0Bcu/tLOeSxwa3VjQHEWz8ULFU8mdfNHr/7A57/rgKlyVo4O73jX4UueesWPwfNmw9Wvdu/Co4e/5O4j8qAsTPSnT1x68OAX4Hnx5JE3Lh7+wt33TmMOHHOybUdv/6IHv+b9L1mepeU2Hd8YNn27sxuQ0tEZnguBCIYCXTdO6np/ry7joDnECeKHHvx3opmZRQRBEVENQgiApKZgYGYuBhFVheCDgZFrfNM0k32hwFYRoUhXtm0pHWFO41Ck9+xMpZTETGAGiOMwmAkzExEzGxQEZ6q59M57QASzcRxjcIgOgdQKIKkqPo/YgaGBSclWskhRKXxbdExORFUtxGDImouUzMjmcN1uvHc3T29dPz3cUn7u9GQcXJuHqqlFsoJOfJSSkxYlKmV0zhE5B9wNGthVDPuR0HNVVWzM6tXw5np1tl55rDd5SyyLuhoF+yLjCKJD9ABMpeQ6xuxL369JNfUZiJn8o19/64F3XCAH/baXDI7i6XKczpr9/fl0VpnmaV133ZCKJXAqua5CzoMRD10Bp+2QKoYxqwsI6L1jTeLQGeJ6OA0U2dH2MA+Pn1888sGk2qojswDyDy//xG9c/4meuREuRB1AZaU21wMtEQ3gH1/44d+69QsJS7G884o7tq912jeKm+DrYeiZUUyzKhESWPA1MRIDsUulWFEtMptNmUh0rOKUHYyj5DQiIgB6X9/4luWVt58rudNc+pz3d/a2Q9/125zTZDLJuUgZFZEdD/0wiiwm8xC8IjSO1+tN2/ZpzLGpRQoyEGFopo4gOB5LNhUmllK846FvfYhM5B1H36zbFUIgEBWY1JNt18WqaodNDK6KlSkholhGhDQk9p6Uz9r1hYPd081prJrSy94sDmhVUfZckuw3OzXX07p53xd/6r97z4uiKIL7m8/Irafp5ut//bR+AQB80+ThF7gb4+HbXnMZitkAy213+uQz/W+efw8878Jz7/r6K8+8cv5hwLZPy0W8nK384avf+80ffv2YV0VcE/Nqy4s49Z4ee7r76Cevnt9Rx37aNKXvbm3Ws0ljI3hP+NA7fzl/TnHOGSohSinBe2JvZvg8DhNENDMACCGqBiVxvjYrIUyJqxArMGy3JyVvHVopo4ggmIogJhEholIK2uc450opziGTBwRmVgPnnIiYGSOZISEbiKoCADGXUlQzgIhkACyCAEagZoaqjj0S9V3vHRBXg5Tnjq4/u7w2sHvq8NYguEoDFa1DlZJWDRctisaEs2Y6pJEYFTQnaLtVjK4UY3YGTqSEiGJgRuMoCBACIDg0HlDGbRd8DOyx6KQmBYdMachVDGI6jH0TKwY0sKyy6VtRNimPft31y79/vp6oZgCFOlQlwXwx6/p1rK1LIyOaUl3PKuedak7dZFoPYA540viuy0NOy24gtjQoEt5mgMyhyDCpq6oOh09u2w/x+596+Muml8vZjRYgAzPg/3LxR952/V8nn5yKM0rgEqaFc+dLiGz3n//hd91884nh/I774lfd92zz4bprtrZxLiJzjI4BshQV9eTJ8Xq9ZEIEFjDRzEQENGl2DFIVp0UHolhuyyUEL5Ke+aaze95xPiAaaF+KB6YYdDSRhJ/DIdbLzXoYhuiCqlQxMLsikkpfiopY23bIjIhg6pyr68qzM1H2JFbybUWQKTg0saaaNJPoLN5cHSEFLMkx1LFeb7YZTEUAhJGJQog+S3boUCkEz0DLbht9SGUkcAc7Fxx3127dpIISOGlpQnCKgd2TX39079sPeOby0eyxh/Jq2DQve1O49/VfvH94drrca+Zz//5z7pRs2eNGEjz5BH5g+8bqtd8/ba/2f/0rr/3Cl33+wacFh3V7xMAxzt/xmr95wwfvMZh7ngRcjRJCtZv6dhqmhSvJnRiAlO3q9Oqt437bzyc1FsK/evAtZkjoiTyAIqLm5DwbkogQETMDOkADAAQMvgIIOW8QgXkmlJvZ7mR6pRRN4wakz6lHyyplGFrnkNEDwm1mloa1956ZRQRNVc0QDQB0VFVmRkTnHaEHQFFBJFV1jlNKaGSQS0neh3HsS5Yq1gSU8lhKrirfD12BoU14fbt94uiZZ87OptNmHPuSxkkTnNXFIHGqrVYTNUWEMZVkogCOeRQr0jEzQsiaPUczrWpuKtf3ebPuvHdo3FN3rpowNcu2K2kgAnJowOOoJa8dVHu7B0WLipjKiOiANGUAQDM0e/SNN+99x92SMyGhaVN79lEVhnGLnNpO1cTEKt+4KsxjnNVVO/ae2JVy97m92lcV0eHJmh0czM8xxdP2bJnaQYRQmoY2w+rap04/9K6NDenFL31p1W7s5GZ/2u5s2v/1jn/xb2+8JUMuhtdVzqO9xE/G3I2OnxT9wRe++cG9P9pcvnNvP+5eOPdY97ZsbKX0wzhfLHIZ63rRpZOxT+d37rxx+rRzVMeakAPXx+tjAZWswRGSxDBtu/V8NjfgVHKovYzDZ7/u1l1/ct5lE6IC6hDAc3A+54GZEV1F3PYdIpBBjFMfeL3exFCPZRBVIpTbFJho6PsqVu241gxasJ5UAVUNyftcZDqtdLTpdLrptld2Lz556+nJ7l63WXlmTapgbcmeoI4hp+I55jwCmqoy+gzJISUzUuz7QWScTXaYJAY3jggheMldERMBhGvfdPLCB8975msf3z0+u/LAK153euV12w/8y6p+4au/+Esef/y3z+19KpRu2uwO0DppPvy+7eT8l+989Q/dlT+8PN6yXr3Lva+Zekdobkhl+9BXnn7pu+88Wp3eeemF1soAZ10H22L7VTDW8/WFQTSreM9JsqEz6C1F/MCf/SKAl6JIBYqGEADAOadWzCznjGhGxFgBOGYCUuf8OBZ2biwFwbnYTBc7db3nOLCLapa7fhyOJa0dYgEahjVCIVA0VBXRQgSolEoHYCUblGSmIXoiMuRSeiQgZEdutT4LIaparCciYmbMbMYAQkTjkG/e/OxkNkX2qnzaLh87u77NKSOUTElGROzHtBk2jW+yCmmZNRVR5X1IpY/okvYJcNmNIFo386Kr4BckffFxfbqOTVxMfcPTW+uTqiYZAVHYTTZpmLHbDKVt22nFjFUvyTlCECmcJbEjD6boDYqpOReckif/8a+6+rJ3XpGSRU0YZOzO7+6v0yCGHtyYhjFJM51IGUFjCN5MmZlQnQp6aDwbMCGVNE4aX4NVXJ9bnK99vT+bX5jddbI5NKMf/on/e1HJa175stXp0EoRNWi337X6O+8+/bm9oUyyFtBT43endI3QT8LBubv+Z/d9H37Vx0yK8/T6r/nav776r6Kfbbdt1ytzNCxVVSMhGBJGAQ3eLTdrjo6djO1owC5UgDCsW2bMVByjiqBh8LHrtp9+w41733GHSKmrsN1umDlW1ZgHB94H3w7bpmpQnUA/ShaF6B2YeY5guWs7H6I5zik3dQNqJWdjlVIYyTuXVUlFVYuBFhWVIoUdn9/dP10tm6Z2SKNkMgvOr7ebwYTRM5J3aAqjpLEkQvS+JnLyORkdd/1Qh4YVVUcEH30kEoC4mO9st5snvv7ZV/3F/abuQ3+xbcc7XvW6N3yoP39w9MiFOy6/+J79p6/+6tnqYxEWrlItVrbusQ+SR3/wd//j114+eeTRJ2p+8uzkt6vKSZLonEA+/s78Re++e7PNV/ZedGH3ouSh7bboxzG3TZzu+4Ojthv17Fx9p3NNVzZJTyfxPH7oT3/JQBx7KaTQhRDGcSQiQGT2qmZq5Jxjr2q3iWlVNYjkfEhj8j4A+zidAS+IvA8Nsy/DqutOUHsrxROnsRPNOYupACg7QgQiAnOqkksLmpzzUtTUDFA0e8+q5shtt2vniYjpeWamn2OqBREQXTGfZcgyZin9kD5147PH/fqZ1ZJSDlWdUpnOFmJScWjHQdNoFACdgIypc+S55oj+yu7F4/aGYdJSqwmibdNoufgqDH26sL932i5jqAMqknRDAY8M9aYdihQGJWBFc94xQi4yjCMhgZhn14/tYneWinjwO5P5+1/7qdf85d3dmEsW8m7qHaBv09gNbXQegAVZDUvKSMafgwbG4hgJvCOFTCW4mMZRS8kqzOC9AUjtbOoXq+7IW3P8ib09Z8TV4eDU1Buw6ff03/l/xd9QgGS8c+EFX/SaL3zzT/6jKXuaXrzv/vu/7NHXwA9cGMv6JfffCxxedunR07PTEQTFoVJVR+bpWXtysr3Va9/1nVkWG7vU3dqe5THFEI2QHGsyEVns7YR6agZD1zNxKv69r37kpX9+NxhUblbEhjyqCTGWYlnFRweWcytVxewDIWQZkRDIiQmYApihlVzMtKqqrm198GZWRBipYALTMeUxF8ioJoCgIPMqppxjjITIVRj7sTxPFcBQVRFhUtdZVQGD88OwAaDnATk/jGlMmdGNKTnyTMQO2DlQ9OQPv2N91x9d7jb9w+/em+zcN6anZm/4xVft3oiVzuXwk4+9xWgoJXvy7O3WZy9e+0Rjur789371nu17XnDXi5erd1777O8g4ryZF8uGev2bz17y4IXt0NXcLJr5fD7p2q0qpLGdVq7C+WyyF6Or47mDOj789EfZze+5eAX/8g9/1nlk8oR1lpGIzExViR0iAhAAmgkRqhoCk3Nm4HxkdgjofUgF6vlBiBNgXzcLMUpDO/RrtESouduk7gxJkILpaKpICABMDsEbmOggZUAkZge3GSDgbbmMjMzO5ZRyGRHMAMCsiIBZSgMRITBiCcEPQ0YMY0nXl0fXNyefOHxak7pQiUDRMp1OKhdHKcN2W1eVAGRJQxoW9fTWdjmdz1hMqAxpc/H8i67fvD7mMuTRK8TJZByLdxhnDQrkcRQQpOAcEJCo5pzb7baKlUpBoqISK49GVayGYSQDtdwNbVXVdZyIlE9++bXXfuDeAjlwGKRUiG2fDJW9Q9O+V2AGpMCxaBGRfuiICLXXpOLjrJ5xBC0a2COwmKaURYFdsNJG34xDqYYLcXV50x7NYqllHClCsQT0rd3/8I67/vLCxXN333Hn9mw9nccf+vF/5sDqCt/4gm/dbPP3/YcfWK3S0K/7Pr147/F1d3Ru58J8umPICSxYBiWHjhD7klHZjARAUR2xmo5pyNYzejMYU6KgCNB1LZgetse/ee973/jQF8wm89XyVhbb3d8d0nbZ6qjj6fJm1q6qzzmIsSJEbnNbLFFgcj6g/d7f/si5JyZFgLAhNmZKuQzDlhiZXQyNSM5aAMmzF8m5ZDMTVQGwoo5dKYUQiwoRmZmqiRRVZedi8Gbw/xEtqmZqxOBdncbRh1BUTEVUnXdmyo6kiIptH0izR13qpv3pficbGv/p4srvhVlzfvdc130U9LDrB7MREUJotsu4vLnrfCb+sXte/mA79poflnJGjAxOzET09N7N/DM1MjOCYwcIRQqi886lsSeKjkAlu8AR/fnf8Yv55QYR3/22n45h5jiqiWNCxFIKACCCmapKLpnInGMAJoxI6H0EJEQexjyZTOvJPoddKSv2NfkaKPgwMylSxuCp71Na35B0llW09IhExMH7cdzk0jMRAKOZqiCCmYIBAJkBkWUrjitEJ1IcqZkBQCkFDc1EVMFQVFJuAZTZKYZNt37suc8+dPOq5OhizUR9aoEMiiUp86aJ0ReRAtCNI6vOp9Oub4ExF0nZ6qkFP2/H/P+SBd/hnqdXYdjPOW/5tl+9bebOnb6jnS1quyqrXlcSoBJJFAH+A5yH0A0mMbZJ7ITYOJDHDhgsHEA4dkCPEAgJCYlVR6sCSCtppe27s3XqnZnbfuXb3nZOls0/8ZPPp6nrtXJiikHb7ZXFdK+eFZp9jC49S2ti0ui9s9Y6540xwXljrUtBKUBmBDLWBmDXtyhCoqw1pMyjd156/peOG0oCFAQwxT72XdPmWUZKQdQRorG2yKvoeubUu95ak9hnlDmGwhSdeK0AWYJnwRhDQiQkndssJqdIT9yJw7Cxt9dd2nXXG4AULLpbzhx79873fm7rs967wXSaDca7F554wSverPXorx7Zu+PbZy7//Nl/eHq+u7/NYPu6v/vh35215286dPT0+tax6WFCnWWVVnmeV0rh32NWigInQVSoEIkBkBMqBYAILAlFEoKkFIXxt7c+8ovbP0yiQAmiQUhNO2v9QttShAyST7HIBotmb9k0y37G4kMKxtrSlB88e/eb/uam3eXMRVcOdeKYGLRmlsBCWg+cNAf1ou76wg5CxBjScDRxPgYmCMxR9vdnMXW2KohIIQUW59um7wRUpjFyQhQUjohKWRE2RgfnCKDIiz56ABWiN5lGBSDK+w6Jnnrr7o2fXd29Mrn4yOF8dIJf9r6b/f3jEVZxMT94oip0zQFyjuFSwKu7lzavPb3qm3P+Vf/Lj91ezuNi+/xvxnAtSVitVuswDx6f/r7dzY+NyVLyvVZ6MJyEru1Dn2WFRm2MipJElEF46K2XT/3lIYUtgMZ7PvcfQvRIoBTFIESCSEoZEMXM1loRYE6AWpmcAVPolQIi2/V1mY+KcuiC15liAWsyQW2LyhRrhEpEKVJ9t9fXS+/mNisgtgBkSHvnumbHe6+UQiRAp7UFZoDYtQsAsDbjxL3vM6uJlIBGRKWUiHgfmFkkkQKtNKEOMQmAD8GFPnFqg9ve23l679rlZmFs1nWNNgZYA3BWkNJWkiiiul1olbnogSikxF3QlUGVs+9a3xulJMVBUbSdK23uAzfsJdHa6qBpa1WWvl+E6LXJmQk5GW2rahhCXNZ7wQdCbYyphsOuWXRt61Mi0muT1fPv3D/0MQEsg48siSUWdphSCiEQUVWa3kWbZSwBI8foY0xZZpQ2imzfO0DOtQaTd32whI7DoBp45zilosyjF231elqJuycO9pYgiXT60R/7SWWUIrzxi8ceuOPJmGLdLsfTQQwIwOuH1974hZf+6jn41Rvhp2+dv2ft8Ws7102kz5//+O7sGwzaZgZTb5LOFAGmtWpwYuXwtJrkWdk0zbAsNibHjqwcAyFRgoDMTIBJmEgJCD4LUEB+5/Cf/sKVH5RnAQIAIhJR4gjPQUQQBACtdYwR0RASMwNA5P79mx/7hWvvBc6S2Obgidk8MtRbm1sgRTDY94sQ0Wjyfdd08z2/9F1XDYpls7y+3I0BDWUq4fW97Z3lvBYHCo+vby5di1AYo2KKSuKiXrYIJBDYM+jIgZiNwsDJs2RGNd7lea4TsLYiUSt48I3PvOjuFz9yn2rqO45t0YXB7YN7fjN0B8vlLHCWGWsHa2/+3re99a2vb9r2W/d967Ofu+f65SvxZf/ozpPNzbfc8NC5f1UvrhUWkRQIJ5Yn37538q9WDSIzMJAyJvkYU4oxWmt9jJnSo+HAR//YW6+e/vSGCMfI+I3P/PsUEwMoawxlIfYxxDwvfeiVUiKitUqJtbGIStscGEN0mgpSokxprFku95hDXgwzWyAZVArMiMgYU2qdMQcC6bt9H3zsFiIx0xh8F5wLwSMxcyJSzKltFkZjSlGYjdGIkFgTitHWZEWMXimVUgIQAIgxaZUpyn0/15YYAlJqm9R1XQRufH9+9/IjV6/OG2+HQySPZBlYOHR9QIa16TRy0IRIJMikAJJpQrdceKsQKfVdz4DMQoqic6vT1brvo+vYMAOMipGPjfOOtFFkA0f/rOCstcPBxGZZTH+PMLi+Q0Tnk9GlVuncW64973NHUowhRERwviUUfo4xZlCUAKooSh97FEwpOdd577MsB1DGWAFOzrHREViBGDYp+SK3ItwlTjGYPNuSm/Nm/WCxXCzD2ZtvW5kWZ87e+D8/cfuP3L/+qzfCvzj+tRvU9sHe7m3y/NMHN3zjav5wuwIAv3oj3L7W/tHbDi5cuH7l8pUzzzv+6x/+b40VUqoslO9j13LS0rbNuBqHvmEh5/2wzCd5PHvkxqOTI4U2pw6dnU4mmEArkwglsUJBEUHzO0c+/POXfwBQQJCI4DkxRREBEaRnIQCEEBCRCJRSKSXvPRG8f+uj/90zb5+MDn3t65/f2aPB9JhSobl+4dabt06dOSteC4hgDSnvoZNoMm2IMDB3Td33sRoMrFLR9cuu7WP0ISL489efWsa+T21MBAZ99Jkyg+FKCF2zbJTiJghL6mMIkRmkSwEU+q4PEpvWM6bz79wbfeB03H3xeMU9s/K20YMflNAbrUyWd3UfXDfdXO079bxbNt/9jjfsXn/iS1+4d7Ez+/bqj7xqI42Pu93zv5dZJJK67Ys8R8SH33T+zGc3MBEQhpQEBAWKoljWSyLyPgqzMbosy6e+d+fGz212Xe99xM/+xa8Ns0GeVT4ySFIKRMTaPKXog2uaZVWVRue9bxDR2hIQtEJhDQiMYrUhzBCNQNCmBMCUvC6GWpe2HCmVh1gzG0Kyuohhtpxf7+s9TZxS1Frt7++MRkMC7UPHyXMKNhsJJ5CkiBiFY9TGJEYRQQTmKMIKs97PEFPfhbq5MBoeKvLVFI0ySRL3ridr5nW30y4OmsWF2VXFyhFc2d1pm9pFWp2O8yzzIWSWkTSnlOU6sQCx0aUi8b5z3i+admV1lRyG0AMgs2JxEUVrbcmIwN5yluV5afKDfhmjF+GiyFxwxlgX/GKxUKhspgeDilA/C1U69+adGz67IRyDDyEGIslUFUKIMTZNUw4K14fVlRWtSYRiDM455igsAKbzPkZ3fHpstzloXaeJsrIklRiScwHFDIqCRU7w64uUtrcXNz3/heU4K+3wojn5f+y+6VfPwf/rjvYLuTXXj88vHbpytXjev919Mzzn9157+Y5Nzwm+891v33D0zL/84A8XZTYcDQVc03lMCIASeVSUy3aZFWXbtdPxqAtsFEXfKQVTa7emR15+4+1b062hHQBI7x1YZZD+49GP/dzlHwBABEBEABARAEBEAGBmpQgAfAhGa2ZARHkOJ/mPJ//sf9j9yd/+nd8d5fnG1oYdj568qijQscPN9sXla1970+GNiahhvHpfeeglLI4kPUvbwoXFbLGsBkOFQqCBQWuTWFg4hpa98iFe3Hvs4YtPH7imHJaGrKFkSJeFwsissAtBhFxMIYUgKaXQRdYmX9TNN155Uf2Pm8uuJZD9W37m0GN/SEjOJ2UzJIKUFZOszNQNN9z45GNfe/T+B1bHa8OV4TNH3jd6+GM//m/+3T3f/OUYHAKgJqO0tfaRNz5z+q51EjR51gdHSoVetFYiQqR833euT4nH4/ET33P5pi9scRLvGX/3//zRw6P1abWqdJGbAkkAAEEbk3nvlEJAAdGJWxEmsoBKJGlVCrAi5WMvCEU51JTpbMCJOXSBYzlcMfkYVWFNBmR7vyyMDp5SaHy3n3ztuoMQAhFZm7muJcUpRmQCq1BYggMQ0ZqDRyRlMmZAFKUwRM8hhtjEyNaMNBkkYUyAolBDSszigkOICUzv4r5vNKdHLj1zZXEwX9Z1XK5Mx4u2djGVRaWoiIGVgsSCGKtiEGInWnV9ZzOTZTYncqFPrJgzdi65XjhRoauibEKLhOxS6wOzMDMRphCq0WBez1mYIO98XZR2UBSECrV54m27z/v8RBhns4UxmkiQk7AAgta6Zwk+VkVpjeZEIfgYg9Y6OI+om64ThIEtEqXISSUKjEARSTnPnHhgMhtvev7keRKX+wfLLBuOxmtZWQ0Gxf8UfxGec6f6uzfjN3a3r5tKX81vyrLytmnzjDn58kN+OB6vrK5E7+7/zr073fbfPflnbewWs4XSsRxMBkr3LiYR1DTKsy6EkGJV5J1vCBVETIFNMfJdPbA0rLKj043jK5urxSgzeTkYfvjmr/zjKz9sVC7IiAgAzKy1AQB5DiLBc0QEkFOMLMLMRPL+rU++9asv/M1/+yff98pTW2dP7fXdtXp9b744ua7LPB9U9q/uPveu129ePPfkG1/z4pSvKooiKa+GBMrYilCRMGNSSN53XXBJOPoFgi2Hw4P9g5Sg9/2im7du2ffLYTmIoV8EnzQ03mfZwOiMY2i6mghmvg+hNXr4ofXvjP5VXncGQzN/1S9V97y/GAyVNgKyOj36mtff/OlP33vDicnTj98XXN017HxvrZ2/9J+eevr37WhoX9gURldlFlMoM1NkxYNvvHj2M5sptaQ1CwJpEjHG1E2dZ7kl3UXvXFREz7xrf/OjwywrUkT8vz7wUxjTofXReHBSUqdsJoyZUaTL4LygE1DRL63NQwhGa2SFBrUeAIlEECQWyGyByKiVoqLt5kAynmzlxSqj0roA0iG0BvLO7wpz6JaKYx9bTlETCEdO5GOTWQMMKToRAvYh1iCKISIggEKUZ8FzEBInYokEVoitNSklAEgcu64ZDaecjAgzMCjtI7f9bD5bzurl3nzvgttDa/YPOtf1WVUwgDVWEkxHGsRn2WRvvlMLxxiIKEWx7FdWVnvn9+pFxir0vhqWDt24LNBo7lPTO9CEkKUQgUJKwYCt8lIZmjvf96133aH1tYCRPT/xvTtHPznK8tFif9bF3hA67/IqTyFOBqMUIaEkjlVZdH2PiITWyaluAAAgAElEQVTKd64LCRGYRYEpLIoYIOhcrYiQNJFxbfCp1aY47d86XVPtosO+Wzl+em39kDL5oKy+8Mj8a/q1h5bffdHFPxiubqyfeP5j7WGGeFOxU1XT0cp0OBoOx5PReLy6svbNL3/5wflXLi7vq7u2b8SYLMtRY57YLZYLBrA6CogwI6JLQaGxKlMIgNb5DoRArMZsOML1ajA1+vmnXvyFVzz4S9s/CsKEAJhijMGHsipANAixJBdqa0pEQkTgBABEShgT4n85+9HzP7O7fGrxhpetzuzW+tbg0lWYNW2RZS/cMmLj9h645bWL9fhn3/fG79z9iSMr9qZXvktAur7Ncr1YHgCkNrnow7RaIcFBtYrCznVCAMIpBK0Nkkni6mWf2C2ba/ddedz1XiAl9pRriCQJV9ZWnG/3a8qV/m376dj2xo66V/6i/r+/qs58AwiZxVqLCDbLmBNIOHlodTbbO5j70di6w6879Mj4Ef9HVTEsTkI+LBSw1jlLBJDDF1c3npmIC957ACyLqgudVppQud6Ni0HdNYyQJJ57y9Uzd20AAYDgv/j1O4+ur07MxmSUDbNNm5UiCJIQDEIkbRgIhRCRSIlg3+6ioqKYIpEAaq1QwBp7sNgbjqZZviYKOUQWyQpb1/Nqcqgs1/quVoQKTOCYQuP7BXCvNYW+SzEwSwzOZloSO98abZzrRILzLYAAUGZyREFE53pSBMAiFGOndZaYrSkJM6VyZhdjF5PPbM7sU/QiQKhJ0ryf7fv+qav7Owe1J98yLtxi3RgvHEVQhDRmmXURXPKTfMXkqu06Fli0i6oonY+t6zPg2jtBXhkNEkOEZIHAUFN7rZFTMiaftTNISACj8dDVtTE2iiBhVQxS5IfecumGT68RqmaxUIXVSL3rfYwsorVShKQppggA83ljtDZGAUhkIQCtNYEWgcyWs/kcSaq89DEorcssb7u5lcmJ9OpBqZazXXaxSzEKveY1rz/7/BdNVjZ+69v26AP/+erFh1M4eOTkj5/C7VuKeUgyGg0G09XhaDyaTicrK0VpLz7xzJ9+89/t++sSQl6O2r4xOQzy4WK+yG0FoIyCvu+VUog4r/fLfJjZLIQgAsEzElhLWo17v5QEStLptfHj79z7X7tfHmdWK0tgU3IxNbPlvtZ2PJqmFF3XlsWABRFEWIXoffAhBpVln3jRV77+vd8Us/rCzfTKV938nadm5/ezoyeOjIf61JgefvqcD8NTG3LPuW5ttezj5Mfe9brJ1KUEyQcR8d6HEJzErlvG1KXktclza713QFTYaWF0bjOtrQQQ8M71CKoJPQFZTYvZ3uPXn7yyvwfE4yILYjtM9z509MlvfeeSrHfv+wsA0Oe/WvzJe0DrvKgYAJJUozzFvsyL206UO7N27tPB1eu7L/rnd+pPfveBJ69e629/101rt3TNEjj4vMi9c4CgtSp0FkJQyiilABgB+r7LMgOgtSYACC7c/6aLN376sCALML77H73k7AlzcnJic3wih7wqBkSECjvXIfaDfIMFm3aW5UYpLax87/I8U7pMKSJBlllhARGbTQQiKTtvdsui1DojzAFMNhqi5KQQUMXYZVmFqKJznGrXdQTMMTB3iBCCkxQAyLveWEukASmlJMwiCViUQkABkOB7YYqps6ZQphRJSECEQVhYEI1WJnFM0RmtOUrT7BtVJIFZvdhfbkfor9WLvTjPM3ZB1W0Ulsb74KTtGlsMRuWYNITgtTE+RRbe25+NxyPfLRMp592wyGxW9dEp4QRJosQUFFGeFS46BLJaI0ff98rayMIA65Np5/29r3nqtr87YSEXDgvfYyIgrorhfLlgjAo0CIQYTWZDkBgjIoQYAClFn+d51/QAMcvL+XxubS7c28yGGKuiQBzB9ituOGyJu6YOIvnRI1NG3bT9ydMnT586/bG9m969cfHRBx7486tHb9v5WIjz6fRINZigwsF4OhqPq9E4y/PBID9Y7PznL//a0rdGkhnkbV9PxiMJOWAMvs2MDTFmWV7XbZYVKcSiKBFQJJX5CBEAk0DgKJ3rglfG0GLh9n6ovvlTg8OTye2nX3rDsbMr1bpVJoTWOwcAxmS9OzA6U0oxp8SYJAlK07WTYfXhs3d/9wcemPQNUnH7rasPNtm8G0ymuVXq+15/9OOffbDI9G03rjx6ed7yxrVF944b4sZNLzqxtYHARDpGVspw6JwPxlrXuyRslE4puhA6XkbX5drkeVVmVUxtCMHossiHIkmiF+bg0169COzmO9udWnzgz91sd3d3p/U/+Ifd+u3wnPJP35Nd+Zb3Qdu8yIi510YTZVVVDPzBwWKx2Lqze+UvbyzvW7n/A489dX85PvSiN9xiT+4zkGGMMQIgc8qs6bqeSLneISarjTEaJEUgjn1mMteFC+9d3vL5LRccS8R3/8IrjlTVTYdHZw6fzfNRboosy130566c09CcPf4yrQwnDRCIFIJl9CxeYwaQ+n6ZF6WgItIpgjUEop1bCJCxmcqGxgxVXhJw7/v1jTMxgaAoTF09B2SOUTh612tKhCqGPiWnAX1slTKAmcnz5EOKXdfNEBSgpBREUuKgVY4khBYBfPCICAJGK0SZzffGk4GxqyH0KYUYUvAth2Xn3Ora1rLpUdzc9w9eePix/b28HDbNjLkVLChlihIn24RmPKr6rh5VQ0YJKLO61qRDjDlZRYpynaJnhNJYl5JWkpnB/t7eeFo2rtXWBucpha71JsuqQbVcLnMquhQefduVU3dNSp1nliCzvpdFvatYKaWVARIxZJTSXYqaMMTEIoklK0fBd4iiSEP0vXOAWkD5vo/CZVlJTHH31Xkbz96wBqlRVF65XleqbVza2lybHtp4/oteclfzkh8/c+0PHt24o/3SYw/fB/2SkYejqbEDm5lqMMjLam1j4/KlR2u+8qmHPgQmrZSHmtQ61w+zilAzp2Xd5MVQIFibLeaNtRlKIlLGGJE0KNdAtV3X5Nk4ivN9GFWry3rGKZx/5+7xT25FFzLLk9LcsLF1ZuvGsZmQEkQk1NNibE3OzD70SvGiblyINstyUR956d39r/C9X/3ObS/Yet9bTv+bP3mCaTIYpNe+9rYzR/AzX7mQm3DmxMrf3XP96qLePHJqQ/fT6vLb3vSWweGTIsAMRASkECGFHlAkEgH74ITYqEqhAAhq5VyfmJfLeVlm43wsmFLywoAaMSEJXt+79rV7n/nD//QFrxEC9K/+J+1Lfg6es/77N/umBRbUxGnPd2F9fZMJmeHQ2qFy9dTfvuFD8JzJQx88ceXTR45evO9re2/44ROqrOvk28YR6RhEaW6apihK55yxREgcRSstHAWCMCDYi+85uPnzR5zvtSH8/p97TaVxOFBnDq+98OjtSudMrq7nhdFlvmKtDoE1JSCNRCJABIQmJQ7B2bywtiIFwgoRlUJhqqoRq0xEnPdZkSOQzgzpVVSoFDITEYQIWsD7hYgEX3NoQt8QcEwJIQgDc4rcW13EGOU5WmvhyBxFkggICIGkFGLyMXijjVJZ37VCfZWd8jLL8xyEUkoh9IR50y2YEcEZLUqt7RxcnC33zx/MHfFeM2MQUmbu+lxZ57vkEmbkY4Fqfng8bTy0YV6VY98caD0haWLSUcWBsWJ052KULs/KpmmKMm+db1uPKERBgy6y0ktSKWllCPDBOy/ffPcxYY7e2zyPwghqMa9jdEongxSZQ0oxxqKolCEQSX3aWj+zM7/ep8ZHNy2Hrg/j8XA239M0nnV1nrHaeUu/1wzLdOzEZnfQrw4Xb3rvT3zkTz+JMVSj8e75J8pBdvmFP33y+MmfvvWAUV3avv7QPfce7G8bBStrh7u+0RZH5WC4dmjn8b/98vzze6EziabTSoHunOtDa3Pruyhocsxq146Gg65tzLOsBuCm7eq6LVQhhDFJUVXIsW3bwbD0PkSIl96xe+qTk9Z7gKx3LjMmM5Ywra+uZCrL7WBUUNcshuWQoxLux8MJe1mZrq1l1X+56Quv/cwLPvSBu//795wZrtIffynJdHzl3FPv/f5XFoYfON/tX3nijhccve/p/L7v3ts3DevsxaeO/NRPvH5z68aZX45IBRkwNAQIiBGksoOm2el9s7dcrA6mRVZZY+t+Oav3iqwyqHNb6DxXggbpWTEh+uZqcn/4+5/crcNsx/kop04NvvrF+9Mb/mk4+sri6//e7txf5IPeN6dv2Lr/G3+jiQDh0JGtmDhRNrj1zsfv+A/wHH3hq9OPfs/41DGZ46Eb1zZf3HlRCKSU6t0yBHkWJzHGEAApca5LCRBRg6Ys0wDPvP36rXcf77oOhPEdP/uyknE8LG49dfTs5nEja0XpczsxtgLBtt+3VgGbECCkJsGyUKs2K2IMPnbloFQwTskze6NzUphSQiBEBiRjcgEUJJNZkw364BHscLjS+16pXHTK9SD2sW2uQEqunRktIfqUHIIolQkbxCAiLKKIQmi860CYEACYhQgNKSOhSxBTSkopm5UASjD54Ii09wGEu742pJXVMQDH3maVIkzJ9X26Pr/qfLq8nD852/HYDIvhWGUJ4kHrfUo+2CTN4ZXNunUMTjgpLa0j5KUtpt7tjYz1zINquHCtVrn3wRg1Xy6UsUSCxMpYAmqDpxiYJLP23lc/c9vfnoKorDEhBkEcD8u6dZG5aZchyOxgXyuNQCl41GplOgm9y0wqqsnG+rELT11G49vOodLjlUnfd22zNPPXuT1J0U0HamNztTloN8bh8Hr56nf9CGLZz68fOr753dnmr39Ffu3sdxWBJjWsBrM6Pf7IA9e2L2ulxqMKEkcLK1X59NUv3j9/JIAujUrIwiQC8+XSZpqERKtJOZy1TVs3RmvvHSTRVg0GVYiRSAcfqrLs2t5mlXN9WRaLxTwf5JffuXv4I5W2mTGFAM3255p03dUxhbKqxuOJ8zWnMKiq5azNVC7ASJw4NLG58n392c+uPPHx67efzI8eHT/eTaDLbnrx8zOzT8Dbs1ynxaFR/ZV7uwsXLx49dvplr3jt6fBYbevdvHzxLcfygBf7i/WyUVpd3dlpg9tcPbx7sFe7sL07X5kWo3JIipKka4t9C7qkfFwMTx45VOZFislFv7W62tX69z78mQGsXntq++qiP/qfNh9duS//jWH6QlImEyamqGm0uj7Ny3Dp8WdSAmUzVLSyMomuCQH33vPncuJVADC96+fN+S9m8SqzWr31xOlX2KZpjbFZbnrX51YLUmRGxBQSIIBISolBICTKjFX4xPdcu/WLx+aLJSfG7/mJlyRLY5vec+sLtjZOazXcW16KWk1snmeTEPxgMEVIiOry9jOR6/F0q25mrZ+5sLjpxKsLsxZCS5hSosQhz3MinTgprVEZETCoTZYLcowyXj+OlDMiQI6Qdf3FTKHrvO93o+sN6sSIwDE1SBJjAglExM8STiEoghg8gPT1PCVnDMfotC20GSAWRTmK0SfsgMurO0+mVK+sHKnylRBFIyARQsap90kQGo6S6bJJwffu0uL6Pc88MLRlJFU3zczXENNoMFWmnC/no/EgoXSd59QjQe8h1266enS+mFGKUZJrHVDiZBSRNkA208a40Pehz43t295LrIz2DKTo0Tdfue3uE8LkUxSQsihjbJkhASqti3xY1zPv+hREEH0IZZlliiKkZdNMxtOuboOLoKhzPoRoMp6k1/fXBpRqrSlTvHnsyLWLVxFMVcahXt74sjsPFu6vH756fGPtKXvrG+JnBuOVyfrWaHqoGFRd2+9sX2kW83yQhUVTGg+8/7W9r/QaFSoUqV3vfbA2U2BZgnNuvDaxSFd2dlzXa6X5WVHyXCujiahzvTXWah1jtFkOIEqTdw7JXnjH9ROfXCvLodESkjgflrNlcF5pLSh5WSTf912bZZmIOPBKGSLjXCxzs/3ugyOfOmJ38NZWLlzZuZxtGZOdOX3i1BHp02B7N0yrOIALn/m7g3OPX/yrT/xFaLd/6/3vzydHX3PL4C+vfWOqcpeSNbnjmEBiSlpZhEQSB1nB1viuQ6LI3C/qohru7h8IqjwnZN07R1ayLKso395ZvODYytc+fX75i3D9J7fhOeorir6sj/z+ifXN4twjlw8dXi8qfvyhS0QZGQMQsyzLLbV986E/++vf+uaFy3d9pJ1d0sXYL89fO//Qy+581dotPYNxriuKPKUEEm2W+5SUUsvlgkhV1aCulyklQyoR5EY9dueV5//18eBj3dT4jp8+OypWyKhhRrdsrh9fPzzQR11aGpLxcEMEtSq86xi8NpmiglMU0DYfJAGBVqNBiEZljMLMSmkim2IfQmBhYzNO0ZihsmzNgFFYrC1KZUY2K1KMZTF2fd01e6FfIHjnOpCU58O+c4m9goiIikhEYgoAHIOL0aPOJXni4F3DTGU1SgCRg1YIXICKXRtEGmtLozNEEEnKKO+EMEjCrj+wdszSA5C1g735/NLuZUXpyeW+C2GxOOgAfceDYdm6kBOjVbv7vc2wicF7Wakgs1MfFsF3dlgoUcu2K4tx0y6znKfFIYa0bJdAWNc1sGCmMmQmBMTH3nzl5k+tsclNnrngc2O7tiEkm1kUVKiS9Maa2byOAD6kGBxKUGiysmiaxhqTkkLFIfiqGkl9tr+yrkOtMVoNkPyJG07vXds9NOm52bdV2XS1/qE/PuavPPDxP3pg+upX13dlhWVJRVGtHDuxunoEQM33dkLobanbC/d/132ztVpQXDtXujC6COxd1+c6R5IQAmY69R4VKaWstilyii4KC0vvY55bES7LMkSnlV0ul2VVhBBiSlfeMz/+l6skNBkNXYp932fWdl3vHRdFGUIAYef7xMla3XSJYyBga8hkg/Pv3L7lM89bpsXisW7UDWZNPLx+aO3Q+sZox05eun1wUCFsDnc//sWdd7/3h4XPf+0bj7/0eHFp6awKo+PXz7koDZVlLpqapjVKkSQgnedZ6v0i+iq3RLQ3W3ByVTFYLFqOgkZyM+CYyHAb44rNlmyyrH76S/sHn2iXN3bw/1H8Rgn/Eo2xVVm2/bx3TMoiMKSW0Z699bU/9A/ee9ddH9blie3ty65jDn5lc3rxO58//LzTa3eoMs+yTLdtk5IQkdIaBZnFZOz6VBSlc33veoM2IRilnnnHtZs+u8kxoSb88V95S+MaQqWYNibqlqNrp1duNSanxDbPtCXgnMUjGkIlEIWRJTEHkRRTrMoqxpTnOaFVKhMBkZSEmSMAgihtwOohgLZWeVBICAgCYvOJtZXNR6SL4EPfXk+ua5e7CN5o412rtCBSdAIYEiAKIkqMXgSib5xry7IKIVEKO3uPbRw+HqUyhEgkSCyiAZA4xgSiFvVVayqtMmbWOhMBfpZ4gKhMAaj39veXbnHQLQLj9ry9vjhYtPPVYSEkycdIGVGx6HvW4vplVRYgkKu86WufgjX5stkpBmNhIQTjIxs8tL61fXAVhGaL/UEx2FzZausDIf/Amy6f/czRZdeJ7qtyLbPUzFokICuZHTX1wphcESoOHqi0ZYh164MPPYHq2ta5hnSW5croKsPjyyfOQlhSaozG3CDH5tiNNz/11IHrXJYT+fnlG979+vZjN7/89S97/dv/4JH1O82DX7/n2+fPPbxqk824GGWTjWNHDp9YNH2m3Zcf/qNLstCkJ9Nh17cpynRc1m1b1w0J5UWeOBmTJZ8SMnCqymKxXAhziCKAQrIxGXc+MYNCtJlumz6EmBJX1eCZd1w/9omp1kpr1frQd12V54jEjNZakQQMLgZOCQViSsGlLIeQxGL2zHuub31sHPtQVRa/vBdWTzvu1od5uf6Cvb3Hz95yx/Xt6696Ufadcwc+4hP3Pf7C1736mFb79eNX2+qG0wff3q/VIFSmWiyXmsx0NG1d69sGAUISUZJSym1GSDH4wDFFPRhVkqQPntBQbJa1ngxUylRF9Mx9O+0/6869bRv+a4N8LEh5CcvZwmCB4NHYwFaYvv8fvPOzd921Ohxlo7V6vuz7kARUhiU5MsPxi9P6hhmNR23XJeY+eSXILC6GMjeMFGPMjfIeRLjIc6vtd1937vSnD8UYrbX4I//kDYvlcjAYW1NKrM8cGp5eOT4ZVON8pKhQipVVnIhIAYjWCv4epsSIyAwAjAjMDJCUsgiaSAFRSoEIlNI+9JmtiBRS8r0YY0lZRKXKDAkZMM/zsjxSL/cV6badmeh9WHKKwFqMjy4yuywf9v3M6CGhAvQxQNMurTVKmeBa72oURRo4JW20UgggIibLbAhBKQ1AhBoAYgwskVmYkzEaSIcgREordM53MbTtwbX66gOXzu+4ZfJhUORGl8vElDBHDVovlvOiqhZ1M+9nRZUnDoXNC7K7swNAGI2qauXYk08/VCmNKmqt6nqR23JjdfPafFeDPnfn9iu+fuba/MD7eOrYGYxhp94l0kZj7NVBvb+6tha975umC2EymFiDAJoRl/XSFCqEPvSeY1odntx5/KXo9yj1SpwxaFTSrI+eOtHV88b5rsNzx9699fhdortbjg8OT/Nvrv/oz9w6O3H6RlWM7v3uo5cfe/Jb93xz/9Lf3PLCmzeP37TXXf7i9oetXqOEgKyUyvOiGpQuhizLfduTFh8wxmQULOt+ZTyaz/eVpoSp71JiyDJb5YTK9i4E54zGFIETap251O3+YLv50UqTQrSkdJbpGHoBndmchUPwo+G4bpqUoiK1vz8n1F2/YMYsy698/96Rv1ghwKIY3v/xR+oFnDpy9g1ncS9b3b16rRM7HY3Xpot5e3rZX7m+PT91dFNZPaDkJSu2zNX0mBoNJHrnQpbli9k8z7TSVDf1YDSR0COgJFZKFdW0bWtEWxQZJmlCX5aj8cBu7yyIG8Ojyw9ea+aDnQ89Obt1Af8/+f9WwN0Jv6gItTGQBAQyIUmQDh0auTqO1yazg1lbdzbPTD646WVr17f3jt4wGm5ZQYnMISWraTgYcpKD2YxQkFS9rDNrBS2SBHbGqMfevH3604eqqvLe4w/90kuY2WZlYvK+n5Tq8LjaGA2Glo5Mz4yySVGMGVJKEZEAMMaQ54UIhhCLfIAIzvfeO60ppVhVQwAFiEqp+Xw2HFaE485dJ6Kq2GAMMbIyudF5PpxqVQhlRT7qun1jLILuuoN6/1LdXA2xVsqopIX7tt4fjw4nw1Wx6lxATIrKxE6Ey7Lsu0YAkosALaJiTk1TW2uUyZlFa01EKcSYvDHGe+98ByBEpJU1mSmLUYoxcWxdFxOF3l9b7OzOrzqbcUzXdq51ohp2kyIvLKEWHyOjWiydSDBZ2fc9CfQpmbJklL5vBqCauCTJl95pGKTUDyqbOFaD0rd8/r37+QeTBSyL4XQwsJRfOjjPAUaDQW6GAJ6M8j6kPjbgNGXGyNpkHSLsznYYUx9c8Ly5cur8d7dSF9j3WlhB1FoyTMqGw6fOnH9qPw2Pn6/OrN73e5tHVsOcdw/2ytJefeFPvWr+wfGoJG2q4UrUw2uXH33ZHW//xEc+/PKXv+Sc/8aji2ulJlQIgiFEY3RMkRGqatAua20EVaa1VSgiajIaLBZzUrTs5kYXgBSCLzIjCQUhSkrBA6gUxZqs9/HiO68f+4vpoKw61yNAWeSgeLnwxmrv/WQ81YZCCAiQmFPkshju7m4rXWhD5+68cOJTGyxRVLz20cXrXnz8j39UA8C3L/f/+IujZeMit10fVkYrs6uXB2vjTIzXsGXETydb1NNt02eW14Sxa70x1sc+V4UgRk6j4bBv5t6FMi+bpiWtEBOCVYbH1ViQBXVKvVKk82xxTj30pYt1d7W+o5XPMwDkf2P0V1T9Kz08R39V63+N/BpCguo3ch+CoBaByN3GxrHBMNvdmynA5EOCpHI8+4qT/eLaqZdP5k0cTyZd30VmLaiUjjFppfOchCXG1Hbep96YvOs5Rrj2gwebHx0jovcef+yfvUJSVNY2PkgAMjAc4KQsVwrYyFc2R4fWpyddiPT3jFaFSAIArbWIKKURSQRAUKkcIIgwkQ7JK2Uzm4XotdaIFoF8aIlIa+tjQqQ8H5jcMhAgZtUmApHQsr6sSHGizOSIGLrd/d0LIcysHimrAAUR82wgiULoSAEzu94roxTZ4BaMIiwKVfABtM/sgBm99yheJCKqlEArDZiU0iGA63eHw5WQxGQZ++Bil0JqIl/aOV8WE+/ctXb36kHduznIsjyccUyXtreLfGx0NSgKn1SI2PduOCgYFRPO5/NRXi26vY2VQ9d291ruXOOrahIFGHy3WOy+z53+zOEYaG9/Z2Wac5cnaI5snrh+9crRrRPXD7ana1MWqGetsuASG8Pj4bhpWh86pVWIEmNKV9+0uLZEHwlbJUCSiGKuBYGOnjp2RZ3aqTE/ePqNr7kx7y8+fP+9F67Mfcyeft4/PPvoB0yhtYbgXDWyh4/f+vB9X7/59GRyw+q3+ocpgichsctlZ03mQ39obbXtOu89CmqtWr/QylT5VLQQg1a66x1IsFlOmg4W+yujtdD7rMjrri6qgfeurmtEBKZL79zd/PhEKQKSXOfMKUqSaFLyZVlqnSnL0XlmdjGUeRY9N+1iOFoFiI+98dLzPndk0cziE+rSQ9c//Qs3vfQowXP+9f0n//Jrl06spguzer7fzq7NTj3/1IU9V0C87agSY1fWBrvF7nVn9AjqRV+UFRhp5w2ZjADWpuODxUHbtIRaGPPClqWpygFB6D2PhoXrQtstSzOAqrx0z/bs6WVi7UGmZC7feG3wLZo33PQdPKf834v0LwJwUZTY//NOKW3+1vLno4BKnFbX1713CiR5pw2VOR6+/VQ93zl8x+G+bl3fF1meYsxzQK0BECJHAIUAojofXVimkAh18PHCf3Nw8lOHUkoigr/7ez977+WLu/tXCNgrOLN26lJ3fUjN2opa0YMTa8+7Yf2kUmPnOks6FN0AACAASURBVMQ+txPhBKgSp5iCAsyyAoiYGYi0yupmNhxMiJTWtq5rYyCzYwEJ0YkIkdJa87MSsASTla3nyXQ9L9eUsc4ttEDfdxzqxfxaWVqlBhy9cEQQIe29z60VFiLFHFgic1BoEgdEYQbvO0SLihFtkqhRedel0Jo8I1TG5jFwCo1SOsaotMQg1pq2a0TYWtu0rdJZDMKQXLOIKHVMe80VydrHLj2yUy9XB5UdjKJgSIsijbqUhV5EDaYGZn1UCR+/+vTRo1vehyKvXOuSjppMihhFjEFL8O3XXbj5c0enZd555fySIy/9QhdZdHyoGl7cvb61eeryzmOsrEazUYwb30IGSpkC8kXwucZm56Z2eyLugMSDAEkiShoBMZBQ/oK3Y1ye7B9c1MGzO7r1go2TN/0/HMEHtK3nWRjo9/3qX/feZ59+7j23X+mqWZIlGffesWVcseNhDQGGgQxlwiQwCQGHEkhIZq0ZIGsWTIAheAwEG2MLFxlbtoULLpItLF1Jt+j2c0/d9W9ffUfx82weWcsy8ZGtwz+0dm1/b/6tr39lunttMXOm2dso1eWF+YXZtbxMTNdxlJ33EjEE6Fzs55lOdGcNIljvOVCq0qbulBY6SYUUs3quQCWpiBSkSBs7gyg4Ms5ZiC6wCFZUbZXr5MJbtk8+vI4BDP13MXomAAIs9fvIcdq2pSgaW7ngg3NcCwigpcoS1Vn77Bv3Tn/hiDGz0Rf2b152/+oHF3/hDX34vn/59Ivf9MbX/e+/8u92tq4M+kN38fq7N9ZPDdPf39qtyaki3Ti9OjnSZqA9tZ0nxilVhWnrrMit9TEwgkA+hkAoOIIveso6meXKN0YqZkwVnRIJ8hinI//sQ7tv/Jd3PP6xa35ifFPPqxhfDeHVwb3CiUdF8TvFdDZDYgA805KJ6F7p7UsdAOKHuNaJ+RVrX+my31Yrj2VN58/8k6PXv3bz+Os35uMGgRV5MZtOQAskjM4pJerOS+W1yhnjwbnZZLKwsOBCePZ1N45/esXaDhnDn/uN9ypWN7UeuZ22rc/01793sLVUlMfX0xP9Q1miVofHEpk3TcPQJapEIWJEIUWkoJUynfHeC8EwBACGzCU6MyFGAs4E5zxEYowHHxkTPlSMacFSqbR1XZqVziOhkAnv9VeZUByLqt7tOptoGbzB0Jqu9d4IwZECEAUfGMMIzlobY+CcM9TWtoyjFCoEQ5ETeiDR2b08LUzbcYYBU4aoOAeigITIYwyMQXCmrpuyLIGwaedJqkIE6yICJ98hB+Npb3Ywq7e2x9dAA09k3VLEOoIPBi2UGDFEKJPhvDNEMaBHkAyZ1rqumiCMQFlk/fGkMuSaqjn7phuv/8ato64JwSYiYRSWitVzo10dm9ZFpsx8DkyklsboVV/lusyQM2vb25bW5xO5MxNbl5ZD4xhUiJ5jYESMIoOIyJ87+u7be5PixjfXh1S1bYRi8VB58/pFgYN+f+UR9vK7nvu9pdXyngdefPeL3ri7685v7/71V/6d5fuLvTVPTiXqYDyq2zZPUgTuIgVnh4NBcN503bSuNg6tz+dzYwNFy5kqy37btkJiWRZEpFVWV3tcKmNdiIERBwGDdHAw23PWX3zL3pG/XV5dWp7UMykUeed91+sNkcA6W7fd8nCx7RpAdNY73yVJ3hmTF1m0/vybt488tIiRx5vu+rd22kn8+bctveaIvtot/IvPVylPdm5eWFBJtrLY395+yQtOXzv7vfNW1Bk/sZLinYvbsKd1qpggJgmdd6QTnUhpO8eYbExD0Wd5aozxPhIBB5nlwqKIvmMY2pox5NHMXcfPf2r7nh/deOazu+vl4sHl2fRFlY+kv5YgA8HZ8+q6CTEwRECmZYIInKEPoftlY19u6dURvm9zub9x70rcoAufH9/xjvX5tFFSO2OFQtsFpTQAKc0JqZo3RDzNGIvKQdRasuAv/+D+oU8uIVKWpPgzv/b2renNoe63xjEeIucDzMdAEtpT/ezOo7f3hZZ5jwIIAYJp5FyqJEYCDM47IOTIkCgi956URmuDYGit0VozJkO0jEmKyJAjEwDovWHcCZEkaQ7AI3ACTgS1qdJ8MdODfLDMmPS2nU9uchaRbHCtqeecc0BgjCED57ySqus6AOuDBYpKJww5gmi7WZ6VXTt3zgmugBjT5L0zTZNowXUpuAohdl0TvSUiIaS1DqKTShBCCNAZ07XTsiwY0xFENR/rDGtTWe87a7gc7E92d5rJgRkJpPkYJBquk7HpBr1+ytO6qZCjsVbkuUIs0oKCmEx2Txw//len/v7OL6xxW9g4zcvVeTtf1WkT26FcQUO7fuSVrbvRsLe+Ugyv3rg+Q2+9BzCsre4+9uCjfw/MzL2dxxAoohKeATGInODisfeffO4Pj916RzUzzPrh0Aq9cPeLX766cmw83W9a83n/4vcf3T/72Le+8eWHXHftzPEjL3zju/78c5/cPK1u7D2jE7U/GSFjs2amuOKMEwfNUmdt07SD/iBEMLYt+uVkNucQvYfhwtJ8PpVSOu8YwzTNgmvTPG2tMc5h5EpKN7OiYK2trr51uvyXmdCcCeFs0FzE6KXUdduVRTko+7WpbdsOFgYUcV5NfIB5VQslydrdH242Pt4b9IaL2cKNh85+91oU4P7ZO9/9U2/dOBi1X7rcfuXJnW+d/849m+sLYbfc2r3WuPN8UCjql+jvW4wUHEUueZrkWSkPxhOltURs6k7ItOu6EH2vX9TVPEQUgvUKZbrIZezpzd39ncCMp8bPQzOjnb+fDTYz7ORotOdfJHx0+GXJuCQgwQgREDHGCBA7GzhXiqP3UUrZWRu/ZN3LDXzfG37lruqy7Wh24WF/64MDFjGEiBwQY7CRc9k0NWLMClHNXfBscTm1QaCjzSOHR/O9516/v/zRBSBaGy7iO3/ufg/NsLdkO6Zk62SKjQuxs0YVmjZX+vceuV2CTJOCMSrzAUAUUnXGhOgJQqJSLVJvApMoREoUESQySeCJAkNOGBGQc4GIkaKShTEmBMsEKpWEiGmaA3LTtMCizpbSpOBZr3MhVcrYxnXTZrKdikhE1jmV6LZtOQYA8sEzRHJd19WIgIwHwhhB8hgjeO+RRak0Ap+OR71ev6prnShnOylTpTRR8D4miQYg9zzjAAMyJoSKETrXJUkWgiBqirQ/q+oAvrOTyWS2Nb5RhTZQqKHo5geDHmecz1p//mAKhkmGS8tLk9lB2S8rB5lCdM50YR7s0V7x9Nsmhx/arOLBbHztRXe9anfnoJdqGcLVav+W/PDuZDukWM3Z2nK2M6mQhciir7uN4fEr0/3q/At9O2obI9EjhBhDjCA4CUaXjr3/9MU/RrKbp2+f161zPo8SRHfHAy/b272+ceikStJPzG57MP9esbQqiH3lM58ejW/E6dX2TO1UJ3VPiKRtbNPUJnSpzpDhbD5ZXugba4XWPsQ8Lff2d3QirXGeKFUpPo/Ckc1bp9OJVNi087oORT9DhiHEEBx6PLZ64vLOpdl8Mnq/PfqJNRtba+u2s2maai2t6UzAVGcZFx14ATEEh5wlOkEQxnpE7Jr5pbePD//tMC+5Z159p75wPnndC878xDvuJ9MeybbN0ss5sE+fu/Frv/37b73lVJWbrz128XSfveXtJ/7mqdYdo5wnVTfXMltdWtk9uI6SN3UbjJVS67JPtiEGUvJ6No+KKyFT0a8PlkbPhXo2KxYGixu3Do4P6uaLdXXt/EPV4ZPDvYtb5mVykLr5w9p5YoxFCBJZCF5r1bYtAEQmXvKSF3/jq18kkJGQceVf4cMXKgA4/tzSyX+/UQVy81ZPVTiqjq0ebtrWOBPJVk0TXcwSnWi5fXCzVxbzuSNyDAh5miTamfr6u6YrHxtmSaYR8E0/fkdapLedOkM1XZ1tUz12SovG7e60q6uJTnsyU0sZHFlY3BisFdmwlJkPkVhMkx7nmiA67xGlRB6oBQhSpMhliN65mKSlQHSuYxw5F0ImGNPIGmIL5CrGIQTHGUelIqgYKEtEYCovBohZjB0A1PWeN0Zw7uwIASFIouCCxeCk4G3bMYzIorWGCCG2RMS5FFw5b4zplJIhep2WiBGIW9cK5EzK8DxnOJMhOgQkYgxDZ5o0yRiTPoSua0Cwuq4FV1mWUQQuBBDbn4w8wvboyqypQXgm/Gw6Oj+ZVXNWFpo86AE/tnhcpxtXdp6ctjZNtGubLNXB2gjsyTdcfeBLx5LeoVG1v7SU+Sm7uHflxNpiwjJS+ur1p0+v3XV9slWNRonmxjGlhEOWJ3py5czouoFQU7TkI5KXIlJk+/3bZwu333r1T2X0ENulU/fapukcUUQB7mWvfs3msc2mbpvOfKq6+10LZ10wSZr3e8Orl29878ojFV6ZtCOW+Gbenjp08sLVqz5ywZwWsu5CkuNoVtngJcc0KabN3ka2vmfmGJgjm2ZptH6wNDR1laaaCwYgYgidMSJRtvO27bgQyFlwtP3OUe/PxKm1W5Tkz1y/5NFnQvAUU5k187oY9BTypp53GBjjbVMt9FelcqbzxPiVN+8ffWjVhkqqrDu3WywsvyG/68ytG7artJT3r+2Pkru5KC5uTz/79cc/9flHZHAbJZy5I/Mrw3+wDetADhR530vL/YMDy0TBWNU1hS5sjEJFpYue4nM7D5ZAJFe+CPtXtjkqZHlAxsPUx6x3+qW3n+HdTUf55e+wby2U/eWvwZ5d8kJ01RZ65GAlkuOSgneRJfKopcsSBEEIzkeICEgckteS+OHebV877LFmxJXQSsqIljGODLRm+zsmyZNqPl3oFZOqCd5LpYIHDMQkOgtccfofQfxx8JL1VR//6a++vnMxUkSy00lz1613PHPthuvmQiZZIkJHvX4f7WyxtPccewWjarlcyvMBF4yCHM93peBZmiGJVA+dbxkD7ymGJsuXCCBSw7AACiGGLM1JMqAEudPp0HaGcWBMek8UWpUUIUQOkbjmKs3SoRAQPIUggTlrHYaZMZZzir7jPLHt1Lk5F8x3wTmjlORc1NV4NhvneYqIMQLnjDEMwQMqzkmIDMBzEJ1theAMMCIDjFIIxqQP4Gxj2oohOO+zrADGuBAIMsbgnG3bNktF2xnOtHEzDNnO7KoFN560X37iG0lvISb+ech70E3yhbwzghBMVyvGFoc9ELJpzOOvuvDKb9zhoTk0uOV72xdfePLW7116LtjGepct4EqyMp+Ns3J95+BG3da9LFeiN61u8Pal+1eQuSballEAep5nGKuFuxjG5cm3BPhASL5bOnVvO58HkIuLS/Vs7+SZ2xjXUqhyMPxse897l84nmdKJDpGETv7hyU9eHX3V+7a2nefIPc7HUydCNIznZTe/lhUrRNw776zlisgDl2q1XLo53U6yxEdq6m55cb1rqhC8UhK4z5J0NB4NhsPxaGxbt7S0cjCe6Exuv2M++KtUgF8ul/emk83NzfloOp6PhSClE2Rp240ZBM8AA8kkjZ4Z0y4uLLXd9NrbDm79/OEQYuvMsFxpmXvdlYWNo0el5N76W1dDkqkoVxRnWa+HvP+5rz31R499mp1QzPsoMQKpyExbBSHqhkpCrbMWOkdtAsKgBO8xvrZxK/vnVjljuzenvLxbTr4a/NNJe+A23+2vf1iUp2D6WZ2tvvgNP/HYGz5a/PrBXvcKxmtqL7pmt5QthdqL3EFDjgdygAKQAzmGPFIEwEix6C8AEvwW3f2Ph52tlUq6rklS7aJ7nhBcCFbNqywvGOfWOhUYISFXgDJP9N7+TowCgA7eO136b31KxEKe4rt+/m7k0kSjMjRtDG2XyMLYpugNyPPQtf281yAWIltI906u3Hp8aVPylHMcLqyGKIzpEp1olcbnUWjbRgiJXOp0McTo3VQAMA7BR0TufS1kAYiMkUBZt02aFQSCcaazzBjDgfK8x5OEi57tzM7e5ZXVDQJhnC9SbbvYdftSyuBbck5y2TRWss66FhEAGEbpQ4csciZCCIDUNLXWOkaLjKRI2q5uq0YIKMu8M07IHJAoRs5V0RvW1dy0c4rueZwL5EJpHWMMwSNCCJ4DAwjTyTjESa76lY0gWWvnu+PuenXlym6bFNyE6XJx6KBq2q4N4JQQ0btB2ZvWNUP1zFu23vzd+w8ObrIMUzFkRX7x8veybFDXEYUzIWmanSPLg6Ls3bh59fUP/Mjjlx7dvlGaK0fATEUwMXbkPWcAMY4GdxKG1ckTkgWKDjFy8sMT95qqXt44HANNxzubx0/0euuck0j4w929712+CkDIWVkWxOBb5z7/vcsfX+j3D6aTlkI/KWeTmc7SCG45PXTh+pOtDYPBkDMRoktVZrxbLtfH0+3Kzziilql1ZJ0DjIlO5tNZUupUaSklIe6OD1KVUsTW2s2llatvGy1/vDRu6kxkjOdZaTrjpHDt/PDy2kKyeG77UqrE3mSEnkQmnTW2i1rqLNPn37R14lNrgrOZrdb7azK2Sz4/fL0/WO5zDuur6b3D8fnZwuJwWC4MUqmAeKvFH3zt0e9Ob4DragwcxGQ2K7MlRmYS5szOS3VrzEdb3z6+c07J5QfZJITBetx9gpbO8NmELxxvISjLWZG5/a+JtZfZG3/D7Xc426Bf+usXfOYDXXzJzXE6a/NI+1hdYzIPMsrRn8h4w0TGKAA0FCUyJABEBowdOXIkML5+H1cL5uzrdo8+tKqU7vVL5zvjOowhxkCco+uCj4wLF0IqmXFdkuU+QJkNmraOQVpnLr99+9jfrLBEKAb4vp97kaeWayGzvMd6TbfDQFqhBYEzvJrfPLyy4h3zpn7hidObxRoKWBiscKQ8K1BmRIEAEGUMpJRCROcsR0CdAnIJQEwb2+VZESNyDklaxsDms20INslSFzwyKUUSISDD4BwDIimkWpBSUZCd3yqTE0KLzk5TOezsPoOkmm9F21JwnPMYo7UdUZRSOTsn8AigtI5eOme9d1qreTXlHKTMGIu2bSh0IXiVFj54oiClROA+Bs4UZ9L5DoFi9FzJSJGhBAJrHRE07ags+t4hFxFBebDeu64xteme29s+COf6xcbVrW3FyWGR53Jsp854Flk9b+Zk66q7+uDkZV89gwBHV3o7XWedIcI06yV8cGh5aTI3F7f/4fDiLdcne9O2PZqTtXfvnFuK3QEPDTkfgIA8h3Du8PuXJ08OZ9+WGBhFQqRoEwHlkbtc05WLA4bSd205XCp6SVkOLsbjeZ7f1RsJkUilfPSa8W8+95VvXPz/YmB1O6MAZMFyPlCKZ/o9J37wY898OkDLmYjgDVny7lW3v+5bF79OiJ3FQqnDKxvTyWxiW6Fk13SK8TZ2WZI467gQo9G4yFLGmI+hqpz5MVj5eOZ98L5LVRJisEQ8ME9Y5uX+7rbSejjobR3sZColYFy4ahYiecH5lQenm3+ztLysinJY103rzHw8/YA7vTOugPTxQ8tLA3XPxuRKvXjk6GFR9KUP3mOINpF55UmB5lp++JvZb37iJPpp1n2yS99q9x7LhuvNyCJE4T/ncUMd/mB387vS36DhiylZJTEC6vPO+Dzj4wuwclvcv4j/y++y//zTEcar4tH+8DXTSo+7VRsZCIb1k2z5hX7yCA/7NP1iwjsp2hgUQbDBE9Ly6tq0rk+/YxAC7L17/9QjR0xj+2VJ0VsPoWu9D0wnKKCZVabr8qLw3guJWqkkSZqqWlpeHI3asizOvfXGrZ9cqkwTDOLP/pvXXK9rY2ktFVm+euXgylp/vTJjLYGz5fFsl3NUREWe3LJ+iDxl0W+sbvTLxURlKimb+X6eZwEYAUqpKVAMkQvmXVSKW19rVRhjkiSz1gGELM0BGRG1XV230165AeCFSBljQogYoyOWZTlyQcTyfDmClILNprtCIHASLDNmRDFigHo24twwnWIgBEAEYzyA45wDCeQGSHgfYvSIjjPtfQAg7zohBGfCee+jpQhKcO8NQ+6Dbdtpr7fsYlQqQWLBBR+CUNGaCMA4c4z1CGz0nbWdtYYLlSSFc2Hc7OyMbgZmKottNJWrrZHzZqr6spo1xMBZVhvz3Fv27//SMUCeSZMmRRBBBiWVbuumQ59ItVNFZ8a3nbxvXoUroxvmwp3U1jw25D3GDghZdM8c+cAtlz/MMTJoGRdAXjJiELWw+eoD43raHw6lUE3TrKysbWyeaE33BX//O5eezdOcfx9jTOr00XOfvLr9FRuCjaGuGzDBM1BKlCphAPN2muYpCTZIDk3qa5In6yuHLlx8mikhtUzlUmv3O4tANmfFPFZdY1vfrZV9zkIL6BrPFOqEYUgO9g9uvqc+9pnl2biDaFWqOYNSJ47xEDud5G1bpzqVGhmnejqLAQkFUYTopy3MPtAMPyIFZ6uLg91xxYViLN6pl4881+w5tlgslBJefKI9v5/de/vAyWFWFJ2x0XofRYAIBAv95E3/erjlFUuOmMk59F9iyQd99XmRnqJ2K6qcmbOIQzj0T2n7K3GwzrNbvOzEZO4WFlTNX7757a/s3WF/5i/Z//Xj3O1jU4X+4V79u8uDU61Yb9jhJq4GXoTtTwu1aQWV4grff0S045r2kTmKbH2t5wIeellRFZh37uapm4InydkkVQkxVuZiMp1wrnzApZWlg91d01E5KIPxnTELCyWBreaRM9c1Zrg4uPL2/WOfWnZEhWb44D+/88TSXVvbT5EuZQz5QMz2mqIskGRe9GvThMgZa4apPqLKE2uHhU4BRJ4WWZ4Jntb1WApBwLJ8wVorBQs+EDgirKp53UyytBwOF611RCAld84BgNZJ27RJXjCWCc6J0LqOiLTWXCgpMx+8kKTyBS51CEKyLIYGFUBU5NF0o6YZS07BecEJKDhrmWCSZ7P5SEoeAhH5NM2tdQBxPt9jyLXWAOQ9NW1d9koA8pY4E5wz79qIMUZgjDgrfGgRnhcYUjA0ne2PppcWhytpuiEUAQnTdkReSB4ixcjqen9r68baxlKiV7YOtrgu9mY7ztmDuFtbYRuhS9YZMGAfe/Gzt3xueTnvF33hrDd1E4INGPKkZ4IapMXUzYpkQNHtt9X02Vfz5oCogughOgqeU3j68A/feu3PIAYlAAgiRA5BC0o5KzJqijNu3op+qaSOMaZpvrS6lqTZZ9oXvnflohBcKSW+Twn9ra1PP3v58zyTRZAddYfXTj9z5dyR4Ymr9U07nUUGnMXG1WuDzfFkj1zIi6zIV3f3bwZojxw6SWj3RjNrrCMnOlW7uVJpa2YL/VWteQx+OmuEJMmFI3rq1Vdve2StrZzz1BsutG0DNjj0g0HW1rYsc/LEOEdkbWeVBudD/O88Mbr85oPbv3CkmjdMuKoyeVZa23nF3utW/E69W4VTm5tH+uZtL5gBwNSlT82OMsERGTAWgiDgXNVv/OXjbcL55DNx42fj+AlovkAL72PtWbSKYoccI10gOinKH+CAba/PaSmGy68/PPrcj38UAH7tvpu/9o7/yGQJniJERMJiadD+p6Vsg4UjUQixcnxrvxukTX8+NwEMtQ5NNck65gv69KnNuq5m49X3bxz6ZEvWNePRB/3Rz6woxhrbtVMTotdJwrnQueCI1Twwwb01jAmlEu99CB7JWxPTPB39k+rUZzYa5wZ5gj/3y6/olLpw47LmKnQ8UUYm6aAvo8tVwqvaSZWQMyuL/dUkHZb9zf5qmS8DIONB8bTpas45AoBAzrhgPEYigOCDEBIZASEia9sOABJdSCk5Z7PZLE2ZVH3nvVZJ27ZSSa2kc5YLgSikVPN6omQ6b+dpnhdZCVGbGJp2vr52uGpskS3YrjFuFK2H0HEGkVByYW0HSNYazgXn2LaN91EKQTEQEABwLpzzSiljjFQyTbNqNpOKOd9yniBGKTIE8MFbUwfvmYoutIlYqmuTSBdBBg8AteAZQeRCNK1hPAOsBKpI1hsDoqi6CTL6zpWb4/Z6V3dNCCH2Sbqn3vDc6YfXh0lgEm1XB4NMydZbRQp4OsyX5mE/kZIxvPDU3fHgQLIaYnxy9d17+ZnF6ZOChduufZiAFAeMFpAjRC0oEXG5r/ma3jpYdnOPaVrkJec8TfNjJ493nf2seeDB/jNJorTWnHMppWDZ3z/3YZ4eXL5x8WAyUSkvsM/Qe6xlSO84dtt3Dy6b8VykkltIk2LWVY0xbbe/ubo5r1sOTCWJcV6JvqUDO/Inj99zMNsSqrh+82IqsrkZJ2kRouGgW2N33lUf/cSgqhrGAbVyLpILQrKFYd51LlJAwZlnFLC3OGybadu1IXjT2eDh5nsnJz8zdM4AJV1jtE6RMcUAhfhAO9i6MZs23b/4oeGhBQ/fd/6gZBwJkDFEpgAZcPexR0sLMoTrFIktvgX2HyZooPfK2PyjDNyBYxAAbMRllqxHApQLxNxrVp965en9X7vv5oceW3/9H7/9kec2pF52wkmTurx71b3+9G39yYd/6+v0dsmql7z8zseeeKYZ7zb+1tr2nXg6tgkEr/FTD74hu3KJb03u9SdXVxb/C/A4P9kIjcNnkqpt66lTiRaCMYHOU6JkXXmCGELs93Pk0XsLgDH4PBva0DU/6oYfETZQmkp83y++tDG1jx6jDrY+tLi8X3uguVSJ4DxajcwomSeCbllZlchOLC0e2bgNAgPokAli3ForGVamFlymuqCIREFIboxhjCuliAABARkCtW2TpLppasGiUEPCjiFTqiCKRD5Ex0C23VSpJEsXA5FMhjrLrWlkqoriJHCOxNv2ADHGaBHA2MY1EwkRiMfQOOc4FwDkvIlkheCCy+DIhy5ETxGNmQuuGJOJzgMQYzyGEKMlikols2pPSWGdS5NUcg0ESLwzNRcBUVP0jenSVCApAO5cRwwZEwwAQU6ne4zFpnUiSafVRMn83O6l2lTOmnkY8YRLHR6977n7vrqiYCGJQqpwc79BhGnrVwaLk/lOKlTVPAAAIABJREFUXi5Y4hKjt2sXHy90aEnxpxffevHwe+D73vm1tyAAImH0qQCGJARo5g+vDfcG0/NwcGR8dzfDKGWeFUIIzuXxkycA2OfcA+9eOs+4UEohopRSpclnvvv7zm8bX3kn9+sxBuoNykQvUOuzLJnM9qxrpC6Xyx5HsV8fTJtpocv1fOHSZCvLZV3ZgAje8ZCjYCUWEzfVXFR1IwoGNe8N8+l0dnT97u3ZuYtvPlj6i0TqNJWhjdQZl/CkrWoTmqQoO9tp1JvD1X5RjqrR9f2dtEh0kpCNAeDcm24ceWjZW7+yPNjf3VdpZkNE8BDSEwv8/i184ontt96XvOMlPfi+/+PjTjJSOpUq0SnTeT8q/v98fP/LO6+BgmD618yTP/qLyl6z2x/h6avCyrvZ7qdD/QTjgWIGvQcozpg+geLOf/P6hz708i/DYz8J9/3hb3zlVeSbR58uvvxcD8vjooib4ouvef8P/+Rf/+u/O/TC3z+7gkbc+dLTX3/0YkUstDMIKc97qAqIXc/+e5UsKH36xv49YuHo8LaLq/2Pbr1755ZPrY8mlYteysR5LxVDYAi+npv+sKyrmdbK+8CYYsp2dVuUC9kgufDa7eWPJMQlsoAP/sz9hNjPQ8R4auWlN8ZnEzkYt3vNvF3qKVN7QrY3M3ka7zp86J5Dp9NiIecJUyBJRCDGhI9RSEnAiCKD55FzhkvkTGvVM6ai6AVn0UcGHoUEphCVEBqRS8m7rhFCEnIAcqaOSM7CcLjsvBNcc4EuBMYEk1rpTKokEqpkAKQIOu87yXVnm2B822wJJUTMZ9MLHJAJ5r1nwJ5HRNPpQZ71QjRc5gypbWaJloEEURSCA4JgqfM1IkqROd84b4Mz3lvGgIg4U0QOmMqykv47j0wBSR86Zx1HEkwgw8hjsBaBW8/m1X6hyo7mF/bPndt/QkBTlOtfevHOHQ8nCooWuFDoa6vTfNYBBJZoGaxTnJGG5sLJ0VbFBAMUj578xZ38Dvi+W7/96xuTr65lMHLtiuYqhZyxzc1iZ/FgYfGOUTiIz5TTSSTivX5fai2U3jx6HBE/Vd/znqVnpdRCSkAmlUxV8Rdf/1WP865xy4uDEmExvzfPY+tnF3auWz83ITRtC+QSLbVQXQwUXdcYFikqaYxHJiIQBlpZHEyaSjNZm7abz7hOGffkmEx7XMmTSb4XzXdeeXHzoSEBusZvbm7MDsaHVzcu7V5hLPFtgDxBF+rZAedqWjsGFoDpNA+RVIKTD7SbnzwcQgvMRp8c3Tx2sH811UXrQx3bjda94Gx9YWf8jhcNTh/RX/p2d27bYIxMSUSRZbmQEoW4sj/5w2+/Cxb6sX6M2ScipbT0k7xUdOl3A23g6v8E3YRmf87IYXF/NNfRG1x9d4CFf/vav/uV7PbXnmu+fOUQGPuqW7Zff3LuJxcwnP/NL8r3fvCfKbXy0utfftxc+9tnlqnVY37S+B54wuiC22cBGHydVV+N2Q+43gcxSxm1OL6kN0T+od87/BBLhZ7Xc2SUZilAM5lZRVzlRYi1dY4TcwCF0FG6am5joIVB9tybd1c+VgBXHjp89/96Sy56Jw+funEQnbmoi0Shrn2tVC4h86GO2K+qvSRNVOzuOHrozPL9pVRMoLWNEAoRuVT0PAApZIwUIwkuve9CiIkuqmaqJI8xxBCdazlXWVYKpQkpeGKMdabmgEJJIUprGi4QURDwoijqugkh5EUJxBljkXHkiU4zpTLOMiHBOQ+gfGiQwJtWl0OMbHRwMTrjzCh4x5DSRIaA1rVCJNY2SungPQAwZM5bgBhCSNKEonCu01oHj5E6Iojeem8DC4kcMh4FT51vgChRInrrQ2BMhWgYikjMmooxTjyF2FpjmciEjIxwOh213s/dlDHcGT32uZftvuVb9z97cAmtLoqitb7qYG//YNLMhWKyKHuaN9PZpnnpaDYPBjnSXnnb5078WwDYqJ6457FfCRCDmZSJ1tJtLiTJkH3LX3Dd9NDGnaPm4FB7JjaFi9Qf9FWStsacOHWrEOJhc/97lp6VUnMhCJALnublZ//xd1KVVrODjvtVdTTvpU9f+kqNRrJ+N68wS5gDLUUIbtAfTOcTR87YWSpTa22W5BAxBi91nqf65niPIQvGy1RxkYbQztuYAhNSRoiM2IW37q9/TMfI2slYZH0bg4+tUrxIiuWl1c3e8tV5dfXqpaXlxfls5kPwzkklkMUy7X/vFZcPf6pI0x4hcvIQnc5TJkrRdTVSIJmQfe15NzrYK1VCMQKxaHxUkgnOGAuAKIUg9adP3fbMfImxNfDfEPYCsqwbvJctviA+9zuMiGdvCfwUTv8M1AL0HqDJJxh5WPmpCP0PZcu/Of2eTZc5VlRNMR3wcqAm7Bv/6dtf+ub31tY2m+V7Lvs7P/fxj11/Aq/HW33QICJYq2JBzETaF3A+tk9R/0cd9biysVjiXZXk7fJv//jxR07UBxPAkOeLAbtm3grCgEKrSJB2bSVz5Zu6NV5yyTn2B8UTr7i09tEyBMxyhe//5w94xcoUQ0s6W5jU2ymsJf2ANnZmxtjAuLrUpZQqVYzs/MyhQ7duHCNfcBm10CEEYhwBKQQuhJQqEijBvW8jRSkSQtk08zzPnA+cAUXmQ7S2TZI8SfOuNUkiq4lRqeGstLFVMpFSaZ0ZG7uullIqKUMArUUkBC4DgOA8kkoyzVhibQDGESNG2ZibiwsrrjMQwAVHPlSzA6UigtSJ8I66rkJ0nXFZVsRAbTNnHIgoxsg5AgDngjFhbQ3AyYcQbVYsEdjgEVgTA5cMQnD1bCa1Hk9u+NAdWruDkMbjnV6/L3RPcUYUjQXG43y2BxR7+YoPTioRQ/Gfj/7Ve5+4u6oET6oLN88fdOrsM8+UQm7Xs6XFVeCOy2A8rNV3dtPOet066ELXmXD5yPtOn/u/h6W/tt+slTLJNinunb5t8R/9zZnrbBCK2zzPlu1Rv5e6CGWvnxWF9f6WM3daaz/bvfA9S+c450wIRM44Fwn/7Hf+o/dTAUvLuoylv7rz3Gtv+8GPP/7fcr2UMimKcnf/Rpnk/V7vyo0rWTkIzi/q3gOnXvQ33/loW5ssSXxwTRVOHd28vrdNiNPZLCsL3oqF9WHo5jWFTCqlwDTx/Jv3h3/O0rSwnqTOMEaM3rrOR8+zNOdJ1e63lmW6XF8azubb0YkiGeRZ/tz2WftjZf6ntLRwZDwfN90IWWQiid5b8Av9BTuyHrsjK8PDW9WpPYjSRBCuaogTkwkDJoQMRA74d7YOf/zcIEQe9N1oPomu0Yxa9SY6/qC88F9dfILES1HencRrrTvLll/Hdj7ukNjyj/zq4BW/Pv1SlLkk6bFTtslA/tmf3v/aVx366f/5D8eRv/CweEF6MUvjY1/99q9+/hgLqeCJVUshH3BcAUwhUCyWMVrRUiAniRwACYw/95f3/MNZKUcMLQBasiyoXpk1xvfLpO7mvbJoXDQz52LsmnmqtfF+5z3V2l8VTe0YAj74s3dGylmsgUfv0qQM0gsmUXjWWc+4y9K8JTUd7Z88epR7krF98ZkzCQ64rFKxqHXiQkySlAU0ziBHnWjyUNczrXWW9ogJxNjZjnMhGDKmjTE+tJKh9xRCjDFwETuzs7b6wogQPHGOBFyrIsbgfMcZMWRSF5wjUYzBM5lbG1vX9fvLXLIsPxTBRkcRkKHz1lI0PnaSyei6utrDGEMwnGvraohSaoWME4FA5r1zzgJA0+2naU4RY6SmnaZJqaWM0bbdWKtejFFw5YON0QEBIOdcaF3GaIiAMYERIwXPvKlbZJFAC4FKpJG8EHo+Hwdnslz/wfG/++mrb6mrZlR1T114ane0d7O1h3LepW5hMJi5dtqMOe8N9zbnBzHEYJyPnjiYbyz/yNpTH6mqakHXWZ6y0By5c3nSs5ea/WVUPNVMCQi0QOt+W3cd5Hmh04xJcfTYaSJ62Nz/zoWzUkpAZFwwLlq39+1rfzKtKp0vVG7aTmdMZtbMTq7e2Ql388YlrYcuVCLy5eXlrf1tCOioWS2XTywde/L62Swvy37v0uULxrOlPJt1TSBIEhVr+65XvO2Rs9+8fnDljiP39qV6fOfxaOHp1948/ZkjPnSClMigNd2gt1rXkxhdKRRmGXlXO+QYDy8mcx9Hu3NJzLR1Q3L8vvkdXzhj23keyeTy6vbNxbQvMh6BB2c76jjXijhJKiFmB6bX663YuOKd7IJinEcWYzQBfYz/22fv5Wwc2Cao20L7J4IioGDhfnvyR+T2P7jmc5zfFdZfI+rHw+wyLb0SR59k3vzq8d/6kL3JqIsICFLag+98/u2HjpZK4l88/I3f+KXfX1rZvOcFy7ecfNXP/9T94sk/evr8/KN/8YkvP8e/+nQW6ACgjMkhkR+K+nDIj2LMsSUETwJi4Oxf/b/3fX0vTa57OzdeBnBCkM57oa27Lh46vDyuZxr1wbzpJcpa66O88eDu2sdKiqIzFn/hN9/8gvW7v/jMN5aWMsXWdupLrhulqgBiSLHtLHFJQXDmkmRjZnZ6Sp9ZXjm5tjbr/OZwhZMQTAjFIjJGQAQqSQBAcjadTjkyJrgQIhB471OtQggAwBhTUtfNjHMhRNK1zvlq0F+L1BIBEA8xhFCXZQ8g6azrzDRRCWMyksjynidKiqHWuTMtV5pxTsiBKUW6NrtKlk2zAxEQKBgrOCfk88kNwQhZiowpGWNIpGZtWyOCMVZKGb0jIOdcmibGdgCBMya5YFwDoTGNzqRpWyFyZI6CIuqcs1JK7z0wiQw55955JXjb1ohEFBkiZ/pgfL3XW4jEJNf/5/pf/+zWmwRLO2M7767v7JzdO08CW79bFurC1SvDsje2dmN2upokruKAtfdsrxLnD73zB7b/QEjWdNnINHceLqZnWCadN2Icq5whBfJpyCc9vrfR1I3IB3mvH1137PSdiPSwue+HBs8wIRkDJEh0dnn2zX+8/nn0c5QFhZhnCREejKfry2Wh1h2553afilETA+dcsNbGrq+GgWh1WIxcJ5n0JgLy21duf/zydxq3e2JwYu5mkxj6TrE+N85mJLxIuzBt6vbSWw5WPi5jrddXl67e3DpzZM0EN2miVMCDPHn0+M3d7cBjoFhkZde0XVd3noOvOs+2fujgnkdvmVazQCwVvOs6E6hMc+OaQmuep4wAOTsY7Q/7/f39+frahnM2zXQ/6W/fvCTAa2BHyoXBfP5Hf75xfT4P2HG6zcvD2H6Kk0NAI+4Qw/dF6tj+p33/VWLldBifZZMnYfAA1t9+FU9o9YOPtJdFdEHhirLPPPH+6DqJ4qCZ//CP/Yenv3wxXbjjT/7Le9/wmjsJIyH80oceP/voMy87+RR6pG7661//AXRTZvZCs0VK8fIwS+8nJp1F+oX/qn73fzj+Ax+7Zfmb47a1nohAcs6Zd56VpRwdtCoRdT13xmo9SPp05U17xz+zYmzHUeKP/8Yr+wRzQtZhpoXjguKk6YBJnngMnqbWHlo4srp6aOvmgfXew2SQSu/gpSdXBr1jjIKzXqk8LZJgXZIkyAURRe8RgQFGICmlj8Q5pxCaplFKSaGQsxiDFDoECiEqJQRPCXyg5zEhhA/W2bG1Ns+HSuYhEhcqycsQAahTOg8Ezlguc6lSIXKhcqCKsBfBIHlvmhCcD0YIFpy3TasUj9F3beVdraQmIB+aGAmRUyRnR4JL76MQKgQicolWwcfW1KnWTTPtmllR5ERKCJ0Xi4QMkTsXEBhBaLsR56BVyXlKFENw1hpEEly3XeVczHtaiOz31j/5o8++Fpjyzjnye+PR9uzadDYZDvmli8+QhGJ1dc9TMVr21/i8kYEqNBbAf/PQT6w9/Wd9NIy3quSollZf6ubzdj6bW7JlNrCmBTCvuP2VTzwytQaLxZW0KCVnq+sbIbiHzX0/NHiGCQlAHFEI+cTOX96cPMt4rFsqdGqNWVhc3t4faRlM0yhZ1G0XmeecayWjdXPjN1bW9mcTBWLQy8ZNVdW1BHbyljNXrl6tm246PTh0aOX6zesvOHz7sY3bz974/zmCD0DbzrJA2O/Xv9V2OXvv0++5/Sa56QncBBJCEnogdILlBxXBggyiM8woghRFVNR/FB0RgQGliCBJaBoCoYQE0nu7vZ+62+rrqxN9nocvPe/AeJydv+2ibz5y6yPPP3jdQ1dx709sHhtlGQUjw2g8qVsdKahg2NV5HcZBXdfgMGXcOm15kE0zhMnGm8Y7vzNPKPLWgzEYs1qbJAxG6URXCkvJAZgQWZaGQRDGEhOaplMELkzarSQ0jUWOSYbX1fqJB5PDdzQYSfBKk13YzFr3A+aQx8qTXV683AUdxqSZ3g3zrwEz9qPHSLzDjR/6cP+qD6s1F17A7fS8/vjuH99kkdTOSabf9sHP3/YP32m1L/n9j7z2V37xEgwGIaFB/+7vP/bpr+UYGkX8R665w0zqXpvseeGL7zl+zne+++hDD/wUL2w3wTno8se89XD3FYNdD+/a8emmSTsdajU2zjQNDgLiHCIMO6OEoEWuEKGrrx/PfS1gDCEE6NXvvlSyKCuqXqL7yaBSFQHmOZ7UeZuEGJHCad+MnY9nBt0s1QIL74wy7tq9g22z+5BDrbilLZRV3m21jbGEMeecqmtrDcUEMJJSWg/PwggRQsBjTKi1BGNf1SlhnmDpPbZWBTLx4DEG8p+4t1LrjBAEHmuViTDCmCrjMKZCcIQxohwQAfCT6SgMBcLdMJ4Jw7mt4fFQsLqpuCBa187UUgTWKG+UtUaXpdK5lBEgarTBhFDK8ukwCEKMqVLamNpaJRhjlCur82kWSKqakgUxJl5b5712tkGICR54cBgJ6yrnNKWSIOkBqqrEGBOMATlChNY1wlxI8dfzX/214y+1HhulHUKVasqicNrVaowJHMsmo3xcW9XxIjs0UHgyWouCcFLX8MT8TZeu/d8hRM3EUaij/vw51+rK+kabQLBTq1uyw2yVznfmp0/3bGWS7kyr2/MeOjMLAO52feANvYOAMUKIYkQpvv3Q3zpTIMYQws7U3lmEKROBtrVqKrA0zaogZmBsK4mtNXWtGEbKO4Y5I5HyBhMUcFGpzFVxnJBjq2tY2mxknShD4pDrFtN8+56e3cxrHwa/tyP9y8cxJZRxq7RnLp/UUdLVqkYOcQFKO8aw0bpWthOK6aj0IZVBxFn46DXPbP/WoN/v1EXtrGWUj6fj5bnZg8dPtaKWBs8FL/NcSmG9Fx6FrZb1rqmLTqfnoLTeOACtncSo8fKOTwjiLQEwiFoceR15OEwsotgC7Tl8QC+9jNdrbusrvvdGiLfj8aqN0B+66I9O/SnuXAXtV84l6m1vSSgxmPjltji+eu+ffuyL3eW3pNV5b7zeffZT1wJy4KkGfcmL7ji6JUWt3vWLC+99175khqPVB9Hqg4DN3fdPP/CNlbvuegyWXnjVRz/2wQfm/uS+lz+kFmdn391PPPK2qMEj6HbaCNk0LcOA5UUpOHOabPx8NvuvIQYmZYJ+4bdfkPmJgZYgdmGmm6dpK8KVY90gyMrKekuQA8yrhvKQWoWdK0EBM02rJZ6/e2+/vc1qL2NKCeOEOuet98YYyTmAB+cbrRhjhHFrLefcOUCIGO3KeisMOtZAp9NGWBASVPUQASeY5vkQkGu3+qVKkScIkODcYgEAFNG6bgCcNWW70/VEkmdhrrXGGGHGgGALQSglgLBaMUys0Xm+BsSBd65yeT0MWaTNVPAWIFTVBWO4rktAGACstZxza1TTFFIKZxxlVLDAOmedtqpSugSIgoh7C1LECKFG5YhwRgNjrfcN8g5j7JwjhBiHPNQYBcpkgjJr7Ce33/bu1Tfk5SYGgrGoGh0Goij0tJ5uTafDydlY8NFkEnbIk/dyWyqP/OqWnZHZg0u/svvwZ2KOCdFMdLqLaG12uBy0Tp6dLM/O5libRpdVanQzN31uVVR10zhEFpe3Le7cb4y6rXnOG3oHESEIYQwOE/e1x/5ssd3FktfpsMHEqzqSMi8KixBn3a3ROqYNQUEUBBgBD2Ve5NQz7R0YY5CJMI/CqLSmm7TytJxm06Sd5E25rb1ycP3U1XtfduehW0xhDIm0SqusWn9TevFPdpVV1RgcEWK5kj5EgTeVNooM5meyNIsjab1Pq9rV+c75S08Oj2OuAx49/qJje29bEpQ22m5uboZR4L1KqGwwMY1tjAqDwGqFEap10w0Tx2mUxGU2cZZWVWrB5bXuR5y1lruk+MY/epsjY0MKBXjpAWmwYB2BylEXuG6VXIbbL4cmh9HfedRHvZd6suP90eJHh/eg8ecd6eGFdxjHsCs9pdAwHBbo+B/MzP7ykF14fq+8555XY+K9wwjc5df/+EguXryz+dqXrvPUYkwQEO89OGUB0Qc/99gza+NH74QXngKAa88m7G/+rNd5en7nx6lVRCZxBMhRRHSWGSmpVpJxHXJx+IbNpVtalBBCOXrT77wgiFlWmPmIF6rOs2mSMOIY4lKZpirzwUw35hwTIgImSdgYfmY8xFBsi9v7l/b0oqAdLZW6kEIoCwEVniBsnXPaGEMpx5gBss45SplSJaUMIWytwxgbY52rpQw94gRTQrhSGhFDaahNiXyAkUEYaWOklIAAecEY9143pvEWjFGMggz7iAGhSZFvciaFjBxwQKSuK+tLShFxjIi2M9oZ7H2GHZTVJiGeIKaUERL+kxPWaGOUtbYxY05ChKxSNghiDw4hcM4zJqt8bIxlnGtrCTIEGKFUWyV4pJzCiCJfIRJa6ygjnBNvUNNUAB6eRfCz/nbpG+88daOpNUKIc660bpqyqvI4icuiKmyjdIkhAjq+70fZdFLVdQEeVof86W2vu2Tts4hg72PCTWe2S3ZOqjpvsmphbqG2djgaB2HL6nG/PCcdg7PGeSSiZO95lwBC3zNXvKb7lJBSa408KJ7ffv/f8wh1gs7pjdMGoU4QNFol4Wxtp97bvCiZDCOKnUGVVTsWFzfSDJxy1mJKi3EqZVBr5bxb3rY8Gk4xokZnXEiKvKpdGEGauThO0nTLWQwEP/mSjX3fib3COS2b3DMeJpRXqpbtmFFqyzoMg4DLaVEUzoAxF3Xnj023fBwv9lo/veLw7n+fS/NhiNhWnjMqKJMeubJMQ5ZkWi/NJNN0GnY6+TSn3DcaDbq9rcmGlMg21kNDINLWRUlQ5KWe9h64eUQdeBIZSzHC2AqLCu0Rdp5Q7pFxbC/M/Jz3BKX3ouagh/oDK7/zEd1gMzTT77HyKTP3y5htRzZzHAAHS+Jbo1PBJz/7rosuCM7b1QECHoiz8PqbfvSDw/5l55Gvf/Eqh7DDgL1HQI1XFJhChjiPv/NbZP3hHy5m155NXnLLO+44c/H2XZ9bbn1fidI7jF0cBWaUFYzgtPKLnTiS4vCrs+VvRLYhyFn02ndfNj/THmd6ph8YJb2tttbLbjfECClXA9IE+4CQWhshmaBRZXWeafA6DsI9i4MEs0EkVmb3l03R7w7qxhitueAA1jnnPXaulKKltS6rcRIPhOBlWRCCMaZ1pSkDgjkQa61DiID3WnkLacAWjB8J0cEYW2uMNZQhjBjnoqxyShnnsq40IwQTbAFz3nZGOd/IsMWDDhDhXUFpZA22SolIlkXBA++tyIozEnedMt7nWTol1DSqaiW9Ip8wJpKon5fjIIirqsAYPQvAYuyrqkQIW1sJEUqZeEcaVagm9c6GcQ8cEwGra11XKWMeIWKMUarCCISQ6lm6Rp4IKf5+5Tu/dfpVWusgCKy1dVODo94bbTRjvGqstYrRULvpz+7aOHt6Uqu6rmqPqkdXfq3z8JeEB4trg8J2rxPsOCWCRlkbhpGrlSXEuPGO2b3uRFhMcFPXxvmZwcK5F12pVPPvzaVv6B8ijICHqqhJ2//ssb9yZcu2KHdumk1nuz1DgjQfUs61qoeToYwT7FUYhIgzXRQE0WlTdTqdgMrxdMNZo7WmmMooQoC914C0wK00H29b3jMergVJGwFYW2eT0qDm9OvKuS+ByusgiapCaQzYu36nPy5yKjh4wIzqspYymJYlNGrfth2r+YQSLiW57/mHz/nuovVGAM5NykWwfX7vtBjFvHNw41AsALu2sqUu6+X+yoNHH56bn3OFpSwxJi+AMjWS8QxoTbm5YO9LTq/ee+QhcuJgvrCbiG6mTTReJdMToVonGufgGfIBp04DN/03ILTDV8c88R+KL/rjs193/aspbavmMN74V9S6GpIXIVRaype6dzdH8fHV91HnrSWe1IxQgsx//4NH/vctepZM3/L6mec/L3nFVTtQGH75m49/9rPrb3vr7JtfuZcFAVm7H337XfDAr/1w8UcvuuUdAEzg8LKrf5c1I0TLQMZpMa4LLEOq0wxHbSbR8A11/6u81eqn0xTd9N4Ds6241LQwU+IEaI5JgTlph3MWlNJFJJnXkJU1lyyUXa2KSHY82DBIsmxKnZ0RzZ7BLuPNrsGK54JT5izBxKdpGkUto0tGI0C+Ual3mHNO/hMihGLMvNcIiHXOGO0cOOsow4LNFvUJihMqqLWGEqyVoiyihHnvALy2inPBaGAsStPTUdQnlDEiKXMeMSpjzAJCe4gI712jUuotoWySjVqtJd2MRxuHQkEZS7L0FEJY8BaGwLuKUGytHU/PJEmPYGKMBuydAUqRtYYQ6h1x3jinvTOIBFZlQjCPBQIKoDBwwI5iDB4557VpABxCVDXKWBVKWVbF/1n59i899YI46tL/kuc54wjAWmeLvOx255qm9t41qjpyNH/4weNl3RhrmxqdoeeRyLGNJwsFO2Td7rXXlkqGSoe8tQYpayjSKl9YCLvNjvRYoJoGU9FYdM1LXg6Abk0vfG33GUp5qx0vzM6mNv/nb7x16qpe0BlN9crcSjZurGBFnQsDvX53WmSekHE+munCmehyAAAgAElEQVR2GmdN0XTa3a3xBCiydUMD6py2jfYeB0FkG6fdmAWhrpyMcSeZB21G5bjX64+21q3BGMtDrzh74K5zJlsbzjtB6Ho6IgS3A6Gso1IqY7pJVGQlJRT/F0dwWuTtIOahfOgFx5a/njTKypA2eRnKmRsueekzZx8+lm3sXdorxfYTG/cd3zpjshIhrGo1M7O02OliVp4djQjpZ5OnDA28C5QaM8KjKA5CHPCwdEU5jjmOgo6u8/zkI9HJnwpva02FM4ohpDHCwQHovMw30w/Eez7UPEAz5cIO4glyyI++QKBj59+MoTi3fyvKz7v3od9h2DXWgKYa3L0PH3rXH548dDKkjAMQqwqUNC/YYXr95FsPcZXh7TT98feeuzBIyKfI+NJP3fTeyQ/1y5xOSZmHfXHdC/6X93xrc1O0u4vJ4onRiRaFxkA6Ga++MZ/5l7DVSYpSoevfsdBKQiZnnMoF6/ZbcTs878FnfijD0GhFvNs2P4eR1w7VpqlKJwNcac+ZjzALwxghII2ca7FBN9jT2Ta1thPEVmlAXkpJCDMaIWybpuZcIuSdAyllXTcYAyFMm1zKUDXWe08p9d4DIt47VSPClZRtjMBZ5YwFAs4apRrKGMHcA3igCEuCC8o6gHWRqjhJirLqzAwAIQs+iBMPWNfWgw6iThAsNLbOt07U2brkkvBOPjljnYnCEBAC3zRNTamsmwkjHCNmrCMYCKPeO+c8xsy5ChwQTL3z2iqrjAeLGaTpBIOKgjZQbC0AEM6F0bqqs057lhDmQYOj3rtPLN/67jOvMqbxAFopxth0WrbbcdPUAFiIAGOkVOkdWT2bff/7jyjta6W8tRbh+2Z+/qIzn+XUFaiNaE/MbqR4oxslhKBJka5sW0nLIaKt0HJ9JLTWOaDb95wjEyZEdLu++g39o5dcev5wtOaMTe3kM9/477u3XTYaHz9w7mUn1p7sJzuObz0j+dLG1rF2pzVM01avN9ocB5KVRttCFd7NJTMAuvK107hq0pl+XzWAtD9n1/mHTzxYO8JIWOpNiePzd557aP2YaowURPKW1eqnVz567u07BIfRtAgwAUbqql7odTwgj+nm1tD7JgjDulGUUuYhaHeR9YzTvBweuWFr17dn6tIRbAkWmWs6xvAgGemp8ng5SmJPT1nrlFaiikjoQLREMBxvZLX6zRf/xr/c+enG0c3ReiSipjS1rnjYQk5FScwQGFGGuJ2nDRHyzFOt4T28QdwacGAxFBS0Ydt896YPd675YPpT8BY553hImdAkFvltaLrpFt54ycpP7fipu+/5AgaEMThntGanzo6uf/1PN63EDWtcQ2XH8UYk1hpsU4tDAmP3xHees7Kdo/8buHf4P3/Du//k+Gs0DXxdMSe3XfK9XUu3ZKOCxDMLhHdneidGZ5WaFLU7+fLx3NdmMG2ayqBX/cYFLBDDzSoJnHPBYH5gynIwNwAtFEmjMHF1ignDrimVq6pK8EiDBeSYS7Jyq92Ns7EW0uyeGax02p1kfjAzzwFrp5uiQoGVLLHGGGsYY94jjLCzSpuGsyAKu3VjCEcEiNKVFIFSHiOrLLKuEpQIKQHAGIcRds7mTSZ5K52cHSzsQ4ABkPeoqjIpQgBw3hDClKl7vYG2GJMAQaO0C5NZY3JtTCATgmVdDj3ouirDMKqySaNTo413OmDdstnyYEPRtd4QxBDy4FXRNIwwxpA1jbeorqsoipTWCIBxSZlodMMwbWpFGWAstCowFQgBQS4vKkqJ0ZoQan1ljP/8uXe+/fDLlFKtVgsjgjAmlCplGRNa1d7VlFJtIC8mqiE3f+NBUyllbd14Y9WT296y7+jnlWcEEA3IYFdXiVNhkmQbZ9vdXml1mtugRdtRB54Kxkp1O0vDM4+fc9m1swu774muubH92Imja1EUUyk7/f7PDn4SNza15b5tFz70xDcJTc6srba7gmk+M9OrSZFNikm+OmjP18h3UIyQW2xt+8nhe0bT4SCZV6iUYRIG4TTbWO7MH1sft9qJqioIkdNIabOytHJ6da3X7xIwysKT153e9e3+bNTayKfO2uFwU3LGKWWCZ2nRnZmvVSMpKovSA7UEBKEYgYyCKIruec7je7+7tGtl19mzhxAJ63KzhijglGPBcTVVQlc5wiRitIGi0uiSwU7Vqg4fPuuxbHXjYpLt33n+sdXVcTWsy8xa35tZmG5uKqi1pcgrzjBmyXy7v9wLbv7SdOsIx4QjF7ru9Rphuvk9a077zks/uPDrH84f9giLuvrVt+0qMjVcx9PhkbOr951aFy1q9l54zYfef9n1V80DFtYZa+GG13/7pwe91cbzEJoIsEcD+pxZ8e5f6n3i82dOH9t6/J5XUmzRP2H0K/yWv/y9t37lRYgFti4ZpsrTF173gVE9vmR54YnTp3YtzJ9dXVMaxe3gmetP9f81qPLCGYp+7rcPhGGELBp0orQqK1XFnIetOGJYE1U2CjlvHQspcp7quiy97LWSyWSjqKEVkLSiTLh+MLOWjXbOdJZmFpa7/V7UrpBSadWOpQxnzq6ekgF2TgdBCyEGFpVlGUSYYGaM1VYD+CTuKKW5oONx3u23COpU5YhgKMuy2+0Yo6oyk2GIEKcEYUIZTZxz1pcAgbUOAFFKMdaUyrKsg5BpS8Ab+iweN6WWUYgYq5XBpmGcAYAHcFo7W2DPPIBqKkywtYpQ7B1yoOqqIoghAuCJs54yYqxhjGIMdVVhsMZ6hBBlBHmPMfVeO4u1bijnzjnJuTYOYwDvqirnQgLAJ5a++Z6111IijTUI4bIsA+k8EG1trWvCnHcuoHFRTLQKvnrzfbaxylpnSKOahxd+8dyTX9TaAydBKJaWOrv2NcO0oMyHPHrs+DHMwsrWeVHsdQcmVcXrjHdXzrv66n3b9338Pvv2/eOVbYtzg7nReDwcDf/j/s+tj892CF4fnkFhrPKKc7E+Gc52Wm3p4nZ3Zemi1ZPHjuSrBFmJG+CztNl0np+78PxnsidPHXlyeXF5KxvNdWebXDsWKpNS4GuTzVYiHGsCP6OcMbYOBc2Vevq6M8/92d6k007T1BjrjLfOI2cwQkWWdzozxpmtzQmlzGELBjjnzluHIOT4+KvGS7cmnIgiN1GLcLn4vHOv/Pe7P494TwqvrZEkyMqcY5E1XjJErYIkoNpYUGErZJRNh+n2uZVeb+nQ0YMAxVpZcMNrm4HX/f7CZLQpwm6T5o67ykxP3HHp+ETmGOcu9RDp7kuIW3bFnX84+LmPurGLOhe2Jj/50WudJh57jNmhQ4+97MXvdfjqYvbq97y+9ZEPXIQAA8K19te86rZHT1LmhaaNh03k+75h1+4W377tSuZ0oVlIjEYgPsU3btr4pV/8gm0NfnCfxqTnk8RndueBB3cMvoIAWVtRiow1TeM7M62DL97sfTUIpKREoHd+7JXZJJMMRzIoyhJT0g6jBrnIKs/p2mhjYbBtPD4dhHFpuG50yGTEheC+Vt5jq1GSl+MQiSCOJTRtGe4e9DyWx04fPLD/cuwQJdQ7ZK1HCHHBOGNGV8ZWjCZllTFGq7KO4r5qnNYqDDmWRJXAhPEuxNA4i4Tkx44dWlqcQ55wGRAaaV0onRGCVYNlGAdBoBqNENVap+lGFAWh7GIqnLMA3jvrQWHKgzjxCAkeKOWMQ0JI74iuh9hjYy1C3mgH4Lmg3oFzhjNUFoX1SoYtaxDnXGujmoZz3qimaQpOeVnlUSR00zAq82ISxy1tvWAUADBlWjlAFpwuitRYnSTxXy9849ePvYRTaYwhBHPOVaVkGKV5hihqNHDKQp4IQSpl//HTt3sDjbFG18bAg3Nv3n/2S7pxwKkMxLaF9nOehx88dGhlfqC0O3Jmap3TAKXOV9S+tLB5qS6+5nUtVt1Jr3l4Ovj/tp26chkXZTHT7RKEv/Wzr50pHjfTrTDuZ8Wmqd00yzuDzvrZM7uXtuf1JB1POOMv2Ht+JsnJ9aMMFGEdjJqEiCNbpzsz21xDuctWq4JylLBQq0JjWFm84PAzD9YYJUJmSkkpdJ42zh+9YfPaRy4qrXOmokCd9Y33QCAKo9FwmERhXU7zXBFKEfGuIR57hHFRZAizs6+dLNyS1I3u9xbzdBLH5LmLlzy8fshgUGVpLPLeUEG9UjjgXdGvHGhshEFlOfVIA9aUYuq7/TY/NVqbieaG5dmuWEyLvNdqH1s7tnNpn3Ho7NmnYt6tzPToPWJ0aofJ1w2ZEza1qLK04cEFvz/3vo+u3w7t/isum/nyl26sXY00Zszc8h/f+6P3fdPzlSPlhb/9xj1//qcXIOe8AevRhVf/+4mCXrx05OmHnqg1II/d7EWR724der3xmgDzXnvn7ef4V9oP/uafTD/wnB9++I4Vw+aBe9SEeIFee967Mh7smFmc5mdjyUejQoTBE9cf2/PdxbpRWmn0K39wHUG0qkfgWcRZp92psrwhIClPKx22mK5wpx0zBNaCy8c4iFgQWl1J4NOq4JJmmcWk6iWLxlZVU/YkneusLPaixXgOy0g1DSGQ5xMRcE5j7xCjTOsGE9Q0aZ5Ou92+8QGAIQR7j7QxTTWVMtJOU8wFj631ZZlavTXTnbPQBK0FQdsIeedtkecADSBHMMeIIUqR92WRC86MQyIItKq9qTzgQLYIkY1RNEwIjQkNCOWMJdn4KAXrsbe68t5gjPOswlByGkwmm9ptzc5eLETkAZdVqVURhhFG1FrvvQUA1ZTO14wgjIKqTjkXyiCCHaW0biyl1FqNkSvyPIoCjMnfLn3jN0/eiLxFCNVNRSkhRDDGjdFKVVwIQgh4pnSZtAd/9/ffLvNaW9DWaeUemXvT+We+oA3CCPNAdAbB4nn1qeEmp64TJJO8jiKCiazsiG/sbTYbvrinn6Avnlp5bOlX4b/8+EWPR+0EE4TAC5kcO3HwB0/923Tzqfn+zImNOmy1TbNV5nXVIBIBM7hyRTXJB4PW8TOnrr30hY+e/tEg3H7O0t6n10/oZiIpT/Ppdc951Ze+97nlxcWNSR7Tlmvyi/ddcmLz6DAdNQaSuC0I7STy3qsOnntbjyARBmErjMqqRlKMRltx3HHe52XqPZ5kI2ddxBIFpqgrBI5jCp4cf+X6/NfDUZq96qpXPnTkoU68uDo54sC0xEwv7J2ZHBVhlE3N8mBxY3om8sDbYjyy8wutja2xoHEr6rVaM6PyROTYli57rajf2ZZP8kkxXJ5dPHhmVaJ6a5qVeUWFYAX58VfPEH45xtscBVs8DRBiZxzYD86/8SOTm0n0qve889L3/Y8rKGb3P3LPQ/cc+etP/iyJYSbZfWp98zfefvk73vHSKI69Aeftrku+PUz9d754zZVX9O68+8QdP3rqK1+49czpU//tt170O//jl5fmZhyyxDH3GfQ3/sn3flZ/YN9H/+SBX4DhKdve42xJG3/Jy2/Gzb37z9nvudo4fXK2v5yp+qEXPDP7lZAiTglDv/QHL+qEop5O5+bm1yZr3nqSRAFwaycgFuuttRp8wANBsbPWA4hAUMcFN4QkAdXgKSEwKQnFlbGQVls7B8vP37U7ae8tq7HXmYxaBDGtNHjlQFRNzhgiKJlOH+30LqgL582ESm6M5lQ6C40ugyBAiGDEy2qz1Z3X2lEMCALja2e1INJB6QAojoB6TkMAVBQpZZgQjjHHGDunKBF5njrnojAmtKVNw6WQMlLWAlhEKBWxradM9p01ZXqqKbRxJWdMGw2WAc51bQkzBIWt1kzZZMZq78A6rXXeCgfW47opGSPOIYyBUsZYiIBbV3vvsNfGKms9QkipCpAGJ5Vv/uXCe375yctlMFNWuWAxJrSuU8J4lk6cruM4IJgDuDwby9bg1lsObYxyo0rjsNb6kbmb9p/+ojEYMcw4Wxh0r3i+PDWZPH36BDajA+df+szq2kW7z93IismxerrOvOhunXzsyA3fuX8k4b98/JKD1+8O0nzabkcUce98mIQnV88+dvDuBw7+qKzPJIGIwzCvm6aqwvbMsROHO+3ucr8/nNQNq+tCO13MDQZtGeRFUdUNpczreiaZ6c7NPnz//agVqXydYGgUCZLu6ujMDc+98fTomNPp3VcfvfKuvVIyxmgM4agZG+84SybVNMJsWpeVapKoO003VkTrcLHOPfWMl1nFJX/qJZtLX25Xutm+e25t7eRSb/8l511y233/Vmamy/lqVa7Mrii2ZRqbbamgK2IZZekQIOEJgboJwx5GanFh/+FTj21bOMfUa6c2snZbdMKoalwgFjbTDWlV05Seuru/sqlHzrMLQBcmmGXxa1x5ypVHvHcfWvj5D2/eQvGhmItXvmbHPQ8dOnV4zAkHZ4AWnfZ5ZX3cNCgQC296y4W/997fCTvR0rm3V5ZuPfnSSAQKKwFYo2qw/QdVfS9Up9/7ztf+0cde7ZBi/yjfevAnX7sret+Br33o68sEYvDUGE2zNN6PL37ep3rJTNmM6goSiZT1mz+nLrxt55Hhaow4eucHriMEVapmgjNgsSSjsuoGs5kaz4RotRCmHFVVubJtyVpQDgJhOGlbg6qmcKrUyjOOelFnczie77cxEfOthCO8c3FB1VWYDDgEQtKyyJ3zIiTeCYSslIFXwtFSm1qXRrZiXTsE0DQ5IRQj4IJVVU2oc0AplYwgybnx1HkMTlmtEEHesVpXQnCtbRTFxjSMxnk+EVJSIrRpMEFaafwsKhjj1nplnAwiziimAWaRaTwRjmLkSlvoLd3UjCCrLSbUQgoWkJdKTayvBe9a4wjDGLhSOWOMIlTVBRcMPFDGOYvGkxHnIERcVqnVLi/GrVYPALx3dV3EYd9Y/w+7bv6NwzdkZSlDYDTRuqSUVLWmhDCCjQVCkW4azjkg+/0fnDx0bGh0rbSz1j4y9+ZzT/6Tc9gTJCVfWZy54VV7H3nq0Tvu+tnSzMIVB87ZPrc7Jsn9Z352+rgbrvZtfkZ2l8+2dn01fg8AHJgtLuxWGLE3zB1NkqRuqigMuOCY4FYUSUofPvLUl+/4l43Nu7pJEiAyKobZtCGUtyIuoo6xU+zjzdHpHdv2pfWYYeqsHY/HvU5Ag+jIsePPv+CiVNtssskQASw98crV9cao0489wvdcc/zC2xcIwduXd5/YGmKvPGfMamUVA9QYZ5SZSXqZmnoPoMzqZLijOzs2dTku13++WvxqUlSuN99fPX0WmuzAhVc+NXpyx8yuRw4f3LPzvKVOb7a1/Mjpe09vncSeUeaNDZ0Ycd9VZUrD2EMZQLQ6LHhUJ3Qujkhe1KZuDHG7ZldG2XTn4sqhbG1RnfOlv7rVW4zYbkB97AtjC9K6yrV3c73xv9iFH9/6jMYo1EWhTzLcvfTFr3PF6pnjDw031qmXsr3tvIu3PfLQfb5aUPjgb73zLX/z1Quo664fehEBjzQC5sDIj/zlPR//26fCHrn+wvov/uKGbtALvkz+cbD1P//8YKOOfPDcBz76s8tsVjujPeog2r72Fz6h9XTvQufgiTPOOYNw97fbMzfHja7yukHv+rNXZ5O0HSW2qcuGS9w4prtRsl5OEqDaq5i1EFKcYcajrLCeei5kXSnAecBniCsVYKp8K4x1NSZRW0hmrO5jubu/EgQsiHpKuThJnK8bVWMIEXZNUwvBKYmttZgYD5FSI0IArLSupoSUZd6oqjsz7wERQsssX986uLi0z3nEqMeOAfacxdpZBDaO23mWO6/rRmNinHNJ1LOOcC4Yo957AEIo18pa8AimCKQMZmnQAaYAs7qqOaV1MaIoVPUYHLagYtlNsyECa4zGGBs78ZZR6ggWCGnvPKJMKSNl4L0zqvQOlVWOifUOMSo8MgRzRlhVl4wyxnhZF9qs/ePO+1/3wLbdO640tplOx5TyQLbSrCQYh4JXuiKYemc5JxjQzx44c+/9p72zznmt9cNzN51/+ovGOiAkCNieHYMbXnxBVY2083PdLhHh2ngrrzdUBcPK3PeTkjSjGset3pJX41NsrjM+uOeiK3v93h3mSkrFpT1zYF4hBIGUsbAIhUSydoeunRh+9nufW5/e51VVK2Mc8qZ2lFIrtJtWDe72ozQtWmEI3iqrnbVh0h5NpwMealJEIp4MJ0LKpCWRM9wJQdnYFg9ce/zC2+bjdosoP6qKVktmtQLvAs4cAEfMM0a1H7TiE+ubSRxtZOOFoJUhHRJ8+6VPX3fvBaNssx0vT+qzxoZFtTlob1O5LnCts2KpP7sc40fGGcWECONqK4UJyM7VySpYNWmaoh4eOOe5Zzae3Lf8XKgmT62f5DIq0jzXzaAtFtvnnkhPtkJ86j5+6K7DjROdmQH124bZKoaKgtCoDXby/rm3fvTsXyDENPTw3NugeODG11198fPOfeLQWoSrteNnz7/kOaNi7fjRUycf/149nB+XgsIzEL3y/HPP/fTfHNh17hKAweAaS/7p5mc+/fGHv3XbK86emS5+s+N+zfzvTz72V7c6ojbfv+Mzf/zka/SZxyneb7kkvLXnsofn9v4H6EqDLTaK3Ss7n7hx7bIfLJ+dbEoWog/+xZuYDKuqpsgXrpgNZ2rbeFUsJ3NPb64eWNh2+8F79y+uxKFYGGyva7Y2WeUByiY18+h0Pbxgcfn4uLBeEws7BwtK551k3ro6kni5v7Mq0kCIsmoI5Vrb7sxMWVRCUEoC42rvLABY5xllRisAL1gA2E2neRBI5wwgjBnFCLnGYo6s96rRDGFtSo9cEs15TDki3oPz2lpNZcta45xFiDT1xFogz6JAsGBMaGMAWWSY89Zir5FlEBLKo3YPkTAfbkYtg+3cMH1aV9Z7ILTkLADkORO68QCqzkeY+OHW2UFvZTw90koGBAuEMJPtuslaSSdPNSYOYQSIWuucrY1RjAmMOOFelfVn9v7g14++hgi8uXV60F/KsilCmDIuODWqJDTMi0lZ5O24Y3QzKfkt33zEO7DWPOuh2Zv2n/pnQGABooBdfvGOa66ds5VHNG6KTFEfebGRT+555q753v7HfjZdG6FtfWKaEfbI4oX2ynJZ6B27lhGC8TQ7CYujzkWzs4tv37eOSMSYwxRFXHiC4pCfWd/65M0fq7ODM70l4qxB2FiTFlNGO8agSbHGMUpawWY6lDKs88ZibFUdRqypHEWYM5daNIgTjGpmqcb28Zee2flv8fzyytlTp5e6PQhgnFZUcJsXLI6EY6UtFAKqlaGCgQdt406HIFzV+aPXn7rwtsHmcLywbXk8SYWIJ8PJ8tLg5GgqARMZdgV2jFKnR0VDMEfejCclkcAR80ppFEzzTVP6Bqta204YxkEoA9Zk9TBTywu7EzxcnQzXs2r4wAVueHhc54uD/mgcM++UNd4r5Kbew1XJjRa5O2uEe5e46YOw7RfJ4d8l8WtF5+JrzjPbz0nOP7+9/4LetJ6+9W3fyjdubslZmVyXDx8hzLa7/q47/yFqh4wIDADEbYzKQZKkSuFPu9VXrb7wxn/PlIDF7R/ov+vDj/5PdOrrDu/wbDuSu+PFjT2X/5XTzgvDdbQ427/3isd33zXPlEeihf7u73+hsWarrLtJoJRWGo9Ho3YccWg4Y5PxWEqxsrir0FuShJiLJ46duXz3XCxmj26eWuhGZyaNxWU3iGsjOBP7+jsD4kKWTLLNVrtFaOR0hRByzpVVyQUJg25RFlx4AIkxppQZYwj1GEJtlDZTAhFjqKpLSigi1FtXViOKKRWEYNaomuLQeYy8FoI5EMaOAQmK5ThLCfKUAmOBYLFHDjwCD4QyhIWxlfcuCtva4qZJKeFRay7LRpwHgGkr6fIoOvj0Y3t37xtOzhhlg0AKGkwnBaUlWDmePtIKVypVCN7WujE2A0e1bgh2ZZlHMUdAKGUIEYJZVWdNo8oqHfS2B2FQVSV4b5321n1q921vP3QVFUEo2wBUqRpjXtclAHAunEecY2uVtZp4m5Xoc1++zzunDGitHpl744Wnv+jBOMvjRF558cKBKxYwJpTIskqjpDOZTKUMCRaUmc/8yxGkTqCyqXxkHdKeGgce4x07dxiPZBC3e7NChM672/XlQcB+dc9mfzArROidCQKhrY2j+JY7v56t/rAuTlnWKbQfpdOQBZnJuq2dTx55eq7f84zYahq2YiYYdqH1pSoq7ezp4RCcaseDLN/0ns1E8ZMvOXHRj3dHDBVVo40tiqw901JQgYmVyjkTglljJCINAVo2Zr6fKFV5ypAjj7742EU/2HHo6Il9K4vTppBAx00eIlx6nAhcaZQkSTqeYABtTZx04lhsnlrv9Jank/WgHSqDtRtPx0hGiFFkeQCVwVU9Q8XI1byhJ9SGRIFNV9y0vStWjz3xZF0l07Rj9WnjG+ekwxGNXqqzZz60+48/nD2sAXh9xNiSzl+Fj/y5CvfT7nVIzJqaYWUs0T6KWP6o2fqaxFvJ4qtNCkofCcLpf/vdm258xXWLi71W0kLEYM+ds/bv4TW3f/zHPynI7IoOIvTE+9//tjd/+J9SCn2FJQKJ2/uvf/MnGrU225FnNvVs29994Jnz79gTRSyzDL3nfVdcfs7+MtdG1VOLinJtsd9J2Nx8b/t4OkzLcV6lxpSzcTLb66W4O55MEGxKlpSF2ZhMo7gxRvU6u6um6nTtvv753uZzs/uNdlVda1NHUYtRluc5wggsItQbYwgW2pYI4aqqO52OswRhj7DXugHnhRDOIUqCSb4qeATeMiqVqtMsa7Wk0ahSE8lbSk9muivOOm0c56IxOpaDqh5ShhBwrVMPCBEmZWSMQQjAY8YCGnCjjHeWCQAXI4woh7xMMYnzfBwEPRFxW6jJdMgoYowaraRgGEiWrmlr47hNCCeYG2cxQgCAADmlMXFlNQFkESLWaopDxkjVZBgxrR0iBKO6KLLPnfPjd564EVOLPOOcY+Kn6TCKutbYvJgIHoIHDGCN1cbyIPz05+9yTlqn6/bphQYAACAASURBVBwenn/d/tV/ps4BMjKc2bmtfuPrX661d1A463XtlK7iKMKEeF8OU/ztW+90PnIOjHUWsEfEae/BY0KdR0G3052Zbff6nV4viYJbx5en6eSqZfXCnag/GHAe17ViCKcWnn760bWz3x3lTyLBNoajCsdziVwbbYByncFsltfD8dZMVzIsR3neiRNA0GgFLtCN6nZaqvIImidfcXrum30Z8Fo1aFzjJNraWI8kEkHAOBlupcuzCwZVeQ6N2Zif3SEwH402kfTGo6dfunrx97cpYxH2iAgwNRCOjdKAVV0ILhoN/ZmZ8WSSVeVMt98000ESF1VTVKkMB8pneVZxBlgmAklVVyKIa7VZj6lv0Xw6GQQDmuTTg5ecfvyuzdVpo2Guv7NWk/GUeCeQm3XUkWCHbb/+A3Lhw9n9WIcuJmz9Ftu+AoIVf/b/R3YRWN+2ziHROYhFzLhGN4QnRm/Q9F8DmIbhC201VfUpltjrr1l83Ruf/7IXvzZKHCaAPyPYe76AxWFsx2hupzv+l3/4puv/8JYFhCpwbQ8dL+cue+ndDf72ztn5pCuPHTub/tK0+zVvqw7jgP7PP7y9LcUoV5VqJuOjnZZc7u/rt4OFznbrATA7u7YxVScDH3KCj48zyYo4HvQ6C9PhKIhbIaN5OplfXE4ziNq1nciozWuTBqLlnI1jCUhaawkhzjpnvAMVRZFqfF2PnPOEEGscoZHzjfcOY2qtR8ha13DOrXGMhc5ZjCVGHiGkdGEN5EVGKOnNzClVNKUOYqlU7TE2eiT5rNbOQ85wSCgFwhqlojDyHjB2WT6WPKhrk06H4Mu55XM5i7ynCHsqOAKNUEJFYGxFqEBAjbGm2uKkVZbr4D1nSVVPijKNopgSwIDqusEIE04wJnmeh2HU1IXzNXiUTTcIsmHYNRac91WtuGCf2fOD3934OWsUJdJ5a23DmCBEOAvem6oZeufDIHLGOty04+VPfvbOremwzj0h+b2dX9l7+lMcWQwCM7PY6/zqO66s6sLpsK7r02ce2b59x2SSt+K+amwrFpOs8x/fvVNrizHV2nnA3j7LYIKtQ8Y5TyiV0dzStt5gOenGQRA+o5afLvr7+Prls3kcy/nl+UQkxqO1UXX22Ppm8blmWpdoWmhaNLkpKh6EG6OtUMYz3WS4lXV6PePd2ubm4vzCmdUTjIkwaOf5lIrg3iseP//7e5KwFcdEG/fEiaPP233e8c1VS00UJ3VRSE43Nif9wWC4MZKRaCXdNB2HkSgrdezG1X3fWB7MDraGG8Y0jSXI/z+W4ANgs7I8EPZdnnLOecvX5ps+DE1BEEtEjQhBbGjsJWrURFNcjbuaqLuuDUEwrrElxpLERH81Go0mixqxxUQURCV2UARmYIbpM1992zlPu+8l5L+u4kxeXVvftef0Iwf27z79bJU0a2ebk+li1SPPbN3KtDUYrR/Ownpl+5trk+Xdp60fPcoQ1qaht9A7eeDundvvt7G5Nrfdk+688W9+smfeHjqVAKGulrsAEfukc+LOwnhC5Rid8edXRHdV90NjlrJuot3Oxz4sW1/C9ZZy5FrIDvCg8G5YuJAGu7U04AJnw97ktZ/T5DtD1we7w4XRZgnbBivPeMHep//mcx7+iIfd/ebbn3fg2B375njjVionw9pnr7p8+e03npWngDhQ3Un9xfkd6496ype7yYxkbX1CK7+1sftf9jKdnKwT/vwbHxTMrr9YZjEWPX7yxHCeUKCuqrpuYuxKyaWDKY4k4LDfJ9c3QLPpxsLSNsmzJBVgW2K0hpSVtZcM1carFhCJIbMpMUZrLREBlpzFOde2U8MkIswmxlQ1FYEz1paSCL21JufC5NpuI4QMnI11DBhCrGqWYqRkVy2sbRzpVz5Fsp6NMchGohOdOMegTkBns8lw0A+pK1IG/WUVk3KwxmcVV7l2Miu5bephVfdCmhg7P50c87YnWpCcAsTU2QoNDUuQLEch9wFbYxoRRSxZlYlKyQSoAKUUACHCzY2pM0KKm5unQo47tp9O1gmWydq69/XHzv3m7952SWjXl5Z2tG1nDKSkzoNhD+qzZGaSkkXEUgU0+s4PVr///XuYQ5zK8fm9k+nc4voP+zUY65bnw4tf8KgY49zcQNW2HVa1MYaIeNqOPPtsnU5Hv9p36tZf3BGCgHKIqiAihcmQ2AKSVVNC11RL205b2r5rsDjX1P6OsGd/Oc1Y/6zlu3PZrKv+1m3zs7Dy5je+8hnPPG81TBqcS2VWUSaGY5NJN+t8IxuT0dnD7SfbCdbNwYOHB/1l60G034bpoD/88SV3nnndnpQIpN0zv7Xp1UfbtX7lJnEWW9m25Jrezo3NdUOBFZPpeU+bm5uV77mG/v2CHz/8hnOm7Xh5sAQZVnNb2XpzsmKETm5EV1qoquWlwcb6SSBbtLIeKwNqcOPEZNuueVDfhc3JrLNEZ+4+bRrjkjnz+l9+VTrt1f1Tp45gzfH2C48dWtXxsVayhUmGrahatEewg3b+vq79o8YTsPjMty4/922HPiODXSiWZKaUcPULsus1WLKOD1Dx2n0XSpDhw8zieWrnQNpCBmMk60xeoemBuj3CdSU5pvZw1f+lwqlXuf/1f05+Cuxi2vVb2JwDxz5Swv2uuOQ711w3r7RV8Eyut4gfPv65f9FK2wAA011PO7L9H+sFv30zdHjn9z7NjDHNum5We9e2ser5GMk5IGRr/XTaMlHWNoS2NnWIwVZ9gQzFFJn2Blu6MEPJpSRjuAvBWSsiddUXFdFkyMcYq6pCxKIAUFQVgAC0bTsAZQZD9l5Ziogaw6oqIpubm4N+j6yzxpUURbMzlAsImBAmddWActsGZjXGEQESxSwImcEg5HuO//L0PQ+NHQhMnatB2Xkb4mZBk2NBwMoZ53tZCUBjlxXCYLArY1fXO4REczbWsfFdtFhWppOAZd0w55JXj+5f2rosJa+uHfPeN/V8aKXopOn1Y0xYFE07m+pg0EeumaALm5bmY25F5CNnfeN/nnx+O1vr97eKQpGYulkpCQ2xYUSDiExGFVSwC5s//unGDT+8J02idZLC6JYdLzjn8Gc2ZKGq+lv86pWve8ok5sm0HdQ1Gw4xICgxWWOyojE0Wh9XzheF1bXJz35+x8nDxy1jFhV1CpALIJmUiooWUTRmuGVpbsu24ZY9i0tLzPb/27+tqgZ//JAwN+y99MXPWFsJs3bt7AcMHvDwZZXxloVtO7bv6mU6tHrcVNlTvzdsjq8cu+CMC9ZOjA7OjlVVb9y2XnDXzjOvfdB3T//awpZmeaE3d2rtpPW2nU4Tux07tt1158/37N476VoV3pzmSR7bpu5m4fzT739kbU1mJ295/OHzrj9z5+JehPWDxze2LO2oUKK07Wa2W6qThzfQM5c2dtIBLAz7jjB0Y7G+Aj4RVrfXvUmIYtiAyTJF9jAp05hns4BFtDfANf3eFzedn5duEtK60TbhFtAOaVdRg0u/RbAqazciV1ec87fXHP4c2IG4hgSBGcc/kriKO16i3SkqQUwNa7do93NozqedFwnUisWylkwEU82iKs3qDRYzOpvCMe5OvHbLc69Z/5CVSdGgOoLBU2R84Ionz139lSXFnsIimC3kdz306ddVzc/rmCbo9j3h9jO/cc6oW11u5vDO7/39dDrOJTZNPZ2MFMh5VGUGjilUVUVESKDApeQTRw9u2z6POABUZz0Td2G2sjJZWKgUXCkJEURLiqXf7ysAIBgyAICIzFy0qJau63q9AQCvrZ8whvu9+RyKonhfqSoiTiYT5xwStrO1xaUd7SzXvskyDiGoAiBb60rJoGCdBXKIgIAKQGhH45V+3d/YWKncFnYxp2R5SI6Kzsaj2ZbF00qOQF3OrRYCNE2/33VpNLlnYW4Pke31d4QyISggRRSMdcVGjBU6wwVCt+Ga4WxjbTIN3eTw4sJWBDMajabtiYX5HWxURAj7Cp13Vdu2TV3FlEOMztsUo6p8YNeX/nDf5QBl0DOra8eqylqzgEB11ZuMJ/XAhpCqqgdKBaPl5vCx2T9ee+t627WzpGju3Pmcc+/+rDdrvcU9Z53uXvz0h0y61lhDKgC9kDZKKb1mURR95cbjqbMU89SansHKOXfgwJGDBw+MJ936RhvbtgioUhEIgJAUhIoocjPctZNj1Lq54KEPHm2af4cHI8BX/uK1w5Xvi5AWnzAtL7unPWPXGbv37txz1ujEKTD5yPp0ue4Lyo4ttbMNQdWmyYEThxaahZPrh258wolf/855MYbKw8amLixtKzpLURfmtx8+ftcsjKLIcOimk2jY7W0W7mpXpnFmPE9T2Pe4kw/91tlA1drmiZ7tLS0tHFs9NGi2DBq3vtkaTHXdrMzG27fuqFI7nkQ1rl9jTXhs2p2czihK0/dt2FwcbptsrjbN4ups0vfNoWP3qIM9/cWbvrxx977YX3jA6Mi3iVWTFt6Dsol4WtExNI+THRfj3e+2MHnzGZ+4Ot6h47vQ71ELWsRZV45/DoZP1Lkzy3hMVQAa0OYh2PhG7j+ctz208IAhSEzonR2tZeOxN4/heH/1exjk1xa31u2hr4f9Zu7StHETxxHpASjmjU9/4BVfrdAMoSwZXpTU337RwbmtHz9nz+ljkNufdM8jvvYQW6/dfrTD/Td/opRijAkhWFt1bQRMIIoEqhJjUC29pingRcpo86R3Utl563jadkQF1AEG53pEDu4TY3S+EikKoACWDSLmnEWkCDIzABjDpWguYX5uoRTIOakKEZSciG1KyRgjIgxQVFxlpuP12EVjfdPv5ZykcM6prr2CSkEkMOxiCs73kELsMiLEONvYPLFlyzZvlxIky83axkljO0e2bbFXD8nAdLoynNsd4gyEmBObOaCkSqCQQ1tV1SxGhlzZfiutt3Y6iv2F5dxNmSh2436/2RitdGHT2UbFO29U1fleyTmETe98iqmul2Iez9rirKjCX5/xtT868EQEOxlvzA3mpbDveSkJNK2unVSg4XCO0FnrsoAlW0x53we/nKeznDkL/2z78x9y8jPSHfvlnSf/5NXPufBB51hvRPJ4Y2NheUsMUtc+5VaVpKSqqjdGa/1mUQVEYwijqtpinK+afgx5stkduPvArbfc1rVJRXKSIBCKsqIKFEcLC7vHwTzwIRecd/75/+v1r9o33Rp3XeqPfNcf/RaBR6ZZqBd3zF7xgksvfsR5Z5324BPrx0ApSTDaU6MGdd/Bu6nfSMqzHP/+rG89/ocXHT92y+a03bLUm/P9nXNnHDl+296dZ9d9PnLy8KBePnTyQDGVrerStrPGzsbtoBkcG6/d+viDl9xwv7uO7uf5waKdG48m2cDiYB50DmXd+eFo1pY0MmR6vYYINMKp7uRu16xpWhmFpu5VFoyvuFiSUPnBgdX9FVanTq71hv25uf4XP3IsbpgkLUQoMNUSC++EkgkHAqK0A/a+Eje/Buvfv2Lby64Zni2rPyFaAm+wYIbCxuLxj8muVwsJTzuxpLaC0UE69QXY8kJc2KXG2CLRGpyui+lM2IK2A+8xrl3TDq/Z/LxN12+Y0/gB76pW3tQe+jFoH/jJVz7ltqu+sktxt/ISVkt2mB9x+Z/Fme0vyuozN3Z/9dKlfnvn/lvx1m9/mJkBSBWAhMCV3Fk2SUWkEJGI1MZHEed96DomE9sxmmLdApJqjlIgac5xYo0jsnXdCykzo7FGFBkxpYSIqsqGmV2MMedAJjM1JQtxVq2YMHQzRHBVLSLW2LZr28mK9XVV1ZozISK7kAKjIPl7dV03nU0bZ9ggKBNjKqXITMUyuZhz7ZeQyqQ9zLhknDZ1b7TZpnYymOsjIZApIU7CZtPM9aphDDCLxxiHxNgMFnNsZ7OJr3tSTC6nhnOnd9NJ1Z/nupHZak4jKtV4cspX1nA9m03ZZOd6hNyGKYMDmHazaJpGSgpdt7y4ZxZGoUsfO+eb/+3ux4VpnFtYKgKgkMuY2SJ7JouqgMjMMSaRwCDke3/1sa995COfq1xd12by6Dc0N7x9Y8ZFuw++9/cuvejRBJJTsrbKmQRijKFphpPRar/qh9DaqpESAYFMZV0fpFUmQNBSRNXZiskcOnD4jjsPHrzrntm4UzWpQFSQzNTfnfP0tLN3Hjm+75MfvZbRMlM67dF5z0WWzeAH7xVI3vYnXWhjPO/8+T983mOe/NhLuqQE2ip89z9++otfHoNqcPYZO48fP/STZ5143M2XP+bBp9myikZnk01rdNTRtuFCCWMkH/N4NOsGg23tbP0/7vj5cGHYjjdLz87h4NMPvvmFv7qsDePj6xuLg8FssmE9r05GYvsaaaYRsizOL6yurqDriHtiyXTTKHTs5Mnh3HwGWe7PjQtJFyvTGuLD69PN0eisPXtsSb3h8G//7GfaQc6gWkRbTEl4RxZLMAXaI2UDtr+Omoz7/64YeOu5n7xm7QdFJoYbYSC1pFjkMG7cpac9W0engBXAgLV47GuSNnjrb+bhLiOlGKLY8rSTHhIMZXQPnP7At5TT3vert5VyWOKPyFDME0ZIUqN/3lue+PO3XadI5yjtQrdE1fbzH3dlNeQF4+54+oHzrjttW10dZ4O/+u5HEdFam1IqAv9FRAyr3IeZY8hFOmK0plJkBgIgsgZBVDXGFPOmp7rXazY314BSf7BlNs4xnzRUW1MpWuM8I86mI+MMs51MOqVoyVXe59QZ70sB57wASYmOOYYoiMbaUjL+J0JFADXGhNC5qhIBVBXJigEKpNiWEmy1wIAlp3E7GQyXDLGI5BhM1Q9xIlC8a2pjUgFje0jmyOHbtm7ds7JyuK59r95a9wa5ZIVMhNbUKWURdd7NQiDTW1zei24R0YTpRp6eUqwq12ys35m6Y0USAEsOBoGNCVmZDYGilqLMjG0cV26rtfj+Hf/8mqPPKVQM17l0SC1AI6IgRaGUklDJGktAYCSEBJK//+Nj//3173XIIBouvcJ9509FVSG/620vfdyljzDMOQFgtN6XTMTC5LOAr6uSO5CAhKUIkylFFFlEVBXvRcYaK1JEhQWN8ceOrv7qtn137j+0sjYB01vYMjfaHAHwpz/zT0kto5IoMRUEgzR99OuZTXXTuwHA3Kdt29Pvt7dqlh958QNvueVwNzqKih35hcW9Naz/9Fm3vGfxlWcs7Ri6CGoB8nQaqppQiQByijEGayh0LZE65yazzhjXdmFlfeWvz/nGC2+/ZMk1425axMwNl4b94b5Dt9928u75heVt9dzq+MTmZGx9peS8ARRZG21OpYBUvrKHjh3vzc9XbEfrR62t6rm58TT0+vO2gtE4LHv81F/eEccS8gyVQTXlKctcoL0gx5i2io5x/rdg6UJd+SRt7LvqgZ+7YnwHpntIvbBTUQBkY2Dt27BwiRJrjoimoFKc6anPqL8IdzwMjAW0XKYSOwRSDKaDPLf01vp+78DD7p4vx/EXEb2ku0mLwEL2L73yyfuu/uJJMFtA76/cQ5570LO/t9T8cDBwt//m6vJnFwb9RssI7/z+x/N9rLXEVu7DzAgCACIC/4kQy72YLLBIESJVuBeVUgz7Ii0qq4L31XgyqlwznkxL2ejVA1FwvoeIIqKlWGdTLsRWpAXFe8UYRJGQjHOgaNjkHBGBLaWoROC9z7nEWIgASVOKbbfi3aDfDENIxHWv1wtdK1qm3dgSO+dDKpWrckne2rZtCaP3A1Fc31ypK1c3fVFUYKZC0JNS6rqfZdy20dgKEbuua5oeoCJqzoKkxvmcEGHiewvON+ONjSBTpbjQu58jiLFru84zra3+jMzWpq672dQgkDUpB1/1UX0pm10If3X611978rkhBOecZFOysQ3lVHpVVSSR8SV3iHljY8XbKsbOWz66Ep/621dYIBDtfuMt7oa3S1Fieuvrn/uUJ15svfFukNKU2IBS242qqiFjFECKxHbG7KqqUlVEKjJj9ikKghGYAiAAIppYIiHFmJqmDq1Mx/HOOw7cceeds/Hk5lsP/OiW/YaMiKiiIgGjAxJUAGwveiMi2sM3uSM3dY/+33rGJbv2//3TH0EvesHLXvzil3LX9pd2PvSiJx3d9/0bHv/TB3/pzFe+5EkuHj/3nAd2YdLvzUnBnMV6XzSjYtdODZNIUVHnPCCnnInNB3f/86uOPFeKxNkkSUYUlZAEQ87j9pRkiqWM2lGbwtpoZGtnkArgqY3NfjNXN+7k2jGBZt7bcczj2cpcz55qY7/Xi100pGyqm76ysu+nY1UpkLUkgaDZZH6k6jGDs1L66hZp9ysgz/DQe96y9wPXVFE2TgBmQCY2InovJtTVH9Ouy8p4RYkNBnFLcvIrPDusW56J8ztFBa2XdgyzqTehmL5gdeXcBW+Dw9hbdrc+X7o7MdcFSaigvxp985ZL/+mqLwGYhwA4h374gMXLnvjuA4fD2gvS/b+xHYgtEt5+098BQCnFWivKdJ+UEqECQCmFiJAI76OKWkpRYUaRoqCqBcGKJCnFOZezEpqi2TuXc0AAIXRsSxEkkJjZMhIhsmoCBGe9KpaUiggZjikhJBG0tjbkAXPXtczkfa2gRBhCh4ggqAJAIpqIfMpRVZyrqnq4sXaqrnvIHiBKLkQgpaAhAF8EiAFytNZOpzPnq6IzFVfXdWhLypv94VCVS0ZrLTGoltlsVlV1CF2/3xuPJ5gyubruzauyxIJ+gtKsrx+pK+71lscbxzZWV7btvV/qpqlriRmxKdqOxutNf2AB2boP7PqXVx97JsnwxKn91svc3BzQ0FmvOcfUERdmC8CALLHNECdrI/LVo5/8P40iIbaXvMnd8HZVROTXvOIpL3jO49kRcWWdzaljcm036vUGMQEzExIrZp3mnK21IgXAxdAiqXNGCquK9xWoVaNMJFJEivX92EXQBCoH9q//6La7DhxdvfHbN92+7x4mIgXSJECCDIqKioB6+qXh4jfk0y6G+zz5J8/40v/9wrNf9NJDv7i1LeGyJ19+209uPv0T5z3x5sePjnzvUeefOze/1HVj72rRkFJxVZVyVIWq8rHtEIGQkAgAsgiAfHD7ta888JSEYAgUDbMHEUSTwgTBkc8WmqzpyImjXYpLvblhv5m1rbXVPaeOp7yZs0y6qXd6alSmYbrY79/TBTRhso6Vm66trUrsf/7jd2m2KkmkU1UpLbqnxnw3lnuYzxY5CXvfjQjlxKeumn/cNW6XaNI8BSRAAgFABLVmdFtaPItaUVKUVu085mNw4lptLoTlS6CucDZmS5BRwkhQDffeMv+gK/M9RhW4hs0P89HrCgWTh2nbXwiOrvyNW6/+YgdQqc6Zarv6Xb/zin/aMZ++cM71v/at0zYm09p7vP2mvyulEBEzI7lSCgAwc8kdM6sqAMScrLEiQESMIAhIzIpFkY3EIM7zbLQOoFVVg7IaBJAUlAjRWRaSUrImC5RKNNamnEkUCJxvUlaJbV3XgtC2M2cqIVXNhhDAlpK6LgwGwxQmRCyCTDbEdWvrELNxzpsGSABRlTfH6/3GEiObqohBEC0imlGYLSggoRdJgGqYJ9NJf7AMikU7EZAUYgr94TyTE4GUOkR0rpKcilLK0u8PYpoS55QCsSF0IpY5pbZl8JMwklJSDL2mJzmoStXrx9gO+ouATEZjF7TAB3Zf+/L9T+DKNPUwdEmxEJEqhNnMWNQsImCdY2tLQjWFs1WW8x/1e0aRiWYXv9F+5xoiq4qvftlv/sHvPrVL0+H8tpSLSrSmFo0pgbVGIYMKFiImRFAQFUlxJgKGnTE2IQFEACD0RQIogrK11V37bt61+yzr+gBV0WRZALBuFtdObXzr+7/4xGe++JOf/cIrCBhVAFIQJKLZi64rey+B+1x91g+u+O0H/+N13/jfr76iAO7atfAbj7p44037Hvud35yX8TlnbitFATsiV3JmtKiaUsiSvXOT8dRZqwiISMwCqkk+uPfaPzn+HAJIMfqmJ1IkR8PSJVJVzDgY9NowU9UYixIRK2hB0axk2bRhfM/Ru3Ztud9oulFIT6wcBk1B6mMrd25f2gainY7f+fafT2csolKCFAKYqL0kamvD0cI91Q4XXwALF/L44JsIr54e1eULcTZBS1IUkUQERS1NyohgqdFpVONNtwq9ZTn2BYDVMvfbtHUnJrji17/8mD0Hrv72hdfffaYSXTn3kKvzAXQNHbtVtnRu359nGYsxpgxz9VtXXn70rf/+EDO+W/t7rK1DveVZT7/W9w7c+oTjZ35hj7HoXcE7v/9xUFBQQhIVZs45IQIAMXOMHTOLFETMOfN/MqVkYkIiEFOkI1KmShVEpG3bptcDUJFMiMYYS348PVayBewE2NpG1brKScwK2bBTzDklY2sAzpoMkEqZTdr5+SZKsqYCoJQ6UcOMiEBksyYUiWGWNffqZZGkWlRRIdd1r2QtWQ0XJKOEMRfNAQGYOedMZNuwAQD93mJISXOuG59KMVhba6bTTeuMcXURBUBR1dxZV01nE+PIgrfsutBZ3xDZNsXKiLLNnVQNiRCghtmGMTbnbK3t2rFzPgt6X0HGlKcf2HXtq488ezLdcK6q6wYASwqCIAKeDdqq5BJTRGtsUWQYTdeW5nad86gXea2DhnTJm6vvvEOQHcAf/MHlr3rli8J0U0q23rOtUkqISESQRSE5O9ycHrfsfNUAUMrRIM7aqfcVoQPDxnAMnUhGVe8cAKSUgHshjPq9JhUCFWt9KWLYZgkljet64fCx9j0f+uTXv3IDAeaSCwoTyd7HzF74VQCYH93yzZfO9Yep168ee8mziqS9e3e95MVPv+7C6x7zzV2PfvBFvhKLWFVNAUbIAGSdkyKCKLmkELSIsaCqzLYUYcb3bv3MHx14WlU1Iinl5FxVctnYXGcy/f6AiEVlOt20TIgGkKvK55xKyZqjYZdEcyqxzHr1cGXlRFVhr56bRgxGeQAAIABJREFUTaYh5+Orq5JHm6PpyqZc/Rc/MLkTYFUUmYmcLfNn2NFagLtQtqNdojNeq5Mjb+2fd/Wdr8b7vUfGdwsTACCwFgFNjKaMj9rF0/J0go5KOzHVsEzuxMnX1V8OOx5+1WNvfOvDvw73oXe+RWN79eChV5VDaNmcOCrLlR56E7drgqxKqnDpefcH88Dr9yPnxVxvufTcG/H8X+VZPPW82ZnX7ZpBYkG8/aa/VQUAFVHnqlKK905VVBEARLKIGGNEBBFLKTknIlZQBTBEIc4MOxFUlV6vyUVyyYarlEKKXcrdgbv3n//AMzRvBQyWKwUxFosGBEvos0wJHQIZhzFGEQbNxlSIwM5KTqoAoAqFwCJJKcUYXwRKzr26F2Io2hIxAKpAKcWwYcNt2yIAINa9XhFFLYQmZylFVENKUFWNMawIBLq2tra0dTm0EyZWBURU1ZQTWweABNY6NxqPqtqX0jqaE+ysW2jTZHnb6e1oXVhZzWS80lQ9gKJiSkkhttZaLQGJ265z1rPBXMIHdvzf15564cbmaq/XpBibpr+2ura8vCOm1hkPBN10pgjGWlCRxHXtRfkBj36+iaZQDo9+o7/h/wiAI/jtFz7mja97eddOENRYq/chopxz7Foy7GzfWExdADTOV4haNBFCSsmwFwUp2TmTU2bCruuMMSklIo5higi+7pecvK+6LhBR1mTAMnPG4HvDo8e71772PT+/5VYCV1QVysLccLr7sS++/LTLLnvGgTu+fb/zLv3bj3z4xOETtccXveRpb+uufkd66QUPPndhuKQ5IIkQgCKizmYz5ywpllKcd4gYuui9b9uWiEX0I2de94oDT0FkY5HJTKezuu6pdCFEIlJVIm+dnU4m3vuSpymnUnJKsakGqZSqahSQUGLIvdofPnIwR1lcXCyqaJiAiF1/UD/3d67edyilnFUSCQQ9yzVnFZjo7OaCZ6lMze6rMvPVuPS2w2+Q5T8wzuVCKqAKqIAoKkrdSJp5jK0wMZNO1qFGPfU5or2y+IRvvexfLt2zH+5z/aHT3/bZPwaB62GEUnxvLkmLB99s4lEVZWUEU7D3hqfsufrfLhj6n5+J+z76jst/99ovVQInXpbO/cbOo8eme7fO4f6b/56Q7pVLLiUDACKIFGaL/0nLfYwxIgIAqoJIxEYVVUNKydmKmXNOzCwCyGS4LjmUEhWKt/2cCHkK2lOYIWBOQsDAkkrr7AAgqdhpu9Hr1ZZ9G7ra94vOgGrJGUkRAREJOJcopaiiUnHWTSaTpmoUidnGUIyxMY4r70LsEICMiTEOBnMpZ1HJKVd1DQoIhdAoppW1w9u33L9tp0WFTFV7G0MktiHEytmUE7GNKTnLXQi+qqytUkwldchU9Vw7adkOB5WPmnNRwwJqYheQCBGs5Rg7AIsIKsV7N51F7937lv/hdadehESqOYSAyN4bApq1Y181G+trg+EQEWPXKelcfy7kaW9+4awHPk+mKpTTxW+ubvwzgeKYLnviBX/1rreMphNEIIQco3UWAFSUDZQCxATgUFsRBKJSMqBRzZKzZPVNnXOGoogIGJl5NpvNz8+L4GRzk5AEyRgbQqjrajweNc3AmCIFtDS+5lCmvfn5b3z9F697059NO2HmhWHtvP0fr3kTVZsf/PNPvONdf/mFf/ncXbffpTGcWjkw987m6tmzktgHPfgcyGJtnQWlROcsMaUUPFMXAhAa5ySJSLHWIqKqvnf586879XyRXKSbTtuFhYWccwq5lEJE1nJK0ThXirTtrOf7pRTnbIhdTNlY67yToohYcgGRlII1PJ5Oe/2BFCE2CrSxerhN9lm/9w7N/ZRbVpvo1yxRcMnEtVQMlGAWnh2XLn5sOzOy/q8b/0Sn/TGMT+l/QkBEFcFsQs7EzFTijJshjE+I97D5b5RWSv3Yyx7W//cXfQoAvrNv8TGfevG3nnzo+rCCTN/df843Tm130hueekVoNypnZrFVZI/VFU9uPnn9PXPQe87lj/r1B23/t9nh75xKR55z95N+9MjV8epkknHfDz5eSiYiVUVkAFAVZhIBZs45ppS8r0WEmREx52CMFUUAQlURUSiqoiqqWNf9IkoEMQZCSCmZe1mKUb3zZFxKU0JIIbdh3GuGSKpC3lcp5Xayab0C1l2bxpN7dm47MyuIZhVAZAAQzaUUa7wo2soaRhIRBAQLgMSQQkSkUjKzKYLMthQxxrQh9AdNKTGmQGKIiNmmqCKBScg5dr3paMNai0jGmK5rna9E1Tqf2pn1XFVNTLqxfqBfLQJK0QqwDOYXZqM143thujptx4sLOwEZFbquIyIRcU2tmgk1paDCxvBf7vjnVx19OmKlGgDVmSamhJq7NlZ9XxIKqIpaJGQuOXbCj33Sf5uFTMlkSvHiN7sb3smIDHTO+ctf+vS7N8YTMsYxShFVBQQETHFG7FKZGjNESNY1RZQIEHE6GXlrQRS9Q9WSlZCIkYhCCCKChAZZFdGYHEC1qIp1RjAhGNWY4oZ3W1WE2UynG/XyuY+57LkqRCU89JEPeeJlj/38v3yhmX/Yp//h/de8833f+9710OGBO+7c+/G5d+Y/3LNza0GQFBDBVpVjyDmXIgCI5EUVAHLOhpAIRQoRTKeTj97vm6+85xnMICrWOBFNKVvjSymAMplMvCMARmLvXc4l52StadtZ5Zos+V7GOSIrUgyzFk2pDSX1mj4pFoACVJObdKvXfevnb7/mH5AwpVLsRVTWC2Ts7ZbpT6mckZ3nXa/TcPitgwv/9MCbZdcflvEqEgkgIKAomMwtFsxke9ytpGqRcytpDOWQzr5H7hHIc1nrJzxw819/VtBvv3LH46/p9ktpH7v9jkvPmwngsP369249enCNucSSCyE+7NzehadVX/1heuFTLxgMekdcd7TBf79s/2U3nHHL3af83ABvu/HvSimIKFKMsSl13vsYi3MOAHIpRISgIqL/BVAhVb4qmZn1XkQIgIa9QhYpqkUAiDjG5F2FSkSIBKqCCCklVQUAAWVCQspJJtNRScEaWxL0hq6qKmtcjHFz/Tgzk/VkawFChW42syym6mthgXY8WV1c2J1SdI5i1yJgDAEpE1W1d77XF2AERqbYRgVy3qcwMbbJuSiRs54IFaWUYsiKFDYUQufMIOaRM8OjJ3+5c/lM0RS61NTDJIXZMJsQ0qwbLS5uKUVTLkyOCKQEKZkMlxLruhZBlShFQszOOVUATe9Z/IfXr7xw1K4MB1sJnUJBzTlRTNFXJCUTW2ISTSpQIn3txu+/7erPhTAREUSMl7zJfeedSAXVb9lO3/3qR6ftCJEtUwHJJVrvEZ2WwkTtdGKMibGr66ECAmJOmY0rpTCDahYBa61IIi0pi6hUdU9LLkWziDFWpCCiChCypMQGcy7WVBEFs1rOh4/uv/qdf3fb7ZsM1SR2f/x7j4/O/NMXD3z9W1/91Kffs2Xrme96z8fPPvuM3Ussrz/++s3fcVzFMiViAEYgwwUUS4GStWi21jJzzpkMkeaSM7EBMO/d+tnXnXyeahEtiFCKWlMnLd7XIAJQhMjlMu2mor6qTEbv8iRTlWXmuIlphmAAojEmxmLIZ80qaJhiaJGxlMw8YCN17S558mtWNifQovBpEZc4TbXZJrPbAHcCTnH7y7U++0rtv2P0tRTWYOGR0HWFiLMAa8kARrjN2htA3MSI0m9wuoJpo8TrkR6MblnAGbY5HkZortz+tGvCASkzP4thcasxZql88QnD6y4+p29tLsV84t82/+IPtzzsjB4A/Out9cjO9+b636N7vvbrh+5/7Y754fywQdz3g4+LCACur68vLM7FDpxzgJGIYozGmiKiBYiImYlICrTduGkaUFZVJCgl5ZyqqpdzNmxFFaioACASEiESIYCW+6iqtRYAYkhsiNkqEFPJpRDTeDLrNwNQBaSuDa62miITZ1FjMKXMxhjmLowNG8J+2428RWRbRFUANIcu9Aa+ZDSmiinVddVOZkHayjk2BolIbYFkrWPbB0m5FFFwroopel+nmJhZsiKJ4R5QWwqWkkLsDNuUusrXIlJKMc4Z44gNIlk3jGGmGkuJzjoABQARlRJyzqUkkVzV/RTbD+/+4sv2Pd5wg8hFAhtB9IY9IrbdxPuGkFKOIEXVV1X5kzf+9U3/cdd4tAH/P0y/8SbznbcS1ojp2998+/LcnjAxwCOuKm/rMJsCZQROORvnU0qIjMjO113orBFnfdcFYihZrfU5J0QwyLkUYspFSAsRJxFmIyXnGGrvS86zkJumEi2gUEIX0mwwv/vDf/WFz/zzlx501s7FLQs/+PHtr/0fl3/4Uz9481Xvbwar3/rXE5c+8dw/+qPXPvsZz1o5dQTecPCa8MfG5JzFGFNKAQQpuWSp617JooQAYIzpus67WnJiAyklYPOh3de+8vAzCZGUiCSljEjKFlIGByQxFqhbmMQDO+ozpgPTguZuY319fOYZe5l708kYybmI0zAuLBujzc1xmE6mC/MLSIBoLAPk2O/TcNgfdTvfeNVf/tsN3wXpZ7xQyxGAWsQgr3O2MDgfFp/yZjn3HfHWsvK3fPpb46nbrLEJGUARCaBAO6HBbulOQAzQ3w7dGuXNEm5AOgfsomLNaLWcggJv3fWMq0a/YNtHlQyelnxP13cefLktvG1LfxbTh35/x0PPqOA+R9fx23fN2eXFXzSzmx5zz2XfPW3QH07WBO/8/keJGIByEmN8kRkz58jGCSKmnJEQCqoqM6uqSLSW19dXh8MhoFNVADWGSxFVJDIli3EsUowxqgqqREiEqgLAKSVmRkTJYr0FJBGMMTjnU8m+sjmLCogIsxVQyQlVjXUltcYaACAy7WxC6KftqbpqGFEU2VgiE+PMmjqXlpiNr8NsxgTMAFC1s5GquqqxbNs4dc4rWgYtpfi6yVmQ8F5FlJARMwKVoqKCZK0xIQYAISBjXAihrl0XEwA0vZ6qlpKM8USe2ae0SWRSSgCCalIOWnIpqd8f5pzft/wPrz76TPKoiqgWkTTPCDnnzIa6IMawMfbE8ZMnN3/1oAdcfPnTXr026WbjIAIqkC59Sznt0faGa/jgjQgWOH/sI3/ysAsuEIxWK1AI3TSG2WC4oKqxJGstm0pUiBiQUDTETkSYrEK2xqWcrWUR8s7NZtOq9ppT1wVbVQBoSGazmYqWlKumr6oihQjAVCQS2qmr/KFT5W1XvOvSc/WmWzb3nLn3xHjuqve84ao3v+8Tn/7Mf3/N826+4fhTn3rxnXcddm8d/SX/QVGr0jJzKYWZc0JjTClCRAIJEVWViFAopGgdt7NJ1eu/f/vnX3vqt0spjnwpqe0mzJadtKH0Tb+NIfXi/k98+Yq/+tZlVf3ic8/2k1M7Dx9411i/tGPn4pbhy1/5u/Pzu1cCIDU33vjTLPVwqdeO4/59h4+d3NyYjUHUYmynY2A9//Ty6+efdt79lw8dnX3y8zcfPMQqnkWirjJu4R5BXn7T/T/w9vUbZfajYsjmMxQ2i9QACpqBPLYrNHe6xE192AN17/3xphvw4K3S/RB5j/IWoMayT/EYZrly97Ou2vwJUkO+hlhwsS+9HecdfQl1G3Mc61onrX73HfeD+/zgNv3ZwcqcuWfLwx7w5Ufe/JwfXhxT9J7wthv/RhWM8VKUGRAZiXKOCEZEkBCJCICIcs50L4CcknUcYqcE1jgAnk6mTc+3bVtVNREBGgDoulnd1AxGpIgWEQEAVbXWEhEglJJV1LJPoESGEVWiqBIxIqlqQXXMoiCKpQhiQila1Fhuu2ld9VNKuYgx5IzJKQhICqAQm6YG00MVBA05kuaSg3cV235KUdUDQsHOgitSjDUCkkO01nahZcaqme/a5CsTQ7EOVRgAAcQYRLQ5p1Rm3s/hvQhEBQuzYdGChIhMaIoUgJJFDQEjjUcjYvGu976tn33NyeeHtjPWEFEpuaRIRACASGiwiISMt/5yf9NbKvmul738Q8geAVVx88LXtRf9b7jP4B0DQHX9uXe8+RlPuvSJYFUTpLJRpPhqAYqqJkJJoWsG8zF1yFQKGONEMiLmLHXtc1YpopCJXUqJCUBVcjbGZhXv6jBbF0BfD3PRFKdM3jkfU1dZn0JXRKx3IOKb6h+/cPM7/vT9Z565+KiLLrr78Mob3vKhT33mTx9+4dPf/vY/u/jih5xYmc2/a3bN2gu4gRQ6QiYyzFa1u5f3PsZIaEQE/4sW8lURYBBRff+Oa191+FnG2JwESbyvRCDE5K0tEkVC/91/M/nh/vZBF81Nj/dO3fOtA6sf2/XorY961Et+77nHj8/a0GVNqDGn0PiqxFJYLGqv34tAUvjOO44dPHI3U+/b3/zJYFk2130M6xrb8fqvEPu5NZ30DXIu6xadYnnTjvde0x7iwdZ86GN85u/ne36IfhFLEY3cbC2zo+AW4KJH6YUPgvvge67S9sfIy2j2/D+S4ANQs6o8FPZb1lp7f/srp82ZM32GzqACShMRbKAhsWPEqDGaZoEYr5rk3msjRhNJ+zW2JJaoiS1qRAVrRDA0pUkfZobpfc6cOd85X9l7rfW+7x35n8eoRCDVw6j4gZUv/aul+9QSSmElOj8n6+YuO/A2xCNBRUGc82etLt74gqlt++JjB0LM/rI/fNPcuon/b9V/vm3vS533Tcr4+O1fdM6JiJmJJGZPRAZCZCmlVqsYjYZk7Lw3JGK21GRVds4QTMy7oom1c2jKAExEZrnVqmKMiJhzZu9zzs4xESBpTsrsUq49ukaSdwUZGBuYM0MiUxMmZwZmgCg5Z1DxzMCOiJqmCSHElBHRzJjZbIhaHju6f1jvX7v2XOe8KSNysujYmymSovnReFSUhaqCQSh8k9GZsCtiSmVZjIejqluNlobjYao6PpQh5kzMiMiIhphVicmUTLMPQckVSKlJZdmp4ygEiireoMm1dx12cAICqzYK3jRDNi4LJv+R3ufeffx1qCyqIXgziSmpKZgxEwBbHP/F+z40rtMD2/aOR4VHZpOJFROH9h88fvX349rL4EnVV36zffhuoPie97776ivOPj5e7lY9RASwnJOoMiETgmlSAcAQiqaOLjgGjSmzb3EaRUByQUUNpGBqmki+5ZizJB+cmYJS04wZDdHIt/UEiURmQiKpKIrxKG3Z/lhO9U233Hnjt3+VwJ+ypnfRZa+64OKZfftXnv/s2Xf/6d+vX9/dcOrT59/04N/FVzWWu9VM04wRTTWPx7UvTihBDCCpopoRI6FHxKZpEJGRP7rm63+y72XeVTFnIgaw4bDf6c5gHuTFw8vv+7fJpeGtU3OTw7RxsPzQeS/tX/6S1dOwZFYPlkvfjTIm5wvI5EeSWgaJEeM49qZmBs147+49T9l8+oGj+1wzefcDjy03o8e3HETQqhju3HMwcLU4/8BU5+RDRwqgPikQ+f8z+4a/scksEXFREkE5TceekN4aPz4moaM5YmZ9w+tg1Up4En75s/DEd5mmpHWmY5fAuDlqGP569or/vfAgsUI8qFR66urkzB9f+JNm50ODo/uSUNsJkChiFr9qqjd7xsWvf/fLl8bNp9f+55v3vGw8Gk31JnDHXV+yJ6lqVgshZInMgOhUc0qp3a40a8wJkXIWQvChkKwiCpgB0HuPaIQBgM0MUYnRzFTVzAgAELwP9Tg2uamqtqqp5uAQmHPOIIAOzbAIpapEMQAgZCJSUWYkohQTEakqEZmZSPTeO+fMLMUi60IRmKHKkBFRVOHX0PuQcyaGNBp6XxC7LOJ8u6kHRadbj/qFD9kUkQJzHWsRaHeLnFsgY0BAQNATCEGZQDWz60oeIxkw13UKZUCOztpRVSwFhSSZfSvlSISq5gMjelNRGaKrGN0/TP/Hn+x9VavXzblRVTOQPKxa3RQtZ2CMzleHRuO77rlneX7hk5/5YbdNz7j4xasm2zJa+NXiylue8mEAcHtun/721URxbtXk71/7ljPW4FmbTlZTIlIVIlQjZkopOT7BUk5ggESShZnx1yiNllwRzBCQyLGpAoAZSR6xL8zY+zKmOniXUhSRVA/anWkDNrMsKRQuxehOCOXj2+/7yX/v6S8uHlmUW2/9/kc//Pef+dYN//b5T7z6d16z3C9Wzkxc8apXP/HSn73/6MuILSUgRlUBsJySLwswyk083j8wNTFTVq0YI3MpIkTEzDmlT6z/r7cfeAUYj+t+8CUi+MCALni84SXXTv32Ow/d+E0e7b/14t89v+v4rLlde+pVk+3hYClUodMK0yt6okVVhW7R5NRejARI7Tbu2rV7/boztz764LnnXLB935atDx34zZdftPXRRycmTvrRrfc98djDdULUdGjvLzetOz14e/RX1vhlB/DeFW97/+I3ic/DcpMevxXXvIaG+8RQ05JL2SbmrH9In/JMeMnlAID7D8DXPoP1L8BmsHO2NdFaXRjuvm7liz44f7PQLLUnbOkJgKngMFd7r7mK3/OnLzx+ePno0qJmrpcG+7buvvu2e4tu59q/+IMl2V25VZ/c+F9v2/sqZkpNxO13/JuZ5ZwBANmHELLEGMfBVyLiPBNhXTfOsUgui8KAASClGJwDQlVBRAAyyI69mYkkVQIARDQzMHXOAaAp+rIYDAZFEYg4joccvCoUzkcRZjZD5zwQEpGqmBkCmgESqZojUVV4khr+/5qmCaWTzGAGmDABMSHCCea8qnjvmyYWrRYixSYRUVItMAM5x4IafVGB2XDQr0cjwCFhOxsYU1G2QmjhCYYh+JQiExrCcHnQqaaQCsNRFHBcHTu2fbK7SoU01vsXdqfBoPBF0QrdbhVaK4NzhZ8RbcDGQP76ma/+8a6rfPBMAqqSgUKhKj6wY0yoZvKjnzz0P3f87Gc/29rqtfJStqIMrn/2GRtXzxbjoT4qT6GddxDS9EznVa9/3TdvummmEz7wjtepIgA0TQ1gZavy3kvKdd0AEDsysxACgKhaSqkInpwbj5dbVVXXWWJk51UVQYMr63rIAQ0VzDOzc94Ah4tHfKi8rwCwySMAcs4BgMBo587hL3512xuueuN/ff8n+w4u3HnLj9/w5utu/p+v33n7jqnp2enJ2d9+1bMeesW97zp4FTIzMv4amBkaoKOmiZaFHRORiZkhsJ6AiABgOX90zTfecfC3HbvReFyElog0zbjodO78xw/vuVv2XdhevXT2def9KwA89ejtFy18b8XpU6N+TnnUKlegxYnJQi2Y1m3f9CY63Jkqy1Kk2vLYtksuecbunXv27z2+9rSp//jsjX/5wXd//ctfuuSyy+aXdHmw7xtfvceX/eVjY9R05slTy4P57Ttsvl8HoPeuetOHDn1a+PQUJkIs89T5sHSQeifrwt3UPTcPttPkGRqP4Xlnw0lnwH/+OwxuBpzB6ulQz2N7lY0PfGDlCz9w5McEPZ4+OfUPA9TkHPGxf/w/Zz7/0uQV0hCtUERi9p2qJ6yHjyx0Wh3LS5/Y8MN3Hr465QyEuO3OL5oZIqopM0u2E7xnEQUEMFMTUWUiRhDJ5Nsm8fjxI3Mrp9VaWZIZEAYiVQNEBFAEYmYRQUT2XkQQAUARSS0753JSMgImAHJA0YYpN8zsqCAKOSdmirEOIagaIJFzpuScizE651KKRVEAQEopOEJEM0o5A2UmBsCcM6gRQ/CFZFSTwrNzDlQOL+xDbn/si9/bc7RPSlk5pVQW9NzzzprodFqlm5mdm6SmqLpZnS8qojpmffjxbciBrXnKWWe0Cl5aHky1p7bt2Ieom9ZtDIUVbX/ocP3Rz3y3P1IzFEs5j9tufM45aw4f6HOAYQNr53p3X/nwpzvvzEkXjh2aWznbKbvkGZHASBUo9G686du3Pbj9v2/e2qbAzp1+1voXXvnGX9yz/Uc3fj6P56dmJtZMlzMrZqpOb3rFqu0Hdv/qnt3Puvjc977rpVXohhBUxUwRUk7Z1Bx7LkoRUc1lWapAVnDe1aN+4ZggLi8tT0zNjhsDdkXhUz0kBGJWQ2IH2qQkWcH70jtMMSIaWEIKYD5ndc4/sfOxM572jM987htvft1vLY+laJeXv+Rt73zz6//yHz6D3D719FPLkpAKfP/ip8q/YGxEjZkBQFWZXZOaEHyOEcnFWHtkBO9KFhFEhCd9dNXXr9n7SmaWVBehUrWUmuHxffdd/aGbN26YlXzjRZ/ZvulSeNJL7n3nmu5IS1rujyZXr7Kk01OlL71ixexKDzmmwo/Ql9Mzax5+YMsFz37elz7/1T+69lUf+t+f/cuP/Mntt9+3/+COFz7vymFc+sH3H96x5+GUbLy8NDfVWTHhII2PHRs8tqf/Z703f/jw59TU8UW1Pcju8szsaU50CVzL4hL7deZGNjhmv/v7dufd9NgXDVvUuhCarcpzjuQ9M5ded+QHzlrWPQWbsUKtxAXvvv2G83u+LTYajY8wFoZEzECGqfCloHZEhp8+6aa37niJmSkCPvo//wpGLngXCCVIbNQyB0fo9ARTJhZRVTBovCsMjdlLspzFB6+aiBGRJDeEbEBETs2YkNBiM1K0VjGZZVkysSNVzTkzM6EDADMTEVMzY3aU88j74EKJAI6giSPQBCIq5soqxjopTkytsTT0HhVZRMgiJFlcXmiw8RaC9z6UoeyMa0lRXADPne37tt17/0M+dA8cPjZqJtvt9qZ1K5966tqxWc46OVU4R52itWfv/qYZxvHi7fdv9541D1bP9kJob9mxb++hptXGdZNVKF2rKshsottat6p72qknLffrURyCsTi87a5DWw40DE7VFDQwiAIBOpVeFzqdzh1X/PL8m84ejAw0IYw1L5uhqoGlldO93bse2XcEDh9WR/7k02Zf8JJrbvufex6/74ZLn7f5tFPO//ktj9519y0A8LyLn1p4vO2eh0zc7IqJFXOd97z79etm55xzIuDZZ8pg2dQIAwB9LqhfAAAgAElEQVQSkcQkIs6xqiKhmQIVZJGIkpDjmJoaiShUaAwSTQWcRxVVYYcpNY69ZA2hHA7H2ZQZ21W7CNV3f/LDF7/kFT/57k0XXXJ+EQKSfPQf/33PgQP3PbjfnC/bvanp6Q9+6K/+ofrr59+EV730jUYOAMwyM5sxgJ0gamhABISGRuDwBFUlIsnxo3PfeNf8a3MWTdkXRU7RdcINb/3z4U562lT40Z7Fbee97ceXvgeedNEtfzlYt1FbbRaYHDy4evzE3KoVE5OEvpuy86EoC6zcqIEqZb779rt+982/++9f+v5rXvuKT3/sX5921tlnPP30b33t63/4+2/cs39H4Wb+7ZvfJivSeDk3S6tXTvcKJk0e6Lf6V/35juvHIzTrAc5mMPVPQRIuztbh41jNQuJclBSX9PzNEGbdzR/W3IHqHIgHkJ26ieumnnnd4RsMO1xsVDQitrjk3MK939tsJsRAyJoRyURyWbYAAVAla4z6qfXfunb/KwFQBfCx2z5L5BAAyLwrEMAMYoqIYmaqambsmCnk3CB6QouxCcF575omh+BURUTHdRNC4ZxXg+BLlUyESKCqpigaQ2ipqIgAgHMuSWZmAGBmA0bKzrmcrARJZkB5OOpP9VZn1TplQM5xmNJIRA4ePXbzL7dCys+9+JwWV+JbO7dt974SDBvWttaunnNEQL4MpIJIGSx4X/rAoonI1GKT7fhi/+jBA7fe+eDicGGhH3sTq6a6dPjIce+LENzx4cTyqF/4wnJI0KAVKiOPTFU5auoQvOYUlYLzKTeq2YAKKExGiFWWJSZvlkwbZEfIhDg9OTGzYkpV7nzR3c+95VmDZjzoD5s6N3UKYLVFwZBzsf2BhyXr2lNmnnP5635192N7nvjei150yeTM+T+86Se7dv/iihc+sxkXy8ePHVs4vvXxbYSuNzW5bt30vv3Da/74xS969rqYsTexAjFIimbO+aCQYs4I6OjXAENRhqZpck7e0WjQJyJftIpQxVibAbIzM9CMaHVMjvgERHTsxnHgXTAwRJCkJxStMsb4nRt/mqx++ZVXtqouYR6N9Zf3P/D+D36qYG6UBf2qVavf93/f9IWTv3J9/Rfso6iYmYoxO9GUciJ2RAGVAeJo3C/KwOSZua7rTqctqf7Euu++de8rnfMqiRyiJFf4D739by9vZHbb7r8/4zmr53rt5VM+sfoFJ+/6+QTJ7D3X6xlnC2C98uL9U5uRcGrFTChoTdqzJu723k1VI+aiP6QtO44+7fRTHnjk4XWbZh99eNfg+OJfXf++v/rLj5+0Yfbq3776O//90zQa3r9lfx4cHTYyN9ELvtWphMy9Zvzaz+LXt+9+ZOHY0PJqo6HBaYAT4leT9LSaweXD0O2ZFbYiQ7kepufxf27i8uk5LxHNKBz5wIrnfPDojQoT4Fc79JmBvTt1eunmb521vLwEgCkKOwDQlASBx/WwLAv6NfeJtd98x+FXj8cjVcNtd35BFcHMB1aVummc82DA7MzMOaeqyEkFEQ3RaVZiS7lBNMYOgBgok1PQEEITIyKZgHOOiBRUkhhkplI1IjlmVtWcs/OuruulpaWU0pYd+2MaVFVLBOvE42g7duyfnp6Z6LInPHXTWrLofadTtTrddqvT7vkq2zKRV6sjtggzIogoqGlsvHdITlHAHJISeU3RcUB0CLy0dLTdmwJySF6TBycHDw/v+MWv2iumnMqamcl2VSI1ew4MduzrLywtTXjnivr4Ah0fzCNyMx6tnZ3cfOo6TaOf3/Xg0eV6LDkYIZBgzpbboVIum6YpAgT2kxPdbrfT7nRUovf+zivvu/S/zz968GiSCAgpZ+UwM726LHwzPL7r6NJj9z1UL41WrysvueJZ05Pn/fynD+zb+/3nPOeynOCeO7d0J2X/roP9YTLwALp27YaJyRVnP+ucycnW1c9c119ebHVavcmepahq5LyBAgYmVhEiMiLn3GCwVFUVKJqkxf7xVrsqiyrm1CrLWI+9bykYAIJmM+73+9PT08Ph0JdtZkaEpmk0DouyhcSK5rlKmVGzwIBRQcujSwuv/t33VKEcjIWd37BxzcuuPOsnz3/s4+GdqmKAqmoGTB4wD4ZLIRQiwKhEmJJ2OlOAGUCZKaYmMP/Dym/82bHX1XUTQplUnOioHkElD7/8uqn+6Nib/jDHnRd99YYJnPz3l16zeNpJ350/vf9f1y8sD1h2bJgoNq6cRuJQhoVVFx7onhVK1263g6c1eXtv+fG7bt9y+eWX3vfLOyvqGNQbN87uPdIPeuz5v/lb37vxlnWb1vYXy/sefnjlzPTeA4dOWr9CJKyfXH5h/5pHX3DvP/31V8GIqdGMFnoqQflMDuvFraRR36opcFM4fBye90pNx+nmT4s7jzwikOZ9H1jxgg8e+YFBC11XsUXel3rk85/cfO5pLURIKTMXCKmJTatsOVeoZQBsmgbQ/nn9t9+652XD4YDZ4ba7vsjgTK2uB84F9KSWGUABzEBEnHOIhMApjwCoKDopRwQBNELfxHFZFs55xqAqappFPLKYAQISkTFQNC0BooLSk0REkZg551wURUBMCZyjJg46VdFEZQ6Wc8wD76vxuK46VZNFk4ZQiqqRAGDWCOADK4Crh02nXY5j8o5zUkGWXHvfdg4JWcF8UYzGdafblSxmGUzNlMDqcUaSsihSnRGsyVEISh8AUiDwhsOmn+tFzakoe6qWxCmwQW61ppF8irnlPbZDSnUztMHS+KEtu3/xwOODRgvPk9Pd6ckOMUbJLXYn/OjZt138g6eNB6luxslsamplKLtR6nE9LkOrGact2448/vhDDrgZ9ntTcsFFFzG2b7rhu0WL6zoZekeOALksZ9euXbVhTa9o6lF9+WWbn/30p5ZFqWJqwOwNhinm0k2aEzAwVQPTbM6xmSJCUnWIxKiQQUnMEAwlAyKFIsam9KTizMRAmdHIGSAAMfk0PJ6EyqqHhKg5WnaajUGzWs6t3sQrf+fd4xzqRjasn1u7euJpT1l765VPXDd8/VR3JuaMCGaaUgSkWA8cmSRZWqpbrdCb7BE7cqVzZCBF4YmK6ye/+Kf7X4FmasKtljXSalVf/tRnmh88ElcVW1avPb3C/v39lS96ud+4enZi5cPbtn7pvpM33XfTVumN5DgOHzhzdmHzionIIXlv7JSprMJw5YWHq83DYSw6nX5/cNrer7e9pmHOVdmDONlt71uETSs8dGd3zS8vLAylrjU17d5Ex4//KL3h2p3/Z/HIBGAD0kEeio4CSANtcy3C0yx3pb2eBAnLfNF5dOe39MLftHvu8zbOzRZE//65133wyHfMhFCZo9jx0m/76r+8f/NpE6qZCAAIABFVTcGwaZIqlGUR0/ifN3z7mr0vTymbIW67/fPOOVUwMzFBDDENvPdExOxSysGH0XCByEzV+xKQstTOcavoiViM0TkHABRKM5Ns3vmk2VQIDBHMDMxUxcxCaNXNyDuXxZwriAAIRcyBGIgZmAIwm5lk896bgZkhWs7Zh4KIVA2AgdUxA2BU9aApCRGKRuIuO4cAxCdYzoIIIkrOckxkUtcD70oGQHLGhWlOKSGAYyb2BkKEZqqiiMDMOSdBY/AmSISaFUkBU0y1phGCR1IRbJVtQwAEYgfSuFAQdY4eHe88uLC8POj3l4idYxTT71388+ffcv7yYlxaXlg5u74/OlJVE4jKLtQR+uNm0B/ffdddYDE2UbLGJhGTREEHZtkTa2Zf6cz03MREfMYzzvjxd355zjlPff4l51xy2TkCldiIqeCipXUe9A95pN5Eb2m82O31lo4e8yFAMVkwkAOnjXFLY4o6cB6dToyGIwpsvtRm6AljkzMxshAGZ8W4OeqRUblshWSoljk4EQQ0zSZZ2lWRm3FRTSZLb7rmuqXjeRyH3needfHZl1709K+e89O/WLhqxcy0CJiJqoqI5UETo2Tz3qsVDNE7Llo9XxUA6j2bwOL8oc+fdcs1e14JVKgMAzr23Jpsf+RN750+kM+R5Y8dy3t8+6lPPWvtyetnnnru7M++tOln93aQP/Gs971s8cEVj9w+5urxOP9ZG520fk233R0Hb0TE5IuglnN2dVLvi52nvB4REAwA6yTn7vzCms2n79v2hOTmRVe9/qd3PXF417ajgzTXhdAq/2D0orc9/CkVTBqZCzMh7Jh44pRtD8Bphl2jjUrirJ03rAGNtm4W736AmwNoexR771/1ex88/MnCBmKJXLbUee5vPv/3XnLKBU+fFdWs4pwzzSKaU/ahYGJmUpUTPrnuW+84eJWpptjg1ts+h4hmlrOEQKoOMSGiKZipc04kOy44uOXhoFW16+HAe8w5m1JRFCklRGy324PRkJkRgdknSc55MD0BiUzBABCJgAEUEXNW1UZBqqoyIFTKOYqIGZatUkTAyMxUBQnBDAmQMTZqZmVF2iQwJOcUzPtCBQHAORYQRByNRlVVScxZhBCrqho3wsiSEzvLOQWmumlcERi9qKiYcw4hE2Fd10TkfCvlCKbMiI5NwbEzyz6UKTaMOBoOnS/BWDUTuiYOEU00k6OSJ9hblMjOx7rxofRlq65jE5N31V91Pvcbtz57x/59zXg8P79U55pc7La8Su73h7HJORdHDi+lJIOcCVFzLnxwxMPxGA07rapGUUGN3O3GTWtmLn3W5h27Hr7i0rMj6Lp16w7sW0p5qTfZspEUFe04mI7NH3jehWd1qtajuw7tP3iwKMpmpL/41f5j4zi3stUpaeceaxaPXvqcM71fLsK0M1s1N7FmzWyr055oVQd27Tv55M2RnCzGarbK6NIQh+PjTMBkknLwhRqyL5omujDxwN03sDY/uu3QbffvSk2am5258spnblw/+5Wn/fwj8feG46WirBy7wWDcNE1KVpZlq9VCJCKTPFaJ7U6HYSLq4eFwefWqzcbw9zNfvnbfKz0XGYjMELWaKD/w6j8/RZbTwdTb0P36ss1t2tSaPHNuy4827jl2ChRa4sxg8OCpl7WSf86WH7Losaq8vhk+3p5sr1oZQsHM6BwyGyGQE1EmV3gPEhExg99+8usE0JEHyO1O74qpHavnZvr98TDCvT/+VnvD3/ZHevvW77nde5PuR+yCjQyIyHtal/LDSUemq9gz2IQRwbNeQLd8M132an/bp0iW/u+q37/+6MczDjEbcAXcW7dmzWvfctkalOddsjqmVFWtJjaOAxGJ2AkxJTMLoWByH537yp/suypJcs7ho7f8S1EEM0OElBNhoVaLaBkKRFSVnBMiKQAwEbNFdY5EBJFPQEQAyDmbZeedmSKiWShCkURUFRCYWUSdC2B1Tuq9T7nx3Eq5YedUERF8IBEBo5xzXdc+cFmWYO6EpmkQyTlLUYoyxDhCYjU2MNVGkhFyVbVGo0FRODOLMXrvR6Nxr9drmoaI1ICdJwRVkSjARGg5Nt6FJqVQtlTBsxcRImJ2KUdENckpNUAYipZIdp4HS8u93kRKGZGcp5wwlKwZEQBBU4pquQhlytmQAMi5AokQMaYIIoD4sTXfevfRq1PW8TCqyfF+vXPPodNOWRsYFxeX5gcpZXf3fY8+9MjjK6Zm62YwHvW7vRaC701N13FsGtMggsvGHlwbE6c0ylT054+Lodjy3NzGpcFCExMgrl655viRvTFXrSJNT7Znp9YPMu7aufv009bOjxabo0tnbTrp5S/cvPH03g9+vvfQwWPnPuOcvTv2HVkaBs+HDx/fvvNAoPHk1Fynlc49c2bdaevvv2/31i0PT03GczefOtHrzM3Olp7m5jYY6UJ/ngP3unOH9xz854//x5Eadx0crZicvuiCMzdv3tjqtr9z/h3XbrvEFQEgmAEABl9ka/hJiMbkxSw2I5XGdFD6daHo+SKGsvjHlf/56l+ev3r1GiUAZQKPTN/8u3/pPHbk0v78l6W82RdnnXvOKI1PuvX2lUG7rVVHR8PzBosdrG88/YWLiq9fvv9rsXOHXfTEwi8aGK+bgumpTlFQ4QO5wEQpZw7BAXvnTJWdqyV7CjFrq3Cs8sDJv79y7brF/vLZ5576rXvjaPap122F606HTV/4jSNHT0pchyRiY2SfZQRMgeemJgBgeOjQMbPVdtll7raP48mvUtmHO+543+o3fNq+fXx+lcndAN3Q7f3GFZc9+4VPa/W3PvfZm1UzAgDq8mDZ++Ccd+yapi7Kcjyugw+f3vjta/ddpQBmhk/c+QUAM1NAc6EVGw0FACBITilJVuecgZkxAqRU++BUDQCIUBWZGRHNDIkBIOeoKgTog09ZyTlCSSkiGjNL0rKoUkoGAsCj8XJRlmrofUc0MjGRIwREVMsnOAcpZyYnYqQyrvtFUXjXlRRd0VbLANGgJCLArBYdBHsSAAiYcw4AzIwIVdE5pyogihxySmwiFg1IAH0omF3O4n2o66ZVhJTGoDk4zmZgTgyJyZmKCjhGdjnX3pUGGdGZqKQMZmiQUU4IzjGhoVOJmhrT5FulD+XfznzzHftfbqBkbjBaqLqzhXPDpfmUaueLdtlTAFd45z1aNgjLAz06v5SsfuixPQ9v2YPoji8PMOlJG9cNR/WBpYWpcsVZZ8085aTZU9b0kLA7PfX1/7zj5E2Tq1ZMh5IGDUxOdya6XdQ6NmMyW25yrygsK7a6GRqLzWAwbgUVLefn91cFdSfmVBvnW2VnGtFGy0P25MoWptSMD7mys/eQ++R/3Lxt63YTU41FKzvvF44PvWvPtMYp11Ou65zuX5KTN8ydd/6mVas2+mr2xgt//JZHnqbcQ0hENB6Ph8MlBNfttpmJCJGKqjvV1HWql5qxrl69NqtV3baZXD/zlXcefOV43EzMzEkWQlGpH7rr4S1/88XfKiZX/NPbf7A/3/3QY/vvvPuSPbslpmMXXrmL5Gm33LSvLK+lcM/6c3+67gVvuOsjg+nT3t48Y+nIT6M1bR9XTxTTnXZRFZ6MnVNk5kBEwXlQUeeKjA2bI0MwM26vWr3u5E07ntj1sw3vO9A+/bqtcMLEsYfax+66cd/+7/f3MHqzMWBC8GD9Fzxv1eWXnxGqtXf8Yuuu5qQHH7pnqn94fNFrXzzxwFvwlXc8/cb3vnuvcJ0p/OHrzpa8fcMp65+3uTu3pmt6QnaMgE6yxJSCD5ZzWRYpRST46Jr/esfh1xCQpIzb7/wcIZuBmYoIM3vvY4zI5NipGQKaKgCqAhGBWd0MvCMzBlJTLIrWeDxkghB800TmkDUSkoi0O53RcBRCkbM65xABAM0QgaMtEwbvCjMRU8csWVQ0pdoxA+hgOKiK0gAQHRISMSKYJiKom4YZQmjFxpKMyqJDSCk3ZVEZZsUWs08xEyIxEANYBmBVA1AQFQDN1ipKcjpYPlq2qlHNZiMiKkKZmjgYLk3NrCZwjNqkzJ6IXBbTHNmziCJ5UXLIoEkkKiuDEjpGLwYAhgSEoAhg4JhSSqbKzv3Tmhv+dN8riahpaiBAQDNlRCJWwBiPqVBZtgGMfAlg+mvZZcTAVbstKTsOTaxTasqyoECqOhw0mp0r0ZIiUmi1RCk2tUgmANPMRQBE71hyzjEF75qmBnRFEXLOiB7ZQLKaGlJgqIeL3ntFb5ZD6CGxQUzNcDxKznnneH5J7n1g2xO752+/+yEPHGNUM/YeWKypW+RO2TQ7rtOFzzzTu8HUZLVy1an/uvG77zrwUmYmhOXlPiJ634oSEbKKeK6AvC8YwJwPOdrkVJcIERkMP77xpj898DI2B8yaRUUkZwrV5159TVgcrxk2Z5Skp69+7wK/MPXbzA+UtublLz5668NX7HxiOmePOpXpXy553+U/++u1E/x5ufiLx7Y6iiajiU5eOdle0a08I7jgzVHhkdHTryE4BAA0I/Rm4CFMzHamJrY263548vsB4NRw5E3+e4Pj8+1fwWnzq442cmxpEhXN4p31ry54y+r9D/7cQidxnJxs3z/zqkua7z20ODUcjF+x8NoHpz751R91jOiK51YXnTd77/1bzzlr9RtffMooGQIGF3LK6MhMVcy5IJqYXE7RNP3zyTe9aesLY4xV1cYdd31eBJwLKTXee30SIqokAMg5hxDUMASvmvHXXM5AoMh5NK6LUMbUhMAEtP/A3unpGceBqVJL3rOqOu9NtW7GzrlRPQi+YPZgBsjOBTM8wbGpmACKaPDFYDCoqhYz5V+TUBSiCtqYKCKC4Xgcy5ZrYt1pd41MsjrvU4ps3ORxUXYUlNEhYUqJyRFhSsLMBslpISAZEjvIUQpfahZiBFMVa2LUrEbW6ky5UKTcgLIPDoicK4W9cwxIzN6AiBwYmIlGR5iTjGKsGRqVlJsxIUgeIoCZeeccV+N6/LH1N1xz4CXeddqtYjxcIlARySmmXIvG0k8hUMrRe2dgItLtdlUtiybNVVWlGNm3VTMiOE8ghVlWi4hgBmAAQE1MS4MFz1h4Do6r9pSouuCbJhJ7VfXeSU4oFFNTFiHlZGiaopllBecdoqSUy1ZbUvYeTVVyHo1rMCrLVlF4VRmOYqc7dfjo/Jq1K5eX4/zCeH5+8Ngju8oWxXx8dsXKicoDJOdwsjfZm5n9zCnf/aMtz22aGlFibLw7wauG4XCxKJz3RZSkkp0LTK7VarMDAEV0YOmfNvzw2j0vqloTauydQ8ThcFiU/MXrv7xu+5aFHeP9RSxD1VmOC8GFZri1cevOWnXytn2V91PMnrWTsevgcxe8e9O+ey/Y9dPYO/V/zcvuNDRrGAYrJ93amUlPofCeHHnPhESEjhwjMZkBeANfyDijLzuNKnt/sPWU5523GinkKHnUHNizY37fnuXR8Z37e/uPdt/+rHNeMHFB3fRllDg4k/z+U+Qp+740P3neTzdff91W+Mymne2v/d25F/o1q7q+6m558IE3vuqKp55eGmpwPsYGQAQ8MzE7Qk9sks1UmPEjU1961/xriejY/CJuve2zRC6l7D0jEgAgopkhgKogARGqqKoAgveslgG4CO6BB+854/RzcxZminEUXHcwXPLeF0VJiGoRyQBQMjJjjNF5BlQENkNiAPMq4pwzg/mjh7u9nvOemBGU2ImIqamImISicN6BRMkKSHUTq3ICUHMWAHM+iCRmNgACjKn2rkQSACYiREw5qQGCY3bLy4sLxx/YuOlClSpLraqtqpeSEJkSETsADqE0Me8DIAoaI4hmJFZFRDAwJAJgQhNFBSMCwGRGAI6dUyU084ygCurMbLi8bGoGg1zX/zT3tbfveinREvkghkkVcmxXk8zFYHmJiioUjAiEqMr2a1oUIefGVLznumnAwDmnKqoKKAguxhx84TxnUe8DGqoRmKVmnFITSibi8bhGdinnqakpyQnAHGrTREBLsY4ZC8d2ArE1GciYuQgleE8oREZIZihiZiqawCClRAgpJ6zHolZWbSACk/7S0rgZzx8djpt+b2Jy1aoNwVccyk9u/Pbbdl2Z6oiUYxNTbBjMoEnJvCsRgwKoZDUBslar6wOJZCLn0f75tJuv3XOlc92l5cWJycnhYFB12pjK4fLgh7/3Z52gu8ZF11G/VXeWdD6v1HywNd2dXGq6HiqiDmBHtCvonD2x9tmp6q5/5PsrGT4FnX/vtwBBbKkqmg1zEy1feubg2DtHjJ68J3RkQEUgIqzZF1k5MSgAixnTxjOe2riSWi1Ero/OLxxZ6Ne2egWOj++anDxlFBcG/XkuShk1r05/cHTyvCO9s+FJ150OvzP6SjhyVzXV0+GxvTv2f+zDVyMzAqRGiuCHo8W6gd5EL8ZE6IhMDQAIUD+x7ptvP3C1qTnncOttn0V0hGygZoaIIuK9N2NmFI05J0dmBmbmfSEq7ECzIgbEJAIniEhKqSi8SPI+xDob5KV+f+XsWsUEAGY4HtfE0K4mU2wAMwDFNHbOEfqUmhBCU49UEvugamVZOXZxPELGGGtkKooeAIjJCcxKFAiDKjomteQc5aR1HIcQUgMhgPPFYLCMBHU96kxMVK1eToaITGEwPs6OWqEyKkNVoiuL0FPIWQ3Rq0K2TABgJgAe2VBVAZDRRE2IGIARlIgAFUkxe3TBFAwVICFSarJzXjACAAGKClGhKv84+Zk4Gi73F5jQOzZT7zyyz0mKwE2OhGBgYOhcMFNEEM0pZiZ0johZxRDAAMwMUYaDcafTVRVR9UWhoiaKrETOjMwISSyrGviiMM2qyoQ5Z2ZQRQBDREPTnJ1zYkbcJsdmplkYkwoAApISoBmcIBKJ0AwQGAzFmqIo6lFDzCJRNJMVas3Cwjz7oqraIZRmdM/Uti889C42V8flHDOqpDhCTS64ovTjZqR56PxEqz1j6C0rO2ia8dTUTF03fzP11bfvf6lhu1sV7B0SDcejdtUajdMT99y//1+/cWBhXF15Nvz4kQzQuuyk/k+eaIudXZSDMvRHw15ROMgtxbaj0uTRmWdNBL/ywC/cUn/37Iq37k0NtKMlxuXZHky1OwWRD945Ds4XTMFBQVRQKEpQQ3FIwMnMiAGSmnOtTlFVRXuyPbdhODyc5vcuHtnnOdQZPWYwqtWcIrI9tOEPiPCRjX8AT3rZ3VfNTM8s9fdSzidtPvva37tgeTmOm1i1emjoPRBiylGyhlA2zdiAiF2W9C+bvvOWXa90jrJE3HnXV1KOTCyCELQAn3OKKN65nGLhmNByFlVBBDNF9JItFN4URccAHHwAUjBMqQG0EyRblpqZy6IzHC0FX5kBgHpfICZRRQiquWka70PO4oNTBcnK7MzETERSVbXAfMoNE5oJgxkSIIsYOWJ2OWcAy9IUoeOcWzh+uConEQFAiIg55JRD8IgoUJsVhC7lUaxj2SrVVFRjBCAyzPVIXOFFxMy8984ZACERsjEAIITQAvAIDqR2DpFdU9doWQ2S8qiJZXAILEAGiNoUHtSUqVRTJMfOiUnhPaA3dCk3iL5dVs1weCAlGSMAACAASURBVOzIrrnVq1LKlrVVtkWymmRJni2rEhZZFDEzc87K5AxiSqkoChHRnJ2jnJPzjsCP4qjdqVRs1B8QEXuXc65aPQBIKTnn1ZJkIyIf2CTWzbKpIFIoppzjGCMicggpKiACSVmUOUcAY2YxVWVElFoUEgKlGFtVCwzNNOVsCip1VbWzRAAJVIlG0UzsATw71zTj44tHJ7uruHTOkaUU01jUNaPlTru344ltU52iM9Epez1HZROboggpNgDw8Q3f/18HX2qWlEowMLXgi5ybAnnk5IEv3rDzhruW8ritPGp5GEcCD00eSrPO/NPnpg9adqPoy9AeJ8I40/ADGy64Z8Oz3/nwl7ZJc2ww+NwgH0i5IDyebLHIvWISyxCcb3nqEoeymCANhQuBvSNgAnYZwJDAQE0UBNAEyYfWcNSMxo3H7ByzAzN1gIaogKK25eS3nn3wK/evvProxDmn7vnKGeHQsWM718+deWBp38c//Cf10kJ2RaugpsmI5L1LOVet1nA08N6NB7EsAwAShY+u/vI7Dr3GDPCE3b/8WpbGOQbDJkawsXNErlRzYAqmKllyZEYAIEJ0maC9Y9dDmzaeOR4qkjGLQUQsTDFnLYrATk0dAAFIjIKIRMgMAASQETE2/48l+AD/Na8KA3/O+ba3/Mq/3v/tM3OZ6lBnEDAPIhoFRERs2FZ4dlU0ESMb8ph1jfts1iTg6tqFREGTqMGNoamgIE0GpAwdmV7vvXPbv/7a+77fcs7ZYZ79fJghE1HKhYiMQimlbVtVAO9AUAuUnI0lIlBhACGyCqAqSCiFiPBJAMgihaNzrmQAyNa6UgoRARkAFWHmklNChLYdxaGgFDQ2DknRtGPrzNpiNWtHjargUwCAkIogAAIRFwRNBpUIRUU4GyIBcMbmPCiAcbXzIQ8dgApiCLWWpMyApKLWORHth1j5QAYLZ+sciypQZkYAK8yoQBi8yTEba5mZiETgSYRIQEVL13XGWADwzopIVdUl51Kys977UAorGs7RWcMsxoCqGmtVNeWeiEQYCZeL1Xg8FeFSsnMGVFSUswhhXbc5ZQCsmjYNCVFARaGIIJFh5tbXVClQIXB9n4TL5Scunjl9am81u+feB1ixG+LJkzvjyp677rq0ysaNENFam5M0NcRoRRFtJmFhIKQiUSDnTM4RZ0zloA113wtDWJ82/RBZwVX1tSce/s+3f/xfXnjZ7HB3snbC+iBAy2U3WWuNkqLRtvofr/lF2/eH7fb88cubJxq7v2RAGTQ977n6D//wzLXNs+fOHl7b910Xik5x5aMG79769J9/zZU/HR9ca1L91ln/GWOu5K4Tc6RQPAZbkbXrWkJe6HS6s7E5CqGtgnGKhhQBiARA9EmiAIpShBWBRUWsKKsKgKqggoqC9e7a6BmFZevw8wjgAUtdto7fcd+D9/zBr73R2d6hSzwTrK31oACoy8VeCBV+HYVqnUhTjjGmt9/0wX/22Kuc87kwPnDXH4hmRBVBUBM8DbEjYwCJS2Fh75wUdd6VkkvJoWoR1VAQZWetIZtiWi5mVTsSEedoSHPmIkzWWpYczMgHm1Ky1qSs1ponqQKAE2HmYgyVkhFBVMxTrHFEWnjISayznHMpxVqXclLQEELJxVhMKVVV422bcgcAxgR9ioh471gYAcmYknnoZ1XlAUAFQePewdHW1jFVUETv7WJ5uLl+hkVSTEgIACqxMCKZUFWsgzdV7JMjG/PS+SrGVNc1AQ45IiEIkFEuSYSHmNa2dlAlxWxcAMkKKAoKAAVccKxFtBgm56wCDJkdDdaPhY0wA2QRzSx1PRJFVQFVb20pAwB474Y4AIg11cHB4cbGGktCpBgTAvq6RuXVapFTWd/YiTGCKiIScSnZWEopjkcbOTOzIClAEVbvqpwTWYoxWWvbpi0sKQ6GSEUV+ImLV86de1rKwxPXDv/oT/+iL1WMOGe1xmgpyqwpNO0oFbXOdmkBjI7S+lRDRSdPHiNKk0n15S9ePXUifOMdN643bT1t1qo69XE0XnfWFvHGQoncDcuqcoRa1dW9X/mH0yevc9WIrW/bzd8++c6fffSlKQ6rVa7q2odgrXGsK4K8TG3T7l+69N5/84fVRr5yz0HdrqfUHyPM2kvr7REOrCdK/JYXPuPq0XJy8SqiGbMFOnznc1/9xf7cL3/6zY/WddreuKHZ+LlHLt175BOB6Mrr4IzulO4UaYDwRAWTM8fXyY5GtQ+ODCCiECqAPgmUWRBRVPRJwKAKoAqgQIgAoKIqgF+7/qdve+St8CQcLu+PF8PezdefetN/+AmERnWZVsZ4ZNaqCqrsrNnd3RuNxgCYUlRVkdKO6t/YeefrL7yqappSGB/4+NusNSLarZL1iai21ooOnME6Z4hKKWTJOQsIqpzToOIAk4oVZS7ZYFEdQju1NOr7xJxBbeGuaUOK7B3mHEvhuh6DrYhM3/feOYeFOYkOMfZaFEC7boUIVePbdhuoiikaA8xi0FhrF7PDyXStsObMhvxsvn/s2LF+lcBmQs/MCrmqmtVywVq8MwRsTSAKhC7lmTMNcykylOKMNbkUIkLjxu0UMAkzoCOilJJ3PqdkXbDOCTKKDn0ko2SERMAEEeSSDRrylgwN3cqHUR4GfJIJhCkNfdXUuUiOgzHW+ScFVckCSIZAC3OwJuVsQ5PjAEAEeVgdGB+adrxYDc5XFgUMMQsgSRJrrUhBwvl8f9RseF8xJ0BjrYlxcM4huKKZLKmiKkhhBDBEqMCcFUrOyTjyPjCztQaUchZQtZZilFD5XCIRghQ0lDP70HJOhiokIFOQ9K/+9ssXruhjly4ezvthSNb6KjRdXq26ZV37UFXcDdZ6kWItlsKglPqeUJRC1bjVahFT3ppsv+F1L3/a2ZGqme2dJ9e0Y28tAdRcikoRKWBK7ti61o+nPPS/c+bdP3f+lSlxFRxzSqlTKSlnR7ZeX0MFY+w9X7zvM2/5b91De9MzO7v7Oad4ulJmEuyE7SpJm+jb7nha3Tp/zwWH/MCNNy5fcOYvHjj+hH/e68uXb508NvncA2/rr77lPDldVywqrHg40lVQqIAj0Gi63jTu2KgajeoqGCIAJCBUFFUWUPg6VFEheAoCACmyFGNQRRjx3hv++a0P/A5aWzLff3l26Up0ZfYNz3jaH/3BL3BPUbP3rSqUUozVwgAqztuUotVibS0sovy7p9/9s+dfdTibjadTfPhT/wVBEaVIMcYacoVZOKuiaHYWCa2qwtcRMxtjEJEISilKhrQpPDfGcZpJyWQNudq5Og2c0qL2bhnZGqOqKUXh5Cy0tSup75aDtQYASimr/ghBmqY9OJhvbx7PJQFZcjRu15Q8C2opAJJLZOa6apnUuUBoVQFMJvA5iXBhztZYMoSIOWUAFi1NE2JKAGQwODf6yj/+/XVnbkS0RAQGiSCEqmQwBpl1NFrLqYhmIotgUh6QFEGFkzNG0YqocWJsZQ2w2JwEFVgzIuRUAKBuGi5ChGQAaTQ/uuYdDqvBVNI2YwQbWSwBqOlWK+Mw+Bqe4pyLXR/qaogDooHCqcTFamYcTMaTuh7FWAANmuCtEUk5JyIrDNY6AEkFvQPVwkWUAUyuqq0hLTh2KccQgooRkfF4UgoLawFBLd3ioKm8rccGbYpJQUbjSUpDKVJV7Wq1rNoKVIeui4XWpg2oNE2zXA5HR8No5IQjl26Z197xl588f/VouViWpGgKi2KM0wa+6zu/lbM9d93WsWP17rUe4+K6p53uumWokIwaqBDl8uXzwrEab25sbCMYEQQtKSZmts6C8O+fe/8/e+Q7XXAIljmpiCoM/QoBmUvdVMpxsnnqU+//2Bd+779D8BvWXNkfih+ZuFwzJpWhiNQ/9sryZ+//5uPbNzzjGw7nh/94Yf+9YecVzzn2vIfv+fKZH581sx964iM/ebf9mL3RdVeOh92bm2oK3be046P+4FHJf33pwvaJnW6+KJVZH4f1xlXOoLHWEhGIsDXKIkAIAKqOEayIEImAM2QIshZg2D32POCS7v/AxYdnmzs7V3aXfUkW9NnPufF3fv+NZYjADAhgbE5Q1x4AcxYEQpKco4IC6Ntv/Lv/+d5/GuqGjMWHPvW2+fzqdDphtsYEYwwAGwPCFlBzjEQkwohIRMwCoNZaUVYVUEKMzrRSylCSJSJjU2aRXgVVe1S13hJZUBJAAF4sliQGwSBEa62oMJdudcgZQIEsbmxsr7psvIJW1qL1YblaTafTIao1aK11vooxOmdjjFVVgXqFHNOq62dp6JpmXIWRtVVVt4XZEB4eHVokMmgMpVTIGAC01jpnhYkli7D3oRRGhJyHqnZclMgxi3MWMWvJXIY4dEWlZGCOwdZDSS400+kasHTdYK0xBq1FdTWRHYaBiLytABUQVSHHbuh6JAJCg0ZEq6piLqpgjCmleO+QiBldCCl3oEhI3gYV7fpF1y0Rs7MYc2LG8WRzPFpLaXAulMLMxZBFBCTTx1QHk0tCsEBgTcg5ERlVVE39MEzGk67vQuUNqHBeLuehDtb4oeuqyg+xIAEAOhucp64fvKtKzvVoRKAKwEUN2piTtTI7WITK9v1s3FTK8sY3vUugZlBr/LLrT0zt//bzr2wUtQ4cC3i1ahjZGENk57NF04ZSuAqtMoTal5z7fqibpggQIZEppVTe/PrOO99w6VX6JDEAknPy1g99LJy9d6XkqqnzakmTyaf+5P13//kHT55am11YtGujh3d5x8egQ6X1HBhzan17o8GzZzf2Hr68d9PJ71p1lzZP777mex78q4fH3L3jg5/98RNwA9ujtbC08q16LC+G1tWPnMgmTv5EZu+++ODRan9q7LHpqG5CsGBIDalziOgYxDknKsqI0lMYaWExOaAjQEOmyOrBS/19Z3/u2N//nw7sydNbT1xerG2cXuV+Pr/43a995S+87hX781XbjrrFKrhareSU67qdzRaT0aQfeuecIfytU+/6l1d/YLWY1VWDD971ZyJ9Sqlp15gLIBOpqogwIjlrAbTrVsYYZlaVEGpEUBAiTGm2PEoxXdoYnwRvWVRFEdFbs1p2VUWohozLWdAEBRKRqvYIAigGa1Wx1rCU/KQ+EtJQMgBbhyq1r0gVEEBUQ9WUUqwzXAqRYy7MpW1bVS05K6gqel/nlJ23qiWloQg0Ta2q3jtlAtAYB8DChUQzIZAhETRkAZVIRcSQm89XVWgUB2FsmhZACYvkpFK61YKcb+uN5WKuolXbZFFmzkO/sbVVSp4d7lfBi4Iqe+9F1BAAmsxK1oYwNYjOuSKcc7LWAIAq8FNCcDEO3eLSeLxdilFBDVKFZujjcr7aOjbtVoMzru/TeDSJaRjSsLm1iYAAoArMGoIZ+gKIaCwRgwCSpFSQSES89zEOzgVC1CeBogIXAQAyhiWDqPeOS0SDwkqELEUZcoG2HaWhEyjOBkVSVYScmWNcWggoHRkY+szsf/ft7zlYmoIuF6BguXSl62++pXnjT/wAL0oxpW4n1k6QytFsr21Hhpq+Xxojw7DyLgAAGUoxjptRiklVmRmd/d2z7//5C69QFUUbgkkxGmNVEAmZC4BWpulNrtSIr//yjz/2yAffuwmeu8GBuTqXAUxLeYNkQANDbtpT4czmK8x8I4dLbVo1px7/hvb2j9/7GXPDe7a+cZbD/KaXvOHKO56u9+zMtyY762l4/AIZO7u4/7IXPPu7v+/BL37qrq/c+5WvPX71sb2hX6mwtZYQghvVbd2OmrqtH3n0S1UC2tnprl1rnA4se126slgeHLmx5v61fwcA2x/7v87MPz+kQPWaD9XBaq/rj977t79pQRAtKThrAaXrOu/9atWFYPq+t1/nfve6D7zx6g+uFkejdowPfeo/aSEiUzQShZwGBXW2UiigZI0tJTkbmIuxBABcBFCJqO87Y21l691rjxMeLbu8vXNidjTXlIa4bNtx33fTyTEbgvWBrEMyqCSi1lgkQoMiLJpLSda2CHG17MaTYyVHY5xK6ftIdkAV7xu01eHe3nRt4qxl1m5YjccTUFIFKWAskDGApuQB0SBaLoxorCMA7volohoKVVUzZ04FkImwW/XkidCJqLXEbI1B1YyogMoFQqgABYBRQXKRLB3308nx2fxQQUjBOFdK6rvFxrGzwQfJRQoDkHUyxBUi5pxCNUIkVTEhaOKjw8PD2fzcuRtLiaUUQqsgIficU8rD7uVDX+nWsQ1ED2xYhSzVdZ2iOO8UNOUMkIW1Dk3JkvLgvLXGMOOyX0xGU1BlTmitso3pAIqt6mDIEUHhpIqIgAgxDgRFFX1oiggAeu+FSy69MFVVLSIxdZY0MxpDBgswzGZL61zmpKDr66eMkTJ0QFaUDRFg+eKXdz/95ccfunDl8HCRmUMYN23JK/fi57WvefWLWCd9BIUlABM5g4210Pe9cJkdHkw3d5ilaVpr3NAfImJKyRjjLfzO2b997ZeeL8yTY2cQpJRc+0oAkUBVc44EihREmLlvRvXBVfnr33vH8NDD4+n4ifsunjzztNXATxxcPeWUWOYeT6yNxxdmL/kntx0RfOS+KzedPnmD3X3r3btf/rZf+swL/jU85VMfeeV1G6O8t5eMi7NlC063Th6+bPOSlc0T20DFGw6jSV1PXBgtVnHaChmMsZ8tjv764w/88Zv/5J/+5P+ke+e/8OEv7y36A9ZI3irDi/+P/lt+EZ5yx6+MTLO+1LJ52832cP7QY/f88r/9qec998bJZL2LA0sKxuecnXegaozlIqronPut0+/+qQdeYoMXRnzwrj/KZahCs1jtO9dYY0SLAjs7UcguuGFIJRZrmTA4Y8kiAPT9oCK+qpy1h/sHXMrm1vVf/MJdt9x8QtQY51XUGJdyAQBEg7gyNCpSQk2pNxZWtq3yUCl2hoKxdVzN6srOVwtLFlCMtT5MRfhJ3rsYI0AWRuc8gECJLGR8YAHfVFoiczE+GDXCsXA0atBVAIhIAEiEAMpcUopGGCyIKAkmECkCXECzaxtUW1IuORGS9VZVXXCpX9Z1PcRIREg1kDg7Ye1jPyCDMPS57Jw+XlJmLobI2CC8KhFsxZJ84h4NOd8gueXsymi0AUahBCIULcxMpi75wEgZVrve26i1YiNFxVhPQAQs2kwbT6EURkc5CoGCFC5J0VrrVKGu61xWKmhtsCbEsjLGimBObB0QoEguZbC2yqUggLGWxYIIaFRJomBtAFUyMJ/vP/HE5bNnrq/rNpZF7PLW5lbXLZwP3dDVdUtgF/MV6ZBjn4pMN0YqxrumZBWK3mKopoulbSr9+Gce+a/vumtV2MAimLI1hhc//6Y773j2+ugk2hX5CiIpcSrZoCdnlou90Wgjq3rAlIszlplA5bdv+It/cf7HKXS2QJGiqEPsqzAGSWKDctFUUhnGo41+WJah1Osjq3LXuz//8Ac/Mr+2PLm9nmW4jcf3Xjo4MKuqGCY5JqYSf+vx9qMuXKeHh8tYxqff8W1v/+rO8+Ap7/ryL91x8bOQ+ke7dO74zaNzk8UqWF4tZ1f71z+rmU4UWldVfb8wpKisoF03r5v66HDRrk9f/vJfQt/+xpt/9t/9m7fIUbyAuQVDyLPXfGRx3TfDU8791avHX/3Iqq52bj8Zu7X84D2v+vEX/fCrX7BIYgRGbciFvQ+FhYwFgaJKxiDQ75165xsu/0CMCQ3hfZ94e+xXKFTy0geDZHw1BbAiR9aMSwHnqRRFEjIVgICWnBMZZM5IaMhJkRhTN1xqw5Y1Jg1c1b4flovlrG2rg70L15+7fTZH12ilbtkvq2ZiDEERVfa+zmVVNc3Q9ypobUADiI6FY1wGb1QBwSAatBT7ZIhIS8oz5/180W9tH0vZOoOIgMZx4ZQ6721KyftGVY0xIqrAiKgqiAgpgUEAJIVixICLXeRSVrOLoR7V4ykDEjrm4qxBRIkDADCzIQOEoalZXNHsXJX6wRAsF4fTtU0AJGcLM4KU2HtbDbmva5+LRfQKQmhEEpFFhD4tQmj6LoXQzHbvr1w9XxwMHS671anrbyTjgjfGT1Sys1YAY+qC88Y4IDSIqvCkIUbnAwAgIAAMQ1Rg51Ak12HKzIhgjIk5c+EQPECJg/jgVRQJnQ8pFlRRKbl0gM44X6S0vs45G0s5564brPXOefMka1NaLuZ73pLFIDIgKAuUUgCwrhsRdXVjAIukxMnSFCmrRE79Pz6If/Gev7t0cNWEegPLq3/o3DNuunGxN7eT8dqoFrGj9RPdbIWa+2XnAxYdhtLHpFXjj2bd+7758Z999OUqFFw2hg6PDg/29wrh5njUjjbWm7YHS4aHlSDYg34IJloxkk1f+r/4t3+0euLyzsjpMt5g12e5XEpFu5hqcyJsXX36Of+1z5tusT65bbs62n/2q//F098MAM+/8sX/9oEfl6EaRthrgHM3Tl767cdf81J2M824/7m7F8Pnw2QzUJ1SElIFLkNZm64dHc2IXD0q3/nSNy2hfPCDb/6dN/3ZJz/4QG/mBGFdUM6+6Cuv/QAAjK984sVfeu0j+5KuDKiwfec5uVx+5nUvesatpzQ4zYN1IsWEEAozKCA5NEZKMQT/6Wl/+xP3fzuZr8OHPvVfQLIld3R4laMxXsmZqm6vXd71lfrgyATNfd2EUpCsxp6ds8ZQSrEO7vBoTuSeVLVjLhjz4XK529Zb3tVEDtSw9AbM4eHXps1mrDbX2/HB/gPTZgc9EIaUssERAyMhGVDJ1jiDNsYBsYC1cch13QJQ4lSH1pCJsQfBwgJAIuprY0ABUICILBcmksV8NpqMiTDGWFVVyoOIcBEyBLkU1VBV3WJugzXkg2+YS0oJNO9fvdzWVbu+nmMctU1OMbMKs7U2l2IMiVI7aWIEsrRcrLyzklZtO3XeKWEpxTqvWVUyKzKLdaHrl3VjcuwXq2E0rqWoQQ6+ttYPQ2KlfrX0tSw7OLF9mlWIIKeenEMQIgNoALCkzIUFiVBjTE07UiSDTIQ5J1Wo6jYnJvLCoBLJgEgREedaERBR54lQnkREpeQsR9Z4EUAVLiZUbUylnbQcEyIMQ5dLJnLB+8IFVJ2FkpMWlSTocNmv5vP5xvpGLjxqW0AQKcyRTGXIWoK+u9aM1qwd5cEyH7gwfvz88OjFSx/7woWrl1cpL2ZHu+r4zM6xYXm0vlm/4DnHUuw21zbPnNqc1NPRaFSYNtYm0MU3nXnPj3zpef/vX370wsIv5+nwoJ9Ot48WM08p1GFj7NAP0MOZ7fHLX3b7se3x8bWbF3HvKw89sD0+d/b46D1/+IFLf3d3s0Yh+gt78x989h3Li1c/efWKHdVpJN1ufyls/8d733fPn3908e53ndwY3XP5+AseuIt8Ay5+9fjTbvrnP3rspd+QVg90//W9a7d8x/C1r8bLX4s/8j2VXJnHw36gdrw9Gk9zWRFa56r5fP6FBz79b//1+8nZv3nnv5/1137w+39tguaayAh5S8kRHZ795vbiJ/K6PXPz8f2792vCiNvT6WTntubn/9W32arBglmU0K5Wy4319eVy0bajfijOWZXyBzd/6Ic+9/z19fWhH/Crf/eWdlT1XQRMWvrdvUvT6TqqZxlGow1Al3IEskPfO1OLDs61zOycBYAch9F4rKjDMKxNxn0sQArF+oqYC0vJOUnSYZCU4vaxNYfw2GOf2968pU9dF6Fuy8729fPlQeurrIZMQEVX2dXysApVGlgJzFMAEDWRrXJR630pnTGGkESULGnOCgTkQCMClZJ8IJBKhAEAEXKOznlEAqCUY2ENwSuzihASEABmEF11q6oe+TBWYBWVnLgUsEiIRGSsJSLJcm338e3NU6EyfQQuEvvVZLoGqCX3B/u7442po5rLgkwN6HMeECxhJaUDbF0QzYiW9g92x+PaeTqalUvn73v67Xeq9WRRSjJIoiKlADM5n1lDFY4O9zbWtxRMLplFfHClFE0KqNYa4VKk0JOMUQEyBgmFmQxJjszgvAMCkNx1nfceAQ1Za0LJRUmYkwpWVZgvjtBUdV0zl+C9Akjp++WR5DLeuK5wRIJSBDmHpo25OGOZi4IaMoULZl4MR3VluS/G+fmyW99oY7dgBYPWVS1DUzvwDSH6Ydnv7w0f/vTjH/v0V1erg65Pvg61n6R+seJoUXeOYePgxnMb7/umB170oVu+9I89UUGyimbVDSilcq6arHMZQLVINFTFlIcuTRre2arHddN1R0ddWc2Gn3ntd3z2t97bHB7sqdme+huTfm0R50xx56bvPb5z+YXb/SOPrD393G13XV1eeOj6Wb3C3Ie8uvOFN/zaTzs6vPbv33TpS6Z6zXeMbj8xPTZ9/H98qDA/6/ueNz+4EiPUbYvILtQpMREZC7tX9MMfu5td+yOvujGE9Vf/5C+l+xePQN4QXFe0BBmgAV+qLGrXSyCEIwo42TSj7d/7zW/PWmpjTeVSwSeBSsmJOKtt6nZSOL3l+ve//vFXlJSJCO/7xB+noQvOMwOANk2bS4kph8rnnEDYEKEBLtk5q4wsS9CqcBEYQIsPnhmInLVu6JYA4KrGkPG2zaVbzldVO2mbermcCRc0ZMintAKkKlRcFiVFZOPGbYyxbaeo3lggNCmXnLP3yEIKfe6o5JWrq6ZZT7Enb1Axp14hIYE1zexoNl2vENxyudhc23n88YePnzg2X6zW1rZKURQEA4UjEcYUQwigLUMMXgnMarVwxhnbWhtW3R4gc7ajtlnNj0BLaEcIOPQFKIMM9Wh9tRpA9PxDXzp99kwBnK5vdUczY4OoeD+Zzx5bWz85DKlPy/XpxjD0wlrXI/LW+AClADZS5iI2l+R864NVhZSidQRCxqKI5CQ+hJyzc857P/Q9oFofmJHTvFst21HjnMlsrdUUI6onK6UUSzTEhXUNIuacAaCkFEKFYI1xomgs5pwAhFmN9yFUwArAHjX9rQAAIABJREFUIuKc46/LxhhmBoCUe2udMKScq8qnoVSVU04KgSyIqvMVKJaSkVCVUSwZ5JyJaOiu5gQh1NaSCU3fRQCsW69iVQVJDo+ujaqxMUUhXLzUdSx3332hgDl/6eoTVy4CkXDljfRxOP9TF06+dYtjDzYok2IRRRUkFBVGtUAgIlwYEZlj1VSKyMwj8iuJFbi92SKstc/eXZTV5Wes3/C9zzprl3v3Ts/K93+X35qOfuNP7fJwDON7rz703NnGrN7PB93sp3/07I/+k+Ujn776xv+8980v33vmtb333H3mm67vhn08bA/uza/51ZdH3jAmsBY0JISYYlG0VpEmPnhyslwtjvb7T37hq3/06x/ch8ELjoEqACJYU2oIDblBjLnuutFtZ1Jt+qPFz33vc64/E4Yc10dbi8XlbpWrpl3MVyfOni0pc/YY9G03f+J1D34bAKfI+NUP/YfUxfniaG1j5P1m13fe11VdqRhrTY6F0PRlsEb6fkGopNTH+fraNnPDeQmGnPUi5IM3hF2/tM4hWlVOg1gn9agpuQCAinIpIjydbBlDsXR5yMLFkPqwGdPSWBFNKWNV1SWXum44FzJjhayaRC0QFM7WOORMBhUxRfGOFotZ04ys8UV6EL9cXWvrLURDBowlRZWShphCaOq6iXGwlmKfQqCDw6tNMw3VpBRUvrxY4Gi8qdoY1z366H03XH82JzbGMeecIcZlU0/I2pRLCK4Mg6o9ONwfTRxKRSTWNtYhEXXdqhmNDbnFqkfIOa+auvI0ZpD77/ncmRuur+s1Zqwbp2pW3bKpWyLbdcu6bodh5YMzxgAgApTCROScE+V+iM5XpVPnCIgPD/fqOqiYqqkFMiksl6u2bfZ2rzbtJISgqgBA6gsPPtCqm9fNRIWRQJRRnHWeRVBBVEspItI0Tcqdc05Vh2Foq3HJkaUggnFOBY2hUjKCAZL8dWKtH4/Hq9XKGHTOp5RUiqqikrXOEKUSrdGcsqpaSzEPKXEVamMMqFNhROmHVU2VwN7u3t7+3vDZ+5qvPvDYYb/PSoaq8//LI6fecpwjowMyPpWO0HHJoGKISmbnHDOLiDGmqBJoHHpjIKqGPkeLDqyrzQDVayenf2xz94P78PAy/OB1pz475fb0aOeuB49XowfOX3j6s58lX/781cPi3/yvRrdUD/39n8//4OH5G37gOc9a/7NffOtqRe5ob9rjEz3f+doffMddH//Gm7eec+fWtBl/47OemcET7GdwkLcWw25dV8NSqirUVXU1X/ux7/51IVywjhHHiA5krFQZdBvjzWfeUAXef2R374kjwXDyObf+4a/95JVL94fWzGci0ned1O24sm7WH9WjDYvhj5/54Z+6/8UpQb9ivPtd/06LIoKtDZBjkfFkWkohg4YcIQ59NAGkoArWVfXIg/ccP34Crcay8DQRUCLvXOMdzxfzpmmYczesnDV1GAOkg8PZxsZGYWEBbynlWIXxlSuX1ta3vK+sMUdH+2CGcbsJEkSM9SoixlDfd7Gf2cqNR9s5gaIaNGiGnAxBFBFjXc5iLS5Xi7XpOqIBkWW/6906kYpmaz0zGeOQvCEtPIgkFW8MGeKhz4eHi9NnTh7NjnzlsbgQQuIlWg+5Go2rrl+CkAIPcdE2a8Owcr5C0Fyyq4LkZIxnzqIR1CJIKcw8jNrNmLIxJpZchUY1AxRnXR9j1aw5kEWfHBVrfD/MCR1ZBCUR9d7HVKqqWq7m3lsEYGZng4gMQ9+0NYsoGKKYUmmqCaJFyQompRRqKykhWQAqnPQpIoKIQ1w1dSOCItA0bSk5p4yIpaRQB1WxZMhYZi6lOOdEc7fq8EmEpMYZZE5x6EM7EhFrPTMQGkBJMTV1y1CYmb7OpjQYYwjBGBIA5qyqAKhFvDecU06ZmBBBNIsO4MjaRooa4IHA0RSxK8kQdu3G8T/5k89n0b7v//udH9n6vZ0Mw7SqYwEg5lSUS2FRVWtNTskYA4hcCiAKCxGAaOFcAECUjawNk1+4Y71+7PyHlpOX3HqdXy3qed4+eRppdX65ynZy7trFI3OUmxcd+7Ufgqq/97Nv++qvfuXSM7fOXdfO77/34mfFPb9aXRY8v3rG//79o2H33e/Zf+Tossnexv77vueGO24/c/b05mzFw/xoMtnyTb2+c6ovmZL57H2f+uVffJ8IrhSccAtoQaZkNk83t1y/nXf39h5dPcFm11ipqmOnT7/911+FKcViLWmOKVTmaDW0I+9pIxd2Vv/jjR/+mYe+vXABQ3jvh39TmQ+P5hvHTlmLqkBkcuHCbK0RLipinZcioXIl9cFPYj5kEZWa3GCMVbUspp8/PmQ1xrW+8nUNGoLXxbwjRONtKWy9Ew6E1MfZ+vq6sBnSvK5rgCBSEE0pEUmkQNs2i8XcOUsYFLVk2b12/87JYwfXDpmPxuPTSmbUbqbY9cPuZHKdpWqxnLGshk5D8Jvba1xcKVFRVcUGq0L6pCIpJ6JMVGfuQtgwWETYGltytnUbh2gsgPocr3k3KiWlsmrqLdGYk1Z1VRIP/SpUVVFqfJ25I3LCQXFRkubceWeBFSiJKmLgtGhGE2PblMm5zOo49uTtajlDsJYAwdraAqAIGwtItXMhxeSD5RQRTSlqjXMOu35lnLe2YhFQARQAGbrBWQUALSaXpXe1sSHlaC0CABHN5/O6bq21iEiEjz9+fnt7q64bESlFyRlrDYHGIZZSrLVEpKoioqrGWOchxYgiyqwIaAyRA/TGFEItMRHZLEJkAFAFfRX6vhfORCTKxtBq2a9Nt1Ip3qPkVHIpGgl9VVUxRguGgZFgWK7Ga5vD0BmDoL6qatFCRFw6G6r/Z+ddP/PAdz7y6MFffvTez3/5QYZUW1OEhUlAkQREU0oiAgCWBABzURZFi7xMA5Xrinvd08dXDs3zn/tN9Xzv6IkZ5VhP2iEMe5eu3rof5zdupv7c8f/1h0d39vfe/ZnLD931uT++uDiztnxod+2GanEl1b1qGawLy5UdTsCdz3za7d9607kTp3ZX4/f+zRc++uEvBAcnjrv5XF74gpNGh8loctONZ557xx2zofvoRz/9K//3ByJibSyX4pAM4tbYHtv2y905Lv28ob1FFFTr8Nabb/vtX/0BKNh1e9PNnbzqma7d9clH/vYTj7zshbe9/Duefbgob7vlrp975CWJ1TmPX3rfr9hqygAGCJSFhbnkkr1l68bWGtWl9aMYWSQ6gqI+c9SiIPs5UUq5xKx6yVPLBKP14+34tAgDEBkjCiiACDENVVVp0cK5rqcshQKhIKrGuKrbYyo9qKBaRYNIAJhSUlw5FxBxWC0XB0cmjNvxuq+r3auX1tfXgqtyAuM150iEITQCeTHrjBHmMhqvpZQQjDAoDiHUpUTvRqUUYRUtSOxcLQWMNUTaxTgej4c+eu9UUikZpFKVvj8MoTLWiwAY5JTrxpfcA1RAlag4KyUNRBTjsmqMYiOFSKXr9j2G/cNLVe2dGQ2xC75CxZjmCKJofGgyq0I9nbaIbjFfNqOWLChQzuA9OeP6fhn7o6reCBUxM2EQyURYuAAos9bViDkrJATXx2Xlx85j7Doi6LouBIe2VpFSOFS1MYYLiwAoAAKAqgoZVCFrjYIIl5y55NQ0oetWq1W3trFpyElBwAwAiGCtzTEDQAiBmVNJACAiIYTl4mA8Wu/6FXM/Hm+oEJFBBAUgKH3fV81EuACKgHgfDq5cHY8nqoiAWQWwqBRvwqJbNKORMQYVhr7/w1s++vpHXqKa6qpdLPr7H969+0sXPvrJB2Je1pWBslKgUgQUSxEAYlAi5dxzkaw+WPvDysdOXb95vFk9+GBMjqzalg7mMi1+vLGx9uKX3fKj32LG57/4sXdde+ThC/dfKbPh4QtoNpr+C9fk1toNtFjxlXCs9TgvMbMsF2BtWfXD+nR89my1Xo++8tCuCgQXVCBzHgZA1hvO8be+8LavPLj3l39zn4hxhoHw1OkzSNDYyfe8/M4TO9Xxk6cef/jy409c2D0sB/PF9Seb1373jffcfw10OFgMj1994q8+dGBNOxnXixTPbtvXff+dH37l+Tecfyln1oJ4/8d+N3MqEZyDxawn21e1Ozrsxi0j1vvXDiR1bgonjt+ahkpk11VbuaxAjBYRXAqPqsZomqguWHPTjli89ZozO+8BkEgBCAC5iPc+lc6YKvOAqtba2eFeXbk+Fh98O1pDrABlOV/VdQWghGCsSykTiGLlQ+TiUiqhGafUEyqCEnnRAgCHB/Pjx3YUYkpZYLY2qq5d66ZrO0eLRXBhGKKvFNSAEiKqCqACgCGbcyECa2zKKQRXShIJ3lPOaAz03cJaqwBN05Si1ro49NeuXTm+Peoj58JcBkJt27G1tuuXKG7V7bV1BezCZJyTppSH2I3atcPZw+trx0Dqo72rLFHBnDh+PXhfN0GEq6qOMRuqcik+mJyUeajrKiUlhFwGVTXkD4/2t7a2hqG31hq0gGW1WlZh3MVrk/H6Y4890rbTtelWjNE5S0TO16UIKIkyUSnM1jh+kiRrgqrxLuSySilaa51zIEYRFotF3TTBmpiTc76k7LyB/58qACIYg8PQGQrGGCJi5hhXzOS9J4KUBwSjCtYalrxazLc2dkR9kV6kOB+OjpYb25vMhaUAgAVgFgJSRW/NkCMRSWbr3W+f/qvXP/qKlHoTsAyZSvEObDv5yteuffLTD69t7uxeO3jiwu49D9xjgwBKSXT25Knbbzl7+erhRz71xZdVeMcttzfXLh0cdNQdgM1Xkdbc+LS402dvci+5c9deu3rxU/vdflpKAv7Hr/B1227/Yp5+Q/W1i1CtUs+r6tjOWb88Wtv+4t274JE8pAQMztgYaG1ct6PaHz95rKTVxub4mbfftn0Kdi8XKTAZjU/uLI5v7rhgPv+1i7fdesZh7wzW03Dx4asnd67LKjEuhYYHHzt/ba//yCfOnzt96pnPHH30769evLJcSRN01hVvfS6lAcx5KFd+5vHnve9Wwlwy430f//1hOXDphz5P1htAztFYWlMzVHWd+kXq57NZt7G1NaS5Zj/a0MpulpJm8wODdrQ2KuwBUl2txWERYxyNpsyiCj4EZgVwAFkhhcoStH1cqho0Wtn6woXHd44f41JAHaCgga7vgiNr6zhkMuA97R8enTlzZrWYm8pjkcptJe1YBcFKUS7FB4qxR0QAs1jsTqcbKfEDXzs6/+jDz7rz+jM3bMcsCCn4aUrRmrqUYiyVkuuq6YaZtQQAIsC5qEpVeVFmJueRC1r3/3UEJ7+aZvdBgH/TOecdvu8O3eWqag9tA0mIUAiOGBaRQEJixZIdOzawCTv+AiTWIJaRQWLPCi/CCsRkyRBAYYhFbCdOd7un6qq6937D+57hN1D28yA4wTuIYwyiBGGn0+n+7q5vJ85lqIZGHw0REJkpgfhoJMx5sr214/r+GLqsuTf16AATS3z11cPzF/fMogrj+nC+nA6H+eHt49e/82dzmlWHas15VetmltIy+mZuKSUAhCBmrrWJSBKo7UpEvQUzQnCeuFeTBIjo7maeSmLKEajamCczq7WmJERJtTMHCyKkMUZKpda6rnkMXeZ5DG2tpZwBYvQ+TcXMI0JEPERVUxL/heoeEc7MhETEtbaSi9ouUlSt1o2o9P707P5wrZUoRcT5sj179oF2GKMzk5oSiwhs10dk8yHLYXX3MP/o04+//9t/+A9+8reWdRI+amyIAJ6HdWtxWCeA5qjrcf3446c//dn18bH/1ne/8bX3MVzpsDw8nt/8n/29W1Hbf/z/vjw9nAJsvcDrn316/u7Nex++/NVvP/vo44//w7/8/YfrE/XH3/zz/POv8E3Uk8rPP9+4LK0H4JzS6XGj57fPjzdTKXB/c/dX/vI3fvs3fmM+5KUsLz7AU2sYCEB1r4zZRgy/1tppwokOgNR8LF4cdDnOXQ2Jr9c+LcJZC6b//aM3v/fvfvTR55eLd29DIQlHFCwOF6irikkBuyQuHvXTv//6m//iGwjgRviTH3wPw1pTlvx0+mKej8t03LbGjIGRhMNDdQdKxAuhmdbC5bK/Tvm2nffqDwF2Mz8vh/V0fro53iJlDDUPZgFE1yqSetfPPvvcxvXbf+bbCAsiergk0uHC5en6+Twdwqhu7fTwo1zubu5fSBHQQpIlJffR9nh8+6etbx/+yq+BFVXt45xzoB/c1cNNgwWIhTA326a8Xq+nnN4p7uhG6hfEmKdjgPbWU5oQCdBarTkvTDzGaK3Py4RAHko47ft5nhmRr9teygzugN1suIWOmA6zB05prXUH0AAn5CQS0NVUB0/zygx73UsuScTczAkQXHciRoTWrpJWMyulCE+qjqhu6oYpSx8OiCRGAWZOzBGeEm/bPk8LAPReEbKZp0xuxAIiPLTq8G3bDoebCEAaOijnYtaQ4J15nrbtKiK1tsPhaBpDt5yLeyBgG5u2erMur7/88v7Fh8NUJPXaiAgRS5laayxh7qaR8wQYIuLuiBiOaoOZ4BfwetnneRbh2jRsvzw+3t7e130z12U9mmPX7ebm9nrdALBMZXRn5GWZI6KO7u+opTL905f/+h99/neYYx8dKAmSgHUq6AO8RyRwEvFhvVUlJsJQ9SC+K/R4jamQATDxOk2Xdsp0wKQWmGOFrIlImGPCn/7Rw3/89/9X6+Ors60399/9Cy9++IP/8daxsK/Hw6+//NW/+def1/rqkKaH8wk43d7dvz29Blpv75bCN4hatw0gIHwPbZd+czwA07hWmddA8+4yoxkjgumeoHTfci42ZNtOKdtheb6u5eHNw8/fPL4n6+//wR/83g+//Oq0T0kws9ZukAKibu2rf/jzD373WykngI4/+cHvjuZTWR4fXq8rpem2a4VAJhljlFIiArmEq/uAcLPKxBGo6u5dRM6PMeL1e++9B8hAs8gaugmnCCUJD+4VWIAZAdAj1LRM8753JkgMdb+u89f62AONBM9PQbxlmXRoWo4l4fn0lpBubt93HwCwXStiZ5nUSCRdL2+X5RiOxEAsQvb09pSmIimVMrvJ0GvEYBZVLWXW0LqrSBbBGI4ZVbUk7M2r7kkKh0nKvUXKSMQA5g7msByO/XICwqENgGSaMAgcmHKgegAxisDYryJTAGk4ertetsPhKJLMRoR7EAkRUGs1l4wIYdL65eZ4V9u5DV3WQx8mIuGYGC/nh3WegnC77iIlpQxItfZlmdx7OJn3qZQxVCNyWvZ9pCmFhukZQ5KkPMm+7ymlfd+BNEsZrR+WtUcTXsxMrRJxTll7d/cAZE5qQMhtnMMsSVLVlFM4ICGRq7GImA0AYGS1AYhlnkdrRGRmOWfDCXzTtqc8qVuSogp9nNF8Pt5qU5kygqgqAEQEERCCm7d9pEJmloQAAoP++Yf/5nd+9rcfH08vXnyw13POmYgDkmpFRAgm0VZ1ng+9VwBE9N7HPN2a7kTYemXmCGtNmYQYiGN0Zc4lT8O6JGptrMtN02tOiRB77b1BmTl8uHmt1ySTRXgYQrpcTi9fvtj3bhYAkVNSHe4BCCmJu7lhLrn1ChB12+epqFuZZ7cEuNdq0zTv18tUVkQP7B40z3nf2+W8Hw6rmeWcp2kae/9ff/jZH330+OOPPnu6+CeffkbZpnn55O/97Jvf+1arXd3xv33/H6c892EvX37w1SdfDnp7e/sBwYKESDBG37br8ebe3VNiBOjDxmhCimREN5Kj7RzQxOHaLmUu63qwQbU+5alQOoxdU4Es81AN7/vebu/eu+5ViEXw6em1YCzrDSL10QERIw3bU1rUNKeMGGaaS1GtCGSKIotqExG1DmjCqdaec06JRm85HSxaH8ICAdH6hTBPGU09giEoqBOKmXl4aEWiqUzCcq1XN3bfl3yjfgUoJcsY3sEzCwCI8H56lab08Nifv3juRqOPVFhjWPfD4SYietvH2GvtkpIIihRmiQgz7/0yT4uZDnUEzDkTMiACMbGbhcfw4SWXYRYApq3k+fS0HW/uet+maXIHU5c5hQczA4ZwMhuEHEEBhoFhSgTDuluUfOx2Hg1E0LznzKbuDrXu67ru9Rwu63ozRkeIMcY0zyxiZkzcWgUIxgQAJIxEEKrq4eDhFI4k5qbmxHJY5zFa3bdlPbi7qiKi5FK3Uxba9nZ7+7XaniIkpSwwu5y++PiPX7z8DqYFCXvrxETBxGTuQyNAl2XprRGAavzdX/8nf6P/pZzLdr0yU+8DAFl4mqbz6WTmuQgRA5AwBzgAQgRgIIi7m2lO2UPHGMLJw1T9nVIKAiKju9ZaS5mIUMcgwghHIrPRWy/v5EmkDPW6V0SQxGZKRAA+hopIRJQyuXutdZqKqosIAKg5IZkpAKibh6XEETRN2YdBIDH1vqtFhJdSEDnczH+BiQA4Qs38eLgH8f/6wz/54qvL29Pj7fH4V7//W3/8Jz99ug786X/6HosAUu8W+Djnb1z3c0ibeDZTgChlil/qQ3Mup/OXN8dno1mEa5gwEwoS7petTDy0EvFezwQpsfTox/XZ0J0wexhEIBCSAHLK5oYIiShv21cekPNEnBJjHw2hSCY3J6Jad0AgyPOSrtt5KjMyaHem0voOoMwJAM/npykdDOu0HPf6xvrjYfpzAKh+Yll6rykTouuQnEWEW+sBoW13V+ayTul8+sKG1fE437xIfFBtzLmst2EeZgBAIWOo4jUCxni6u3n++HT95re+dT6fRaZ976Xk0/l0e3PfapunrOG995QkIlzNHRBdOJn3Usp12yNiLvTJJz97/70PljURL/u+R/jhcFAV8xGh05wjYvRBRCmla1V3PxwOrbWSSx+7cFYFIiAKMN3rnnNiFgBp/eSu83SIYJFiViMCMRDRbRqjXa6PpcgyH3pvkrPk1FoQRmJ089ouKeWU01dv3twdb80852wGTEiErTcRUYuc2LQzU20KANM0nc/nacrb9QIBx9tboamOJ6HJPUZ0SbcTpmt9BNB3UkpEFA4shMQpz+ZDVXPKYY4kvVcAiMC5UITX2glToLXWj8fjGD0CAUJEAGjfL9N0cB8W+37Gw+Ewemeh6/a0rosOj6BpSuYK4O8gJUSMCLOODimn1joSiqw2Rs4Zwbf9nCQZqMcIT4juMZildySinEpEpES9t957SrLXCyKVaUUg8+bqkpITpbBXbz7qLb7z4a8hyb7VXJK7D3UziwgzPRyO+77P89xaEyzDH0kyxCo0euuStKTMcme2IZVXryv+z3/7z0SACMGgb/bJp//9L/7mX1O/d9gQCYGJ5HJ9XNZ12xpJvp4+e+/+6xacpgS+aS/MiDT2vQMEITFlxNrr2173JMu5Xtb5eSlsvk/T+w7Rejscjq2zh6WciAjDUipdDQDPTw/CsExHRKdSeu1hza1JmU11ng/73h2dAHLC3i+AMpXVnd4J9KEsTF999mo5rCkHkbRWU0J3TDJFEDO4d0C/XvY032ZS0ytS2mq9WW7d1IAimhvngqZuNso0jWHEaVgTJlcfo09TJpj27fr5Fx8/f/n1m7v3Ja/7aFOO0TxJ0TEACIkAHCACUARb7eBjWo/btgFALqnv/XizbtvIidSMERDi9Vevbt5/nkXAXccgYQDY9x0AlvVQ61bKnGQy62NUZhTJw9B9ELMFCqaIi6sxoqSl1urhEQHMOaXtfBLmNKE7IAoEc042ho6RU6Ii+k6tOSUNcXfrYyoFKIgMf4FDlvAR2oTR1AFCUt7qniUzs6qmlCygbS2lgsIIHu84s4xx7ZEGmWNehPIYiggRMaIzglsQCguZeU7ZNIACwCICkSH0fH46HldVn5e11mFmKTN46mNf11U1RtdccIxe8nI+PZVSiNFd3YgYEMkUIpyIVHWe59PlYZrW1jqAC5NIHhosOWU0tdPT28Mh910JExAT8TRNaq33wZQsjAgRYYwWRtfr5YMPXrZWmQsg9q4554iH09N1Xe8DmYndVAdJEuK27zsz55Qd3kFEQPLRAwBExMzcQ7gw4RgXRyEU4tAOfbR1ntxs7w1/8p+/1/dxvb4+rAfJN2maX7364mvv36gRsyBCwHAv7oqMACBYhg5mUOvgI5iQJvA257LvVwBwoP10yXO+u3+x1SuQiI2PP/vy5ji/+vzjl9/85rrm7eHp+P7zy6Ueb9fLeV/KpIMQHpe7r79+9ek83YhIbdfe9Pb+aMYidD2/8UCrr7c6Divf3v/K4+lhmW4C/eHN2/u7OxEJNCTpvY1RAXVZZ+sLYkheLufHsHF7f3QAiBht5CTmrXct06rD3IZ6INcEy/CY15vWRmJKEo+nUyklc+lOwuFmJc/79mRQUzrkvKgBOgy9TOt6Pe1PT0/f+OYHZuaRkJyZxnDAYMqjj1yK6RARQHC3VObRGyFQRDeN8JwzM6sOdyAkQlKtajrNs0PsT9vx2bPMcd1b4hS+9330WjFBpkmyOBXtGwITsXsg+eg6zcV9IDIzuao7IkigO7ik3HubyhzmqnuYqY4+OkDc3L7vAKkkIehtRyczdwhCfXh8+7VnH277NeVMJKfTSYQIlVlKmc7np2W+8V+KCORE5OHJw0QoHN19aAMA+SXTcK8OPC9Jqw/dcpn2vR2Px66QBd2HMA71xKW1Shz73g7rzV7P83Rn4SIIbnvdRLhVI3YE7n2s62pmqoosTMltl1QA3GwIZwCmBK4pYg/nCFWzaV5M3VzxF0BV57K0sYnI08NlPRyJw93MoiTcO5UJrELK2NU9GkValvl0uaQk4cqIQ5u7I1JOqamZ9yyT0HTd35R85BRjjP30qtWWpnsd2zrfNo3D7W3tJ4osEgAekMHgHUdIFGbm7qqKP/4v/4pAAHR04MySkrszsY2dfslsIE9mbbs+uO7HuxcR4e4AGOa5yNPjPi95jFZSQaQ67O52fnp6RCjznBViZhqOo28Pbx9rNTX/AAACAUlEQVSePX92vUC9nMsBQgHB7u/e7/xO8v2x2b5ML66XjgiHYxnv6DYvN6rWmx1vb/r5rYYnSg+X13e3L1lYgdZ1vW6bCONQM3c3DyWaiEE4D23hTug6WipZhwc4ACBQEty2HQhzLgRhgaYjXMqSzYwJiWLfGiAhEgFypndaq+HRu97fL+G5qyIChgT0AGCa1QYTsdDj0+X29kBEYwxEUMOcCkQEqLszAyKO2pCIUwYk8G7qOWcz88jhDVFbuwKk0UeZ8xh9ShKSBb1rTNPd6NfEk2mXkgjydXss04EQxxgiiYgdgpDcnQW6OkKEu2sQsVkHiAAkkXBz05LFPSMEANS6hXeZppTz49vX84zXp93M15t1u/YXzz+8bF8c1mfmBkC9t1yEMQ3thGDmzNh7jwgRCQAWAk+1Xad51hEpJUADgNEVCYmAMAfQXk9zXva255ymkgPQTInQVEtOqiMczN+xVmvKk2QMozwlfafrNE3bdnXn9ZB68zJRbyMCiUSj57S4NlVMSVq7eGhrNedjSktEZ2IAZ5GhBoBEtG815+wepSQ3HWOkPI0xWECEEXm0vUyT2SAoAbrtIxcMQ3cjESKEMLOYipyenryrTEy8eJjwlHPZ6yNhivDhnjGN0YFxNCMYzBAUX35+vbu9z9ncw1x9tDEG5zzN85s3b+Z5zjn/fw9kHoeHyfXhAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import cv2\n",
+ "from mmpose.apis import (inference_top_down_pose_model, init_pose_model,\n",
+ " vis_pose_result, process_mmdet_results)\n",
+ "from mmdet.apis import inference_detector, init_detector\n",
+ "local_runtime = False\n",
+ "\n",
+ "try:\n",
+ " from google.colab.patches import cv2_imshow # for image visualization in colab\n",
+ "except:\n",
+ " local_runtime = True\n",
+ "\n",
+ "pose_config = 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py'\n",
+ "pose_checkpoint = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'\n",
+ "det_config = 'demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py'\n",
+ "det_checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'\n",
+ "\n",
+ "# initialize pose model\n",
+ "pose_model = init_pose_model(pose_config, pose_checkpoint)\n",
+ "# initialize detector\n",
+ "det_model = init_detector(det_config, det_checkpoint)\n",
+ "\n",
+ "img = 'tests/data/coco/000000196141.jpg'\n",
+ "\n",
+ "# inference detection\n",
+ "mmdet_results = inference_detector(det_model, img)\n",
+ "\n",
+ "# extract person (COCO_ID=1) bounding boxes from the detection results\n",
+ "person_results = process_mmdet_results(mmdet_results, cat_id=1)\n",
+ "\n",
+ "# inference pose\n",
+ "pose_results, returned_outputs = inference_top_down_pose_model(pose_model,\n",
+ " img,\n",
+ " person_results,\n",
+ " bbox_thr=0.3,\n",
+ " format='xyxy',\n",
+ " dataset=pose_model.cfg.data.test.type)\n",
+ "\n",
+ "# show pose estimation results\n",
+ "vis_result = vis_pose_result(pose_model,\n",
+ " img,\n",
+ " pose_results,\n",
+ " dataset=pose_model.cfg.data.test.type,\n",
+ " show=False)\n",
+ "# reduce image size\n",
+ "vis_result = cv2.resize(vis_result, dsize=None, fx=0.5, fy=0.5)\n",
+ "\n",
+ "if local_runtime:\n",
+ " from IPython.display import Image, display\n",
+ " import tempfile\n",
+ " import os.path as osp\n",
+ " with tempfile.TemporaryDirectory() as tmpdir:\n",
+ " file_name = osp.join(tmpdir, 'pose_results.png')\n",
+ " cv2.imwrite(file_name, vis_result)\n",
+ " display(Image(file_name))\n",
+ "else:\n",
+ " cv2_imshow(vis_result)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "mOulhU_Wsr_S"
+ },
+ "source": [
+ "## Train a pose estimation model on a customized dataset\n",
+ "\n",
+ "To train a model on a customized dataset with MMPose, there are usually three steps:\n",
+ "1. Support the dataset in MMPose\n",
+ "1. Create a config\n",
+ "1. Perform training and evaluation\n",
+ "\n",
+ "### Add a new dataset\n",
+ "\n",
+ "There are two methods to support a customized dataset in MMPose. The first one is to convert the data to a supported format (e.g. COCO) and use the corresponding dataset class (e.g. TopdownCOCODataset), as described in the [document](https://mmpose.readthedocs.io/en/latest/tutorials/2_new_dataset.html#reorganize-dataset-to-existing-format). The second one is to add a new dataset class. In this tutorial, we give an example of the second method.\n",
+ "\n",
+ "We first download the demo dataset, which contains 100 samples (75 for training and 25 for validation) selected from COCO train2017 dataset. The annotations are stored in a different format from the original COCO format.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "tlSP8JNr9pEr",
+ "outputId": "aee224ab-4469-40c6-8b41-8591d92aafb3"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "mkdir: cannot create directory ‘data’: File exists\n",
+ "/home/PJLAB/liyining/openmmlab/mmpose/data\n",
+ "--2021-09-22 22:27:21-- https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/datasets/coco_tiny.tar\n",
+ "Resolving openmmlab.oss-cn-hangzhou.aliyuncs.com (openmmlab.oss-cn-hangzhou.aliyuncs.com)... 124.160.145.51\n",
+ "Connecting to openmmlab.oss-cn-hangzhou.aliyuncs.com (openmmlab.oss-cn-hangzhou.aliyuncs.com)|124.160.145.51|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 16558080 (16M) [application/x-tar]\n",
+ "Saving to: ‘coco_tiny.tar.1’\n",
+ "\n",
+ "coco_tiny.tar.1 100%[===================>] 15.79M 14.7MB/s in 1.1s \n",
+ "\n",
+ "2021-09-22 22:27:24 (14.7 MB/s) - ‘coco_tiny.tar.1’ saved [16558080/16558080]\n",
+ "\n",
+ "/home/PJLAB/liyining/openmmlab/mmpose\n"
+ ]
+ }
+ ],
+ "source": [
+ "# download dataset\n",
+ "%mkdir data\n",
+ "%cd data\n",
+ "!wget https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/datasets/coco_tiny.tar\n",
+ "!tar -xf coco_tiny.tar\n",
+ "%cd .."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UDzqo6pwB-Zz",
+ "outputId": "96bb444c-94c5-4b8a-cc63-0a94f16ebf95"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)\r\n",
+ "E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?\n",
+ "\u001b[01;34mdata/coco_tiny\u001b[00m\n",
+ "├── \u001b[01;34mimages\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000012754.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000017741.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000019157.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000019523.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000019608.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000022816.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000031092.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000032124.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000037209.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000050713.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000057703.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000064909.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000076942.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000079754.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000083935.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000085316.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000101013.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000101172.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000103134.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000103163.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000105647.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000107960.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000117891.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000118181.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000120021.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000128119.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000143908.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000145025.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000147386.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000147979.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000154222.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000160190.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000161112.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000175737.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000177069.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000184659.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000209468.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000210060.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000215867.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000216861.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000227224.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000246265.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000254919.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000263687.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000264628.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000268927.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000271177.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000275219.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000277542.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000279140.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000286813.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000297980.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000301641.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000312341.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000325768.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000332221.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000345071.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000346965.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000347836.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000349437.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000360735.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000362343.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000364079.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000364113.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000386279.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000386968.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000388619.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000390137.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000390241.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000390298.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000390348.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000398606.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000400456.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000402514.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000403255.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000403432.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000410350.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000453065.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000457254.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000464153.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000464515.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000465418.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000480591.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000484279.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000494014.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000515289.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000516805.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000521994.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000528962.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000534736.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000535588.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000537548.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000553698.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000555622.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000566456.jpg\u001b[00m\n",
+ "│ ├── \u001b[01;35m000000567171.jpg\u001b[00m\n",
+ "│ └── \u001b[01;35m000000568961.jpg\u001b[00m\n",
+ "├── train.json\n",
+ "└── val.json\n",
+ "\n",
+ "1 directory, 99 files\n"
+ ]
+ }
+ ],
+ "source": [
+ "# check the directory structure\n",
+ "!apt-get -q install tree\n",
+ "!tree data/coco_tiny"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ef-045CUCdb3",
+ "outputId": "5a39b30a-8e6c-4754-8908-9ea13b91c22b"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 75\n",
+ "{'bbox': [267.03, 104.32, 229.19, 320],\n",
+ " 'image_file': '000000537548.jpg',\n",
+ " 'image_size': [640, 480],\n",
+ " 'keypoints': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 325, 160, 2, 398,\n",
+ " 177, 2, 0, 0, 0, 437, 238, 2, 0, 0, 0, 477, 270, 2, 287, 255, 1,\n",
+ " 339, 267, 2, 0, 0, 0, 423, 314, 2, 0, 0, 0, 355, 367, 2]}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# check the annotation format\n",
+ "import json\n",
+ "import pprint\n",
+ "\n",
+ "anns = json.load(open('data/coco_tiny/train.json'))\n",
+ "\n",
+ "print(type(anns), len(anns))\n",
+ "pprint.pprint(anns[0], compact=True)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "r4Dt1io8D7m8"
+ },
+ "source": [
+ "After downloading the data, we implement a new dataset class to load data samples for model training and validation. Assume that we are going to train a top-down pose estimation model (refer to [Top-down Pose Estimation](https://github.com/open-mmlab/mmpose/tree/master/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap#readme) for a brief introduction), the new dataset class inherits `TopDownBaseDataset`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "WR9ZVXuPFy4v"
+ },
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "import os\n",
+ "import os.path as osp\n",
+ "from collections import OrderedDict\n",
+ "import tempfile\n",
+ "\n",
+ "import numpy as np\n",
+ "\n",
+ "from mmpose.core.evaluation.top_down_eval import (keypoint_nme,\n",
+ " keypoint_pck_accuracy)\n",
+ "from mmpose.datasets.builder import DATASETS\n",
+ "from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset\n",
+ "\n",
+ "\n",
+ "@DATASETS.register_module()\n",
+ "class TopDownCOCOTinyDataset(Kpt2dSviewRgbImgTopDownDataset):\n",
+ "\n",
+ "\tdef __init__(self,\n",
+ "\t\t\t\t ann_file,\n",
+ "\t\t\t\t img_prefix,\n",
+ "\t\t\t\t data_cfg,\n",
+ "\t\t\t\t pipeline,\n",
+ "\t\t\t\t dataset_info=None,\n",
+ "\t\t\t\t test_mode=False):\n",
+ "\t\tsuper().__init__(\n",
+ "\t\t\tann_file, img_prefix, data_cfg, pipeline, dataset_info, coco_style=False, test_mode=test_mode)\n",
+ "\n",
+ "\t\t# flip_pairs, upper_body_ids and lower_body_ids will be used\n",
+ "\t\t# in some data augmentations like random flip\n",
+ "\t\tself.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],\n",
+ "\t\t\t\t\t\t\t\t\t [11, 12], [13, 14], [15, 16]]\n",
+ "\t\tself.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)\n",
+ "\t\tself.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16)\n",
+ "\n",
+ "\t\tself.ann_info['joint_weights'] = None\n",
+ "\t\tself.ann_info['use_different_joint_weights'] = False\n",
+ "\n",
+ "\t\tself.dataset_name = 'coco_tiny'\n",
+ "\t\tself.db = self._get_db()\n",
+ "\n",
+ "\tdef _get_db(self):\n",
+ "\t\twith open(self.ann_file) as f:\n",
+ "\t\t\tanns = json.load(f)\n",
+ "\n",
+ "\t\tdb = []\n",
+ "\t\tfor idx, ann in enumerate(anns):\n",
+ "\t\t\t# get image path\n",
+ "\t\t\timage_file = osp.join(self.img_prefix, ann['image_file'])\n",
+ "\t\t\t# get bbox\n",
+ "\t\t\tbbox = ann['bbox']\n",
+ "\t\t\tcenter, scale = self._xywh2cs(*bbox)\n",
+ "\t\t\t# get keypoints\n",
+ "\t\t\tkeypoints = np.array(\n",
+ "\t\t\t\tann['keypoints'], dtype=np.float32).reshape(-1, 3)\n",
+ "\t\t\tnum_joints = keypoints.shape[0]\n",
+ "\t\t\tjoints_3d = np.zeros((num_joints, 3), dtype=np.float32)\n",
+ "\t\t\tjoints_3d[:, :2] = keypoints[:, :2]\n",
+ "\t\t\tjoints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)\n",
+ "\t\t\tjoints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])\n",
+ "\n",
+ "\t\t\tsample = {\n",
+ "\t\t\t\t'image_file': image_file,\n",
+ "\t\t\t\t'center': center,\n",
+ "\t\t\t\t'scale': scale,\n",
+ "\t\t\t\t'bbox': bbox,\n",
+ "\t\t\t\t'rotation': 0,\n",
+ "\t\t\t\t'joints_3d': joints_3d,\n",
+ "\t\t\t\t'joints_3d_visible': joints_3d_visible,\n",
+ "\t\t\t\t'bbox_score': 1,\n",
+ "\t\t\t\t'bbox_id': idx,\n",
+ "\t\t\t}\n",
+ "\t\t\tdb.append(sample)\n",
+ "\n",
+ "\t\treturn db\n",
+ "\n",
+ "\tdef _xywh2cs(self, x, y, w, h):\n",
+ "\t\t\"\"\"This encodes bbox(x, y, w, h) into (center, scale)\n",
+ "\t\tArgs:\n",
+ "\t\t\tx, y, w, h\n",
+ "\t\tReturns:\n",
+ "\t\t\ttuple: A tuple containing center and scale.\n",
+ "\t\t\t- center (np.ndarray[float32](2,)): center of the bbox (x, y).\n",
+ "\t\t\t- scale (np.ndarray[float32](2,)): scale of the bbox w & h.\n",
+ "\t\t\"\"\"\n",
+ "\t\taspect_ratio = self.ann_info['image_size'][0] / self.ann_info[\n",
+ "\t\t\t'image_size'][1]\n",
+ "\t\tcenter = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)\n",
+ "\t\tif w > aspect_ratio * h:\n",
+ "\t\t\th = w * 1.0 / aspect_ratio\n",
+ "\t\telif w < aspect_ratio * h:\n",
+ "\t\t\tw = h * aspect_ratio\n",
+ "\n",
+ "\t\t# pixel std is 200.0\n",
+ "\t\tscale = np.array([w / 200.0, h / 200.0], dtype=np.float32)\n",
+ "\t\t# padding to include proper amount of context\n",
+ "\t\tscale = scale * 1.25\n",
+ "\t\treturn center, scale\n",
+ "\n",
+ "\tdef evaluate(self, results, res_folder=None, metric='PCK', **kwargs):\n",
+ "\t\t\"\"\"Evaluate keypoint detection results. The pose prediction results will\n",
+ "\t\tbe saved in `${res_folder}/result_keypoints.json`.\n",
+ "\n",
+ "\t\tNote:\n",
+ "\t\tbatch_size: N\n",
+ "\t\tnum_keypoints: K\n",
+ "\t\theatmap height: H\n",
+ "\t\theatmap width: W\n",
+ "\n",
+ "\t\tArgs:\n",
+ "\t\tresults (list(preds, boxes, image_path, output_heatmap))\n",
+ "\t\t\t:preds (np.ndarray[N,K,3]): The first two dimensions are\n",
+ "\t\t\t\tcoordinates, score is the third dimension of the array.\n",
+ "\t\t\t:boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]\n",
+ "\t\t\t\t, scale[1],area, score]\n",
+ "\t\t\t:image_paths (list[str]): For example, ['Test/source/0.jpg']\n",
+ "\t\t\t:output_heatmap (np.ndarray[N, K, H, W]): model outputs.\n",
+ "\n",
+ "\t\tres_folder (str, optional): The folder to save the testing\n",
+ " results. If not specified, a temp folder will be created.\n",
+ " Default: None.\n",
+ "\t\tmetric (str | list[str]): Metric to be performed.\n",
+ "\t\t\tOptions: 'PCK', 'NME'.\n",
+ "\n",
+ "\t\tReturns:\n",
+ "\t\t\tdict: Evaluation results for evaluation metric.\n",
+ "\t\t\"\"\"\n",
+ "\t\tmetrics = metric if isinstance(metric, list) else [metric]\n",
+ "\t\tallowed_metrics = ['PCK', 'NME']\n",
+ "\t\tfor metric in metrics:\n",
+ "\t\t\tif metric not in allowed_metrics:\n",
+ "\t\t\t\traise KeyError(f'metric {metric} is not supported')\n",
+ "\n",
+ "\t\tif res_folder is not None:\n",
+ " tmp_folder = None\n",
+ " res_file = osp.join(res_folder, 'result_keypoints.json')\n",
+ " else:\n",
+ " tmp_folder = tempfile.TemporaryDirectory()\n",
+ " res_file = osp.join(tmp_folder.name, 'result_keypoints.json')\n",
+ "\n",
+ "\t\tkpts = []\n",
+ "\t\tfor result in results:\n",
+ "\t\t\tpreds = result['preds']\n",
+ "\t\t\tboxes = result['boxes']\n",
+ "\t\t\timage_paths = result['image_paths']\n",
+ "\t\t\tbbox_ids = result['bbox_ids']\n",
+ "\n",
+ "\t\t\tbatch_size = len(image_paths)\n",
+ "\t\t\tfor i in range(batch_size):\n",
+ "\t\t\t\tkpts.append({\n",
+ "\t\t\t\t\t'keypoints': preds[i].tolist(),\n",
+ "\t\t\t\t\t'center': boxes[i][0:2].tolist(),\n",
+ "\t\t\t\t\t'scale': boxes[i][2:4].tolist(),\n",
+ "\t\t\t\t\t'area': float(boxes[i][4]),\n",
+ "\t\t\t\t\t'score': float(boxes[i][5]),\n",
+ "\t\t\t\t\t'bbox_id': bbox_ids[i]\n",
+ "\t\t\t\t})\n",
+ "\t\tkpts = self._sort_and_unique_bboxes(kpts)\n",
+ "\n",
+ "\t\tself._write_keypoint_results(kpts, res_file)\n",
+ "\t\tinfo_str = self._report_metric(res_file, metrics)\n",
+ "\t\tname_value = OrderedDict(info_str)\n",
+ "\n",
+ "\t\tif tmp_folder is not None:\n",
+ "\t\t\ttmp_folder.cleanup()\n",
+ "\n",
+ "\t\treturn name_value\n",
+ "\n",
+ "\tdef _report_metric(self, res_file, metrics, pck_thr=0.3):\n",
+ "\t\t\"\"\"Keypoint evaluation.\n",
+ "\n",
+ "\t\tArgs:\n",
+ "\t\tres_file (str): Json file stored prediction results.\n",
+ "\t\tmetrics (str | list[str]): Metric to be performed.\n",
+ "\t\t\tOptions: 'PCK', 'NME'.\n",
+ "\t\tpck_thr (float): PCK threshold, default: 0.3.\n",
+ "\n",
+ "\t\tReturns:\n",
+ "\t\tdict: Evaluation results for evaluation metric.\n",
+ "\t\t\"\"\"\n",
+ "\t\tinfo_str = []\n",
+ "\n",
+ "\t\twith open(res_file, 'r') as fin:\n",
+ "\t\t\tpreds = json.load(fin)\n",
+ "\t\tassert len(preds) == len(self.db)\n",
+ "\n",
+ "\t\toutputs = []\n",
+ "\t\tgts = []\n",
+ "\t\tmasks = []\n",
+ "\n",
+ "\t\tfor pred, item in zip(preds, self.db):\n",
+ "\t\t\toutputs.append(np.array(pred['keypoints'])[:, :-1])\n",
+ "\t\t\tgts.append(np.array(item['joints_3d'])[:, :-1])\n",
+ "\t\t\tmasks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)\n",
+ "\n",
+ "\t\toutputs = np.array(outputs)\n",
+ "\t\tgts = np.array(gts)\n",
+ "\t\tmasks = np.array(masks)\n",
+ "\n",
+ "\t\tnormalize_factor = self._get_normalize_factor(gts)\n",
+ "\n",
+ "\t\tif 'PCK' in metrics:\n",
+ "\t\t\t_, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,\n",
+ "\t\t\t\t\t\t\t\t\t\t\t normalize_factor)\n",
+ "\t\t\tinfo_str.append(('PCK', pck))\n",
+ "\n",
+ "\t\tif 'NME' in metrics:\n",
+ "\t\t\tinfo_str.append(\n",
+ "\t\t\t\t('NME', keypoint_nme(outputs, gts, masks, normalize_factor)))\n",
+ "\n",
+ "\t\treturn info_str\n",
+ "\n",
+ "\t@staticmethod\n",
+ "\tdef _write_keypoint_results(keypoints, res_file):\n",
+ "\t\t\"\"\"Write results into a json file.\"\"\"\n",
+ "\n",
+ "\t\twith open(res_file, 'w') as f:\n",
+ "\t\t\tjson.dump(keypoints, f, sort_keys=True, indent=4)\n",
+ "\n",
+ "\t@staticmethod\n",
+ "\tdef _sort_and_unique_bboxes(kpts, key='bbox_id'):\n",
+ "\t\t\"\"\"sort kpts and remove the repeated ones.\"\"\"\n",
+ "\t\tkpts = sorted(kpts, key=lambda x: x[key])\n",
+ "\t\tnum = len(kpts)\n",
+ "\t\tfor i in range(num - 1, 0, -1):\n",
+ "\t\t\tif kpts[i][key] == kpts[i - 1][key]:\n",
+ "\t\t\t\tdel kpts[i]\n",
+ "\n",
+ "\t\treturn kpts\n",
+ "\t\n",
+ "\t@staticmethod\n",
+ "\tdef _get_normalize_factor(gts):\n",
+ "\t\t\"\"\"Get inter-ocular distance as the normalize factor, measured as the\n",
+ "\t\tEuclidean distance between the outer corners of the eyes.\n",
+ "\n",
+ "\t\tArgs:\n",
+ "\t\t\tgts (np.ndarray[N, K, 2]): Groundtruth keypoint location.\n",
+ "\n",
+ "\t\tReturn:\n",
+ "\t\t\tnp.ndarray[N, 2]: normalized factor\n",
+ "\t\t\"\"\"\n",
+ "\n",
+ "\t\tinterocular = np.linalg.norm(\n",
+ "\t\t\tgts[:, 0, :] - gts[:, 1, :], axis=1, keepdims=True)\n",
+ "\t\treturn np.tile(interocular, [1, 2])\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gh05C4mBl_u-"
+ },
+ "source": [
+ "### Create a config file\n",
+ "\n",
+ "In the next step, we create a config file which configures the model, dataset and runtime settings. More information can be found at [Learn about Configs](https://mmpose.readthedocs.io/en/latest/tutorials/0_config.html). A common practice to create a config file is deriving from a existing one. In this tutorial, we load a config file that trains a HRNet on COCO dataset, and modify it to adapt to the COCOTiny dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "n-z89qCJoWwL",
+ "outputId": "a3f6817e-b448-463d-d3df-2c5519efa99c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "dataset_info = dict(\n",
+ " dataset_name='coco',\n",
+ " paper_info=dict(\n",
+ " author=\n",
+ " 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\\'a}r, Piotr and Zitnick, C Lawrence',\n",
+ " title='Microsoft coco: Common objects in context',\n",
+ " container='European conference on computer vision',\n",
+ " year='2014',\n",
+ " homepage='http://cocodataset.org/'),\n",
+ " keypoint_info=dict({\n",
+ " 0:\n",
+ " dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),\n",
+ " 1:\n",
+ " dict(\n",
+ " name='left_eye',\n",
+ " id=1,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_eye'),\n",
+ " 2:\n",
+ " dict(\n",
+ " name='right_eye',\n",
+ " id=2,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_eye'),\n",
+ " 3:\n",
+ " dict(\n",
+ " name='left_ear',\n",
+ " id=3,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_ear'),\n",
+ " 4:\n",
+ " dict(\n",
+ " name='right_ear',\n",
+ " id=4,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_ear'),\n",
+ " 5:\n",
+ " dict(\n",
+ " name='left_shoulder',\n",
+ " id=5,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_shoulder'),\n",
+ " 6:\n",
+ " dict(\n",
+ " name='right_shoulder',\n",
+ " id=6,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_shoulder'),\n",
+ " 7:\n",
+ " dict(\n",
+ " name='left_elbow',\n",
+ " id=7,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_elbow'),\n",
+ " 8:\n",
+ " dict(\n",
+ " name='right_elbow',\n",
+ " id=8,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_elbow'),\n",
+ " 9:\n",
+ " dict(\n",
+ " name='left_wrist',\n",
+ " id=9,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_wrist'),\n",
+ " 10:\n",
+ " dict(\n",
+ " name='right_wrist',\n",
+ " id=10,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_wrist'),\n",
+ " 11:\n",
+ " dict(\n",
+ " name='left_hip',\n",
+ " id=11,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_hip'),\n",
+ " 12:\n",
+ " dict(\n",
+ " name='right_hip',\n",
+ " id=12,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_hip'),\n",
+ " 13:\n",
+ " dict(\n",
+ " name='left_knee',\n",
+ " id=13,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_knee'),\n",
+ " 14:\n",
+ " dict(\n",
+ " name='right_knee',\n",
+ " id=14,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_knee'),\n",
+ " 15:\n",
+ " dict(\n",
+ " name='left_ankle',\n",
+ " id=15,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_ankle'),\n",
+ " 16:\n",
+ " dict(\n",
+ " name='right_ankle',\n",
+ " id=16,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_ankle')\n",
+ " }),\n",
+ " skeleton_info=dict({\n",
+ " 0:\n",
+ " dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),\n",
+ " 1:\n",
+ " dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),\n",
+ " 2:\n",
+ " dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),\n",
+ " 3:\n",
+ " dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),\n",
+ " 4:\n",
+ " dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),\n",
+ " 5:\n",
+ " dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),\n",
+ " 6:\n",
+ " dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),\n",
+ " 7:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'right_shoulder'),\n",
+ " id=7,\n",
+ " color=[51, 153, 255]),\n",
+ " 8:\n",
+ " dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),\n",
+ " 9:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),\n",
+ " 10:\n",
+ " dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),\n",
+ " 11:\n",
+ " dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),\n",
+ " 12:\n",
+ " dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),\n",
+ " 13:\n",
+ " dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),\n",
+ " 14:\n",
+ " dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),\n",
+ " 15:\n",
+ " dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),\n",
+ " 16:\n",
+ " dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),\n",
+ " 17:\n",
+ " dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),\n",
+ " 18:\n",
+ " dict(\n",
+ " link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])\n",
+ " }),\n",
+ " joint_weights=[\n",
+ " 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2,\n",
+ " 1.2, 1.5, 1.5\n",
+ " ],\n",
+ " sigmas=[\n",
+ " 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,\n",
+ " 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089\n",
+ " ])\n",
+ "log_level = 'INFO'\n",
+ "load_from = None\n",
+ "resume_from = None\n",
+ "dist_params = dict(backend='nccl')\n",
+ "workflow = [('train', 1)]\n",
+ "checkpoint_config = dict(interval=10)\n",
+ "evaluation = dict(interval=10, metric='PCK', save_best='PCK')\n",
+ "optimizer = dict(type='Adam', lr=0.0005)\n",
+ "optimizer_config = dict(grad_clip=None)\n",
+ "lr_config = dict(\n",
+ " policy='step',\n",
+ " warmup='linear',\n",
+ " warmup_iters=500,\n",
+ " warmup_ratio=0.001,\n",
+ " step=[170, 200])\n",
+ "total_epochs = 40\n",
+ "log_config = dict(interval=1, hooks=[dict(type='TextLoggerHook')])\n",
+ "channel_cfg = dict(\n",
+ " num_output_channels=17,\n",
+ " dataset_joints=17,\n",
+ " dataset_channel=[[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ]],\n",
+ " inference_channel=[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ])\n",
+ "model = dict(\n",
+ " type='TopDown',\n",
+ " pretrained=\n",
+ " 'https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w32-36af842e.pth',\n",
+ " backbone=dict(\n",
+ " type='HRNet',\n",
+ " in_channels=3,\n",
+ " extra=dict(\n",
+ " stage1=dict(\n",
+ " num_modules=1,\n",
+ " num_branches=1,\n",
+ " block='BOTTLENECK',\n",
+ " num_blocks=(4, ),\n",
+ " num_channels=(64, )),\n",
+ " stage2=dict(\n",
+ " num_modules=1,\n",
+ " num_branches=2,\n",
+ " block='BASIC',\n",
+ " num_blocks=(4, 4),\n",
+ " num_channels=(32, 64)),\n",
+ " stage3=dict(\n",
+ " num_modules=4,\n",
+ " num_branches=3,\n",
+ " block='BASIC',\n",
+ " num_blocks=(4, 4, 4),\n",
+ " num_channels=(32, 64, 128)),\n",
+ " stage4=dict(\n",
+ " num_modules=3,\n",
+ " num_branches=4,\n",
+ " block='BASIC',\n",
+ " num_blocks=(4, 4, 4, 4),\n",
+ " num_channels=(32, 64, 128, 256)))),\n",
+ " keypoint_head=dict(\n",
+ " type='TopdownHeatmapSimpleHead',\n",
+ " in_channels=32,\n",
+ " out_channels=17,\n",
+ " num_deconv_layers=0,\n",
+ " extra=dict(final_conv_kernel=1),\n",
+ " loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),\n",
+ " train_cfg=dict(),\n",
+ " test_cfg=dict(\n",
+ " flip_test=True,\n",
+ " post_process='default',\n",
+ " shift_heatmap=True,\n",
+ " modulate_kernel=11))\n",
+ "data_cfg = dict(\n",
+ " image_size=[192, 256],\n",
+ " heatmap_size=[48, 64],\n",
+ " num_output_channels=17,\n",
+ " num_joints=17,\n",
+ " dataset_channel=[[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ]],\n",
+ " inference_channel=[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ],\n",
+ " soft_nms=False,\n",
+ " nms_thr=1.0,\n",
+ " oks_thr=0.9,\n",
+ " vis_thr=0.2,\n",
+ " use_gt_bbox=False,\n",
+ " det_bbox_thr=0.0,\n",
+ " bbox_file=\n",
+ " 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'\n",
+ ")\n",
+ "train_pipeline = [\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownRandomFlip', flip_prob=0.5),\n",
+ " dict(\n",
+ " type='TopDownHalfBodyTransform',\n",
+ " num_joints_half_body=8,\n",
+ " prob_half_body=0.3),\n",
+ " dict(\n",
+ " type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(type='TopDownGenerateTarget', sigma=2),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img', 'target', 'target_weight'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',\n",
+ " 'rotation', 'bbox_score', 'flip_pairs'\n",
+ " ])\n",
+ "]\n",
+ "val_pipeline = [\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'center', 'scale', 'rotation', 'bbox_score',\n",
+ " 'flip_pairs'\n",
+ " ])\n",
+ "]\n",
+ "test_pipeline = [\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'center', 'scale', 'rotation', 'bbox_score',\n",
+ " 'flip_pairs'\n",
+ " ])\n",
+ "]\n",
+ "data_root = 'data/coco_tiny'\n",
+ "data = dict(\n",
+ " samples_per_gpu=16,\n",
+ " workers_per_gpu=2,\n",
+ " val_dataloader=dict(samples_per_gpu=16),\n",
+ " test_dataloader=dict(samples_per_gpu=16),\n",
+ " train=dict(\n",
+ " type='TopDownCOCOTinyDataset',\n",
+ " ann_file='data/coco_tiny/train.json',\n",
+ " img_prefix='data/coco_tiny/images/',\n",
+ " data_cfg=dict(\n",
+ " image_size=[192, 256],\n",
+ " heatmap_size=[48, 64],\n",
+ " num_output_channels=17,\n",
+ " num_joints=17,\n",
+ " dataset_channel=[[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ]],\n",
+ " inference_channel=[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ],\n",
+ " soft_nms=False,\n",
+ " nms_thr=1.0,\n",
+ " oks_thr=0.9,\n",
+ " vis_thr=0.2,\n",
+ " use_gt_bbox=False,\n",
+ " det_bbox_thr=0.0,\n",
+ " bbox_file=\n",
+ " 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'\n",
+ " ),\n",
+ " pipeline=[\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownRandomFlip', flip_prob=0.5),\n",
+ " dict(\n",
+ " type='TopDownHalfBodyTransform',\n",
+ " num_joints_half_body=8,\n",
+ " prob_half_body=0.3),\n",
+ " dict(\n",
+ " type='TopDownGetRandomScaleRotation',\n",
+ " rot_factor=40,\n",
+ " scale_factor=0.5),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(type='TopDownGenerateTarget', sigma=2),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img', 'target', 'target_weight'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'joints_3d', 'joints_3d_visible', 'center',\n",
+ " 'scale', 'rotation', 'bbox_score', 'flip_pairs'\n",
+ " ])\n",
+ " ],\n",
+ " dataset_info=dict(\n",
+ " dataset_name='coco',\n",
+ " paper_info=dict(\n",
+ " author=\n",
+ " 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\\'a}r, Piotr and Zitnick, C Lawrence',\n",
+ " title='Microsoft coco: Common objects in context',\n",
+ " container='European conference on computer vision',\n",
+ " year='2014',\n",
+ " homepage='http://cocodataset.org/'),\n",
+ " keypoint_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " name='nose',\n",
+ " id=0,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap=''),\n",
+ " 1:\n",
+ " dict(\n",
+ " name='left_eye',\n",
+ " id=1,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_eye'),\n",
+ " 2:\n",
+ " dict(\n",
+ " name='right_eye',\n",
+ " id=2,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_eye'),\n",
+ " 3:\n",
+ " dict(\n",
+ " name='left_ear',\n",
+ " id=3,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_ear'),\n",
+ " 4:\n",
+ " dict(\n",
+ " name='right_ear',\n",
+ " id=4,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_ear'),\n",
+ " 5:\n",
+ " dict(\n",
+ " name='left_shoulder',\n",
+ " id=5,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_shoulder'),\n",
+ " 6:\n",
+ " dict(\n",
+ " name='right_shoulder',\n",
+ " id=6,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_shoulder'),\n",
+ " 7:\n",
+ " dict(\n",
+ " name='left_elbow',\n",
+ " id=7,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_elbow'),\n",
+ " 8:\n",
+ " dict(\n",
+ " name='right_elbow',\n",
+ " id=8,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_elbow'),\n",
+ " 9:\n",
+ " dict(\n",
+ " name='left_wrist',\n",
+ " id=9,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_wrist'),\n",
+ " 10:\n",
+ " dict(\n",
+ " name='right_wrist',\n",
+ " id=10,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_wrist'),\n",
+ " 11:\n",
+ " dict(\n",
+ " name='left_hip',\n",
+ " id=11,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_hip'),\n",
+ " 12:\n",
+ " dict(\n",
+ " name='right_hip',\n",
+ " id=12,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_hip'),\n",
+ " 13:\n",
+ " dict(\n",
+ " name='left_knee',\n",
+ " id=13,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_knee'),\n",
+ " 14:\n",
+ " dict(\n",
+ " name='right_knee',\n",
+ " id=14,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_knee'),\n",
+ " 15:\n",
+ " dict(\n",
+ " name='left_ankle',\n",
+ " id=15,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_ankle'),\n",
+ " 16:\n",
+ " dict(\n",
+ " name='right_ankle',\n",
+ " id=16,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_ankle')\n",
+ " }),\n",
+ " skeleton_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),\n",
+ " 1:\n",
+ " dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),\n",
+ " 2:\n",
+ " dict(\n",
+ " link=('right_ankle', 'right_knee'),\n",
+ " id=2,\n",
+ " color=[255, 128, 0]),\n",
+ " 3:\n",
+ " dict(\n",
+ " link=('right_knee', 'right_hip'),\n",
+ " id=3,\n",
+ " color=[255, 128, 0]),\n",
+ " 4:\n",
+ " dict(\n",
+ " link=('left_hip', 'right_hip'), id=4, color=[51, 153,\n",
+ " 255]),\n",
+ " 5:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_hip'),\n",
+ " id=5,\n",
+ " color=[51, 153, 255]),\n",
+ " 6:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_hip'),\n",
+ " id=6,\n",
+ " color=[51, 153, 255]),\n",
+ " 7:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'right_shoulder'),\n",
+ " id=7,\n",
+ " color=[51, 153, 255]),\n",
+ " 8:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_elbow'),\n",
+ " id=8,\n",
+ " color=[0, 255, 0]),\n",
+ " 9:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_elbow'),\n",
+ " id=9,\n",
+ " color=[255, 128, 0]),\n",
+ " 10:\n",
+ " dict(\n",
+ " link=('left_elbow', 'left_wrist'),\n",
+ " id=10,\n",
+ " color=[0, 255, 0]),\n",
+ " 11:\n",
+ " dict(\n",
+ " link=('right_elbow', 'right_wrist'),\n",
+ " id=11,\n",
+ " color=[255, 128, 0]),\n",
+ " 12:\n",
+ " dict(\n",
+ " link=('left_eye', 'right_eye'),\n",
+ " id=12,\n",
+ " color=[51, 153, 255]),\n",
+ " 13:\n",
+ " dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),\n",
+ " 14:\n",
+ " dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),\n",
+ " 15:\n",
+ " dict(\n",
+ " link=('left_eye', 'left_ear'), id=15, color=[51, 153,\n",
+ " 255]),\n",
+ " 16:\n",
+ " dict(\n",
+ " link=('right_eye', 'right_ear'),\n",
+ " id=16,\n",
+ " color=[51, 153, 255]),\n",
+ " 17:\n",
+ " dict(\n",
+ " link=('left_ear', 'left_shoulder'),\n",
+ " id=17,\n",
+ " color=[51, 153, 255]),\n",
+ " 18:\n",
+ " dict(\n",
+ " link=('right_ear', 'right_shoulder'),\n",
+ " id=18,\n",
+ " color=[51, 153, 255])\n",
+ " }),\n",
+ " joint_weights=[\n",
+ " 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,\n",
+ " 1.0, 1.2, 1.2, 1.5, 1.5\n",
+ " ],\n",
+ " sigmas=[\n",
+ " 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,\n",
+ " 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089\n",
+ " ])),\n",
+ " val=dict(\n",
+ " type='TopDownCOCOTinyDataset',\n",
+ " ann_file='data/coco_tiny/val.json',\n",
+ " img_prefix='data/coco_tiny/images/',\n",
+ " data_cfg=dict(\n",
+ " image_size=[192, 256],\n",
+ " heatmap_size=[48, 64],\n",
+ " num_output_channels=17,\n",
+ " num_joints=17,\n",
+ " dataset_channel=[[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ]],\n",
+ " inference_channel=[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ],\n",
+ " soft_nms=False,\n",
+ " nms_thr=1.0,\n",
+ " oks_thr=0.9,\n",
+ " vis_thr=0.2,\n",
+ " use_gt_bbox=False,\n",
+ " det_bbox_thr=0.0,\n",
+ " bbox_file=\n",
+ " 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'\n",
+ " ),\n",
+ " pipeline=[\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'center', 'scale', 'rotation', 'bbox_score',\n",
+ " 'flip_pairs'\n",
+ " ])\n",
+ " ],\n",
+ " dataset_info=dict(\n",
+ " dataset_name='coco',\n",
+ " paper_info=dict(\n",
+ " author=\n",
+ " 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\\'a}r, Piotr and Zitnick, C Lawrence',\n",
+ " title='Microsoft coco: Common objects in context',\n",
+ " container='European conference on computer vision',\n",
+ " year='2014',\n",
+ " homepage='http://cocodataset.org/'),\n",
+ " keypoint_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " name='nose',\n",
+ " id=0,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap=''),\n",
+ " 1:\n",
+ " dict(\n",
+ " name='left_eye',\n",
+ " id=1,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_eye'),\n",
+ " 2:\n",
+ " dict(\n",
+ " name='right_eye',\n",
+ " id=2,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_eye'),\n",
+ " 3:\n",
+ " dict(\n",
+ " name='left_ear',\n",
+ " id=3,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_ear'),\n",
+ " 4:\n",
+ " dict(\n",
+ " name='right_ear',\n",
+ " id=4,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_ear'),\n",
+ " 5:\n",
+ " dict(\n",
+ " name='left_shoulder',\n",
+ " id=5,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_shoulder'),\n",
+ " 6:\n",
+ " dict(\n",
+ " name='right_shoulder',\n",
+ " id=6,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_shoulder'),\n",
+ " 7:\n",
+ " dict(\n",
+ " name='left_elbow',\n",
+ " id=7,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_elbow'),\n",
+ " 8:\n",
+ " dict(\n",
+ " name='right_elbow',\n",
+ " id=8,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_elbow'),\n",
+ " 9:\n",
+ " dict(\n",
+ " name='left_wrist',\n",
+ " id=9,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_wrist'),\n",
+ " 10:\n",
+ " dict(\n",
+ " name='right_wrist',\n",
+ " id=10,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_wrist'),\n",
+ " 11:\n",
+ " dict(\n",
+ " name='left_hip',\n",
+ " id=11,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_hip'),\n",
+ " 12:\n",
+ " dict(\n",
+ " name='right_hip',\n",
+ " id=12,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_hip'),\n",
+ " 13:\n",
+ " dict(\n",
+ " name='left_knee',\n",
+ " id=13,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_knee'),\n",
+ " 14:\n",
+ " dict(\n",
+ " name='right_knee',\n",
+ " id=14,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_knee'),\n",
+ " 15:\n",
+ " dict(\n",
+ " name='left_ankle',\n",
+ " id=15,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_ankle'),\n",
+ " 16:\n",
+ " dict(\n",
+ " name='right_ankle',\n",
+ " id=16,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_ankle')\n",
+ " }),\n",
+ " skeleton_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),\n",
+ " 1:\n",
+ " dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),\n",
+ " 2:\n",
+ " dict(\n",
+ " link=('right_ankle', 'right_knee'),\n",
+ " id=2,\n",
+ " color=[255, 128, 0]),\n",
+ " 3:\n",
+ " dict(\n",
+ " link=('right_knee', 'right_hip'),\n",
+ " id=3,\n",
+ " color=[255, 128, 0]),\n",
+ " 4:\n",
+ " dict(\n",
+ " link=('left_hip', 'right_hip'), id=4, color=[51, 153,\n",
+ " 255]),\n",
+ " 5:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_hip'),\n",
+ " id=5,\n",
+ " color=[51, 153, 255]),\n",
+ " 6:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_hip'),\n",
+ " id=6,\n",
+ " color=[51, 153, 255]),\n",
+ " 7:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'right_shoulder'),\n",
+ " id=7,\n",
+ " color=[51, 153, 255]),\n",
+ " 8:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_elbow'),\n",
+ " id=8,\n",
+ " color=[0, 255, 0]),\n",
+ " 9:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_elbow'),\n",
+ " id=9,\n",
+ " color=[255, 128, 0]),\n",
+ " 10:\n",
+ " dict(\n",
+ " link=('left_elbow', 'left_wrist'),\n",
+ " id=10,\n",
+ " color=[0, 255, 0]),\n",
+ " 11:\n",
+ " dict(\n",
+ " link=('right_elbow', 'right_wrist'),\n",
+ " id=11,\n",
+ " color=[255, 128, 0]),\n",
+ " 12:\n",
+ " dict(\n",
+ " link=('left_eye', 'right_eye'),\n",
+ " id=12,\n",
+ " color=[51, 153, 255]),\n",
+ " 13:\n",
+ " dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),\n",
+ " 14:\n",
+ " dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),\n",
+ " 15:\n",
+ " dict(\n",
+ " link=('left_eye', 'left_ear'), id=15, color=[51, 153,\n",
+ " 255]),\n",
+ " 16:\n",
+ " dict(\n",
+ " link=('right_eye', 'right_ear'),\n",
+ " id=16,\n",
+ " color=[51, 153, 255]),\n",
+ " 17:\n",
+ " dict(\n",
+ " link=('left_ear', 'left_shoulder'),\n",
+ " id=17,\n",
+ " color=[51, 153, 255]),\n",
+ " 18:\n",
+ " dict(\n",
+ " link=('right_ear', 'right_shoulder'),\n",
+ " id=18,\n",
+ " color=[51, 153, 255])\n",
+ " }),\n",
+ " joint_weights=[\n",
+ " 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,\n",
+ " 1.0, 1.2, 1.2, 1.5, 1.5\n",
+ " ],\n",
+ " sigmas=[\n",
+ " 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,\n",
+ " 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089\n",
+ " ])),\n",
+ " test=dict(\n",
+ " type='TopDownCOCOTinyDataset',\n",
+ " ann_file='data/coco_tiny/val.json',\n",
+ " img_prefix='data/coco_tiny/images/',\n",
+ " data_cfg=dict(\n",
+ " image_size=[192, 256],\n",
+ " heatmap_size=[48, 64],\n",
+ " num_output_channels=17,\n",
+ " num_joints=17,\n",
+ " dataset_channel=[[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ]],\n",
+ " inference_channel=[\n",
+ " 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16\n",
+ " ],\n",
+ " soft_nms=False,\n",
+ " nms_thr=1.0,\n",
+ " oks_thr=0.9,\n",
+ " vis_thr=0.2,\n",
+ " use_gt_bbox=False,\n",
+ " det_bbox_thr=0.0,\n",
+ " bbox_file=\n",
+ " 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'\n",
+ " ),\n",
+ " pipeline=[\n",
+ " dict(type='LoadImageFromFile'),\n",
+ " dict(type='TopDownAffine'),\n",
+ " dict(type='ToTensor'),\n",
+ " dict(\n",
+ " type='NormalizeTensor',\n",
+ " mean=[0.485, 0.456, 0.406],\n",
+ " std=[0.229, 0.224, 0.225]),\n",
+ " dict(\n",
+ " type='Collect',\n",
+ " keys=['img'],\n",
+ " meta_keys=[\n",
+ " 'image_file', 'center', 'scale', 'rotation', 'bbox_score',\n",
+ " 'flip_pairs'\n",
+ " ])\n",
+ " ],\n",
+ " dataset_info=dict(\n",
+ " dataset_name='coco',\n",
+ " paper_info=dict(\n",
+ " author=\n",
+ " 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\\'a}r, Piotr and Zitnick, C Lawrence',\n",
+ " title='Microsoft coco: Common objects in context',\n",
+ " container='European conference on computer vision',\n",
+ " year='2014',\n",
+ " homepage='http://cocodataset.org/'),\n",
+ " keypoint_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " name='nose',\n",
+ " id=0,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap=''),\n",
+ " 1:\n",
+ " dict(\n",
+ " name='left_eye',\n",
+ " id=1,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_eye'),\n",
+ " 2:\n",
+ " dict(\n",
+ " name='right_eye',\n",
+ " id=2,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_eye'),\n",
+ " 3:\n",
+ " dict(\n",
+ " name='left_ear',\n",
+ " id=3,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='right_ear'),\n",
+ " 4:\n",
+ " dict(\n",
+ " name='right_ear',\n",
+ " id=4,\n",
+ " color=[51, 153, 255],\n",
+ " type='upper',\n",
+ " swap='left_ear'),\n",
+ " 5:\n",
+ " dict(\n",
+ " name='left_shoulder',\n",
+ " id=5,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_shoulder'),\n",
+ " 6:\n",
+ " dict(\n",
+ " name='right_shoulder',\n",
+ " id=6,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_shoulder'),\n",
+ " 7:\n",
+ " dict(\n",
+ " name='left_elbow',\n",
+ " id=7,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_elbow'),\n",
+ " 8:\n",
+ " dict(\n",
+ " name='right_elbow',\n",
+ " id=8,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_elbow'),\n",
+ " 9:\n",
+ " dict(\n",
+ " name='left_wrist',\n",
+ " id=9,\n",
+ " color=[0, 255, 0],\n",
+ " type='upper',\n",
+ " swap='right_wrist'),\n",
+ " 10:\n",
+ " dict(\n",
+ " name='right_wrist',\n",
+ " id=10,\n",
+ " color=[255, 128, 0],\n",
+ " type='upper',\n",
+ " swap='left_wrist'),\n",
+ " 11:\n",
+ " dict(\n",
+ " name='left_hip',\n",
+ " id=11,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_hip'),\n",
+ " 12:\n",
+ " dict(\n",
+ " name='right_hip',\n",
+ " id=12,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_hip'),\n",
+ " 13:\n",
+ " dict(\n",
+ " name='left_knee',\n",
+ " id=13,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_knee'),\n",
+ " 14:\n",
+ " dict(\n",
+ " name='right_knee',\n",
+ " id=14,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_knee'),\n",
+ " 15:\n",
+ " dict(\n",
+ " name='left_ankle',\n",
+ " id=15,\n",
+ " color=[0, 255, 0],\n",
+ " type='lower',\n",
+ " swap='right_ankle'),\n",
+ " 16:\n",
+ " dict(\n",
+ " name='right_ankle',\n",
+ " id=16,\n",
+ " color=[255, 128, 0],\n",
+ " type='lower',\n",
+ " swap='left_ankle')\n",
+ " }),\n",
+ " skeleton_info=dict({\n",
+ " 0:\n",
+ " dict(\n",
+ " link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),\n",
+ " 1:\n",
+ " dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),\n",
+ " 2:\n",
+ " dict(\n",
+ " link=('right_ankle', 'right_knee'),\n",
+ " id=2,\n",
+ " color=[255, 128, 0]),\n",
+ " 3:\n",
+ " dict(\n",
+ " link=('right_knee', 'right_hip'),\n",
+ " id=3,\n",
+ " color=[255, 128, 0]),\n",
+ " 4:\n",
+ " dict(\n",
+ " link=('left_hip', 'right_hip'), id=4, color=[51, 153,\n",
+ " 255]),\n",
+ " 5:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_hip'),\n",
+ " id=5,\n",
+ " color=[51, 153, 255]),\n",
+ " 6:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_hip'),\n",
+ " id=6,\n",
+ " color=[51, 153, 255]),\n",
+ " 7:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'right_shoulder'),\n",
+ " id=7,\n",
+ " color=[51, 153, 255]),\n",
+ " 8:\n",
+ " dict(\n",
+ " link=('left_shoulder', 'left_elbow'),\n",
+ " id=8,\n",
+ " color=[0, 255, 0]),\n",
+ " 9:\n",
+ " dict(\n",
+ " link=('right_shoulder', 'right_elbow'),\n",
+ " id=9,\n",
+ " color=[255, 128, 0]),\n",
+ " 10:\n",
+ " dict(\n",
+ " link=('left_elbow', 'left_wrist'),\n",
+ " id=10,\n",
+ " color=[0, 255, 0]),\n",
+ " 11:\n",
+ " dict(\n",
+ " link=('right_elbow', 'right_wrist'),\n",
+ " id=11,\n",
+ " color=[255, 128, 0]),\n",
+ " 12:\n",
+ " dict(\n",
+ " link=('left_eye', 'right_eye'),\n",
+ " id=12,\n",
+ " color=[51, 153, 255]),\n",
+ " 13:\n",
+ " dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),\n",
+ " 14:\n",
+ " dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),\n",
+ " 15:\n",
+ " dict(\n",
+ " link=('left_eye', 'left_ear'), id=15, color=[51, 153,\n",
+ " 255]),\n",
+ " 16:\n",
+ " dict(\n",
+ " link=('right_eye', 'right_ear'),\n",
+ " id=16,\n",
+ " color=[51, 153, 255]),\n",
+ " 17:\n",
+ " dict(\n",
+ " link=('left_ear', 'left_shoulder'),\n",
+ " id=17,\n",
+ " color=[51, 153, 255]),\n",
+ " 18:\n",
+ " dict(\n",
+ " link=('right_ear', 'right_shoulder'),\n",
+ " id=18,\n",
+ " color=[51, 153, 255])\n",
+ " }),\n",
+ " joint_weights=[\n",
+ " 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,\n",
+ " 1.0, 1.2, 1.2, 1.5, 1.5\n",
+ " ],\n",
+ " sigmas=[\n",
+ " 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,\n",
+ " 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089\n",
+ " ])))\n",
+ "work_dir = 'work_dirs/hrnet_w32_coco_tiny_256x192'\n",
+ "gpu_ids = range(0, 1)\n",
+ "seed = 0\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from mmcv import Config\n",
+ "cfg = Config.fromfile(\n",
+ " './configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py'\n",
+ ")\n",
+ "\n",
+ "# set basic configs\n",
+ "cfg.data_root = 'data/coco_tiny'\n",
+ "cfg.work_dir = 'work_dirs/hrnet_w32_coco_tiny_256x192'\n",
+ "cfg.gpu_ids = range(1)\n",
+ "cfg.seed = 0\n",
+ "\n",
+ "# set log interval\n",
+ "cfg.log_config.interval = 1\n",
+ "\n",
+ "# set evaluation configs\n",
+ "cfg.evaluation.interval = 10\n",
+ "cfg.evaluation.metric = 'PCK'\n",
+ "cfg.evaluation.save_best = 'PCK'\n",
+ "\n",
+ "# set learning rate policy\n",
+ "lr_config = dict(\n",
+ " policy='step',\n",
+ " warmup='linear',\n",
+ " warmup_iters=10,\n",
+ " warmup_ratio=0.001,\n",
+ " step=[17, 35])\n",
+ "cfg.total_epochs = 40\n",
+ "\n",
+ "# set batch size\n",
+ "cfg.data.samples_per_gpu = 16\n",
+ "cfg.data.val_dataloader = dict(samples_per_gpu=16)\n",
+ "cfg.data.test_dataloader = dict(samples_per_gpu=16)\n",
+ "\n",
+ "\n",
+ "# set dataset configs\n",
+ "cfg.data.train.type = 'TopDownCOCOTinyDataset'\n",
+ "cfg.data.train.ann_file = f'{cfg.data_root}/train.json'\n",
+ "cfg.data.train.img_prefix = f'{cfg.data_root}/images/'\n",
+ "\n",
+ "cfg.data.val.type = 'TopDownCOCOTinyDataset'\n",
+ "cfg.data.val.ann_file = f'{cfg.data_root}/val.json'\n",
+ "cfg.data.val.img_prefix = f'{cfg.data_root}/images/'\n",
+ "\n",
+ "cfg.data.test.type = 'TopDownCOCOTinyDataset'\n",
+ "cfg.data.test.ann_file = f'{cfg.data_root}/val.json'\n",
+ "cfg.data.test.img_prefix = f'{cfg.data_root}/images/'\n",
+ "\n",
+ "print(cfg.pretty_text)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WQVa6wBDxVSW"
+ },
+ "source": [
+ "### Train and Evaluation\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000,
+ "referenced_widgets": [
+ "c50b2c7b3d58486d9941509548a877e4",
+ "ae33a61272f84a7981bc1f3008458688",
+ "a0bf65a0401e465393ef8720ef3328ac",
+ "a724d84941224553b1fab6c0b489213d",
+ "210e7151c2ad44a3ba79d477f91d8b26",
+ "a3dc245089464b159bbdd5fc71afa1bc",
+ "864769e1e83c4b5d89baaa373c181f07",
+ "9035c6e9fddd41d8b7dae395c93410a2",
+ "1d31e1f7256d42669d76f54a8a844b79",
+ "43ef0a1859c342dab6f6cd620ae78ba7",
+ "90e3675160374766b5387ddb078fa3c5"
+ ]
+ },
+ "id": "XJ5uVkwcxiyx",
+ "outputId": "0693f2e3-f41d-46a8-d3ed-1add83735f91"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Use load_from_http loader\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading: \"https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w32-36af842e.pth\" to /home/PJLAB/liyining/.cache/torch/hub/checkpoints/hrnet_w32-36af842e.pth\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c50b2c7b3d58486d9941509548a877e4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0.00/126M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2021-09-22 22:37:43,193 - mmpose - WARNING - The model and loaded state dict do not match exactly\n",
+ "\n",
+ "unexpected key in source state_dict: head.0.0.0.conv1.weight, head.0.0.0.bn1.weight, head.0.0.0.bn1.bias, head.0.0.0.bn1.running_mean, head.0.0.0.bn1.running_var, head.0.0.0.bn1.num_batches_tracked, head.0.0.0.conv2.weight, head.0.0.0.bn2.weight, head.0.0.0.bn2.bias, head.0.0.0.bn2.running_mean, head.0.0.0.bn2.running_var, head.0.0.0.bn2.num_batches_tracked, head.0.0.0.conv3.weight, head.0.0.0.bn3.weight, head.0.0.0.bn3.bias, head.0.0.0.bn3.running_mean, head.0.0.0.bn3.running_var, head.0.0.0.bn3.num_batches_tracked, head.0.0.0.downsample.0.weight, head.0.0.0.downsample.1.weight, head.0.0.0.downsample.1.bias, head.0.0.0.downsample.1.running_mean, head.0.0.0.downsample.1.running_var, head.0.0.0.downsample.1.num_batches_tracked, head.0.1.0.conv1.weight, head.0.1.0.bn1.weight, head.0.1.0.bn1.bias, head.0.1.0.bn1.running_mean, head.0.1.0.bn1.running_var, head.0.1.0.bn1.num_batches_tracked, head.0.1.0.conv2.weight, head.0.1.0.bn2.weight, head.0.1.0.bn2.bias, head.0.1.0.bn2.running_mean, head.0.1.0.bn2.running_var, head.0.1.0.bn2.num_batches_tracked, head.0.1.0.conv3.weight, head.0.1.0.bn3.weight, head.0.1.0.bn3.bias, head.0.1.0.bn3.running_mean, head.0.1.0.bn3.running_var, head.0.1.0.bn3.num_batches_tracked, head.0.1.0.downsample.0.weight, head.0.1.0.downsample.1.weight, head.0.1.0.downsample.1.bias, head.0.1.0.downsample.1.running_mean, head.0.1.0.downsample.1.running_var, head.0.1.0.downsample.1.num_batches_tracked, head.0.2.0.conv1.weight, head.0.2.0.bn1.weight, head.0.2.0.bn1.bias, head.0.2.0.bn1.running_mean, head.0.2.0.bn1.running_var, head.0.2.0.bn1.num_batches_tracked, head.0.2.0.conv2.weight, head.0.2.0.bn2.weight, head.0.2.0.bn2.bias, head.0.2.0.bn2.running_mean, head.0.2.0.bn2.running_var, head.0.2.0.bn2.num_batches_tracked, head.0.2.0.conv3.weight, head.0.2.0.bn3.weight, head.0.2.0.bn3.bias, head.0.2.0.bn3.running_mean, head.0.2.0.bn3.running_var, head.0.2.0.bn3.num_batches_tracked, head.0.2.0.downsample.0.weight, head.0.2.0.downsample.1.weight, head.0.2.0.downsample.1.bias, head.0.2.0.downsample.1.running_mean, head.0.2.0.downsample.1.running_var, head.0.2.0.downsample.1.num_batches_tracked, head.1.0.0.conv1.weight, head.1.0.0.bn1.weight, head.1.0.0.bn1.bias, head.1.0.0.bn1.running_mean, head.1.0.0.bn1.running_var, head.1.0.0.bn1.num_batches_tracked, head.1.0.0.conv2.weight, head.1.0.0.bn2.weight, head.1.0.0.bn2.bias, head.1.0.0.bn2.running_mean, head.1.0.0.bn2.running_var, head.1.0.0.bn2.num_batches_tracked, head.1.0.0.conv3.weight, head.1.0.0.bn3.weight, head.1.0.0.bn3.bias, head.1.0.0.bn3.running_mean, head.1.0.0.bn3.running_var, head.1.0.0.bn3.num_batches_tracked, head.1.0.0.downsample.0.weight, head.1.0.0.downsample.1.weight, head.1.0.0.downsample.1.bias, head.1.0.0.downsample.1.running_mean, head.1.0.0.downsample.1.running_var, head.1.0.0.downsample.1.num_batches_tracked, head.1.1.0.conv1.weight, head.1.1.0.bn1.weight, head.1.1.0.bn1.bias, head.1.1.0.bn1.running_mean, head.1.1.0.bn1.running_var, head.1.1.0.bn1.num_batches_tracked, head.1.1.0.conv2.weight, head.1.1.0.bn2.weight, head.1.1.0.bn2.bias, head.1.1.0.bn2.running_mean, head.1.1.0.bn2.running_var, head.1.1.0.bn2.num_batches_tracked, head.1.1.0.conv3.weight, head.1.1.0.bn3.weight, head.1.1.0.bn3.bias, head.1.1.0.bn3.running_mean, head.1.1.0.bn3.running_var, head.1.1.0.bn3.num_batches_tracked, head.1.1.0.downsample.0.weight, head.1.1.0.downsample.1.weight, head.1.1.0.downsample.1.bias, head.1.1.0.downsample.1.running_mean, head.1.1.0.downsample.1.running_var, head.1.1.0.downsample.1.num_batches_tracked, head.2.0.0.conv1.weight, head.2.0.0.bn1.weight, head.2.0.0.bn1.bias, head.2.0.0.bn1.running_mean, head.2.0.0.bn1.running_var, head.2.0.0.bn1.num_batches_tracked, head.2.0.0.conv2.weight, head.2.0.0.bn2.weight, head.2.0.0.bn2.bias, head.2.0.0.bn2.running_mean, head.2.0.0.bn2.running_var, head.2.0.0.bn2.num_batches_tracked, head.2.0.0.conv3.weight, head.2.0.0.bn3.weight, head.2.0.0.bn3.bias, head.2.0.0.bn3.running_mean, head.2.0.0.bn3.running_var, head.2.0.0.bn3.num_batches_tracked, head.2.0.0.downsample.0.weight, head.2.0.0.downsample.1.weight, head.2.0.0.downsample.1.bias, head.2.0.0.downsample.1.running_mean, head.2.0.0.downsample.1.running_var, head.2.0.0.downsample.1.num_batches_tracked, head.3.0.0.conv1.weight, head.3.0.0.bn1.weight, head.3.0.0.bn1.bias, head.3.0.0.bn1.running_mean, head.3.0.0.bn1.running_var, head.3.0.0.bn1.num_batches_tracked, head.3.0.0.conv2.weight, head.3.0.0.bn2.weight, head.3.0.0.bn2.bias, head.3.0.0.bn2.running_mean, head.3.0.0.bn2.running_var, head.3.0.0.bn2.num_batches_tracked, head.3.0.0.conv3.weight, head.3.0.0.bn3.weight, head.3.0.0.bn3.bias, head.3.0.0.bn3.running_mean, head.3.0.0.bn3.running_var, head.3.0.0.bn3.num_batches_tracked, head.3.0.0.downsample.0.weight, head.3.0.0.downsample.1.weight, head.3.0.0.downsample.1.bias, head.3.0.0.downsample.1.running_mean, head.3.0.0.downsample.1.running_var, head.3.0.0.downsample.1.num_batches_tracked, fc.weight, fc.bias, stage4.2.fuse_layers.1.0.0.0.weight, stage4.2.fuse_layers.1.0.0.1.weight, stage4.2.fuse_layers.1.0.0.1.bias, stage4.2.fuse_layers.1.0.0.1.running_mean, stage4.2.fuse_layers.1.0.0.1.running_var, stage4.2.fuse_layers.1.0.0.1.num_batches_tracked, stage4.2.fuse_layers.1.2.0.weight, stage4.2.fuse_layers.1.2.1.weight, stage4.2.fuse_layers.1.2.1.bias, stage4.2.fuse_layers.1.2.1.running_mean, stage4.2.fuse_layers.1.2.1.running_var, stage4.2.fuse_layers.1.2.1.num_batches_tracked, stage4.2.fuse_layers.1.3.0.weight, stage4.2.fuse_layers.1.3.1.weight, stage4.2.fuse_layers.1.3.1.bias, stage4.2.fuse_layers.1.3.1.running_mean, stage4.2.fuse_layers.1.3.1.running_var, stage4.2.fuse_layers.1.3.1.num_batches_tracked, stage4.2.fuse_layers.2.0.0.0.weight, stage4.2.fuse_layers.2.0.0.1.weight, stage4.2.fuse_layers.2.0.0.1.bias, stage4.2.fuse_layers.2.0.0.1.running_mean, stage4.2.fuse_layers.2.0.0.1.running_var, stage4.2.fuse_layers.2.0.0.1.num_batches_tracked, stage4.2.fuse_layers.2.0.1.0.weight, stage4.2.fuse_layers.2.0.1.1.weight, stage4.2.fuse_layers.2.0.1.1.bias, stage4.2.fuse_layers.2.0.1.1.running_mean, stage4.2.fuse_layers.2.0.1.1.running_var, stage4.2.fuse_layers.2.0.1.1.num_batches_tracked, stage4.2.fuse_layers.2.1.0.0.weight, stage4.2.fuse_layers.2.1.0.1.weight, stage4.2.fuse_layers.2.1.0.1.bias, stage4.2.fuse_layers.2.1.0.1.running_mean, stage4.2.fuse_layers.2.1.0.1.running_var, stage4.2.fuse_layers.2.1.0.1.num_batches_tracked, stage4.2.fuse_layers.2.3.0.weight, stage4.2.fuse_layers.2.3.1.weight, stage4.2.fuse_layers.2.3.1.bias, stage4.2.fuse_layers.2.3.1.running_mean, stage4.2.fuse_layers.2.3.1.running_var, stage4.2.fuse_layers.2.3.1.num_batches_tracked, stage4.2.fuse_layers.3.0.0.0.weight, stage4.2.fuse_layers.3.0.0.1.weight, stage4.2.fuse_layers.3.0.0.1.bias, stage4.2.fuse_layers.3.0.0.1.running_mean, stage4.2.fuse_layers.3.0.0.1.running_var, stage4.2.fuse_layers.3.0.0.1.num_batches_tracked, stage4.2.fuse_layers.3.0.1.0.weight, stage4.2.fuse_layers.3.0.1.1.weight, stage4.2.fuse_layers.3.0.1.1.bias, stage4.2.fuse_layers.3.0.1.1.running_mean, stage4.2.fuse_layers.3.0.1.1.running_var, stage4.2.fuse_layers.3.0.1.1.num_batches_tracked, stage4.2.fuse_layers.3.0.2.0.weight, stage4.2.fuse_layers.3.0.2.1.weight, stage4.2.fuse_layers.3.0.2.1.bias, stage4.2.fuse_layers.3.0.2.1.running_mean, stage4.2.fuse_layers.3.0.2.1.running_var, stage4.2.fuse_layers.3.0.2.1.num_batches_tracked, stage4.2.fuse_layers.3.1.0.0.weight, stage4.2.fuse_layers.3.1.0.1.weight, stage4.2.fuse_layers.3.1.0.1.bias, stage4.2.fuse_layers.3.1.0.1.running_mean, stage4.2.fuse_layers.3.1.0.1.running_var, stage4.2.fuse_layers.3.1.0.1.num_batches_tracked, stage4.2.fuse_layers.3.1.1.0.weight, stage4.2.fuse_layers.3.1.1.1.weight, stage4.2.fuse_layers.3.1.1.1.bias, stage4.2.fuse_layers.3.1.1.1.running_mean, stage4.2.fuse_layers.3.1.1.1.running_var, stage4.2.fuse_layers.3.1.1.1.num_batches_tracked, stage4.2.fuse_layers.3.2.0.0.weight, stage4.2.fuse_layers.3.2.0.1.weight, stage4.2.fuse_layers.3.2.0.1.bias, stage4.2.fuse_layers.3.2.0.1.running_mean, stage4.2.fuse_layers.3.2.0.1.running_var, stage4.2.fuse_layers.3.2.0.1.num_batches_tracked\n",
+ "\n",
+ "2021-09-22 22:37:46,021 - mmpose - INFO - Start running, host: PJLAB\\liyining@shai14000065l, work_dir: /home/PJLAB/liyining/openmmlab/mmpose/work_dirs/hrnet_w32_coco_tiny_256x192\n",
+ "2021-09-22 22:37:46,021 - mmpose - INFO - Hooks will be executed in the following order:\n",
+ "before_run:\n",
+ "(VERY_HIGH ) StepLrUpdaterHook \n",
+ "(NORMAL ) CheckpointHook \n",
+ "(NORMAL ) EvalHook \n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "before_train_epoch:\n",
+ "(VERY_HIGH ) StepLrUpdaterHook \n",
+ "(NORMAL ) EvalHook \n",
+ "(LOW ) IterTimerHook \n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "before_train_iter:\n",
+ "(VERY_HIGH ) StepLrUpdaterHook \n",
+ "(NORMAL ) EvalHook \n",
+ "(LOW ) IterTimerHook \n",
+ " -------------------- \n",
+ "after_train_iter:\n",
+ "(ABOVE_NORMAL) OptimizerHook \n",
+ "(NORMAL ) CheckpointHook \n",
+ "(NORMAL ) EvalHook \n",
+ "(LOW ) IterTimerHook \n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "after_train_epoch:\n",
+ "(NORMAL ) CheckpointHook \n",
+ "(NORMAL ) EvalHook \n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "before_val_epoch:\n",
+ "(LOW ) IterTimerHook \n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "before_val_iter:\n",
+ "(LOW ) IterTimerHook \n",
+ " -------------------- \n",
+ "after_val_iter:\n",
+ "(LOW ) IterTimerHook \n",
+ " -------------------- \n",
+ "after_val_epoch:\n",
+ "(VERY_LOW ) TextLoggerHook \n",
+ " -------------------- \n",
+ "2021-09-22 22:37:46,022 - mmpose - INFO - workflow: [('train', 1)], max: 40 epochs\n",
+ "2021-09-22 22:37:48,774 - mmpose - INFO - Epoch [1][1/4]\tlr: 5.000e-07, eta: 0:07:17, time: 2.749, data_time: 2.287, memory: 2594, mse_loss: 0.0020, acc_pose: 0.0000, loss: 0.0020\n",
+ "2021-09-22 22:37:49,164 - mmpose - INFO - Epoch [1][2/4]\tlr: 1.499e-06, eta: 0:04:08, time: 0.391, data_time: 0.002, memory: 2903, mse_loss: 0.0025, acc_pose: 0.0152, loss: 0.0025\n",
+ "2021-09-22 22:37:49,549 - mmpose - INFO - Epoch [1][3/4]\tlr: 2.498e-06, eta: 0:03:04, time: 0.385, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.0270, loss: 0.0019\n",
+ "2021-09-22 22:37:49,937 - mmpose - INFO - Epoch [1][4/4]\tlr: 3.497e-06, eta: 0:02:32, time: 0.388, data_time: 0.002, memory: 2903, mse_loss: 0.0020, acc_pose: 0.0104, loss: 0.0020\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:37:52,650 - mmpose - INFO - Epoch [2][1/4]\tlr: 4.496e-06, eta: 0:03:23, time: 2.656, data_time: 2.263, memory: 2903, mse_loss: 0.0024, acc_pose: 0.0206, loss: 0.0024\n",
+ "2021-09-22 22:37:53,025 - mmpose - INFO - Epoch [2][2/4]\tlr: 5.495e-06, eta: 0:02:58, time: 0.375, data_time: 0.001, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0331, loss: 0.0022\n",
+ "2021-09-22 22:37:53,395 - mmpose - INFO - Epoch [2][3/4]\tlr: 6.494e-06, eta: 0:02:39, time: 0.371, data_time: 0.001, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0065, loss: 0.0023\n",
+ "2021-09-22 22:37:53,770 - mmpose - INFO - Epoch [2][4/4]\tlr: 7.493e-06, eta: 0:02:26, time: 0.375, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.0143, loss: 0.0020\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:37:56,509 - mmpose - INFO - Epoch [3][1/4]\tlr: 8.492e-06, eta: 0:02:54, time: 2.685, data_time: 2.248, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0183, loss: 0.0023\n",
+ "2021-09-22 22:37:56,902 - mmpose - INFO - Epoch [3][2/4]\tlr: 9.491e-06, eta: 0:02:41, time: 0.393, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.0334, loss: 0.0020\n",
+ "2021-09-22 22:37:57,269 - mmpose - INFO - Epoch [3][3/4]\tlr: 1.049e-05, eta: 0:02:30, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.0139, loss: 0.0017\n",
+ "2021-09-22 22:37:57,632 - mmpose - INFO - Epoch [3][4/4]\tlr: 1.149e-05, eta: 0:02:21, time: 0.363, data_time: 0.001, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0119, loss: 0.0022\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:00,246 - mmpose - INFO - Epoch [4][1/4]\tlr: 1.249e-05, eta: 0:02:38, time: 2.552, data_time: 2.207, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0152, loss: 0.0023\n",
+ "2021-09-22 22:38:00,579 - mmpose - INFO - Epoch [4][2/4]\tlr: 1.349e-05, eta: 0:02:29, time: 0.333, data_time: 0.002, memory: 2903, mse_loss: 0.0019, acc_pose: 0.0250, loss: 0.0019\n",
+ "2021-09-22 22:38:00,946 - mmpose - INFO - Epoch [4][3/4]\tlr: 1.449e-05, eta: 0:02:22, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0025, acc_pose: 0.0127, loss: 0.0025\n",
+ "2021-09-22 22:38:01,309 - mmpose - INFO - Epoch [4][4/4]\tlr: 1.549e-05, eta: 0:02:16, time: 0.363, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.0310, loss: 0.0020\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:03,971 - mmpose - INFO - Epoch [5][1/4]\tlr: 1.648e-05, eta: 0:02:29, time: 2.618, data_time: 2.265, memory: 2903, mse_loss: 0.0024, acc_pose: 0.0264, loss: 0.0024\n",
+ "2021-09-22 22:38:04,319 - mmpose - INFO - Epoch [5][2/4]\tlr: 1.748e-05, eta: 0:02:22, time: 0.348, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.0147, loss: 0.0017\n",
+ "2021-09-22 22:38:04,672 - mmpose - INFO - Epoch [5][3/4]\tlr: 1.848e-05, eta: 0:02:16, time: 0.354, data_time: 0.002, memory: 2903, mse_loss: 0.0020, acc_pose: 0.0065, loss: 0.0020\n",
+ "2021-09-22 22:38:05,062 - mmpose - INFO - Epoch [5][4/4]\tlr: 1.948e-05, eta: 0:02:11, time: 0.390, data_time: 0.001, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0168, loss: 0.0023\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:07,651 - mmpose - INFO - Epoch [6][1/4]\tlr: 2.048e-05, eta: 0:02:21, time: 2.546, data_time: 2.202, memory: 2903, mse_loss: 0.0009, acc_pose: 0.0095, loss: 0.0009\n",
+ "2021-09-22 22:38:07,981 - mmpose - INFO - Epoch [6][2/4]\tlr: 2.148e-05, eta: 0:02:16, time: 0.330, data_time: 0.002, memory: 2903, mse_loss: 0.0025, acc_pose: 0.0225, loss: 0.0025\n",
+ "2021-09-22 22:38:08,311 - mmpose - INFO - Epoch [6][3/4]\tlr: 2.248e-05, eta: 0:02:11, time: 0.330, data_time: 0.001, memory: 2903, mse_loss: 0.0025, acc_pose: 0.0266, loss: 0.0025\n",
+ "2021-09-22 22:38:08,638 - mmpose - INFO - Epoch [6][4/4]\tlr: 2.348e-05, eta: 0:02:06, time: 0.327, data_time: 0.001, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0281, loss: 0.0023\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:11,310 - mmpose - INFO - Epoch [7][1/4]\tlr: 2.448e-05, eta: 0:02:14, time: 2.614, data_time: 2.220, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0202, loss: 0.0022\n",
+ "2021-09-22 22:38:11,678 - mmpose - INFO - Epoch [7][2/4]\tlr: 2.548e-05, eta: 0:02:10, time: 0.368, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.0059, loss: 0.0018\n",
+ "2021-09-22 22:38:12,049 - mmpose - INFO - Epoch [7][3/4]\tlr: 2.647e-05, eta: 0:02:06, time: 0.371, data_time: 0.002, memory: 2903, mse_loss: 0.0019, acc_pose: 0.0231, loss: 0.0019\n",
+ "2021-09-22 22:38:12,418 - mmpose - INFO - Epoch [7][4/4]\tlr: 2.747e-05, eta: 0:02:02, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.0564, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:15,087 - mmpose - INFO - Epoch [8][1/4]\tlr: 2.847e-05, eta: 0:02:09, time: 2.617, data_time: 2.224, memory: 2903, mse_loss: 0.0019, acc_pose: 0.0576, loss: 0.0019\n",
+ "2021-09-22 22:38:15,457 - mmpose - INFO - Epoch [8][2/4]\tlr: 2.947e-05, eta: 0:02:05, time: 0.370, data_time: 0.002, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0451, loss: 0.0022\n",
+ "2021-09-22 22:38:15,824 - mmpose - INFO - Epoch [8][3/4]\tlr: 3.047e-05, eta: 0:02:02, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0021, acc_pose: 0.0161, loss: 0.0021\n",
+ "2021-09-22 22:38:16,191 - mmpose - INFO - Epoch [8][4/4]\tlr: 3.147e-05, eta: 0:01:59, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0024, acc_pose: 0.0953, loss: 0.0024\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:18,866 - mmpose - INFO - Epoch [9][1/4]\tlr: 3.247e-05, eta: 0:02:04, time: 2.622, data_time: 2.226, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0718, loss: 0.0022\n",
+ "2021-09-22 22:38:19,231 - mmpose - INFO - Epoch [9][2/4]\tlr: 3.347e-05, eta: 0:02:01, time: 0.364, data_time: 0.002, memory: 2903, mse_loss: 0.0022, acc_pose: 0.0466, loss: 0.0022\n",
+ "2021-09-22 22:38:19,596 - mmpose - INFO - Epoch [9][3/4]\tlr: 3.447e-05, eta: 0:01:58, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.0562, loss: 0.0018\n",
+ "2021-09-22 22:38:19,963 - mmpose - INFO - Epoch [9][4/4]\tlr: 3.547e-05, eta: 0:01:55, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0021, acc_pose: 0.0830, loss: 0.0021\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:22,631 - mmpose - INFO - Epoch [10][1/4]\tlr: 3.646e-05, eta: 0:02:00, time: 2.611, data_time: 2.220, memory: 2903, mse_loss: 0.0021, acc_pose: 0.0687, loss: 0.0021\n",
+ "2021-09-22 22:38:22,995 - mmpose - INFO - Epoch [10][2/4]\tlr: 3.746e-05, eta: 0:01:57, time: 0.365, data_time: 0.002, memory: 2903, mse_loss: 0.0017, acc_pose: 0.1515, loss: 0.0017\n",
+ "2021-09-22 22:38:23,361 - mmpose - INFO - Epoch [10][3/4]\tlr: 3.846e-05, eta: 0:01:54, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1117, loss: 0.0020\n",
+ "2021-09-22 22:38:23,729 - mmpose - INFO - Epoch [10][4/4]\tlr: 3.946e-05, eta: 0:01:51, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0023, acc_pose: 0.0976, loss: 0.0023\n",
+ "2021-09-22 22:38:23,778 - mmpose - INFO - Saving checkpoint at 10 epochs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ ] 0/25, elapsed: 0s, ETA:"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 25/25, 43.4 task/s, elapsed: 1s, ETA: 0s"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2021-09-22 22:38:25,434 - mmpose - INFO - Now best checkpoint is saved as best_PCK_epoch_10.pth.\n",
+ "2021-09-22 22:38:25,434 - mmpose - INFO - Best PCK is 0.2753 at 10 epoch.\n",
+ "2021-09-22 22:38:25,435 - mmpose - INFO - Epoch(val) [10][2]\tPCK: 0.2753\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:28,080 - mmpose - INFO - Epoch [11][1/4]\tlr: 4.046e-05, eta: 0:01:55, time: 2.639, data_time: 2.248, memory: 2903, mse_loss: 0.0018, acc_pose: 0.1022, loss: 0.0018\n",
+ "2021-09-22 22:38:28,448 - mmpose - INFO - Epoch [11][2/4]\tlr: 4.146e-05, eta: 0:01:53, time: 0.368, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.0652, loss: 0.0018\n",
+ "2021-09-22 22:38:28,813 - mmpose - INFO - Epoch [11][3/4]\tlr: 4.246e-05, eta: 0:01:50, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.1531, loss: 0.0019\n",
+ "2021-09-22 22:38:29,178 - mmpose - INFO - Epoch [11][4/4]\tlr: 4.346e-05, eta: 0:01:47, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1465, loss: 0.0020\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:31,838 - mmpose - INFO - Epoch [12][1/4]\tlr: 4.446e-05, eta: 0:01:51, time: 2.608, data_time: 2.218, memory: 2903, mse_loss: 0.0018, acc_pose: 0.0605, loss: 0.0018\n",
+ "2021-09-22 22:38:32,206 - mmpose - INFO - Epoch [12][2/4]\tlr: 4.545e-05, eta: 0:01:48, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0022, acc_pose: 0.1361, loss: 0.0022\n",
+ "2021-09-22 22:38:32,574 - mmpose - INFO - Epoch [12][3/4]\tlr: 4.645e-05, eta: 0:01:46, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.1523, loss: 0.0019\n",
+ "2021-09-22 22:38:32,942 - mmpose - INFO - Epoch [12][4/4]\tlr: 4.745e-05, eta: 0:01:44, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0022, acc_pose: 0.1340, loss: 0.0022\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:35,606 - mmpose - INFO - Epoch [13][1/4]\tlr: 4.845e-05, eta: 0:01:47, time: 2.613, data_time: 2.217, memory: 2903, mse_loss: 0.0021, acc_pose: 0.1284, loss: 0.0021\n",
+ "2021-09-22 22:38:35,973 - mmpose - INFO - Epoch [13][2/4]\tlr: 4.945e-05, eta: 0:01:44, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0019, acc_pose: 0.1190, loss: 0.0019\n",
+ "2021-09-22 22:38:36,348 - mmpose - INFO - Epoch [13][3/4]\tlr: 5.045e-05, eta: 0:01:42, time: 0.375, data_time: 0.001, memory: 2903, mse_loss: 0.0022, acc_pose: 0.1670, loss: 0.0022\n",
+ "2021-09-22 22:38:36,724 - mmpose - INFO - Epoch [13][4/4]\tlr: 5.145e-05, eta: 0:01:40, time: 0.376, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1706, loss: 0.0020\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:39,416 - mmpose - INFO - Epoch [14][1/4]\tlr: 5.245e-05, eta: 0:01:43, time: 2.641, data_time: 2.245, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1876, loss: 0.0020\n",
+ "2021-09-22 22:38:39,786 - mmpose - INFO - Epoch [14][2/4]\tlr: 5.345e-05, eta: 0:01:40, time: 0.371, data_time: 0.002, memory: 2903, mse_loss: 0.0022, acc_pose: 0.1800, loss: 0.0022\n",
+ "2021-09-22 22:38:40,159 - mmpose - INFO - Epoch [14][3/4]\tlr: 5.445e-05, eta: 0:01:38, time: 0.373, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1617, loss: 0.0020\n",
+ "2021-09-22 22:38:40,527 - mmpose - INFO - Epoch [14][4/4]\tlr: 5.544e-05, eta: 0:01:36, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.1060, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:43,178 - mmpose - INFO - Epoch [15][1/4]\tlr: 5.644e-05, eta: 0:01:38, time: 2.601, data_time: 2.203, memory: 2903, mse_loss: 0.0020, acc_pose: 0.2289, loss: 0.0020\n",
+ "2021-09-22 22:38:43,544 - mmpose - INFO - Epoch [15][2/4]\tlr: 5.744e-05, eta: 0:01:36, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0016, acc_pose: 0.1636, loss: 0.0016\n",
+ "2021-09-22 22:38:43,910 - mmpose - INFO - Epoch [15][3/4]\tlr: 5.844e-05, eta: 0:01:34, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0021, acc_pose: 0.1721, loss: 0.0021\n",
+ "2021-09-22 22:38:44,276 - mmpose - INFO - Epoch [15][4/4]\tlr: 5.944e-05, eta: 0:01:33, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.1038, loss: 0.0017\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:46,914 - mmpose - INFO - Epoch [16][1/4]\tlr: 6.044e-05, eta: 0:01:34, time: 2.587, data_time: 2.198, memory: 2903, mse_loss: 0.0020, acc_pose: 0.1295, loss: 0.0020\n",
+ "2021-09-22 22:38:47,283 - mmpose - INFO - Epoch [16][2/4]\tlr: 6.144e-05, eta: 0:01:32, time: 0.369, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.1358, loss: 0.0018\n",
+ "2021-09-22 22:38:47,651 - mmpose - INFO - Epoch [16][3/4]\tlr: 6.244e-05, eta: 0:01:31, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.1543, loss: 0.0018\n",
+ "2021-09-22 22:38:48,019 - mmpose - INFO - Epoch [16][4/4]\tlr: 6.344e-05, eta: 0:01:29, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.1155, loss: 0.0017\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:50,700 - mmpose - INFO - Epoch [17][1/4]\tlr: 6.444e-05, eta: 0:01:30, time: 2.611, data_time: 2.217, memory: 2903, mse_loss: 0.0019, acc_pose: 0.2150, loss: 0.0019\n",
+ "2021-09-22 22:38:51,070 - mmpose - INFO - Epoch [17][2/4]\tlr: 6.544e-05, eta: 0:01:29, time: 0.370, data_time: 0.002, memory: 2903, mse_loss: 0.0022, acc_pose: 0.1850, loss: 0.0022\n",
+ "2021-09-22 22:38:51,439 - mmpose - INFO - Epoch [17][3/4]\tlr: 6.643e-05, eta: 0:01:27, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.1244, loss: 0.0019\n",
+ "2021-09-22 22:38:51,805 - mmpose - INFO - Epoch [17][4/4]\tlr: 6.743e-05, eta: 0:01:25, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2272, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:54,470 - mmpose - INFO - Epoch [18][1/4]\tlr: 6.843e-05, eta: 0:01:26, time: 2.614, data_time: 2.218, memory: 2903, mse_loss: 0.0020, acc_pose: 0.2409, loss: 0.0020\n",
+ "2021-09-22 22:38:54,840 - mmpose - INFO - Epoch [18][2/4]\tlr: 6.943e-05, eta: 0:01:25, time: 0.370, data_time: 0.002, memory: 2903, mse_loss: 0.0017, acc_pose: 0.1534, loss: 0.0017\n",
+ "2021-09-22 22:38:55,209 - mmpose - INFO - Epoch [18][3/4]\tlr: 7.043e-05, eta: 0:01:23, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.3068, loss: 0.0018\n",
+ "2021-09-22 22:38:55,575 - mmpose - INFO - Epoch [18][4/4]\tlr: 7.143e-05, eta: 0:01:21, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2066, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:38:58,277 - mmpose - INFO - Epoch [19][1/4]\tlr: 7.243e-05, eta: 0:01:22, time: 2.636, data_time: 2.228, memory: 2903, mse_loss: 0.0019, acc_pose: 0.2946, loss: 0.0019\n",
+ "2021-09-22 22:38:58,651 - mmpose - INFO - Epoch [19][2/4]\tlr: 7.343e-05, eta: 0:01:21, time: 0.374, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.2669, loss: 0.0014\n",
+ "2021-09-22 22:38:59,019 - mmpose - INFO - Epoch [19][3/4]\tlr: 7.443e-05, eta: 0:01:19, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.2514, loss: 0.0020\n",
+ "2021-09-22 22:38:59,388 - mmpose - INFO - Epoch [19][4/4]\tlr: 7.543e-05, eta: 0:01:18, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.2052, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:02,074 - mmpose - INFO - Epoch [20][1/4]\tlr: 7.642e-05, eta: 0:01:19, time: 2.634, data_time: 2.231, memory: 2903, mse_loss: 0.0021, acc_pose: 0.1846, loss: 0.0021\n",
+ "2021-09-22 22:39:02,443 - mmpose - INFO - Epoch [20][2/4]\tlr: 7.742e-05, eta: 0:01:17, time: 0.369, data_time: 0.002, memory: 2903, mse_loss: 0.0013, acc_pose: 0.1537, loss: 0.0013\n",
+ "2021-09-22 22:39:02,811 - mmpose - INFO - Epoch [20][3/4]\tlr: 7.842e-05, eta: 0:01:15, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.2114, loss: 0.0017\n",
+ "2021-09-22 22:39:03,180 - mmpose - INFO - Epoch [20][4/4]\tlr: 7.942e-05, eta: 0:01:14, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.2147, loss: 0.0020\n",
+ "2021-09-22 22:39:03,231 - mmpose - INFO - Saving checkpoint at 20 epochs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ ] 0/25, elapsed: 0s, ETA:"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 25/25, 45.0 task/s, elapsed: 1s, ETA: 0s"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2021-09-22 22:39:04,788 - mmpose - INFO - Now best checkpoint is saved as best_PCK_epoch_20.pth.\n",
+ "2021-09-22 22:39:04,789 - mmpose - INFO - Best PCK is 0.3123 at 20 epoch.\n",
+ "2021-09-22 22:39:04,789 - mmpose - INFO - Epoch(val) [20][2]\tPCK: 0.3123\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:07,402 - mmpose - INFO - Epoch [21][1/4]\tlr: 8.042e-05, eta: 0:01:15, time: 2.609, data_time: 2.218, memory: 2903, mse_loss: 0.0017, acc_pose: 0.2806, loss: 0.0017\n",
+ "2021-09-22 22:39:07,769 - mmpose - INFO - Epoch [21][2/4]\tlr: 8.142e-05, eta: 0:01:13, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0017, acc_pose: 0.2352, loss: 0.0017\n",
+ "2021-09-22 22:39:08,136 - mmpose - INFO - Epoch [21][3/4]\tlr: 8.242e-05, eta: 0:01:12, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0021, acc_pose: 0.2968, loss: 0.0021\n",
+ "2021-09-22 22:39:08,502 - mmpose - INFO - Epoch [21][4/4]\tlr: 8.342e-05, eta: 0:01:10, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0015, acc_pose: 0.1867, loss: 0.0015\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:11,188 - mmpose - INFO - Epoch [22][1/4]\tlr: 8.442e-05, eta: 0:01:11, time: 2.635, data_time: 2.244, memory: 2903, mse_loss: 0.0019, acc_pose: 0.3474, loss: 0.0019\n",
+ "2021-09-22 22:39:11,561 - mmpose - INFO - Epoch [22][2/4]\tlr: 8.542e-05, eta: 0:01:09, time: 0.373, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.2988, loss: 0.0016\n",
+ "2021-09-22 22:39:11,929 - mmpose - INFO - Epoch [22][3/4]\tlr: 8.641e-05, eta: 0:01:08, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2864, loss: 0.0018\n",
+ "2021-09-22 22:39:12,292 - mmpose - INFO - Epoch [22][4/4]\tlr: 8.741e-05, eta: 0:01:07, time: 0.363, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2130, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:14,985 - mmpose - INFO - Epoch [23][1/4]\tlr: 8.841e-05, eta: 0:01:07, time: 2.625, data_time: 2.227, memory: 2903, mse_loss: 0.0016, acc_pose: 0.2869, loss: 0.0016\n",
+ "2021-09-22 22:39:15,352 - mmpose - INFO - Epoch [23][2/4]\tlr: 8.941e-05, eta: 0:01:06, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2948, loss: 0.0018\n",
+ "2021-09-22 22:39:15,732 - mmpose - INFO - Epoch [23][3/4]\tlr: 9.041e-05, eta: 0:01:04, time: 0.381, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2796, loss: 0.0018\n",
+ "2021-09-22 22:39:16,098 - mmpose - INFO - Epoch [23][4/4]\tlr: 9.141e-05, eta: 0:01:03, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.2982, loss: 0.0017\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:18,773 - mmpose - INFO - Epoch [24][1/4]\tlr: 9.241e-05, eta: 0:01:03, time: 2.624, data_time: 2.226, memory: 2903, mse_loss: 0.0016, acc_pose: 0.3208, loss: 0.0016\n",
+ "2021-09-22 22:39:19,142 - mmpose - INFO - Epoch [24][2/4]\tlr: 9.341e-05, eta: 0:01:02, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.2067, loss: 0.0018\n",
+ "2021-09-22 22:39:19,512 - mmpose - INFO - Epoch [24][3/4]\tlr: 9.441e-05, eta: 0:01:00, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0020, acc_pose: 0.2734, loss: 0.0020\n",
+ "2021-09-22 22:39:19,879 - mmpose - INFO - Epoch [24][4/4]\tlr: 9.540e-05, eta: 0:00:59, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.3253, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:22,523 - mmpose - INFO - Epoch [25][1/4]\tlr: 9.640e-05, eta: 0:00:59, time: 2.593, data_time: 2.211, memory: 2903, mse_loss: 0.0020, acc_pose: 0.3644, loss: 0.0020\n",
+ "2021-09-22 22:39:22,893 - mmpose - INFO - Epoch [25][2/4]\tlr: 9.740e-05, eta: 0:00:58, time: 0.371, data_time: 0.002, memory: 2903, mse_loss: 0.0014, acc_pose: 0.3229, loss: 0.0014\n",
+ "2021-09-22 22:39:23,260 - mmpose - INFO - Epoch [25][3/4]\tlr: 9.840e-05, eta: 0:00:57, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0015, acc_pose: 0.3083, loss: 0.0015\n",
+ "2021-09-22 22:39:23,625 - mmpose - INFO - Epoch [25][4/4]\tlr: 9.940e-05, eta: 0:00:55, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0015, acc_pose: 0.2692, loss: 0.0015\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:26,300 - mmpose - INFO - Epoch [26][1/4]\tlr: 1.004e-04, eta: 0:00:55, time: 2.623, data_time: 2.235, memory: 2903, mse_loss: 0.0017, acc_pose: 0.3494, loss: 0.0017\n",
+ "2021-09-22 22:39:26,667 - mmpose - INFO - Epoch [26][2/4]\tlr: 1.014e-04, eta: 0:00:54, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.3283, loss: 0.0013\n",
+ "2021-09-22 22:39:27,033 - mmpose - INFO - Epoch [26][3/4]\tlr: 1.024e-04, eta: 0:00:53, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.3560, loss: 0.0017\n",
+ "2021-09-22 22:39:27,402 - mmpose - INFO - Epoch [26][4/4]\tlr: 1.034e-04, eta: 0:00:52, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.2936, loss: 0.0019\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:30,106 - mmpose - INFO - Epoch [27][1/4]\tlr: 1.044e-04, eta: 0:00:52, time: 2.643, data_time: 2.248, memory: 2903, mse_loss: 0.0016, acc_pose: 0.3084, loss: 0.0016\n",
+ "2021-09-22 22:39:30,476 - mmpose - INFO - Epoch [27][2/4]\tlr: 1.054e-04, eta: 0:00:50, time: 0.371, data_time: 0.002, memory: 2903, mse_loss: 0.0020, acc_pose: 0.3418, loss: 0.0020\n",
+ "2021-09-22 22:39:30,845 - mmpose - INFO - Epoch [27][3/4]\tlr: 1.064e-04, eta: 0:00:49, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0015, acc_pose: 0.3162, loss: 0.0015\n",
+ "2021-09-22 22:39:31,211 - mmpose - INFO - Epoch [27][4/4]\tlr: 1.074e-04, eta: 0:00:48, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.3371, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:33,896 - mmpose - INFO - Epoch [28][1/4]\tlr: 1.084e-04, eta: 0:00:48, time: 2.633, data_time: 2.233, memory: 2903, mse_loss: 0.0019, acc_pose: 0.3924, loss: 0.0019\n",
+ "2021-09-22 22:39:34,263 - mmpose - INFO - Epoch [28][2/4]\tlr: 1.094e-04, eta: 0:00:47, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.3889, loss: 0.0019\n",
+ "2021-09-22 22:39:34,629 - mmpose - INFO - Epoch [28][3/4]\tlr: 1.104e-04, eta: 0:00:45, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.2687, loss: 0.0013\n",
+ "2021-09-22 22:39:34,994 - mmpose - INFO - Epoch [28][4/4]\tlr: 1.114e-04, eta: 0:00:44, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0019, acc_pose: 0.3294, loss: 0.0019\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:37,690 - mmpose - INFO - Epoch [29][1/4]\tlr: 1.124e-04, eta: 0:00:44, time: 2.642, data_time: 2.247, memory: 2903, mse_loss: 0.0019, acc_pose: 0.4194, loss: 0.0019\n",
+ "2021-09-22 22:39:38,056 - mmpose - INFO - Epoch [29][2/4]\tlr: 1.134e-04, eta: 0:00:43, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.3326, loss: 0.0017\n",
+ "2021-09-22 22:39:38,423 - mmpose - INFO - Epoch [29][3/4]\tlr: 1.144e-04, eta: 0:00:42, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.3295, loss: 0.0017\n",
+ "2021-09-22 22:39:38,788 - mmpose - INFO - Epoch [29][4/4]\tlr: 1.154e-04, eta: 0:00:40, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.3882, loss: 0.0014\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:41,450 - mmpose - INFO - Epoch [30][1/4]\tlr: 1.164e-04, eta: 0:00:40, time: 2.609, data_time: 2.216, memory: 2903, mse_loss: 0.0017, acc_pose: 0.3309, loss: 0.0017\n",
+ "2021-09-22 22:39:41,816 - mmpose - INFO - Epoch [30][2/4]\tlr: 1.174e-04, eta: 0:00:39, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0014, acc_pose: 0.3749, loss: 0.0014\n",
+ "2021-09-22 22:39:42,184 - mmpose - INFO - Epoch [30][3/4]\tlr: 1.184e-04, eta: 0:00:38, time: 0.369, data_time: 0.002, memory: 2903, mse_loss: 0.0018, acc_pose: 0.4279, loss: 0.0018\n",
+ "2021-09-22 22:39:42,550 - mmpose - INFO - Epoch [30][4/4]\tlr: 1.194e-04, eta: 0:00:37, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.3873, loss: 0.0016\n",
+ "2021-09-22 22:39:42,599 - mmpose - INFO - Saving checkpoint at 30 epochs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ ] 0/25, elapsed: 0s, ETA:"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 25/25, 44.1 task/s, elapsed: 1s, ETA: 0s"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2021-09-22 22:39:44,183 - mmpose - INFO - Now best checkpoint is saved as best_PCK_epoch_30.pth.\n",
+ "2021-09-22 22:39:44,183 - mmpose - INFO - Best PCK is 0.3288 at 30 epoch.\n",
+ "2021-09-22 22:39:44,184 - mmpose - INFO - Epoch(val) [30][2]\tPCK: 0.3288\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:46,788 - mmpose - INFO - Epoch [31][1/4]\tlr: 1.204e-04, eta: 0:00:36, time: 2.599, data_time: 2.210, memory: 2903, mse_loss: 0.0015, acc_pose: 0.3854, loss: 0.0015\n",
+ "2021-09-22 22:39:47,154 - mmpose - INFO - Epoch [31][2/4]\tlr: 1.214e-04, eta: 0:00:35, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0012, acc_pose: 0.3277, loss: 0.0012\n",
+ "2021-09-22 22:39:47,521 - mmpose - INFO - Epoch [31][3/4]\tlr: 1.224e-04, eta: 0:00:34, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0019, acc_pose: 0.3654, loss: 0.0019\n",
+ "2021-09-22 22:39:47,887 - mmpose - INFO - Epoch [31][4/4]\tlr: 1.234e-04, eta: 0:00:33, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0015, acc_pose: 0.4014, loss: 0.0015\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:50,571 - mmpose - INFO - Epoch [32][1/4]\tlr: 1.244e-04, eta: 0:00:33, time: 2.633, data_time: 2.242, memory: 2903, mse_loss: 0.0019, acc_pose: 0.4077, loss: 0.0019\n",
+ "2021-09-22 22:39:50,936 - mmpose - INFO - Epoch [32][2/4]\tlr: 1.254e-04, eta: 0:00:31, time: 0.366, data_time: 0.002, memory: 2903, mse_loss: 0.0015, acc_pose: 0.3948, loss: 0.0015\n",
+ "2021-09-22 22:39:51,302 - mmpose - INFO - Epoch [32][3/4]\tlr: 1.264e-04, eta: 0:00:30, time: 0.365, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.3251, loss: 0.0013\n",
+ "2021-09-22 22:39:51,664 - mmpose - INFO - Epoch [32][4/4]\tlr: 1.274e-04, eta: 0:00:29, time: 0.362, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4011, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:54,329 - mmpose - INFO - Epoch [33][1/4]\tlr: 1.284e-04, eta: 0:00:29, time: 2.616, data_time: 2.218, memory: 2903, mse_loss: 0.0014, acc_pose: 0.4166, loss: 0.0014\n",
+ "2021-09-22 22:39:54,695 - mmpose - INFO - Epoch [33][2/4]\tlr: 1.294e-04, eta: 0:00:28, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4266, loss: 0.0016\n",
+ "2021-09-22 22:39:55,062 - mmpose - INFO - Epoch [33][3/4]\tlr: 1.304e-04, eta: 0:00:27, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.3923, loss: 0.0014\n",
+ "2021-09-22 22:39:55,429 - mmpose - INFO - Epoch [33][4/4]\tlr: 1.314e-04, eta: 0:00:26, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.4607, loss: 0.0017\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:39:58,079 - mmpose - INFO - Epoch [34][1/4]\tlr: 1.324e-04, eta: 0:00:25, time: 2.598, data_time: 2.215, memory: 2903, mse_loss: 0.0015, acc_pose: 0.3104, loss: 0.0015\n",
+ "2021-09-22 22:39:58,443 - mmpose - INFO - Epoch [34][2/4]\tlr: 1.334e-04, eta: 0:00:24, time: 0.365, data_time: 0.003, memory: 2903, mse_loss: 0.0018, acc_pose: 0.4616, loss: 0.0018\n",
+ "2021-09-22 22:39:58,808 - mmpose - INFO - Epoch [34][3/4]\tlr: 1.344e-04, eta: 0:00:23, time: 0.366, data_time: 0.001, memory: 2903, mse_loss: 0.0010, acc_pose: 0.3579, loss: 0.0010\n",
+ "2021-09-22 22:39:59,176 - mmpose - INFO - Epoch [34][4/4]\tlr: 1.354e-04, eta: 0:00:22, time: 0.367, data_time: 0.001, memory: 2903, mse_loss: 0.0018, acc_pose: 0.4007, loss: 0.0018\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:01,843 - mmpose - INFO - Epoch [35][1/4]\tlr: 1.364e-04, eta: 0:00:21, time: 2.616, data_time: 2.227, memory: 2903, mse_loss: 0.0018, acc_pose: 0.4073, loss: 0.0018\n",
+ "2021-09-22 22:40:02,211 - mmpose - INFO - Epoch [35][2/4]\tlr: 1.374e-04, eta: 0:00:20, time: 0.368, data_time: 0.001, memory: 2903, mse_loss: 0.0017, acc_pose: 0.5594, loss: 0.0017\n",
+ "2021-09-22 22:40:02,582 - mmpose - INFO - Epoch [35][3/4]\tlr: 1.384e-04, eta: 0:00:19, time: 0.371, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.4707, loss: 0.0013\n",
+ "2021-09-22 22:40:02,951 - mmpose - INFO - Epoch [35][4/4]\tlr: 1.394e-04, eta: 0:00:18, time: 0.369, data_time: 0.002, memory: 2903, mse_loss: 0.0015, acc_pose: 0.4522, loss: 0.0015\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:05,626 - mmpose - INFO - Epoch [36][1/4]\tlr: 1.404e-04, eta: 0:00:17, time: 2.622, data_time: 2.224, memory: 2903, mse_loss: 0.0013, acc_pose: 0.3195, loss: 0.0013\n",
+ "2021-09-22 22:40:05,995 - mmpose - INFO - Epoch [36][2/4]\tlr: 1.414e-04, eta: 0:00:16, time: 0.369, data_time: 0.002, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4603, loss: 0.0016\n",
+ "2021-09-22 22:40:06,364 - mmpose - INFO - Epoch [36][3/4]\tlr: 1.424e-04, eta: 0:00:15, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.3914, loss: 0.0016\n",
+ "2021-09-22 22:40:06,733 - mmpose - INFO - Epoch [36][4/4]\tlr: 1.434e-04, eta: 0:00:14, time: 0.369, data_time: 0.001, memory: 2903, mse_loss: 0.0015, acc_pose: 0.5051, loss: 0.0015\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:09,418 - mmpose - INFO - Epoch [37][1/4]\tlr: 1.444e-04, eta: 0:00:14, time: 2.632, data_time: 2.231, memory: 2903, mse_loss: 0.0014, acc_pose: 0.4651, loss: 0.0014\n",
+ "2021-09-22 22:40:09,789 - mmpose - INFO - Epoch [37][2/4]\tlr: 1.454e-04, eta: 0:00:13, time: 0.371, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4974, loss: 0.0016\n",
+ "2021-09-22 22:40:10,162 - mmpose - INFO - Epoch [37][3/4]\tlr: 1.464e-04, eta: 0:00:12, time: 0.374, data_time: 0.002, memory: 2903, mse_loss: 0.0016, acc_pose: 0.5292, loss: 0.0016\n",
+ "2021-09-22 22:40:10,533 - mmpose - INFO - Epoch [37][4/4]\tlr: 1.474e-04, eta: 0:00:11, time: 0.371, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.4183, loss: 0.0014\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:13,213 - mmpose - INFO - Epoch [38][1/4]\tlr: 1.484e-04, eta: 0:00:10, time: 2.628, data_time: 2.229, memory: 2903, mse_loss: 0.0014, acc_pose: 0.4511, loss: 0.0014\n",
+ "2021-09-22 22:40:13,587 - mmpose - INFO - Epoch [38][2/4]\tlr: 1.494e-04, eta: 0:00:09, time: 0.374, data_time: 0.002, memory: 2903, mse_loss: 0.0013, acc_pose: 0.5198, loss: 0.0013\n",
+ "2021-09-22 22:40:13,959 - mmpose - INFO - Epoch [38][3/4]\tlr: 1.504e-04, eta: 0:00:08, time: 0.371, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.5084, loss: 0.0014\n",
+ "2021-09-22 22:40:14,338 - mmpose - INFO - Epoch [38][4/4]\tlr: 1.513e-04, eta: 0:00:07, time: 0.379, data_time: 0.002, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4849, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:16,996 - mmpose - INFO - Epoch [39][1/4]\tlr: 1.523e-04, eta: 0:00:06, time: 2.606, data_time: 2.221, memory: 2903, mse_loss: 0.0015, acc_pose: 0.4523, loss: 0.0015\n",
+ "2021-09-22 22:40:17,363 - mmpose - INFO - Epoch [39][2/4]\tlr: 1.533e-04, eta: 0:00:05, time: 0.367, data_time: 0.002, memory: 2903, mse_loss: 0.0013, acc_pose: 0.5011, loss: 0.0013\n",
+ "2021-09-22 22:40:17,739 - mmpose - INFO - Epoch [39][3/4]\tlr: 1.543e-04, eta: 0:00:04, time: 0.376, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.5854, loss: 0.0013\n",
+ "2021-09-22 22:40:18,109 - mmpose - INFO - Epoch [39][4/4]\tlr: 1.553e-04, eta: 0:00:03, time: 0.370, data_time: 0.001, memory: 2903, mse_loss: 0.0016, acc_pose: 0.4886, loss: 0.0016\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "2021-09-22 22:40:20,760 - mmpose - INFO - Epoch [40][1/4]\tlr: 1.563e-04, eta: 0:00:02, time: 2.599, data_time: 2.234, memory: 2903, mse_loss: 0.0014, acc_pose: 0.4787, loss: 0.0014\n",
+ "2021-09-22 22:40:21,109 - mmpose - INFO - Epoch [40][2/4]\tlr: 1.573e-04, eta: 0:00:01, time: 0.350, data_time: 0.001, memory: 2903, mse_loss: 0.0013, acc_pose: 0.5198, loss: 0.0013\n",
+ "2021-09-22 22:40:21,459 - mmpose - INFO - Epoch [40][3/4]\tlr: 1.583e-04, eta: 0:00:00, time: 0.350, data_time: 0.001, memory: 2903, mse_loss: 0.0012, acc_pose: 0.5001, loss: 0.0012\n",
+ "2021-09-22 22:40:21,805 - mmpose - INFO - Epoch [40][4/4]\tlr: 1.593e-04, eta: 0:00:00, time: 0.345, data_time: 0.001, memory: 2903, mse_loss: 0.0014, acc_pose: 0.5597, loss: 0.0014\n",
+ "2021-09-22 22:40:21,852 - mmpose - INFO - Saving checkpoint at 40 epochs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ ] 0/25, elapsed: 0s, ETA:"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
+ "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 25/25, 47.2 task/s, elapsed: 1s, ETA: 0s"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2021-09-22 22:40:23,387 - mmpose - INFO - Now best checkpoint is saved as best_PCK_epoch_40.pth.\n",
+ "2021-09-22 22:40:23,388 - mmpose - INFO - Best PCK is 0.3473 at 40 epoch.\n",
+ "2021-09-22 22:40:23,388 - mmpose - INFO - Epoch(val) [40][2]\tPCK: 0.3473\n"
+ ]
+ }
+ ],
+ "source": [
+ "from mmpose.datasets import build_dataset\n",
+ "from mmpose.models import build_posenet\n",
+ "from mmpose.apis import train_model\n",
+ "import mmcv\n",
+ "\n",
+ "# build dataset\n",
+ "datasets = [build_dataset(cfg.data.train)]\n",
+ "\n",
+ "# build model\n",
+ "model = build_posenet(cfg.model)\n",
+ "\n",
+ "# create work_dir\n",
+ "mmcv.mkdir_or_exist(cfg.work_dir)\n",
+ "\n",
+ "# train model\n",
+ "train_model(\n",
+ " model, datasets, cfg, distributed=False, validate=True, meta=dict())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "iY2EWSp1zKoz"
+ },
+ "source": [
+ "Test the trained model. Since the model is trained on a toy dataset coco-tiny, its performance would be as good as the ones in our model zoo. Here we mainly show how to inference and visualize a local model checkpoint."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 387
+ },
+ "id": "i0rk9eCVzT_D",
+ "outputId": "722542be-ab38-4ca4-86c4-dce2cfb95c4b"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Use load_from_local loader\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages/mmdet/core/anchor/builder.py:15: UserWarning: ``build_anchor_generator`` would be deprecated soon, please use ``build_prior_generator`` \n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Use load_from_http loader\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages/mmdet/core/anchor/anchor_generator.py:323: UserWarning: ``grid_anchors`` would be deprecated soon. Please use ``grid_priors`` \n",
+ " warnings.warn('``grid_anchors`` would be deprecated soon. '\n",
+ "/home/SENSETIME/liyining/anaconda3/envs/colab/lib/python3.9/site-packages/mmdet/core/anchor/anchor_generator.py:359: UserWarning: ``single_level_grid_anchors`` would be deprecated soon. Please use ``single_level_grid_priors`` \n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUAAAADWCAIAAAAvuswXAAAgAElEQVR4ATTBWcxtW3oe5PdrxphzrrX+brenK1eVq9zEtuIYiOQGZFkEkwShKMhICIgQN3CBiAQ3thIgDogICRTETYyDwTIxoEAgNwETWZBQQTgkohFEgJvYVafqNPuc3fzNWmvOMcbXsMuI56HHX7ikngCZohAlOxGf70e2ZJVMhzAzZUIUus8geEN0n/f14kYf7jcfVGux1cKJSd2DmeZLYWU7RuuZGcSuqiPdR2dS7yizJIcNZyuRbw3i1FKJCUBZsDzS+WoA+nA3Hj9V2sntJ5udaql88YzaaKdPKLa0rm0d0VMg05Xtbkrv3h44ELAQ1u5GjQjkFioKcmLxzADSnTR0Ec9UUndnEJIQbymxJ5KBSCG2y2u+eUdffmpSdf80BIoUMv78w3NvYKLlQprH+W4oNDnqnp9+cLm5H+/PaugeVQVK7Q69bzePHm/tOC1oI+SiLVdKdajI699Af63JNl9WhruD1QAdR47Iso+wTJOxBUW++3sqLe3ianf/8vTwoq53UVCgqZqczAWYnbiiU18bK08F28aifbe/8m2rV8tc9NNPT1/97t93d383P5zfuWzvXl3zdlI/7+d62/kv//o3EfPYLAAqoxSxRrUoyJkmiLuNabeLaT1c7Szj/Nr6aahCJt4echu9mGbJynUMc0A0yi6lTGtbo3OZlTkJ4REprNU5aT2ljsnJBOSR0+WU7JpEjPNxUGqmB4UIk5CHF2jCWTiTFTkcHknsy4UK0/FuC6vEg5nDkl3dAUZRidGtkZkxKzPniJQggYrKjgKgSHgM8otnYtbzVE8PXmTSyS3dezAV6yZKEInN0wKclCwqFqMU8ZJESUZ2hhTmKYqWseVolk4iRJoZmZ4AiZTwSApRAiOImCPCibjMJJOOPnyLUqa6ZyD7Oei7fvDpduoBGAUZMrKv0U+JtwigVFXWjKARo+502oltaS0i/fG7iw06H7v3TA8i1Glu2wD88slOJzk9rH6SzEgEEbiwCvdubuCaOmlbe3b2iDrz4TCP3t1znpcQoxrX75d5LrdvTh4hLNOSQSUJV4+mz765vv7NlQ2kU9s2BiOTq8qSkcFRGaHgiOxmbGLDmDgDoCQmKAdciKQQFfZILUTg3gYTwxFBXCIyZSZQlizj6POBSJmmpBrTMvUxxrmPu4kpI0Inchdb3Vr4MOZcHk+P3p+N21L36+rRB5LuPun9aCJKNeYLyVmmq/P10935fI7g44eyfpZMzJdeSNez7Q5lnmJ7oPWYRbTjjBByJrAoYde5ZtXFRrcz+yARrgsL0bSTrNZvdRLd1i2BecF51asnzKU303EyjXKi/id/+hf+5t/+ld/+tf/xnT2eXMy0Pixk81Jfbf2//fBVeGVw3YtnH2cb53z3vYvhw7q65/HuuNtfBdrWel1qO5sN10JOKHNa3WgUP7FkcR9Uox6EoGml91MyXT+fR2w+yF3K4jdPy7r6+XM+fbYBxEFEPF3UHqOWUhZt595OTYizRNHCxG1rU5ksBitN82TDraV1r4V4oVp1O28Z6sODQoSjO8CeQRCyRFJ44tuYBQnPSIHIXrkYk6wPWYR1byatlLm9yTSaZrJ0c1Dy6MYMSMCJQgDKyPlpcRitHJFJLiLe05sBAqAW6cPDnAgQogQBRGLhzMTEIAIQ4ct+sjQPQ4JFEkDm9XsLaLSjbcdKP/gjz9ZTvn69AsW7WzO4phOreQQ4VVUkkjgFQEKUg6OHefBsytq3yFBG50JaxBzUcneYWLmtzSKJqE7FhjlGmTQzIyjgEPbhAiVwFp/nKfpQyYvri+A4nu5yF1dX+0g7HzONDk9MZh3B+51+9g0/f7RyqlmaWy3q5hKaFElBIM8sJBlpEYWUiN0cSYFISp2ElZkSnBaW4DIl5TS6ERJOECZty+Fid1OSO4yPn25Xj1Av5vMa96/GdPDrpxfW7fWHvZ0aa4kcnDUjbQQ7EJFaLp/zdENcSwa8bSrlzYd93HkEkmO5meanwrWPRrYRnX07G5Rkx8Jg03ZE0tAdadGISAuM0lsnJwRToemCoRFJ7GhbkGsOS0GpwiVJg0KmpbStW8uE0Nze++Lh4RXVpWPlh885Lsqf/lf/o6/92l/6nV/7G88WerSb7c2rR/uik3x+3n714zceqqJlZlEZm42jHWat++l0xMPtiYLaGI+fXgyH9c3ChzkBEFw8oasvzUg5fmLnl8NjXL83Xb9XX7043b0ApcgUj97bkdj93ZqGZTfpTLd327jzfFBmzpExkgqFJLFOCzFR37pQkcK9dZUSHkTsZHWRaaf9lLaF93FxWHqCCK33MWxaJJExPAYygwThwBAVZuLejZnBQcqIhANKKF1LGavYeaiq7Hi6rLG27TQuLuY2bIzIoDAXYYgRM1IoJDzoOpf9NB5GPw8qBEBA4+w+GECd4EHpXkoZEenORJ6h87Tbl7a2GEFJRGBhSydJchk9IDntpO4P1rdC7A5670vPehvtvAnSXMmDhSGEzIgAJwAmQEGF0iNF2FJYzDwVQogAp7oFF9dJt+4IFJbCiiSSfAvfRkFjWsow8wQyI5FBZAkgCgCiARXOCq6x381Bej41LS1Cyg6HR+weo3td+HRH0tyitjvrfRBxREyluEUiE5mDyUFAEqtSRoYHk0ASTGAkQxlSCOyqhSjPRw8DUYAoOfY36kl1yelaS0VNItezj3S9//joSfsnhZFvPsxYE0xgEy8pHgQhiu4I2l3P5YLlIMM65yjT9Oabo71p2QnEWfPiuTJLDrG2mmnmKAtzhVSzk8Q2q0aPznNA2FqQkQoplXYeJMzirORBCAyHpMAskyCQIomcZ9dF3aifGTRGxMVlGfey38+p2zhO2NO//qf+g1/963/5m//L33rvWq8mztevbhYtu/L5w/mvfP1FdyGAgrmyCBfWq3r5+vR5b+jnQUK60MVTKXN9+LQn5xieg3qL6ZIefXUh9vvPRnstZmP3LN7/3uu+ndaH4KzrKYLi8ePr0baXH67Wsx5i/2g5fraNB0DYmlEIU5IQkRCNWtUiWncy1SrDBjERSZBd3Mx1j/tPW47c7eZpJjPyzG1rZZrKAX2z9uBxQsCoRHqSKyVFBJIAIsqQSIQQA0zKZRZGJrzdwgdkx7LzaKSC4UHJYUgPFgJ5SBAxh4YlBDQlC1FQIDNAHj4kzaZ56u4EVOUItJGczswQQhVWjz4qSwwHkw0CiAkAeQdp7p9UTL0ftdZZZqebR/twDgNxZiIyiUkKkMjg8CCKJBCBlSMTwswx1cl6IAK/KzMpMyh5EYgXERj1NeE8FSHmbpZEoJgP7B5IU9WtmztiJBAiBCCT3prKRBr1gDH4dD8SDTbpbjz/zoPMWO+GtVzXTXVCoh/R1+E9YfxWMpigiUEBk0SUhcgzjYgVlaI7lSRwDiQFl6x1niaXKqc3vh4714xkUuwu68iupM+/t9QbF67bRuvp9V73pzf58GZ45wIcbxmnYeZSFIhgUhZ4+sjCmnNi8sPVJFJWO3Pm6WWMewcoAsxy9e60bm59FGZUZCQ8idjh5ERgropqbhtcYigNQGJeikXP5NFTKcPEPFQ5M4nA0DrxvJQ2xnQYQvV8Cne23sMRFsJ08XQZ5lipPrv40z/97//Kf/9Ln/1v/+fTG74i0fXz/f7xottn2/hvfvtu7ZaZDN5fKXHSKBjClOfztjXXS/3SD17L4cF9bS+Xu0/76Y2T1rAWKSm4fB59RI7KvBWd9DLqBe8P/P7TRx9+8tpNSXTalYcXb85nU62l8vkep89HbuyjpStRMGmteXFVThtyG0bshGyZ6bzw5fNpa2N/SM3p/rVZs1IKyCet5aLc36+aqZfZz2kPaZtFCIHSPB0gsAAgpISDicyHCLOyHIKqVSmiut73850XXmTpEemezOxGBCCSiZxSlNwHQAIJhBYCZTLgYBaHIxIgSslAJnb7EhHnU4eBGSRSLwUZ1rMojRZplBGJCIFmJcpUL3u5eCJwvr/t82GiR0+u3NItmBgEomQhsJsbwJT0bcxECMqIEGYmEmKAPRz/PwZ5BAmViesyx8D5tMJTqXgaKRIZCa3pTnOpOtPaxxieIwgohSNCdUq4uZdF5r2ao60mggxyC6p+cTOpWDsTijHK3SvDSB+Rg9IzghKhKgIgOCNk8qtnVaieHnpvESAhJUVEuLlq0cUE1TuFNGs8WoJCg1DSS5Sp1IX2z+nwtD7cn+YdFZXsfL6N9ZWM1TCiPYQHRJQkk4zB6c6Qtg4O1UVlpt1hpiTjMU3y6pN1u9uIGEAmll01OAmKotRq7t2MSAiRhgwEh1YmAI6+hTiRMglk4jLFaKBw65Fe3Z0oRYQk94fFMzwi2Zdl31obY7ghg4REhByeSDWp7xz+rT/xS//Vr/zc6f/5u88flR3RvL04LAuJf/zQfuW3XntASwn3MnNmts3ViwonYbOxHCpfnfZP6XC1a3f+yW+d/KESA0zWPQVXT8knUymlEJM83K9aSyk47IR2PbNsZxPWeY/TcU2betNxl+e7TiEkQSNHoOzqkw9KlVy35K3evjnZcCaRRepVeeeLTz3W0W/ffOLRMNZIxzC7vNrNl+XNm/tJWarYRtvq5JQWfR2cTCBwMhOzhMM9I0aZBEhmTg1mlpRgmsrUzqNvHUzEEBUPi2AVtt6BLPPkYe7mnkiISp2VmGy4dycmVnDh9HALlYIg4sjM0ZNZhdNH7C40HL1HmZHgfia3JgJoSJBbcpmCc7eXw+V8Oq8QoyfvXaV7uGdQuLCgTBLpSSEiYYkUkEdmJEDQTCJBEDODmIDMjP9PUqaVqknsZiKSEWMNlpwOU8J9QCrcGBEkHkQZSEsRUuWM0DKBsI5NKxNHBEcESHY7JsbpoSOFFcy2XKsNHF+m9wEnBMISYKIUYQhJJKJwyeWq1EmOx963SCdSY+ZAJOc0yaOnxRvfv/KObiMyJUcIslxQCtJlvqTrd663sW7nVatHJFOJQYUgOd191r07M9D5rWmvI8N6F5LoNLbu7iKiVXUSKbi82r38dFsfzplJREBSksxMBcwQZvPwCBZxcxhAIM30BAiJDBICkoJAEtBIq0qRgfDITCZhZlDUpQSlBSKSmVjIbGQQkVCEFibo1lc2vvjikz/zM7/4n/6X/7Z//Vvv3CzXajwe9hiq+LsP+Ku/8aKWiUQjRiJBCApviSQRCc5lt+dlm64JEkSIdb799OzdIpODe/dHz2dezm7LsPHOB3j9hk5vfKpT3WW9hCq/dXw47Q7X6+lka9jG48j9PIgAzQyhuT3/8vT+96RC7+/H+q3rF79znHU5tvN7X3mnyfHczrt9AZ23u7LdOTp7wzCXgmlHESRK6TgdO4WUUtJsbK5ciAKEiMxIEMIVMrgQsxYQKAliwyxIiIV59OFO4Njt5+49IgkgZEYSs/vIRAYSKSpgmpepbSOGA8kVFkEAE6cHiFS1d0OiaAFlOJaZ3MgDjlbKpBOdb7t3BpEQMZFUcMUy68hzRqlloqvnF4Rgoirzeh4sUWZ2dwiKFhvhg7Q4kVi4eyonssDBQpRkZhEBYJ6nPsyRWhnmbzELEadlInTWRHoHxFQmhGVkAhFAJhEtSyEid7hFy1FmRXh0irRInZZhusS2IeEihUNmNot+NBqMBCW7BwEqFEgqQgkKXnayu6TudH/cRosKQQGcnIxqLnOZd0Q+nW4bJgKjbT0aL1dyeDK13rY7v7xapovLN2/ufG1lKUSpkxA7hEop68n2FwmSh0+2cYw61/lQM8xa+Mhx5uzet8FS6jV2c6X00x0f71eiJKIIu3p0kZKtj8I1aJg5g9IRSAoiQkoQKByZycJSI10ycqo8YN4V7sgQ5QSZOYMRyppZkoSJwj0yOCKEkEEEiIQW7WY0yqPveu9n/+U/9wt/4Wfjd775wdPLC42PXj084Xh2wb91b//D198oOEhIIj2pZNlTpGJgrEYMPdSrx6X5rcys81QnOd2d2eb1TZzuNowCMb2RqycQFSm99VhfKkXB3MGyv8rlEp5uJz7f5ul1kLMNjuHCnBz1cV7dLEG2u8jdExsj7n7j0D5jSqzZphtermlWbR46N2t8fMX9YUSHezAzaLBMHs6MGMiRDOIiYajKqmGBTBl9aBHW1AkpWFfjlYkInIHg4DEcrkSSsESCQUpakAHvLqzEMLeIJAjSibmUIqpb3wSc8CCjKAAyAoBMBMjoDkAzU0inGdnHSARnDKHKc/oprQXBUSSNmQuEdKGn703Ru4Lo6uZSi19dX1vT492d09hfl26gJABmxiikwz3dk0C1ElLcPdMYkiDvUcu0LLvNmvdNiQYoPJmJKBDsESwMJnhkkhRJcoKEjwykYxh0zmWZ+jlGs4DXWZPTR5IDIJ0l4DkCBFRYBoGQxAwmckdYZHeGaNGET1MZQtH77iDJ7H20VVIGoQiNIEryZV+hmR120mwml1yXejquMCpL0T3qku0s49iK1rZ2G16qTBcFk087LZNOOwbY2yiXdbvb2rppJV1qu8P5c4sGWomZw2Nbx+Pn+3e+VO/e+OuP2/nOAp21Rrbn715vaXcPD1XmtMw0ZIl0ZmLiTEQEOAHOiMIZM6VHZmotDLIRnOoWdSbV0tZGRGnsEckpmu4Mc7ylIoJMT8a8nwlxOtpM9Oir3/kn/8V/95f/kz8zPv743cvF1/OLT18/vZavXO3+71fHr33zyMGOtyhiEEOEU2RaSmstu++eLGWXIW1edNpX8MjwTB7b/OobJ28oe90/1v1VGd1aO2mZTrex2y1UB5GLCk/s6AW0PdDtR56rMUmUUXc0qT7+krTonLv7F50UOWBnbg9J4Ua4fqdYEmz0Bt2xzBwrZUPvfX8xP7w6WYM7WJnJWdhHooMyA8SFdWbVcFLbbHcjYJzucHEdSbK9stEzAdEMp2hGKeZZCyNgqRKDd0woorLs9P50sgwaCckp5pHnDplQjLsQZyQRUsASmeTGWpOpeDfNNCqgUBX3ZMp0dov0ZHDmFO7EgzSlKiXZGiH05N3ZBysH3Ty5mOe6LPtt9fP6IErznq0jPdw54TqlR1p3IfWAVskMZiJCwAFKh7sfpsoytW5mkRKEQaAcGmH4NgJIVcyclVmYCW5DtYzh7RzLXuZ5Wk+2nYcIuIAlPSg9EsnCQYlIVoLAPZmTlVglI5CEQXYeWViJlVVF1tyUS50QoO0cRNhdkUdjnzZrWqE1ItQ3YMyU2WhkRgaUhQTTnuc9nY/mZzXz9BQp4L7sK4qnBk18cTO7wc372PpqV1dXZi2c+0Nst0ENFDTNtZRyPJ7D49EHBVxvP2+tbZQOV+F88uz6bO28bexi0YnSjJmDSSMCSSAiTygnB8MhwonMpCLwyEiiEp7MVFTb1pkYhYk6EmESkYTfRWBNIYKkTjXCRyOJ+OAHf+CP/zP/xn/8y/8mvfr83ctdbW9uJgiGWP5fL+1rH96neYAAiHCpKkIEbbaBEBZlz9fPL852VyozRb0oOmmZlt7y9dcftvtBSvsnS6LBOPowzSJUJomM/SVPS9k2Dy8yBVO8ftH8DFI6PNvGqIdd3HxAD2+oZHn1DbQzcTANPT9sFDFMlxueLjyBvnJSsNTRey0C6ZePpu2eH15t7WTTVCMHi/oIOClnG6PMhRTKznXqJ5/2fLgod69GUheZYmQ/O6e6jUgwkVskExLTHLTjdjSBKM9jxOXFodmpj/7k2Qc3j3br/Zu/7wd+TC7e/+aLFy9f3xFyW8+n00M3BxxEjFKn3Qfvv3/7+rO71y+Z8/7hLpMIUuZA1HBm4cAW7hGRQZLh6UkhxNGTihErgekLX3wqPLfWiWLLPhURzb4leQYIkmBnVhgNi/31zbRcnc+ven+gyJFOICa2PsQLEWvh5MFciQeSR2NlZy7rudkIVWEhMEWmMlQ5k1q3seZhX0Xk7v7I0GmugCfCIiMDSCQinYuwcrinQyu0qke6UUb65koF1YsU75aB4bzf14sbbtGOtyYllv2BODH0bMd5X6qyBTAIXqm4186up7stHN5SJp537I29ZR/bO+++i5TPP/90t5sg0clqjcOj+fbuzFCUUcoyNrdu0RE9xYqdIy3LxPvL/RjjeP9w8WR69t6TT77xZu0d4WNLJVxe7wdHRPrqVAOI3pI4WRMgJN6KrtBkBSPDmRNvBRNnRkSCM8Hgt9ICoME2VRDIh1DCM/BWJogZycqQ9NiQM2x89Yd//x//Y3/qF37xZ6fj/fs3h3h49ZXnk/Ww3v/3r9/9zU/bUmt3Cx/hISJahHUEMoO8p0xVdrh5fnF/vGPyi8cXb+7vOIu16K9chvZwvkKdOIzYs1xOwqMUGpEZPO9ofzHf3d8dnk4y+d3r9XL3+NF7cRov19sn4wSnozcoZT+527S+sjhyOFiG0CyT7J4MFro/tmVXM8V7gmSYE/NYt3HK2CAqScHJYUnCpSYxAukR+/3kcD9RSHKCnG2kE5U50si3JCfzyDQuHJKcMe9o91ROb7IfkYZ0ziCduBBff+H3/PAPf//f+Gt/Zbl80mVaHz6PM5g4wkdvCTYbAEQUmeAQSXL34KBW5qyT8pC71y2Zpkupk8aWYwsDZfP0qFUDMXo7PNrvr+vp1OmD73g6Go/RdgdOod77cqhtJW+uU3CVdUuOzlSa57Mvfunx0+96/eo37199I4cNI2QqcWaywDZQEBeBdWKVKkmekXC2kdYjkWUSYvLISQoJzL11S6PDMrPwcT0rg1nMPDwjMziYmRygQAELpZEnREKZRwchWYWFx9aYRFTcXUUJOc87Er9fNyTfPK6n1epEN5ccNequTFxPI2K1TD48Lc5tvcv1TR6Po59WEmEB22TDk8bT58/GoNPp5bNHj9a+umQ52P5qGiF3dw+jtd182R58PFgP50x0ys5YkWxllqDsW+qsl9d6eh1tdOZIUy24uNmfx+qeEjxkJDxciFFUPDwTSEJCKoTYmiFEiJg5iYRyhGcQCU+lbutKSQAyKBOggIADKYy3PIJSiTK9zEUU5y2zj9/3D/3EP/dH/6U//x/+a8t6fHqofrz9yvv7bGznl//rN9v//KJdXezLVNNtDHsrIvTSIsjOsBVSUg8yX9c+OovLpO3UuZfmhlN4B026XM5ScT41hC+HGiOZQSX62SP0cLOvOzz9ykSEF5+8gkcQ6i7Jp4cXZGPsDnk+6XLZpqtcP+bTx7BGsvCstBmWR6QaUejm+dzb1u/K1jpDxjq2c2bnGMmMDBfoGBYUF5dFJ4mI0aMuCsnxAEtKD6EIA1eBOiXZ5kLatxAFT3DxHEhXXjZqUxJUtJ2bkEYKhn/h9/6Bn/jxH/rPfvnPFdsMKHOOkWFIz9GdAP42ZEYITXNxzxwpTMTpCJ0m8jE2lD3tn6VOdPpc7l93rpjmyU7RjiM8hHn3aL54GsOCHj9+6m4kMc2SjLEOYfF0DNaZoDki0IkEBJkfP/6BH/zRT771+rNPvuaD3T06CTOXKEvpq/WTTVpJSSYQIQzMSZxugaG9mRYiyQQINQxuRvAIBhIgt1j2FZDh5t5tJDSIiJOQGRSsTEkZSAaBo6Uo0TSkqp05V+cloVmgIgLW0d26EeVyVd79An/H+5fTzc7aiHBSbO4qRKHLrrjzpx9un316l1QcLTnXB8Y5A14nbs1F89mXlmk/nXtzg0zkCEre7lsEts3HmxibwHtAplqLwDf0tWeQwzODlHwEnAmkxMleZtWlJtzGgFOmZ5EkZCQBHOzuUoU0MziRkSZZGOCCsoh1z8yIfItFskc6wEQkuXU3zhqFhKkwi9mWlMwc4ao03Vw/3N7GOX/0j/zhf/IP/7M///P/ysVmjxaf3R5flKlwv3v1tz5q/8enzRGzTjpLKSUTp+PZyRGejjDiPS2HlIoxhIgt+v5Q+zm7GxpsS+EqB/FhBNgYy37aPSNVd51k9Iej7fc3u/1yjjui3lePGIxp2ul+xy8/7tZivtCUpsq6Mzvh4Vvqd4oaBE5O5px2lMj98ymR66vRN8u3KL2JbcFJBESmMhMSBExgjmXZZ2w2sD24yGxpiGBQILJESeUievC6mx8+3dwTbMJFNNbj4FAmmWZxeA8rUtxinP297/tDP/njP/DX/+tfujuuNEuhfvvQlBfAt7VzSSkQ5rH6XKfGY38tpzeRDZlpZqqyu6LDo/LmdahG2efxs1wfYr6Y60XOXN989DCOyuy7x/NyTb4ZXR6uwLHslNUyta8+uoMI4Ewjhii/FQkCpif77/6+H3n5Yv3k6/8TA5EeG5AkVXlxIfEeYXR1PXEhG2YtM0OqBKidR2xgATGYJTxGczcrQsGSCbMhynUq87yY2bqu7kmceCuRmVIFBDjCEelEEDA0mQsB6W4DukidSjs3hiTB3ODBFV98b/mh3/v0+TuPTuPcm3dzRzIGqwww1K7p8etX9tsffjRA64hmdryPvnadSIUU+uzJdZPj/DhWa32tHHR8GL31w6HevHNhVl799u3Dy3OR0kYs01SZpKitnk7m1r2Jqg337pRCoIDrxPOySOFu27Z1SiFhAjIDBARFhlYuu5JOfW0qCoUN4yIysSSIAhQi1FeKYRkMEgSNrTNXnZyzuFupkmmeGcFIcjdIiiAa/QM/9Uf+8X/wj/38z/2Ji/N455EeOGQcd9Pip4e//dn4O591swYHKfKtoMyEQwrXWmzk6MY1y06opgg55ePn17evT+O+w8hbZjCVCACUUsr+hg9PVMowohjYtn5xedBKrz8/te5P3qXeKboUme8+u2v3IkqHRxMvA9K3e7q42N1+vNl9LvvFPJgpYJY21zrfiA+7e2XpQBIXRpqPtBbhWYi/TTiZ6wKkW09RRmYMVi0B780YFB7uXqbClecrkolvP+npVGfqzSMMwUKMDJmYCqZdMTNNtDWffeUnf+zv+eLX/upf2lab9/tF5ZNPP4lUiCVxuZDrJ2Vdjwku3+kAACAASURBVP3EynzcxuFqTrPtIZAYYxDJ/qZc3sjrF6Ofkgv8zN4zxadlQrg1t8YALzcsi7Uj6PJwlbDDxTTvZGuxHls6g8jCM5IoVSlJSFiYl2dXX/rKD3368WdvPvo7aUYga55JRCJTiBKzhPN+ES4UHtbQugUnETIzNiaODGcqQGRQZooAzBFk1uskQShVi+jpdHYHAkCQIJJIKSPTEuAwZ8pSxTk5hJAXl7sOrOeVwNaNU5My0ih82pfv/sL1D3zfo6urfaJvI4aNSJ90t4a9uH+I2i6n+fYVjie8eHn2HigW2p++P9fd7vOPbreXush8uIjdM7x4+fDqIyLplDUzdhelXuvu4urum/fHT3upNDxuLq5y9OCMlkTCSqftvK2dkgDE4PQAp0582B/Kouf13JtZz3QXJRDlW0TJWZRoEkSkOUMyOXpEQKfiOoh9nnm3n46vPMhB5JZkPB9oPTsFpaFUsOToQSIRPHowUxXIQu1kP/FTP/WHfvQf+4Wf+5mLzb/wzuUcp+LbpJV7+2vfePj1NyNiYFCpyixm5p4clBTEFI6khFDZs0yBBieadjKaxykBSkdYgJyLyFya2/5QZMdTzUHezrbsJiksBdv5VOf5vS+XN7fnh9vkMT+8GBTJi4uKzgyJKlwmevnN4bfTNCWRBGXEkLcgXjscY/jYjFkgYOGM9BZIpgwkIrPMk1aeJXtDd0cGJcxCq+6W/bqtPjwdJDLcyyGllBwWQaARDoSGByhKES2SJaZZe+8z9q3jg+//R//hH/v+v/gX/uzp9tSGF9K+RaKMXOs0OdvukoizrRBSECdomhwDZjGGAUwqSd035FaTE4GMSEoKSiS+jcE073n/SHsDXS4HUplmmRZdx7BmlORpkYFkFpZCHlmWVCnX7335yZPvffHpr99//FuStW0D5kV1uIdTmUmr9hYMrgurynbu6xZEIPKqEknTXMPcLYcbwAkQJQtFkFkX9RRlylpKOnXz6EEEnohYe2/hQc4BrzQRMsgcXGsuu4pSHL6dmrfMICWy8IhQ4qnqvC9PnuA7P3hadbfZphqHffEipz4+uXuz4v5i/+TTb9xOupyOcbptus/5EX3wPbtGvr50e1X7w6qyt+mYwOlzWx9YKEiSa1mudP+YT5+fb79J046J5GLaxehEGpFtNBQAtB1H37pUxYCPYEXdFymcETbSu4cRMqSyc4ojhKCkki5ZKxEhM80zN+RAeFKtIGeJOsk4w6XVSRjKxtfv4e62tTcT3Hd7KaUcHwYImWTDM6OounQM/oP/xD/147//D/7Cz/30I6PveP8mH15dlKii3Nuv/s7pN++7ckqURLp7a0NVoUkRPhKpSCYZZWZPYARJjWjCEhkgApCeUpiUy6LNO3n0jGUqUBTlCAdRZJYD7S8FlNPCrYNa+fxbp8sbfuer5Xznd29s3l0E2v5Kbj8a68cUlsxaptJtq6RCesaKoHCDUUQ6BZNkGmcSBCUVXLQO85729GqxnqetA4EkgJlVRVZbk+A9GZYkPCFAMSIcWpJIEZbgCC9VDstkbEkgsN3BSL/89/7RP/Cj3/Of/9K/c7o/BXGMtOaZJXIgSCFOmYAwewqwMiawwQPgTEIysXsIk3MmVwIjwkSEEus2hAuIKSmS91eoe6LdcjEVLLsaoNY7UZqFG0BU5pBCY0RaEXVa6tXz9x4//vKbj7/1+sW3KmdrXYh2y24bbT1bZZrnuXmPgcystfbew4mYIpyZyqy7XRljtLNn5rQsREESPsTcPAwwCBOR0Le5BUDJSRyRbGYIRAQn6aThwc5lQr3Ecjmvp7Fu4T29OTwIAiSQqlK1OrIUPuwrUwrZfr9cPqnyrL4+NbSj9a33cv+6w4Nj31ub90Uqba3LMpYnZXeQCdrXfP3SpIy2SXtwEYwtq5blZjk8sbH6p7/RWIJ8nqWKOBgGj5OiDJ1hR9rOHexu7J24Rllm8hy2MbMb0kmESABCd9O5crWIzMhShQiRSQwRWAvyQoHuXiZ1DDKORvNMUkB9Nl11ke2++8oiyszxu0QkIswMwKi5c/wj/8I//2Nf/fv//J/9mZvav/zsnTi9vCzYKWXSf/f1Nx8dK0+MPspuBnA6nt0iKdzch8MjWUrRRBAQ6cwcSCJ4QCDMmXAmCaHg1CKBjmAkMmI6qAivW1OZ5oOU2VGi1Kmdhm1Y1225mp5/V+kP6+mVlFq5QKs8vDydP9McYEUEoiU7AklCFt+Gtzw5kZQJykxElqq7ZR9uiTEMIgrALYDIhHsyMRCinJzdOxmxEgm35srk7lIkMjhLll7naS6wSLAE7sQetWHo9p0/8k//5A9/93/xi//e/f1D95CktpkWTgSReHgMF2GA3JKDiCklRBBOyaELEXnRIpVQ8Oj9erxrD6+rlhVZt9cjmyAQxMi0HHVi2u/2y1xK1WYDSBBsWCSSeJqYlSOzbYNGkurhyfXV5RfvPn99evOtKhIpYRYZw8wDzBARYjBxZjKzuyMZRBEuIlqhyjbcBhFBpJBAlUS4D4scWqj1/5cnOI/9NL8Lw/4+PsdzfI/fPefOzh7e2fWJDYZAneIAImraRhVqpaqIJGrV649UaouQoialbVDvVq0pCiESSUhiFSUmohjTcpZgHDCHbdY2e3jvnZ2Z38zv+F7P83yO9/vdsdX29RIENFEAdN61bVu05DKpQK0VDREQ0QGWGJ1jYgzUCjWKDCqL7WqjqWoCrRpjqLU4x8BkCM45NK0Ki97tL2eLJc6u7t09u6cyVvVjyeMutzE4CqMWXyCG7nKa9vfc7FqbaCU2yiWvLyT6sLkoLDjr2u1mckEX12O335mt3vs65wHKZCAWvQVPLgbKHgik5HEnaSrOQ87VhCkIsENVAwMAVUMw53zXz6aUpnHwbcPOl5LQ1JwQo6phNgSPiEQa9tEFJU85gazCuKpgwuRQUXhywWmGWtFMiYwYiJx9k4ponMVqikP9N/+T//iFa8//vZ/88cMoT51cqZuHHcm8cbXKr722fmcD2NisazgEqFjGXKacTWqpKoaGyIwIACZSkZSYAYGZzLElQ0WRgo1SdAoKBiDVkWNHzAYYxYqY1Cqh75o5NXMsmsvOzODo+HjIQx53LhRQPw643HfIOK7H3QPUhDFEyYqVpGRFMDQAUDMDsKomRo6AUEEZkB2ZAhGToZjGGB6bpnHKCY2kmEMPBFULgAECkT0GyKaAqAaGTAbivLqGtHIIUXWUSs5D2XE1ItFnPvFv/cDHb3/6Z/6ncZgEULMgR7VSpSIykjISmBkAky+afUehc743zdY0bdUsqGDSzNj3EIjGQVZnOl+wSq0bP17AuM1mBMAGoCY46/rgWFDZEzh4DBEQoRZUFe9cKRUJnfipSnu498St23fvroZ3XyULiiKqZoYAgIhEgEaIBoaIAICIzpGImoGZhUhgINXAyMwASVWBlBh8iIBiVnNWJqfVANBF7rpGoJZSpIhUBQMwMNCmwb39Dki1WhLMVfq+2ds/XK/PG9euL6bt5ZYIDYyZqiIzhuBFRRW8kza4k8NWSTOk2bK5v0q7aXDeEbl2RnHP7x7sNKGbt7isDOHy0aPZzFewcZNbP0ujDReJlEste1f56Mngug50eONLmu5nreAdhQDI4ILr2E8TTkMShVqliW4aq6C5oAAsZiIKAERIwOzck7efubxcPXzwTjefBb+33Z4RVvOKCJLQpooUVDU25BcsmIhwGgUntoyIJkIEDkkRUSoIVCIA0H7W+RCmaSqlAAB5UtE6yb/3n/2Na/Ojn/upnzh09amTKzqctVj2552a/dIr5/fWAFE8uZRqmaojN2t8MSilllIBEJEQQVXMlB045xTMTDE4p06yPoattIvGwNKYsRoR+cBdHzbb4iJ5zynnDHW2F/s9V23sZ3uxCbXo2cOLfCFxr/bLcH6/EEBsG1a7vJfTSgFIkiy7HtGqiACoGQCImoiiIRKSN8HKyOQgBMfoG+zOVxeIMJ/Pcs7DNJFxzQqKQPCYAahWJEAgM3jMoBIjMhDB/jXXde7hgy1hbHpaP5o0swgAMRs+/+f+0vd++/V/8nf/1+1mzNWsWAVgBiBQBVN1RKCmYMH79ihCEAJkh1AhTUkfA4cIbe/8TLXobsgcQtu5prPpHFfvld0mBR9zVgJnqrjsl1Kr7yjOHDCVIs6RWrFEJVdGNrOSiaxKaF74M3/2Ix/+5B9/5aU//b8/jVqJWMEAEUURyBgBjFTBOQAQEedciEbEpVQECpFqUSkKRqpqBmKGBOwcO69WDYoKOvZSjZCAgViAzYysCBggoIqJYNvCcr8TsjKVaVImvHXz4MknP7TZPZwv99964+wbr76ICIgQY8iJHVbnsYI1DpvOB++XvQsNGHCuw+k0TgMaCVLju+xat1mVkupsGYVz2biaJXTWLCJYIYy1ym5Vy05C4xbXyv4N5gDDGd9/KQ33wDH3XQOYg3Ps+XgRHp3m1VRCiLVkB7bZlgqVG3UYq4oIIIJaBTMXfNO2IYSLs41vqHEn4/SAnJoTLUBTKKieAdRMGFHMCAABRJXNgFw1ZGdMiCJqRsCTmcXY9H0/pqmWUmtVMzAkNWH8j378by4ofvqn//sDlicP98v20SLgwbKvap99/cHDFU8loQQ1NAXncDZ3U8pSVUQBQBUAzDkyM0AjZjUFMEMgIzRCAgFxLQIYKIOpAgJCbDw6dI6KZAACwixDnPH+UW8NI0Jel7KxOmW/72Jf0gWkNSpaE126sOlcq5p3dP3KESNtd0OqdRwTIKhBqdUEiNB3YFyZPLE1c4diPc7HpNvtVkSZXC1aa1UzUzA1RCQiMwMCE0IDJjA25xHQkI1n1gSF6lOGsCxlzWnjkA3YoMBzn/zLn/joyWf+7qfGIU9ZSFms+sBIWKuYEBqgASAiSXe1FayyUx+aNI7BB0NAKyqMBNRWECxi3R75Fin4zf1sWzduJiTUSgRca8X5bA6K7CH0ROBUVczUjAVyEVNkxQIKytyED33/933Hx3/gn//BN772uZ9UCIxKRKoKAGpKRIigasRmQqqGqqH3IXJOCiCzWczFSlYtaFL1MUBVILLQRFWVWgGBG8MSlbNn13aR0OesuYjkHYkz0FIrMjat98HVajVlQrx6/eT6jcOTK4fHV269+NW7f/Llf4ZiBsrBS8Zlx/O+2Qyp6/GJK/OOeTlvY9+K4i6PDy92712Ml0OqNddYnPMlVRQuU+GeJSsyhI7j0gBMK0rhOhWwEns3P0K/UNfg5T1dve70ApDJsykDEvWeQ+CL89I1jXNu/JYyoqKQV/ZUihkoEioYK5jDp5566vrJnS+99DtWAkVMqzV3QEHLBl0K6Ei7DAayZi1VlIgJqagxGQGCUmUwRFYFFUACH6BpvHN+TClPyRSBmEvOzIzTj/7ET2NZ//yn/seDxp6+elh2l3uelj1U5c+8fH66MxmFzIsZESABEZATNVMxIhJBMyGHzGiqAICIAGAGqgoGwXsLQgzsXRVDVWRTNUQHiNDA0ZX91fl53hXfeWj0ys39abdenda8ASKOkd1e6ZchjbI9LTpxN+s3Z9t8ZkRsprFxHJ1KtaKgbCSqkKuqAhOSs9Czi0HSRAG7LvZN93C9rhvLg6gpqLFzqqZmqgWRiNgUwAzVELGCIYP3ohJjC9RbbE12YuZyFctYMwARIwjrBz75l7/nI9d+8ef+l2FXRAnmQgJlIFNVAHRQBwUl59ERYMB2FghtHKwU8dErFMcOfM4JoZBi9Q3HmXVz9B0NO4RNPX8IOpgiOk+gFfcWSxE0BXaIBIioqlUqk7rgEaimqtUQmdrwwic/+fHv+sHf+8PXv/7ZnwTwBoKIZgbfhI+BmQEAgoihATOSY4Nqhj7A4d4sFxiHosUQoZQiBmYGaCFGUZFSySEFcBWNJbb90cnVWnGzmfI4TbsLEFB4TB8jBO+dGaqYoTQz/8zTt9///ju3br/vxa+++cXf/xUy2m1HZJ8zeJRZ33Lwt47aJ67NHFrbeiIy4Gr14eXwtTfWG9mZSBLhiGkqaGxKbJBzcZG5Qw7mHLngSi054WJfXQD05gILC2YaT2VzN6L67mA8vh2HFWze47wrxDbru3GcUkq1VhUyUCRwgaWqqCAiOTIk5Hrl5MrHP/KDr771x47mr999NY+X7Lnrw7CqNiGAKYOZWhYBJGN4DIsBIxAgKCqJmoEZAiCAEtvBwZ6ZrcfBsoChmqJHRWLOf+2/+Nuri3d+4W//b/vRnrt6WDcXe5EWHRZ1n/n6owcbBTEEZwhECAD2GCoiAgACIamqAAERgD0GCIhIpmCmRAQArneqAoAGwKxNFxQVCRULBdc13W61LaqxjYbZBSyjjqtK4JGgaZ1f6PK4r7k+eDWnlRBT2VQwFFHnyEwUOXiHBgSETEVqrYUf8wigauJc0FqdJ0RYzOePLi/rZIGCmRRTREJDEXWI9hihgoAqIgAiEDITkklFBOVg4SiUbeVMpaoWICA1ASSK/MInf+R7PnLy2X/4qd2QhLQ/2UPNlw+mlDKxhyplMATHjggRnC4PmqbhnHQccs3K5EopzYxzNVVzUQHr4ckiNJDd1PaeM959t4znakjzfiY14eHeiVhuIi0X7Xbn1us1AJgqsYWWvXc51TGZibgufuDP/cD3fOcP/O4fvf61X/6UCQMYACAiAJjB/88QVOwx7yjEMKUBwc3mbm/RTcl221JzYYKcq4CBATl0wZsqmCEBIC4bYu+EYpwtU4acBPIQbMOGglqRStFatIqSKZhTtNlB8/ydOx/64PtPrtz4+stv/d7vfo6M1qtdEUD0Wk0kx8Y/dWV268Z83sWu856dKRjZxXr809PLlaRhO27XEwKjGSGbqiXVCuAIG2CGVKd2Hjgggnb7IMolM7nEvXVNo1u7/1KN0F/5kB3coIu34f7LVUdYLCOiu7xc5VxVFZFEhR0yoxmoKRL64A0o29S28flnPoaUXnjuO//gxd9767WvQAlICBrQFICkChKwg6JZqoI9pqqMDgABAUlR1R4DAEQFwK5v+75NNeexTGPyjcNOiBgb+Ws/9lP33n3jF3/27+x7ed/JAsfdzMFywea6f/L1Bw8uVLMBgAEQIYABKDpDQkQwVQTWx0DNFAFMEZEQGUHNABHVFBmJmPCbAMUFrlDnez0FrLUwut16UMa2CaaSxqkUtGqIGFrH3ucytX2LQKv31jYxEFDBAgaA8BgKIqgaIgIou4aIainee2RTrSVXMEZEBiylhDayo5QyGqKaMQGCqhgYGZCxmYGnfraPYOM41Jx869B5k6xJfAPdtWZa57KZavYoCACIIADNvH3hkz/8vR+/8iuf+antlBRypV7GPK0VQNXAUq0TqoCRmhGCNJ1DFjI2tZJEBRWMUIC9bzgsxUdqZqwoLrBrs+542Pm8gbzT4GPOIx7u7fvoYggN+/sX25SSY4cGj7VzL1qkGrO2wYW2eebjn/jwh7/v975295Xf+Jk8qck3IaKZIeNj9C21ioqCadvFfhZ323HYFR/qcq9PCWuhmhOZiYgiAYALjMyqyojISuivH3gmd7kzcRFd1CIeh5v7NPekVipAVVR027GcXgy7VVWDKzcPn//gsy/cecH7/stfeeX3v/DrWqRWNeTogwipKjvpWr5x0h4v+1kXFrPORNnjkGyV0ksPLu+9d4GiSJ5RmsCBaShlWBcDFLTYshKYr65DmpyfCzpC1f4Y9m96rbo9xXtfgd66/Y9AoXH1its9muZN63xT6jQMowqaYRUFFIAym3XKmHICsLZr+xAvtlOlqevinWef/1d+8Ie/+vKX/9lv/FMusdYq4qpO5HwXwXkqSlog5WSAuRTJCmyGYGIIzszwWwwKkw/BX712YlrOz7eb3Y5bJqrMbAv86z/6t157+Uu/9Pd/9rjh5446ttyS9nOnrvvF1x88OsOyqQhkgMSGVJlBEQGMCAzARAHA4JsIUaqZISIjVSQyBEAkA1MCM0S1alUt9nGx30+pIup83g3juNmsmdCh04qVChGrVW6Y2Uji5mxEdVaVzRfMDskITQEQzKopARCgOo9AoNVMARTNFFANwIwQjYzhMbbgyZByqqj4GLCBk27RStV0WVARPPUHhwg2rteWEnWz0Ha1XJRNEafNArEi+pg3pU7K5BRMpDTz9iN//q98z0f2PvvLn6rGPsg07jV5tr0YSl1NZUSNu80UYzCsaaqsiEiCAEnJmYKKITnzyCLkGnAMcYbQCDccWwLSzX0thZzheCmgxA7w+OCQfSS2YVdFBL/FzGLQ44N9ER2TeIYmAsawPHny5Ok7q7Ny+sbXJU1JhJFVhQAMkRgZCVUFDVVUgLwd7C2GXXr48GFsXOvDxbDLGaJTMC0Zcs0ASEQKZqCOHkMXG0eEVqogueh8QCOH6YkDiG3DUtmRQvGh7bq9e6fjS2+8Nt/fv3nzxhNPXTs5vpXK8OY3Tn/7879ZdhOb80HJxyoUHC56Oljw1aPZch5C8ES+WmFrN/n8dJ1ffXu9GSYqKugd1OW8Y+bL9W4aTVTJ1zALQugbbFrbroBDbpchZzm+Tf1+HIedk3j3DyXmvr+jm8u8fl27Dk2IifvGrXfjMBZQBq0KhFaPD7qEsE6DIjiz5fJ4N22aHp9/9jvnC/oX/8y/+srLX/7SF34Zq45JUsVhrMg0a/lwOc/DuFbIqVa1Iec8GXtEBFAEJUB5zAyRQcViDMu9WZ5kgh2QkWDmSuD9PPznP/ZTf/iHv/Z//P3P3Fjoh28c2XC57OHk+OCR2q/ePbv/huYxISEAEDokABBkNKiAqIqM8JiYAgASmCoCgjqggo4BEdWYqRYFQzBEtaoQe3QhemIfu37enZ2djuOOgLQaGjtP6KSYgrPYtAZ5vESsYmbo2cRYiX0zTbvYNWI47TYIDKbeMzHmVM0IDRUE/j9oBACIaGY+ICKXVIlIUNAYHR5eW2KV80cXDgIoEoOpCQJ5ZgfIkFO2rGbgIgNrDJy2midzDFrBDLkJH/4LP/KB5/r/83N/p2XCRq2G7X0rOUpNWlLNVTQzNUQqScxMxIgYyIzMeTKppgyPEapq1zvfOOUaWtcvSSVvzm3aUhqTq94EIBJeObkCqLVCKYYAZgYAiMCutm1DxDnlEBoArUhHN5984skX3njt7fW9r4EMCp6ZmLGNgRxVtVQkZ2FiB77UIqCeDRBzys5zoG7IJRf0XL2DnDUnIQAkMELVivCYIhIzmVZTj4Fi0zikcZSWNv2iVRFGBhYAQPSqNuRMvj08Pun7JvqZyphH98orXxs2A4IjL7FrpSIYBKeLNuzv8bWTfj5roIKC9r4TD2+cbV976+E4jg4cewoE874xxfVut9mWVHPoSD1DqRTB73PdqSowsUrxnbUHbK52TfPoq8xju3jCnZ1u6iU0DU2jqFrXuTGlXASNQNWAAPRg2VeTbZmUjAn7LqDj0OD3f+8PTXn77e//7nfe+so3/vg3p6kOScZCu6nscmHQvg2Nd6Io1XZTHYoCmG9QtJA5NhCTMVVVIsRalQi7PtYq1CJ6zFMCthjYzeKP/9jf+5X/6+d+7ec/e+uAP/bUDduczqItF8uHAr/xzsOHp2ZiVhyiErGBqVYiY8cGhsQqxR5DADAQYI/sPCDXMgGhAdBjTJIrVCRAU60GHBTYNbFv2jY4Pn94KmIIaChIpsZAhQKHjgAJicoWZCwUzIBrEQIycJqk6+bFbBrOTZAQiZAjShWpBgaI5JwTEVVFQwBARDNjQgNAInau1KQVHbvQOkBR0eD8NE0K0sWmbdpqmnMqUqUqmRMR8kgM874bdmMajBlKEiPk4D/6L/2V29fwV3/pH0T0GrAU1KnGuK+a8rTLaUKwmghMQNEeA3OeAbTUEgKbiGoULUjAjlwgdgiECNAfaNv7ml0VKBsrg23XCZnx6Gi5XLg0hc0wEjhRMTVA8N4RsZkSgYHGZkFNd/WJ6zdvvu+1V988f/OrdUqCwoTe86xvPWJKdcy1iB2cHJSdbdb31dgAEQEMkIgwGbqS0SwpohCA9CYjoBETgCKAmbRt6GIjIttBZsv25Oqxij16sCLdPXVtURG9EoAhWU5JmX3w/fzAMJqTGOaI+eGD3Uuvvj7tBlAMvd241rGPacqeAJhFp+hcF3tgBIDo/WVGqoXQD7u1d41jk1piaAVpHNOQCnmHDmrJLOgaV0Nl1u2QtWgfG0CnnFxnLrjLd0bI/XwR1hcbqKCgpbIhEIqoGCgDSJVSKjMDARgoqLIxQxObpp93M/+dH/v+XKdvu/PRt978yt2X/9gjTEU2u3y5GTejmEj0TKjo1AeXsg0FRPHg6g0VGdfnUgZi3o1lmsREVcE5jo1DpAoFAxqCGTq2xcnef/offuqzv/Izv/+533lin567ut+Wbed1Npu9N8mvvnphTbPYd9tH0+5CAQERHkMEMyAmdlRrMRMgMFM0MDAkNmCrFQGAiL1DZ1pEsxEggAlgM3dqSBRKSahqpQI4BDRfw8KBksDE3vvGXKA8cd6WslVmM2VRi22gBoeHA5oDj0BiCUzNEMApAhIgAkoBJBIRAED4f5kZGDITO7bHsGox5hBiUFIAjSGoSCkpOh+dL1KnktDIDJDJqDD7POY+dlPKtaBJlaoYXdc3T377v/zMdf8bv/wLHrrqVJWqTnt7V4fhEiQhF6i2vahswD4Acj/rhmlbkmitqGagRE60KqhzxIGRzJFT5dCZaqkVvceu7bXi+aM1AOHR/sHVk3Ya2rPtGlEfExEAaAIiOyL0DtSI/R43/fH1w+s3X3j91dcevfnHmiZBZCQwbGJMMmmpgM7FePO55zen46PTl0EYsBLDtzCiIvg0gUCKxNkJ83FKl07FENUqGZKzo70FEW62u1x5sT87uXLM0DxcD8HSrVkpJkJISAAguZAjNHGhYT/nkH1YNr2/vJQ/MpXaFwAAIABJREFUevG13ercSjk8ab/tuaPQtiKVQD1zxUSea9YZNyIgpi/fnxYNXtnflzrUgqI25Txkebja5ixqwM4hgWExCLUog4Kin5FIdRLY+SKZHDddL2NB2gsAm93aDGLbijXACCKiRSWZVJGChoA05CJVTQWshugUwSh0M7557blSNkf7N88u7m1P74Xo1WAYp1IqEahAE5hIXXDzWYdIu129mIbDKx+qRS4eviJ5w8wlFzNW1Zyrc9y0zoRSScoKHpAqgY/7/i/9Gz/6m7/28+986bXbx/GwwT2U6PLt20++sR1+682HNo+ugfV7MjyqRIpoZkhGRcwAulkjUlUrkKmJgZqaVZaKBIYKhoCOOVQwMgUwAFMBCC0pIhkrVCYEgVqFyVmAvZvLJuL5+nQ2W6pOIdbdKpRtTWtkAzRfanYdQUv5vGhS7tA8cCUtaoHQvokARUQzwbeYGZgiopkBALtIhGCqUoEshiBg6H30XEolYjCEmtqmAbUxT6LKzAYa54F8MvPDZZHRRCpBIyUTMra+7fjGh77/iX37/O/8qpcu2eS9Uwqz2fE0nZuOFDIJXN6b2BgdV6GjkyupjprTbr1BNTMABCIWU0AEkBAB2eVkzKBibA1CsahNEwmploIH+wc3jxfbic4vL8mzqpkpgMw6H4IHMDAVAWsPHMdbT1+/fvu73njp1bt/+vmctkaAgIQYQiM1p5oZwLXx6pPPbi6m1YM3AFlVEFFVATT6CIhTgmpp3jgKPtli2q7BikFBcggBUZrWE+EwTIR4cDQ/PLhqaNttaoOb4wWpoWN7DAAMnENABDADRaez2ZGIrSu+/Npud35Xarlxrb1z+6DtHAKaMXB1pMGHknU38GaaVN1g84O9p9PuaxGjongqqJZL3Y7DIONmqgwszmrV2Ww/+jANm1RCf6jDdJY3xuigsoL6nvJWyoqqCqgSR2ZQLQBmhm2MQzXUbOTSuDOOnqBWMlRAIwLfRBEhrleu3KiTItSuPXz73ddqCQCGCADKgEVg2XHoYuMkRpcVk7phEGhDngpNGwREkCElNQcmoBGhHB4tS4HNsAFyyMkYyPPezcO/8e/+9D/6zH/98m9/8Wju7lxfyLBpS7ly6/is2Oc3u81mgqIw+Zps1vW11m0aU1JUYMbQ+b1+NuVsBKXmYZcMxMRMycQeQyREJK9AqNUYGVBLFecdOiPmPE1d31ZQSUoO0ZtvOC6D4hhakqIAihMPFzqNgAkIXSkipAgIaqaAgKFh9coNIIoWDH1IW5X1pMCmikQIoGqEDkQkwnJ2PedtLQOiEbnZflehIKAPXEUkg05mWvquq7XUXJRI2VzsF8dSy7Q9L7ZzaVMcezMTEe+9eeW+ffbDf3H/ZPvF3/gcCuSputiwb/rZ9XG6a2OCUHXQtFMwp5jUmqMbV7gVB+ny0Wa4nKyCQyTvilQCVDNukD1KUUJCIRUgcAZG3lygXDLuHRxcOWi3ibfDjkFM0cyQtO0DsyulShVT3bv+bOO7w5PFya0Pvfnqy+9+/Z+TstSCZAbCDsBckeoAIPprt5/fXA6r+99QAFQEQBExsyawD3EYIZWpjcRNk2EvbVeoA7EZEGEE9oDGTFJrYD25st91S0DbJmwc0/BWFzg2nn0lJLDGISoqMSgKQSBPYHC2xdfu5rw5zWm6ca197tnjGIgAS1LFArUadQ838OB0XaWZcr5+531Xjj72tS//UhQPkABT7+Dm8bwPuK1y7/yiilDP5Gdhhteu3149omzmomw2d60+LGMokxhUanK+7KYzqbU4xBCdb3ypkrKYikeXyF+/9szq/J3NxUNB5xEErUI1thDdrFsAMIDeee7D4y43gRfzq1/44m9tL9eI8BgismE1O5zRcm/uvHmHBjhNRbvlzae+Lad8du81HS5mLdcybnfjZoD1phSBtsPQBNPKFFUzISfh4ztX//pf/Z//4T/+H974/B8czf3z1w+jbubE2PLZRF94VERBpBI0KWdHtLdcEOmDi3cc7AU/T3XbuLibtnEejHXYpd1mQvVtmEMtwzA6FxBJMImqSJ41bUEgbpxviVFgIgMkMoaUktTsHDhHoiNGaeYxhlDNGk/rs93qrJQNQkGtaqAIpKbI4IM3rELQzztDLaMdXmtXZ9t8WbSgiCASMwMxgGIt3WHfzJabzUpKJXPoqJk1Shqd56AItFtP28tpEXtDSLkYGLuqbFduPh2XedoMlw93ZaN5KIaAQCLWd/NSzRq99uR3td344K0v97FPYz04OFaKFCjJQ0kKhnks0bdDyjpIKnJ0bY/84F1z9nBdk41DYseqamqeWKS4hpvW5zQkAXIeEaUI5cxMDI7J4bWTKzdOZufberm5RARVNANA7UJw3qVSRKFdLLqDJ2ax9SFfffL5Bw/eefDaV8cVmwoiIFYfzESLFBQVh08888H1+e7y/mtA6JBUrRRRsb7zzodxwjGPkU18VD6wtAUZiFUNvetc3CNiRFMVptr33vuu5pyIScnlB23kpmPnpG2CVvBe1VwqhBRSmtRZ4916cPcfZRsfDsP29q35s88cEBIrprFUo1TNwt57Z3l7dupoMabxiQ89f3zwHS9++XOczOqqdVal3rh5zTOtLu+fDyOgUk+He0+I3/jGed5bHn1wvqjri3vnFy/WXC4faZ4wNJRXls5VpHq02dwt92dTrufnY67CAjaffcd3/+uvfP3XL+6/XdUeI8IKFci6Nh4dHJk6MHj6qQ+crx7O+27WHn/hD39rezYQMgAgkjAaUOdS03gRNBNmllqvPv/8zVvfzt7O7r+zfufFo/2mbSjndHFB33jrvSJxf5/u3F4c7gUtVlR3eXjwUNub1/7qv/3fffoz/+3li3+619HVed9gjqZx0d29kN9946Lv5ujh6o2nNtsxTck563u3TWdQerB2TEN0fipj6INQVaHdekRxwcfYL2ut+/sHInWzEkNUKdFD6BdEIVdTA7ORkHa7HXqH4EpOYGYqw+40Q1ausXEeIlJSgWGtBqqTSRI1dcQKpgTkCID7ftZ2nQJo0Ukua9K8K2QgKkTExMTOrMi4iXs9z/qaMypZZSzFmJGJmQoJqoFAniw4AyBRQCJT4sB7Rze5TVZ2u9VURikpIzkzZHLOBZFd4dofPBN9rpdnbdvnVPYXB9y6JBM5rpkZA5ot9/bPLi/QTMR1s0Zh14Vm2BbHjYimade0bU7p+pUrD87fLmJm1eqY1/eu3Hi2afZ36+nho7c9YwxQ0g5v3rxxctCsEzy8fIiKKghAiNbFBgiyFh/9fHmt3btONTfteOvJZ9669+bm9MHmvMs5MTiy4rjUcllEyFQd33r6zuZiXD1405BMqxnWqirQRBdiFHXbcd0xJfbqj2hamyUkUUMwH7omhEakAKkCEVbCoFKqBYQO87lIVcyeXdtA14HnbjtM27EoMPkGfdsEEnBpk72sdrv1U0/O3/fsvgOGCpLlcoVvrRN2y3G7rtsdQyhan/y2j145/tiLf/LrvmJ028P50enZaQiuCX57eYYhMuqg27abx25etRKWbu+DzfydNA6r1XsIZVzFNAB7q+uSLlRQguO438dIaUrD1iRP3gAOZt/9ff/+S1/8hdX9NxSoiiQAICSzNjYnV/ZzBjB4+vYHTi/uH+0fNuHgi1/6re35xsxUwXuvgOSCx4wOyKyIGoCU+syHP7Z/9HSF9eWj7b3X3wquOJZcMxQ43+xm/XXCs4++7/rNa7HUtCvTlPIwULh29Ud+6L/69D/+b8ZXX22jLWNcaHKa28PFvXP97TfOJYsRNN0Be19MiQVhqEI1p7aZpWyOAJHQOQAWMzLwpFJH8weiNURXa6nTZSmmIgQlxJhSQU9Vs0OHgEhc1YpkVSNEJkoF21mMDSPWWqgCqJrkChWgchoLICAiEauhIYBBYEcOqykBAKFW1joqICEiEZjVKmhF0xRmndsL3hEbQMVhtS2GvmlNqpnTPJkIsUOPCGQCCKyU2HE/u7IbH9VpMkUCLCUBEAAgovdei2CYLa8/X/JZOn0NvZuG4XC5p42ACSK7YGMSNmRyRWxal6J2cn2vXdTtJWilccgl5RBajn6cJkeMLLU6BAQZIKubLRWdZ8jbnXfRVNs24O0nbx/uMXSLDNGyDmNWkRDdrGnEoIp2bV/NcSCrEONw49az79y9W4ZHlvZrxrHsas0BsOp5KRZIfd/PjvZkKKvzB6A05WLiax40ZwGbLWcAIU0jiCQTCAdld0E2+hgRnIpg7IOfAUcFICREIQrOQTvrzjYXXkBqAauIHkCRCwhq3YqqaARP+1ePY9vvNjldXnC+ABlnh0fP3V70fWW0cYfvvJfvnQ4htABIxkgZjE+ef/bmUx8/v//2cHZhMjSdn3appEyorUNxroql3alYkcgoaqqza88b3dfh/rSpVR1x61vw7bR6N4+PCij4JT3z/J06Xd1sTzcXj7YX73mKfNx+x3f9B1/94s/WTeaWPMapslAya72Dw8P9aRyR6NqtJ3aX27Zz8/7w9ddfHNYyTKsYFxxltR4dhrZDA8cmxAXZDyPsXT2++eTzTPHB3Xdff+UPEBfOdcRkRXbrt+P8KccP9g5vPX1jedjnWlbn49bRbHHrqT//iX/nH/zvf7O8eTpvpAfYD4oMbYhv7vTFR7re7JZ9p25m7ONiybUQPNqVdT7nKzfi9iLtNqXrmlowlQk0iyoSN02/HdaQzbEXBhDNqRI52l8cBDi7f8aUk6PgW0ektRQpRcERGdXQcalQR3AsITiONBZliuPlRtQAMScN1EMdqqIqILJBJWN2AuSQMPh+msZSBgQGIwAwEDQAIh89eWr60O2B5TnV7epSmz4sF5qy7jZ12CQRZUfORREDQCavpNDIvD2c1hfTuANFAlQRQDQzVei6LtXR98fzqy9Ivru790YIuKnwxN7Berrw3iG7OCdWIwex49W6nr2xMehv3DnwB7luwrBZT5uUBiVlP/PjRSVn1FQ1lKEqMFYLPgCBEaWpqFZmmi06fPLJ9wfeQNtXaxAxTblvWu9IsRqCqJiBI0fsiFzf2XLv2v1H921aW21NsaZUSgaPLcJYldB8v3d8fKRJh2HFLgBV03h++t756akPeHT1JDR7U0oXp+9tp8F3R7o7RyzsPRiVVEXENTMXekNWqABE6J+9c2V/8YE/eenFtL5wHtWI0AMgkho5sK2nUSSB882safpZTjCcb2V7b3/RNMuDOO8P5yU6fPfB8OjdVBQN0wc/8P7X3n7QxdZTs//U4bN3/uxbL3/p0d13ahkUimeHaojmHaRcNVugVG2o3dyDlvX9Wx/8ofPNS9PmVXQ2jo0BLPYjhXH1Ttm8JwiuOYCnX3gSdLFZPXr43umwmggonOw9875/7fSdX2nR+VYcUHGY61QLBd8S9LVU73nvcEkKolm1P109SKtkiKGZX7t6/M79e8NlaluOzcLFMt+jWsr9dzeHT9yZ7x+Y6fri4YM33zBzzNEcyZTG9dvN/Kk2ni1vfoi8m0XbcynITk36mze/71/44X/06Z+Ae5eLOfhSFkFj8K3zZ9S9er584+GDg3l05pRid9gzWKl2sX5ZNvnKzeO6Wwy7bTuf+ebAhQh5EilVhV1cX5xtL+5p3hJxqlbSWI3f/4kfyo9efuVPvugjcZwjAYKqllxzEYghANZ+0YmltNbAhOi8p4v1VAuR4pRSv+QitFjcJrmYhmHcTSKmoATYdD16JkAzGMeRWGvJZqhijyETgItNxx7beXBdJTtCS+t13j/oD+dpmtL9dy+GXQlNS8FpZQBEZEJGisplPuvW5/emYVIxUkAAJLJvwhCCIIXZlebgFuh5fvQmedrh8qPPPfvKN36PDAicbw1Ju6Vf7If1Znr3K1u19uozR2EP0m6XBwXhmnMe0VgtSaljCK2B6ZirCSgTICJRcEkYKLcNIigeX3um95v+8HDMXLVatejDrG9Z0QhVINdaLSO3IcDx0aEP86/8xT8q487UvPO1yGMhcDasmhsPojqbLUoq47gFxNhEEx63a/dfMhGGWRvifs5pWJ0LmmsP6+YRs5IjRKoJtGTywTWzqqTqEZE55nrZLvusYKMxeuNKyACISNS0BNY4C1jI94CCjou6cXUmu/cO99r54bWB9+axdsGtNnW1Gp3PXdfced8nvv7yHzngPszCtf7mjW97/eu/m7crIjAjrZnA9pbLKY+ryxVWbiMCibR9wzCu75088z2r9du71SvcmyI6JnJWZNi+Gzf30//DFHyA63qWBaJ+nud936//df2rr7Vb9t7pIWRDQkgINVHqIMXhoAdPPDgoOs4lDLYDA4hlbAgCekbHOiJHUMCBQJAimBACIQkhuyS7r716+/v/lbc9h4nXXJf3TSBVXUzNXY8CI2U2L57JBxqIsdmamX8+uScyGYjYLszOepoUeeUslHngfWmtmurMJXGii3F3b7s7gKErK6OBQiVweW5ha69LECRp5LxE8qTKfDDcXd+XKkwamXfWVkU+8QgYxHFYb+qJGXcvNmeuyaKuasyWIMHbpoLE98LILl1/250nXv/Xf/O+qF+16iR0VQtAKoyE3KjE2b4qCEH3Q7a1eHZYqNFoPWkpi+PRjs+mas5Wk56gOF5Yvi4K08JMvNHOM6lIStrfulT0N8mbojS+Kr1QN770p4rNR59+9IGs3VEqYu+YNQCX2hRlQQxRFMRZOBhMXMFZilKis+FwXCqVCqlGw74MLMikPnV00nsKrLHGWS9kGBLaOGo71L6y3rNnEEpaUzprjLaERLEMg8R5q0JIs44XYyWmpbL9rpifb5eDC+PBeNQvAGRWb7OS7AQzEJH3XgjhvZdBNRnvs2HnPBvP3kspmdl7iOPYiThrH+QojZXV3RVU0meHb7nhwBOP/0+0knVMEhkhaSkUhc7t9rmuh3D28HTUCL0xVcnekHdAVok4dOUoLwoBkslzlZOkIq+8tewZCH3QCoKSIEcGnFq4pp0Wx667LoynnLWVsb1ur9JFuxFmWdN5Go9LRhpPtIzs8sIyc/TEax6e+chyWY5atcZGtwtWX3tgYSppbE/204YqDZZAk0Fe5H1jXLtdNyU++ZqHg/dJjOHQ4YNa0+VLF2xeYRQE2Vw53CU2QSSEkMXYstaoVJDWjVfgGUgjhAAkk9gASMfgwTlAAnyGJo6CxNuJL/cAMYqVDEMRpdXQkt5KIxE3ZjipawshyeHOtq7KWpJNz9SZ5PrGLmEVRbXFG66Zaj/r/JP/HHrnSbA3VTUGb2en5yqdDwYDY0QYKxJB1qilUbA/6jZmDrKp9GibYnRgCMl7Apa99e29K+tSiNrMdGP+KhQBF8XG+cdc6a0vw9mlhYXbevtfm6q3GjP16Zk5FTWdnkwmq5s7Q+nVMA+OHH1Ooz594dxDe+tXJkVQhsKCFhQ2UlhqT588e8VZcfDYUlLvKGs9g8snw52N4WCYG+OcR8MWUICnMG7OHSjyYrh3fuHAzRLWrJy24LS16AiG+3MLfMtdL33uDXf/1cd/M+npWuSU1vUIg1CmcbxS8NmBzBqdyd4Vb0dCRJULBJr5peuLMt/ZukgJe2/He84KMTN7tJzovOgGKIBCCCKMQgVUDfeySA8nw+HmwCt788vfPlz55qXHHkvmjnhfsHEIVgVhqWHU3/HaJXEYxmr357rOMINDBKFCa1wQhkhkdKUeVOrhhso6xWgNjCGKgnRKhFFVdmeaBybl3qS/bayXKgGUDM7bwlYlkQga0/VaQ9uRCssgmPawr0TT4VBPptI02ls7qUt2ziMopMAhCqG8c0TEzCgBOQyCotKFQLDaWm0lkhDCOReGsRBiZKnePkK1qBYq3VvXvmws3DLdKlfOnZIkrAYKHDkBkQ8j2F3tFv0qThtpKxBpICgQwpUl50MCx3GtBWZYVcwoVSTdZCwjMR7sg/Ps2bO3NBWHhdU9yQqnF69rpvrY0cP19kELYMrKGF1Wk3YtipvTbNzO3mg06VlBgqKF6djK2hP3PDj10alIKilqW92966aTWw8uz8x1zuz4tfFqlEYjbUNAdrn1FISCffqFWz4t3o/z87Vnnbh1c7N8/LvftGUlopjiTjnYEuSDkMIw1BV4awrj0rheekYWlmUoPBMJFWEcoHbGGfDeCaGUTKSsSm8QpIxAjwSOW/WOSmKZTe1uXObxVqD08ZtvjVrHxnk+6uX9nbX+1nlm26jXneGiKEMSLN3VL7irnhw5/8g/AqaEVkpJDADYmZmVVGxt7+RVFQShQzXVmZ3qzO13B0m7XY7WJqOtyuW+NCiSIEwR/HBvON7dDVWQtqaSmUNx2hjtb+5cftJpC+CHv+JqU4e82YjjpmewXjMLqQQilGWupHSO2EulVFXkTlfaWBRSxCmDC1WQBvF+b0+QUEGk4kRWZW4KCKgWxBKcM1x5KCdlDjY3ZSaEkwBGCGOzNHNgwWeenJbAuQeqFJmrrjs+P7V86skHlfWCNRqTRIGUUim5nbuxU0GcFuNdnbNnx4KSUBpjjfcoTZrUBEbN+kKt0SzK8f7eqoeQUbAzJCR7IUEz61jJ4TDfuX576g3JDXf/fLn2wIXTTzanFyeFB1uoQKAMKajvrp5FawIlQNHoHTn+F+VfpCf35/CM1hsWw0dSN+5Wv2KSDzZLjtDn6GXlR/X2cVsN3ahXP3SzZJj0L+WjLiExBJLZusJajYAiTMM4lUGQZDEKQWysnhTjoaPZVlNtr1wIBBlQ3oNzXgjpEZgZAYgBEBiDKI51OZJSWD1xulKkUHqtvVQBgDPaRo0ONY8sH14ebz01GfTj+eccnuPTT56KVVTmIxmAA4yV0F5Uo1ExGjdnI4uaQEklPXExJJ8770FFsZRiMupLUtaYQBF7ZxwgMiED+Cpeqqn9yd4QCXDu4LMSmS8sLIh4WihjKxuoQJsyVhhkTba2P6i0LcJ6VsumYmFEXHvsngfT38tiFTiNe72+NCPxYjO6o2x8t5WdWphtTdnAzDU6CJVnWbHzJrz/OX9H7+Xp+fbSwePrV7bXLj1FKDGMRNzRo13ESikUQoVBFsaN/d1dQZgdmO8cOXjlyRWzs++ECuIYFIHxQAJZsOA4Ec163Ovm46JQImNbpKFvN6dGZT9Im8NeD4ot9tWR626YOXxDnDX7++Xe5nq+f96akpmr0hhbJSoSUhy746759Mjuqfu8DyemsA5HzuXapEGcpdLaoNQOpfdQhGEYRUlRlI3Fo3q0V45Gjp0pxkAE3lTlcNKvhNNplk08BjKKklox7ufdDesUkR/9qr7+/teSfiJOOhNtCj1GR15MEKT0U0EcJElzOMy10ZUZgDb5ZAJCcRAycxIm043pi5fOduozURgO8v1WfVaCcHluAx02awmmsyI85OnITKffHy5ff40fVpuDnZObl0+NRhvjXqd1nUix0sB54YVXTt/5ihdcf/DYJ//Hb2XWR5DzOJ+ZSuMoSdP4oYu9HV9bOnT11pXvjfd1WVVeokKQEZSgw1grqKOuzyzMNzuLrWZrfe3i1PK1JONhbzeMa4hu++KlavC9Q8nchc3VMz+xmn0wvfZl944vP7SzutbsLAxHE6snSkoVZjKub62ddKUWJIz3xbsK9YF4XPbgfxPfDJv/rgXO6V818e83LSWSDIIQCuL6wnDcDZGj5iI6W05GphoBl1qXYBjAsXcAiKzDKJMqYATHmoi89wxosNPu1MeTMolTBleVlXeslGT0TOidZ+sYvTUuDgNdjtDYqhiBM0IgOq60JSEdo8pajXbbyFazEbMZFP1eNHdzK9pfOXdFEhpdMnkppHdRkLVsuT8Zd+ePXm99zsWGcR4rV4ykBeWMrjVazlldTrxjgYCIhGidA/AInj23rnrpbH3/9CPfUoHDztK1sw3Znmq4sCEprMqSwOblsJ3VMMqI2bF0vqzALcwdIVt4kqdf893wtyOBMOgNXWU2Pn7BvMDCM+aeqN/xrjvGtbgmY291ZQACURX40Is+H3yAap16q3Vwb3tj1N1mjyKKKe5Uw20hbBRJqdRkrAFCp41SlC50akuHt86t2f4eB1IGTZJIDjwpIu08socoDNhL7zV44W0hlc7CeFIMgqzuKoZyC4HqM83lIzcbsMPhoFUTraBOgn/AWp4Uw4DlcDTqXHPDTWkHrvwzy/rqqLjcdRuDPqNEa4ybSNVEGYtQoU+AQAUBg5s5cmi4szvaGwCSIAJiBFMVY9DjiMooRg2kkhkg3N1cK/pdAMFgJr9qb/vmj5nemcEwL4233mIwHaZ1FCJQ7DwQCvxfmDkABmd+QDNPynKC5JXwvd7w4PyxTmMqz4eDcnikVKuhWGynr8vmrjPjqRDytZG7bkEKGXZaUFh4/JwCIRuz6xH/VW9lf+GG1pz6ylc+64owUfHdb7jnUGfqMx//cGY5FiXmRbsWxkkURPHXz/XGonHg8HUblx8vh5ZErFlLVhxPStxUbopME7iioObBZ40oTuTc/M1Lh46AyTuz85NB9ZX7PtKxjWXsPbrfffqn9tI/aDznlT+9/uRn8qGptWb6vZ0q10mcKBWWlgc7l61xgqTRtnpPCd/05X0G/jd6kGr3tKw3+v/R8e+3ZNK2k24YBUgURBmDlyoQUiJ7bXKiAJl0UXhXOGectYiE5IkiEqEMAo9TQZyqMLVMk3JnambGU0QyYEGEUBUFAARhHCUJAyshK12Nej1kbtTTynE57Prxvs37Oh8W456S5EF0lm6K0nCsg1Ch07uRwHDu2c1g78KZi94U1lSAIMigaAS1qWK4bkvoHLohzmI9Wpn0t/yk1JXSKCSBUKGSYjIeEiEzIBEzAgOiB++t8XM33tkINk9+65F2o46LR65dbIdT03UdxkGAtkLkYGdnv54alTUFAomIHVbo69lcwI6Jz7z2idZHm1VRjAbj8c3d9Y+vwb/xovc8L3hqCUpfjEctFaM6AAAgAElEQVTWy6hG3kTfeemXwg+o+nTa6Vy9ubky7u+h86ACjKb0aFdKHSdBEqfd/bGrKgcogZgEUoJRK4xSEdU8OCAfUeBljABSCOtKzwatYZ54W3o/EaEMMNBVKaM6V1hNLqMPkoY6dNWzBpPBuD88vNRemj8I5AGBmbSpSPPu3nb9yLEbs2m5+vXcNx9d752b5PW4wc72+zshFqBkXqJULQ8YJHGYxaXVh47duH3pys7qlpAglBRB4BGa9Rb7AMtNyX3Lrt5e9KbYXLlkJ2ONEoGL9/hrvvDazXNf0tp6h2VhPHCQdeLaoqCMFUlJzloEAO8dI4MHsMDCGCelQuJiuJNEolXPOq1mpCnsXj5x7LoXjliWmz7Pk+2idtUhYaEs8xjY1pSbmkovbVedJgzLcHZ6c3H5dwaXy745c/GM1eZNb793oTb1D3/zwZqHRFWYlzO1MKmlXqlvrpT7JV9/0/M2Lj9ejqxFZPToAqoPrd+FKo7kQj6pApmEURQmWRClgFRvN2sJTc/PWg2nvvtAx4oDFd1/5XsXf3Y/+3D9OS/76fUzn41lO6y11648rYsqUEEUhYZtkRfOWW9dVVXjXxz90JM/c/J3HvizRwcvftVZ/pMTb9q1X/rwhh2V+ldN8Lt1Ebd4sOnBICoGH4D0KghDRgGhTKyXHtjZ3BnhfcXOICIjOCelihkFgwahGAMVpWmzVa83ur0xSUkEgZLeGGAPhMZ7550gkqRMWWqtVRwTOG+FZ2LvJYAZb5ii74Hr7XkvPMezy4sL0g5CJftiuZ0O+jvdcXd3sLcLAISAQeBlxPmAnYtrHZnWwygcb1wej7tEIQMLkkmWee90VTB77xlQMiKDAtYAiF5OHb99Ktk6/d1HFQlcPH51p5YeWEygXiclbIUE4fqV7Va9yqbqHkhQCDauLAKFMRatRnbqR06d+MIto+Hk1OXTbjL53sNn4N/44Vffo9KGd2VhnCmKLMnyUfTIyz4X/kbUmWu25hc2NwaD3U2ojBdKZbPVcEsIFyUyy7JBb1xOKkTkf4XAJMJ0LsxaUqY5l6lVGkFECSU+MPXgcBEQu359tNvzriDPCN46LxSx05PhlYhcu1VfWD7eHfTyyaSWhDOdthREAOA9YsCm6g53F2+663hcy8889NRwfLY7iNuzWRT193aGo357qsaOJr0xBC3jMUxUcnDROlycWdq9srJzZQMBw4Ta7U7pi0Z7rqxg0B9KO4Ryq55lxtmdjQ2oNAA4sOP30cG/ef5g/0kKlMn9uF81moLDeNTTQtZFEIJEktJ79OzYAGvH1qAgDgMpQ3Jg9S7GrdAKEePVyehHX/Kyaw9eJdY2/X/7m4QNqUBEUtggatZdKFUtpeaMsBUEsSXE1Q0XJOXhY29bf2SQ51WJ/8fb7k08f/6Tf1wHH6KhajLfyNJIbo7Gp8atMYhbnntn3rvIZYQK0DmBYru7OizXpcg6zauNLZj9cDCxflLPFrr9FeRQxb6VhYk3c/HMud5gdlA8ubt25Z159jvx8974n1Ye/1Q1jGudZHtjm3WppKw1p5wImaUi8MY4B9s/u377yXtfdnX2dvGX7z+x+d5H558YtX/8E2UxGu68dVX+VsMFIU+2kVQQhUkWWVOB8c6aMGhozh1U0koLXnqpldB5ic6w8845RCQij4AgGaSIotljzxFub3NllRkJ0TEQSUZAkoTI3iORByKBbC04ByAYkKT0DJ6COIog71bVdpJM1wIziQ4dv/a6qlxRReGnrlW2i0BTofrWo1/TQxOqRKAr2PtqwjIOVehBBkmmh9vloE8UUixJqLTeMWZsi5wwsM6gEN4hCPDWIggUYTp9vNEoVs8+FZLGQ8eOB1JddbQpGqkF6wxKSrc3+1k6as+0nGfigB1WVWp90I51ayp94lUn73rg+cNRvn7l6bEZD67ur7xpu3uiBIClU62r33NPmjglubK90qpIqvF++O0Xfj76zaRzsD63fHDl3E5vY90bD0EYpLPVeFugiVKVxMl4WBbjAgDwGRVCzGStbs3PV/FyDUs1G7313jf83RfPzam56Ruq33vXLzptf/djjzz42Der7vZwPHFVX4iCSBw81Lh+/rr7/+lPm2k2s3DAWDMZj62x9SwjJEYPzBa1rGg4HCw992XKxBcf/uzGYCCUCuLUVlWV5ySoNbswHo6qfp+ilmDhkYKpJqrg4NHrd1Yu761vIaKI7NLSjEdbVOFkXFFAURjYsjrYkmU+Pn/2KactI3j24/fB4U/eOe6fdcZO+hPQnB1cFpb7G7tBvRF0Zp0AFQTespsUVa51odFboAADhSTRM+e7GDdVmFl0LznUePPL7ow3d9xnvxZvrclWI45SciqqChlE3KiFy7PJLc9xL7gx89Howe/Y3T146jxreHy68Z/PXYAw/qm3vWkyWv/y//c/ZpIQzThBN9eKwiA7vzVcMcsTqa5/1nM2Vx81E0prQbuVzkzNnj5zcmvjIkkfqGY9nVlenmeEwWhgrBrubxAFE1OwHt59eA5ZfefcxflR+Xg12Xpnkf5u/NzX/sfVJz7BZUMmsLuxyx6SJFKKiqoajPaTKKpKYytPH1l88RNvV5n869mPweYJmH/0vuLOd3/+ytap74S/DuEfNAZ7Fn0BQmSNqbQ2ZZwFY4fDXe+hE2Z3LB45byep5ZHNN0fD0XAExP3ujrUWAIiICdkDiRBkMH3VTaz3B7tDQGICQCIk5z2rUAjy1klGR6zCwGljtSYvAFEEAdAPIIuAdMVcChHUk2sGfKEhyZv9LGk3r74znqxs7veW2lNPnPyOn1hUNJUmvXFemIkUkkACY6CCYW9LOyNUmCYtIFZRQwYwGQwFETATSesAFHtjEYXx0Ji/RsJ+b30rVoTX37isjVpaTuNWZFXoLIeytr05yOKqNZMVRRnKmpCiKtI8xxRHlOlzb7xy7NOHqkqX5dg4PVgbL+daqeDosUSfmPmfr7/85ftueHQw+Pj52EKuVHO0SY+89MvqN5L54/XZhaWLpzcH6zueGYNIpjNmvCNJR2kgSOZjo4uKiBCRfwA9chDV1OFrbgYZj9jf9YoXvuKmH/rQfffrtRU1z+9924nTV7b/+lOr+9ssxwPrmKti+cC0CBYbM+decdvrP/mFTwy3nlyePxwqtNaUlVEBeWfZOUSorCYT7+z3l579IjdJnnjok07bWKrSaV0UwKyUApWws8IWGNQ8BiQjb9kJd/2tL9m4eK6/syskOYimp+KsBfVae2u7GExGQZxKlUi/X3Q3hntbAMjeM0P5a3jwb2/tbZ/JR6UpDTKy8ro0CmMfZ7X2FIckhDJjbY11FtgzsxNeoiDPjIDgh0HcELUGEf3kgenp/nZ0+nzLl7XmfLA0nZw6S0GKsUxEJFQEtTS9/TnJS5+n5uby+7+en7xIW2tUGQhr7xpdebA/ece7f2l99dQ/f+JvDs51bNGfipTyprS0m5uLeVMruunEXedOP5L3BUqR1fCa48vbO09vXFmZmjoaREq7cae5UGsmpFLCcDLcRfaVleV473Aq9nv93dLUx8Xp/b3+O/Lsw7U7X/fOS49/Yiq7KjeDlfMrJKyUURrXAxXud1ekFPmkrCrb/PNr737sZ4aU39YYvQS/YlZvaF299k9n1975p08+5y8OtT7Y+uaFVXCbBw8fy2oNGSiWYm9zb2d7VakQg6ghVYWUNUIzqSBKTFFdWb1i+rn3np5hvUMGDyJMGvHMQbTDKvcOPUgJAMjsvQ+yelhLTaXdKLdFwYTMIIEcIBCikChEnEYiq5ncel15i2k2q8u1dlpUk4HjYPHEPTN24+L6ejEYjcoejw0rUQ+jSeWqKg+VEkIJJGeq0WSyvLQ4nOw5k2nWU50Fz1UxGWntIyUQsDSWnRNEzAgoOkef74uV7SsXnR3jc196fGfPt+skI68demfAidFA16RozaWF0cCRtY45UrKRshHCXHzL2uJfzuaF1sb5HLY2JzdNwYFpPNy+6hu76r3vuQQAL9qo/dmZ6b9eGUUQ7K36Mz/yreC3s8Wrss784uUzW731fccWw1ilM67YJyiiJGDGcuycMQDAzyBEhyoMy3e+/Z0Xd5Pa1YNXHft3pwbrf/Hx+/bXzqBR5cBi1sV8xtpLiGiBvCmnDzQ7S8+755b2G3/kxz7ztbUH73/v7OLRJEIiW2qrrfXWKsA4CJw2mz17cWPrhtvuGXerpx69P8SUlfLagDPeGcuaNUtJxJqDrHnVMcZosLJmML/hxMs2Vy6M9ruA7CWQF1HoozioMHKWZahkhOTD7uUnMe8yY0VesBj/Fzf9saPjnfO6YguKlaAwowDcYKxkUHgftxtSiLw7QZREAlEwMnsQgpBQBEpAsTS/vFeVmQpv3d6e3tlaBiLp5pJMaal8P8g6gdaIFAahFFLNT8t6RlpYO1HWFs7RuHKB3EmzN+089f53f+D86e987e//bqaZKmE7oWzHPrfhlV6xMm6OSN5+1z3nn/q2nkiKAyKpq4G3l+0oX1w+0ppOnVNMwnMRR9OhCi2PIwVKdHrbV4q1deFYq2iyv3tp2M/fWcUfTp/38p+7eOoTh+dv7o92L529GAgpo8ixCySYUng2WhtmvPoP0+d+9YVnqbV48NpqNPlj/hbMPwoAv/PV7p/cbac/NrUyHszMD7LwcBzO1Gph0khtTrtbO1mt3d3dH5eTuQMHpuuRIUMUQm6+9pWvjnpDRCQiALDWMHskJYJ61DkIZqi1Z8mMQgoiBGctBaFq1Yw2mFcuL6zzKAQxISIJAUIAkmo2GnNzWPnezl6M2Gwnu+unp1pLTCzZq/nrjtbM7v52EiV93W3FWV5MBpPhxnbf9oYMkNZazlkCTVHrJ9/w+tOnH/jKQ6eCWtKqdZ53640Xzj914fJOGopWPeuPB4PceusA0LOIp49E0N1f23auwFtuv743dM6yN+OiLKVAAGALrXqU1kNjvWfjHBsI6rVZNhOqzOrbdhsfDCsrJkNhbeKKcbtuj0/VNzE+3Gl//if6X4fjL4KzD2+2/69/nI7k46bM136qO/WRNG6FWb22dWm4v9oXAp0MksYCmC74igQLgUVe2BKYwRgHQIrAO2Llj994EMOjrcOTF936E2ubVx556Ilh/+kTt9wxKHYvnbxgKkMOnZsYL72ZZM050Vg+cCC+4dk3PfzQN6i3MrW4rGJH5AQrbTQzsmNyUBRu7fzGbr932yvfsL9dXX70856JQCAGSIiAP0A+KN0kBOvAq6n5rJ5WA6PZHbv59sFwq9xzDJYCAcYBlADkpRCI7DyIAirqbZ/1VckIzGxclb8naX2o40a9iXfs0JMQUVvUUrd7BWWAgTx664sHveHOmSe8tzJKgQQjsGNFApCCOK6K7vTsYo5+cVLOra10TLUoeNb6SAQgsRnFalIFWqsoJCECIQWy6TSioYaIKG31IG+PKi+JmjOv3j3z7vd/6OTJrz903xcj0FmgmtIt1HFokouD4dZoqojUbS989YX17/HYMbuoMzsaDIrtJ6rBaObQXLM27Rhrtbo2No4yZjJubAvDwrD2WbcnGDXR/t7wUnd38J+HtT9s3/3aX3rqyb89eugF/e7G6ZPfBlRREHinBYL1xMzO+R84/rH6bZ889gX5rB8/RhMYvXvqfnj0P7z40IkXhY++b+p48qlHs6c/lk4XiqXupTMHO3EqvbfeeilCNtjvDY9ctdCeCgCEN3p9ZePhbzzNmkGQCqMgitkTgp6Mx0hR0F4EnxuPyrMRIooSzUYaK5txlkTGAQQ1kLbYH+TdETEhQvADUaiNVvV6Y6rd2x8GFHldIQzNaDdGdBxhXV1746sa+nyhbdhJ60HCQVjp0hTc7e2df/K7pFoHDh5iZCBWCp5z4pbu2tNPXuo1ItVo1paXZ6pi+PSFlTDEWGbXHr/6e98/mVuZhKHOx1fK+VuOUX9n79zT38ejR5YLGwMqU/W8EQyW2ZOUaUS1WurZRanz7Mc51bMFCZqgf+nebuu3m4OJ7Q2FAGudD9IsbB4AmsEw+eyPnnv+1r+HE3/yyr9sf2cVW1PrWX2y/n/vT/9ZxhYBcLRtB9sjpTLtIchmyfaByyyTQajGI2e1ryptSo2Ajhx5gQgWqihdZun0ZEvJMAqbtbT+qte9Ip9cu969P007pjL7e3url1b0eJ+kgmSqKPs42pfSHr1qKpqdR1kRWWLS1npCQaTLyvXpyvm1yrg7XvPmna3RpUc+5zyAR8Gen0FE2pIKMWRmQiKkMA2T+SoMrrv5JaOdi7qgwudCZOhIBUBCCkJjxq7Ktd4tx4Nxf014sGDBkUU276+1/3B+tHm2AkBPFgDRWgyUBpCxCFTYTBxKXwkhrYhTFCl6Ba6gJGRmYexospVlyyDhwHh9dmNNGqwhLxE3PCcoUyLJHCIrDzJQEMrECZ2qdG9UtgOYGKqFsrK2XW/o6OeC8Vvf9+F/eegz5x94KAvZ2zJlMx3Brg53Srut50wiTjzv7s3dk7o3Rkhy0lK4cn91vNtPW/U4kkGWZO1EkE/UFHtRlXkxmsQNhR5IUOQxnxR7G/vr61vDd43jD9Zf9KpfOHvqb687/uLdnfUzp55QiGEYGFsRAYKy1nrvEWnqN7D3/c9ciK8FgNPNV/1S9/Wj3r0A8PUp+FfJ9367s/mhWlgbbegDh5chDIvcWe2EdI1Gc7DTu/maq689tixVVFbjbzz4jW98/UlgBBRRnKVZTcm6ompzc4WkguayKPpOgA8JjZIkLQE6K4PYWk1htHTsmgM3H1x/+uLG6QtUWlYUJ0kUx1prDGtRInuDrkCMMbJVVQ529HBLgDBR9kOv/5nl2ta3Hn+4HkcmAFEa7cvCUKriy089hUH74JElocB7X+Rhs94sJn2NtcW5lnEmiUKJPNbG2wpBdjqt3e2LK5vu6uX2JK++fXb7xbceSkL57Ye+jD/+oy9b3ylH48lCJyKCSlttsJjYQTGup0EQkKOxZeoOME3nBFUSdzfemie/Hm3ucFlqokhEc1HjoBQCVd1x8o7ghQ82/uyfv7/yK3dXH/iSPHAomp8xT735SuujrbIsmNnmohyXQRBXjlk2fLEbx3zoQK3SenNTe0PMXBRFGIaava8MAiCCbLW9DrjYBpaMJXNiyXSW3avvePXSgZtUIPZ3hp++/77tKxeAIGnPko1JacujI3OBaLQdVJKAGL1BjyCFYsfFxG6tblWVuetH3rKx3r3w8GeZETwBAgLyMxCdEEIByixqNud2di+iI6xl19z62t3LJ0c5ofOWdSQy5yqpFHAA5AUo5exocnEy3iUUAF6w14jm1xrtD3dGmyslO3LgAMEDg/MOWXgfREEUBsl0WjscUoBxwoIj6XRlKUrAgx0Pd/bPdDrXIZTHx2fbVy5ZrwCgwa7tuOZ8naQQHABHHpUUrChaXgifWi06YbQ/5nbLWYOWfb1WY/vHB6Ze867f/8pX/2rtu0/UY18Vo8iaVhJcHFgI0m0/Vyp83m0vu7z6nYxUf1jsTfaM20NdQY6EDQJoL15loBfG5VzrmCAx7Peroozbip2NBSimre6wtzfcW10f/2JR+8PWXa/8ufVznz961a0bGytnTp0W7INAaVOGoXIWjTHMgIjDP7i2+/1/fFHw6Htrf/INfeJ9o/8AAC/ah69Pwb8KNh9pf+PerNbyIyvDSMZpmrS9ddYVQULVqF/0t3/0dT/8rOufJQP/3//iT7/65UcVIgAhEAAaMoSUJo16Y2bshJ704qsOL91+89qD3+pd2lBAwjsPTERho16fm4GIlQrQsylKMP8LADhrHVIQRFLFSFGiXJ5Xo531qr+RqmiI2Rvf+q57njf/5/f99XB9Wzl3UKVD1tvajnfy/Y2dqNWeXmqScN5wvtWPUG13eyTjxcPTTqEnBCQRCK+rUKXal+SVtWkSl1WF5y6bm66bTxM8/dh38VWvfcX+CCajwVXzUZjUhQBCNNqurW82a77drDvBgcf9EbKsa9OLXP/sW7Zu/NtDj3xvuLndU9lhlc0jSvA9ltPzs/CbzZ/6I/r4tx54xJRb73nN9AuOht/b0r9606Xsgyl6CeCtQY82DtNcWweZz3cbDTx8OM7zfGXF2hw8gvEuq9ecFcW4D+wYKGzNeW0g7zMBgvBgveM4xrteeufU3PW5HuoJfPubXxnu7TCKuDNXVaDKnfpMUF8KRCiss0oocODBKxU5DWhh0hX9nV3n7B2vfvPGeu/SI58DEOwA2BERe8/MRAKJ2JlDNx09cd0b7//yH7Mz7cWrF48+H/SVEtxbXvnyh58687UvPjjq7QNDIAIRky4tWu3NoLITYMlgFYiKEH5rqvHBrLd6SaMHy9Z5gyjZMCYkrARwMnQgVRBSnAa15fZ0O0vHa5uF4oRYFGXuqq20fnA8WH9B1Ju+vMveOSnYYc25hLGOHCMlHkJB0tkQ0JAU5CLLrIRCSUnAQmikTlV++qarbv1Pv/mFL/3x9vfPTdepHPanVEBIZ/t5pKIrVQuS7Nk33/nEya9IC15Qoa3DbWuGVES1aLHX22kfOO7KYWu2mp464rzubfekEqLOzpqYqSaz9Y3e3taw39savytPP9x64avevvnUlzrTh3f21q5cWiOBcRiURa4EMqqiKBBRSjX+hf5ubxue8b7an3z6G6e/f8uH4N9IPvM6deFfHAWEzlMoJCGCEgE7BlJCCgSvFN3+gme3mumXv3jf1mpOCMwe2HnvCYARo6zZmV/anwAUexipeDod7XVhUAEJJkZmKZVM4qTdAA0yVIAA1jtbeecJ0Whj2RAgyYDCJAoSB3a0sxlqEwo7lrXX3fsL//6e53/lu5/47Oe/EHeLThTPd1qP7WztX+mNh0Vjsd2cjYBcNfK+FDUZrO10A8TFTAWdrOsrSmKwlc+NLTDKAsZmpyau5FtUBL1hNL9AQWQ2z27hS1/16rK0W9vd2ZrKmnUmQJRe8/b2VpLamc6c4TKUpE1IQZqXGocbF39y5fb7bnjkie31y3sym5PpIokGoijy0e+8/OW33fHSs1H+q//13TvnH5585AA8429Pif/+fe+M09qXxnT3i9I458BBqsdbUx26ainx0D1/OZiMhXcFoErrmTZYjbrghScI61NcyqpaRxSK0RAoEJ49SC8lOucQBHsmHwBZ2VnyhYVic/ZIbeaqGMk5IqOhyjUgSRJlbqqSqx3LpTce7nr9vasXV9Ye/ydAQq+Mt4jA7JlZMJAAo00yN3vs2bdP9gqrh8+/ffH51641W2JSNqcW7ry8Z3e3gq39c6OxjQMFXNe2W2pvPTErQqGdTYTwBN9/5ZNXf+42zksAROEq7QR4pSJByhgThMhASKKsioBCoiBqNrPE5+PKMM4ndnUsq6oIs7g4vY+XvhKePyuMkVJUiMhAjBlC6l3ssMFeIDmPgdCxpUBIxQ5FwEQqVpExHuAbz7/mWW/99b+//4/6py60ElEMe9MRjo0ba5JRsFYuuVp643XP/v4TD3gD3htrmX3u3K6SoRIJsijLPG5GtXpzpjMrQU6qSSgVgC30yGlfU7izn6+vbejxePiuSe3/nX/B3W+78vQnDy7dtLO9fv7sJUIRhaHJ+yBCj8457x1LqQa/0K/9wyv6z/qPzru3jT/w9afypzqvoMsPwMV/0X92W/uXNyajHVtMAAARAUAAAgAiMrMDzwwAiETeAxEBADMLVAzM7OAHnGciARbCWNXmQY/ZIQgiERIhECEqJCRCISQKSRKcBSmkc8ZaUJJAKQRk9kIFxlSCGIgkDItxr+ZUBbqg7O4f+tE7X3Q9l8PPfO7j26cuCwwPHUov7I/yjaI7Gs8fnktmnLLZxA7VMK116sOBHOfr7TRImuEEGQL0erz7dLeGcZygaiy0Yj/U481hIbKbs3SN/Wj77Dbecfcd3out3UmnnjaaKQMoGZUTu7e9pqSZm5sz3hGw4TipN3v9oR1urL9166ZP3fDk2Z291VUMmyJdCOMZEQTD/vj+H36Hf8vSycu9D/z6fzlRu/DFn2/AM0718Y9ONSDkBKV14rEnt06e23AsQNbNZGtqRh1ZCgEGTItEzUme7+zmeWWdNuV4QEweTNCYcYUDNwIW6MEKJs8OUQJ77xCRiAyR8BKgCjtL3rIodqbna535FAUXpS1KzQ6Z2WibT0prAC0EQKW1L37jvZcvrGyf/BfvPXthvSNCIvTeCyZEdtZTu5k256W0HGT3XLt6203nZjscRBCkSlBL4hGHytkExQQ5dlA5y6yF5YoE6BKyVDjpPrq0es9DSxe35WTsSTAgI3jvpXFQaTcYYX9oKi20ZsuU1pP5A7cW5tyUXLpz+Xt33/Yj//Dpb7VHx//84XP2/FkqNxfNACUEEgNGZDbeEVIgMLaQgleIJeA0eFl5qQSDq3mQaSIVhsPchPKxE1cf+9nf/Yf7PtQ7c3k6k/mgu9hIVwejUCovk83oWT6CxcW59ct9YK2tsdY63dfFFpVDEFGYpIY1eQEiZrBx4uMwqEVZOSmNs95zIyLt6NKlFVuMR+8q4o/OPv8lP7F38Z/mZq7Z2FhZv7wOCpUK9LhLQSyUKIpSkDTGFr88mf3YVDOOnj5y79H9L5pSP915jXrgv4Lz+t1V+Pvz4Ma6GAIAMwOAAMJneO8dMyIxAAMAMyICgPeM4AARGBEJwTkPgSCI6lHWyQfr7BkIkBIkgSIkmYgglkGEIhIqkUIIKQHBs0czMoCJ4K2/eEx8t8PM3jsCUEIhu2qyo4KYpPGQLi4vb+/sghuyB19qq4fHZuYm4/JKXmZAOXEaR7KGZpSXpY3CzIfB9d+6IVOHGypxVXBoJvncVz91QyW6blJIWi/DEzPx0PZWcorCG4L6FXTjzTPbePzmBSXDYU5pmCYZC4IgCJGCfm8k0Xem65ZFgpRrGWWtYV7a4drmW3dv+tQ1T1/qrp2/RKoe1mU4odQAACAASURBVA/KeIFFdDsdygKxc+DJySQ+ffppPd6//2fVC4+nAHDvCK98rR6ITHKlArq0Mrh09mnPRFHb5tuzszQ/I7SZzC5cPd0+SFJcurh/6uTTpba2Kr11rXaoGrPo9HB/rywsQoDKznYaLMkyIAmjNSKAo34/d7aszRypygrz7TgL4ihgZO/YOWAg5y148h4ICclLAgf8glf9xKXzZzeffJCU8I6dYwAmQu+9YBICmcFnaTo77bfzvNpbnKuOTEOqRlMZq8DJCJgpiL1UIpYA6LIaqACspygBZDXs+UbGc/Pw8VvcTz8VaF2xl1EonLNCMgAKSUBesbOOjBHWgfYMKEAeZd7M2rajRVi/+eKvz7mHzq2bta8bcYnHFXALcJo4AouCwLNgQA8KIQCXMI0JW4ip9SjBetcBQimiOAiKqkzSc9csLbzro3/3979WXV5tRzAZDqezdGWvqoVY2nA1viapJXNzM5cu7llXgFeEQnBZTvZtfi6sHRTRtJChI0Sv0OS+6nLUT6RkY51xQRw1I3YQXLq4ytb23zmIP3LwBff82NbFL7Xahy9cONPb6kopglAVgz2UESNIqYy2RGLyi/3FP5puRMkjc29auvzJ4bN+Tj30/mJsnLHFL4/D351DP9HlCJ8BAMQIz3DOeQBmQCIhlTMVIgIAMwM7AATAH2BgBAyksKoRZ+1iuMrGeARkDwAeBGLgUZFUQBHJOFQpKSWCCEnGaaBdccOLl7916wPZR65nkOiBEJhIkeeVb0PjGPMGqqnnPufOixfXHQXgBHI53Hrs2FxdVVzk4ZtPPPtfrjzyzX3x8uuOrGyf/P7FrSyd7r+3d+Ch6/KnrR+bG0+84thc8s37Pn2dK/qZ/u7GWgnLdynR9r37nK7N3RmkF4vd9bXLQ5w71EbvLaQKorA2QdZECILIqiyJkpqzTtbjeFIGzemlYV4Ve+e2f7r77H9YXFm3T53cQlHL2tdQtCRCege9/vf8J+2o8C733n7+Lf07jxoAuNJffMH0E9P/7YCF0pMkhKJX7a5eNowy7rhyZ7qhGk3QhkXUDOKOCHEy0OsrV3RZeu+J4eDBBqWHY6V3N7Z2d7oOYX4+ve3Zx9NWhk4IUsYYElAY/+Tpy+fPX6p3jut8rGhw+PjhOMqsrUxlPFBlbFmNnUFnPIMj9gy20PbWl755/dK5zZMPMhH+AJD3jggBAL2XkrxjlyVX33jXIB+63W2sxe3GzN7mEykWCioAHSgCX4FHQUYpl8SxdxV4FBKArTMuJEzr8Pj/6e/4NBlLQrogYG8EEgvhs0REIWdNVhIROE6EUiwElA5qEdUyChKs1/Xw52/ns2fBQRvHFUZrwKvsd6WceO/YOjZAyChSIAI95dWQIEBoOifRI1EEFHoUoUSwIgy2rpqt/cpHPvl3v1JuDGqKJ+PKVab8/+mC72htz6pA+Hvvq9zlaec8p7e3t5Q3hRQSQyCEEvBTBsEGyghiwTYg1m+NM4MiwyxAx8IsZ1AQdfjEERApAhK6EEhIz5vk7f3085yn3/d9lb2/DGvNWv4zv58LM+1Gp+vP0+TE/J69ew9euPJMkmYGcTR2vvTsyrJzpTFzhFUrxqAAmcSFyhqdUqyG2+D7wCHP04V2vtUZXb28nSRm+23b2R/vv/Mlr1l9+nP11srVq+dHO3002lhb9LZ1kkUBZkEko23vVzeP/83BqcbkNyZfPX3qr7avfWPyrXe7EkMMo98YpO+bEz+oyhF+DwCQABEBQHiOsIggEinFIRIRIooIMgEygAAAAykUQyi1qcbEghutjrp9QC2UIESRiIICEDkAIKJmQtJGdJbWJo/e8Ypf/817fujm235i+7de9fjb1jZ3fdQcGa2UJ09/4OO/e8etv7y280i372+5+YZxSWMvGuywGIzWnlo/fUJjecvy8Z+849j0xPht/+ubi0cWiytXz27vtGfm+7/bmf38NTvf2dG+PHbra37gztv+4bMf2TzxwNx0th3c+iDdw/BqXz6o+am5G6drl6ZjeODEJbz+hushqqJ0VXT7luzkREMweoCtjQ5grDeUNXVDarcaNicOd3f6w82zo7cX+z8yNR7yubMyLJtJc04ntbh4+38Y3/i+8pO+HMZqcPee4h/f3IP/43XrO09/WoNoFzgGiV7K0TCwQdPEuJXpoEiFCEXAcjRkRkTtvVeiWAQRtdW6MWUklKNBVZZK23rDtqcaWZ4mibWJJY1ZlubWrq93nz5xNmvvC84tt931R/eoPAssmWKjzaiEKjqDFFl8RAbhSs5cvrRyzYuvnDuz9eSDgTwjsRBBIKQQSBsxSABgZlpHbnhhORqWvb6uJa32/sunHyn7PYFIhCEyszDz9FTOYl3VRwnMEgEoRsc+q09oKbZ+sdjz30OsHCBbrZg9KgWAGlmDTyxoQpSoCDUJYTRpK7W9psFIkk9B1kme9+3G3Ea0sRRkAh0RLOuR9iHProyGhYBDKSECJJ68IIigChE0GgQLKh6wMxcdGFNToVzJD/7FC/3VTxEGnejuLiqMIwTjo5MMJNNZmuWNCBnCnFDLlZddsa710I/C1qB9tWPHruwO02GYnJm97s7j9438YGNr+7HHv3Hl6iPNbCEE3trobe10wfnuW7nx50sveNFrr5z+uz0LKzvbV0bjYa8ECLVuf0dkVuvSO1BUSxK5/FfnJx5pZmm+sfVL+uFa9dp/Sq9+W8S5EJC0+YN2KMdSFMwAgCJRFCesnFI6BEjT5aXmxTN91g6YUUgpYAYUERTCBDEylwoS0qiz2ZhnsrvqfCUIxIBoQKUIGkzG7BBFWFAiiAD4bHbh3h//zecdj297ze33nfm5lU/cu7m+VY5L76vIjH5w5ZHHahN7qnHXNleWpqXvquCx8i46mmzOn3nmobpOIEp7ZmY6MVf7u1FUUXampqam2/su/PKTs//zls6FLSX5wZtufv2rXv3kd//5X7716OE9y6cvPbq6U9TnmtecudSs5Inr70qqM2WxdfHCDi4d3YdI8TkSDi3Wp9tTiozzPOwPSj+qtYxSmavCyPskmR0NwvbmxeKtvYUPTZKYnS3u9hI8cO/wNe9/xyl4V/NE/TO/zTGAMDF13rkF/0dj9Ez9j3NiCSLMAIxKizG5ySYm6uMiOhBvjRYQN4IogKhRAQEIo4jxXgLWDLAEBwBaWZNqrRUpk1glAgyinoOESrvSDUKSJdCwnCUWLECExIBC9AyGQpIYRcrYRBk0mFze3NT1w+dPPd155hEmt7i8Mj1trYoKYlVVg150HrqDAqen9h99wbDfrfpDlZuZ5eObl58u+gMkBFDBOwBwVbV/36yiyUtXTyAnpRt5Ya6cKJif37fb3ej8Qq/xR9aPEYRBGBBYhJQS9ghMZBCEY9AKiQIZPHrD89fXn/TVeLpRGmYaYmg4W/KEt5mDdpU+vD0sYNSotRZCWVzjHvhPce5ruPFCiSLzD9CRryT/8jvlvq/Ty95p6qhSrszR6dpW0ai8Ib19/VH5gYeylpXHcfxJO70zjTabvmPXvalXcBHGJoYABGSkLBG0NOoKo6QJNloxCiiCxICOyFqintL2MMegAUbdy0q2BaUsAgghYMnqz4/wqx9Mp6b21O2pNEGtfQwEzvroAuL6Jjx+obW5CRXFEOt+XCZIp9VLvrT/bwEg3XrwrpM/sttV291R2QcXaxIqLocIxBwFomUFBAVVgNnM4jX3vujuT33uz6ueh2gDV1obFC0QhVmEgDyRsRBBEaSzptEebT5NIhJNJAERABZCBSljrtI6GAFNjEhuPDs5u+/Y4Zd+3/71nYv/dNvJQ397uNfti4CEkIRqu3T93vrsRDMEtVX45SkgPQWcs0CWKoijk089BKUzAIJiplaQ1dzC0lrv9OLCwsF91z72sq+E98yxt574h1//5tmpmeWs+fjZk7uXTn7t8c9DrJt2ulQM923hQ/uOFOtPjked0XiId7z2QIiBCFEQSSk05bhixKZNlFFCLkYbXYlktUmrEnc7w+5P7xz82B5k2FkLl8/2yh/7+7By2ztOwTuOQOMTb4Urj2ggYX/X/p3feYVlgPd8qfvFH96uvy8F4ggRCRKjyUZjGxGzdsOBKllQoTakAEKaJdoorbVVBIjjsR8Nebtvs0RJcD54bZLWRMOmidLGUCKAiJqUBgYBcFVY3w1TOSUaWVSUwEAIHEJQRNqqGFlECEEpAKYy+sm5m049+i+DC0/rXB89em17blYDWwWAkctRBL2x3duoqqW9N42GfT8eq3pyYP/1/a2Lve2dygeOvtFIankSAwsiJjIe5qPxTqx8RKqGo0qqlZUbrqyf7f1it/4HOYeiqpwIIJICAuHAUSlFSgGzD0ERCaJK6Jrn3XXp8sPVuDs1tXew09vtdyfMcjlcy9J6XrO3h8tXu27fYq2ZafXw+u+PxvB/8YLfVy95p5rYU9dG17rjfa75rTT5eFb+4LTUzvlkkF6lajqa/VYv1NqQp585fGKXo8TKaC0SY6AokmWsQYwBUyNlOc0xzxWmIVMmz6j0FSH6gECS56hRCCTNoNFQqOHD18SXfwkkAHusT8jMPCLgTi/qoNtN02j5iEFhakwVK+1DFJFfOPvlb/fvge/5yPEX35x9Y2eXO9302bXmybPDrQ71uq4cQvAYUDq7NOgaL5wuTq+08qPXf98jD32hv9vVCSs0Vo2P7lNJFjbWYGMr71ccJVNmupJYayyNt56p3AAERJBEUBSjMdpPtGdMSgvLk3OTC2MADuXq1TXKJ15+733bvnjgzs8ufOi49zwcDBWiCyGHan3jfLM+692Aa4dS3hQpA4fgtSFFXI46q1u/3OF7QFjKu2Ptv7au+Ztbu+PVLJ+450UvuP+Gj/G70lExtDPHfuSVP08T/W9/4xPr69t+qxO4aO45kkS3M1iLO7L3tpdfPfX1/m5fk8NX/dLNhStSm9dMDYVjZM+u4oJQIRALj/oyLMfBsUDJwXrH3Z8aNT5UR4ByO+leLYpb3+pf8BvwPfm7Z1gpk9dbE1mK4wAxIBsyg3/XW/jz6cRY1JG0t1p8HI8rtdPHPC0RXBBJEqXQq0SniVIoeZIqoiiOGZn15kYdYhV8SYrqLcqbSZIioBASkRFQiNpHr1UWCnVhNbYbymQeRRFYDsARFalanipEF6Aqg0KJ3nkfHPjFPbesnTsxvHquINY6j1QRICGFGDFKYEGQbHpi775by2JcDge6ni4sHBh2ro77o6JywDw715hs5gq11rEKvLFdFIWrKs9EIMxKlpePX7zyZOdnNp738UMI0hsMR+OKSJXCijn4WDpGjiHGqnIgIAAmU8duev7VtUe4X00v33L+5Ne5ahtNzm2axnwL9Bsmdrb6o+B9fSG9kKz99T9V8H/3GzZr1vVUKbO1xgddsUbhR158Y/HVZ88WxFjtFdMSmLfpsdoEMjdac7+Dp/oLK2mSbKyf09p6H60uFbtUy/RUOjs5mVoUKXY31r0EFG+RSIgFmWNqIGgGJgWoNCPQQz/BRz+QWOVDENTUmACjxI9RQrAJ1JoYRNcaXhsj7ONYo4Ivtf7j/fX/AN/z16mu1WVikhamxHNUoI3hGIUjiogr8YmTGAW+9CXzdGfhJTeWO1EGRaGCCLIVPLQ/3n5dUY7o0qpe7cr5C+qpi43d0Uxqd5b3gnKuchBDBOG6FUQpfJipS21qseszSGaWFpa3t4YqhIurqzy2I933Q9r9hY36nzQRNKGqqsKi2KwdVGn7fUome7qxPOVHu8MgKUs0iI0kufTGMxtv78K/cuOfzE+/P/Hze6anJ79536NTfzg3KDYh4PEX/ZvR1hO7nbXxzmAM2M6b6dSsd4N+f1vD9PTKzRsX7+/vlpkhfOPb72rX562tj3y1s7PusEeKNaT9aqjFJsoMyyHHaJO6q6Q36BeDsPoTvfT9tehYuo1QKl/1wt2/7pfubD/2Z+n2dxdXllbmr20vTJ549vLjz/6DMBJi+bai+f4maWWMFolEBBJRrHdekwiG/UsLRw/uz5LEifNVMSj6TrzzxWA0rEKonNtezYr+KMbK1LFWR2UAAZSyaYaIWqIgaBZCwqqS8bCWZSKqG701JgaP3juldJqm1igig2C0TlHburFF5dsLx2J/u+hull6sTYMriCixVlgCs/NBhJ3Fqbk9OqgQi6h1s7Uy6l6BEJCQQ3CRK+dAAhpUTN6LZyFBgCiodIZLS9eeOXdy980XXv7Nu5SKIchwUBDpxEKW6MHQXVjd7Q9GzCwCIcT5VkNqob1w7cmnvwqAS8vHTz/7dS6n0lq96G3ltRpl7dJzHjfeMBtMi7pfu/j/fWp87s4I/8qeb6hLd0cA+DcvU8ULTVupKrXP/t64cOrX3vDi173upes9/vqHPt/c6uw/NRIAZfV6OZik3KpUTbZ+cyXPmsmZRx8esTEoLISJaefpZMtcf+z43ORCOfT3f/OzW1tbiRGiAIwASCiTU00kjwKxHCN4QDn/c9XCB+eajflu9zyLRva5DgDRearVKUuEVEgV1DSBj4AcA6PCR/b/p836C26+8LvLg69YDUmdJmvoIyeJJBkgCSpUGkeFFM5kPj7whB7ofGVmWFUxABHHiEpiaGWqWZPgpQxQCZ69ANuD5ZGeO9h+dmW+IIUCQgTNBOemoG5geZlXFtXURLRW+mPT6dXOrbW+82R26lLRmLzuyJE7qlJ/9UWfPPp3d4gKlZPgolah2+2sbz6z3FruF90qO7A0M6KirQx4jmUIiRt8971fHTx/DP/KoUeax96+H9IM8+Yj93138S/nq/HQ2Gy309uztJBaLmLsrw+ppnR9ITiHoTsayNzheztrD2xubLqRx99+7+uPrtxwcOm6xYWVguGJZx79zmNfvnTlpAvVxER9PBqHIMyqHPtx4QeDYVXA9k/30j/IMYKNbRCVJLpem7x86HV7L3xkbm5pZmZFKWDl1jZH3z3xSY4iwsWvjJM/TIlARJjZGM0hMCthFgmiuJ6nrVY9zTNlMg5BhLUx7EAYiqIKPnSHY184aw0aUQAhBGYWAUBGJGFBJJEAIgJKwRRDEXmoMEWMKEqAlUatSYAFIAZBQNAxN0abdP+R5/e6V0dbqxVLYg2K0taiJkDUmCIRAoTULMzuS8BGqSrBVmsOfAdZSldV4zCqqrKqEEQhCQtHBkUhMoEAKZXB/NzBC5cubrzhzN6P7Pe+QvrfEBBCtFoEoPBQlc67kGW5c77dyuvttDV97aPf/pzJs5tuu/nq5W/vrk1oW4vVwBhLaUv6O1m9eXzSTW5daNf97tcuXL0HPviPYwBY/jq86b5k4HHzXp77ijz4TjqvHEeVJ7p3Z7zl6uzxm4/9xun7GFMj3k4sf/y3/3D55ICqmCgdAVAwM/bi/qX/NqfOf+ebo8oielC6Nb/SqtlyuGqULcclgur2B84JgjbGshtobfJaliQarYpRqrLk6Gv1/PKb1pt/fuj22168dvHLVak313soEDS2p9OJSYEYtTZaQWKVK6ogvvLRRa+sZicokSCiuKb3CZQataEii4ZISCISF0SjCBtrfKXfzCftQnsk4iWEIBSZEJBCsBaUQkBhhLVV6ruZyjTnGheX2pVWACDGomFMDKdGTbRkdgLnZrE+EZK6CpFPX8SnT6lhZYmCAAilT78uLP91TaBGVCPMuv3MlEPRXecJVN5eeZ748dnVy6lqhEK0bXmbhSOnz/zFt+Ffect7j199pOIBU6JO//Duvr+ZUSZWnokMMOdgyAp4KKsS7EzgOPRXNzsL7fbB0c53OtvjUCL+1n94w6GVw0vthemJ1uzKvnZ72Ym6unn1M1/6zCc+9fFRMfLBBe+dY02GmQlw9y297A/rBinhpgi1WvXrj9/+2OQPzJ38cFX2syzTthZCV9upx8/dLzyOUQa/MEr+wABoABQRQgXCMTIAIIkEY7SKwoxBAQEiCKBANBEFhFmRIiBhIIVIyMjMgAikgNAqhZHDc4A1iBJhkhpyGUMARoUqGtYalRYiYGHvfYyRiDQmmoS0PnbdXZ3Ny731K5GRFEcRFnExACEJMESJMrGwtLB8TAOUrm+zZrMxP+qtQWQXoytiRI7CCkixYhIWQQQBAAnKpDrHubn9ly9dXn3dqakPLaBjbbRIBADvUSlBQiAFHJlZkRIB5GAbZs/+O048cn/aah44evjKpQeGO22FIOyU1ll9sizBcr82k2QVHr304J6VxsbDm0Wne+kO2ffVOkgJVnqT2ibpyGCoYprr4EP4kUbjtTOmPvkn13z5186+9PndG1+p7vrKf/6Q+eIz1trpPTS+qpp7oLhMCeh3Hci/uXqeXBq5igrmD9yoqexsnjCqPuwziFXRMQOQkAYWzPIaGe18MWE0M1bOkwKbyPl/e3XyL44ev+7W8ye/ERwOhwWQQNJaWmghjyAYD4M8y7Qmjh5BGJSLHCS2kAdltTssh2VwTlTkRLfJj4rQIRQSJhCNkpjcJsPe0IyT6bqFGHzZHXoOAARChACggFgRa2Sro1dTY2cVDlrNCr0nDqjEGqkbaCacG8ktGk2aWCmowI68d0KgoihUGlj45Bvo+o+IiAIlnV3c3NH/9jXh2H6VJX7gm6S1oZVLvTvzlV/58BeuDnbDuHMFR6eTC/9w9a3bE69KQHjPhcP7nrj1/EP3a/bg+fwb+td8LK1TTUBzBOAYKVRFMLqZAeyGZgqc2P75yzFfvM2Ex556/Go1BvzRN7742mM3HVg6cPTggZmJmcQmVQhB4eWNnW8/8uQH/vIDAv3E2mLskQERoufuLw6yP64bUhnnKEkM48XF+fXjbz1w6e/LsrAJBk6qcmN+4djDZ7/sx30OMvzlcfZfMwYBQWYRAYUEIAwCiAKBgBABFFjASqKgIIDSiVLEHIiAAVhYKUACRZqFCSWvmyRFk1ibaFTiqhADCHuNmfPiSkYWhQmCBRTmACCpNc6FGKIIcqVYHBlYWjrW6+0MNzcArbArYgQRAEBEFonsKFJjbvHosbs4jAejLW2zNG2P+qsk5FggihALAoHGKJHAMytUhJGQySRo48L8kcsXLlz58Wf3/a89gYWUQkBEUhRFAFFntqaVjjEaY5kFA0ZdTs5d09l4IhDMzd3c2Xqqs8V1m4B4H2IUNSoKi1WrNWm1vSXfzDvPGkbfr0aDIjgRDWMm63gkUkYpUcZOrMJfeN0d6cLk0p6jWWMPaPyDA58eFX7rsw9YJ3+8uzJ5QMP39C7wYx/0vUb9N2ueXSiGowrGR294scL+xsbJem0SHDnHviq8FxSltREp9u8/lNZrPrrgSxHobG2z+LKqVt+0uvKx26677oaLT39t0BkN+kUUyGeXpxrplYtPQRRBA5G0QpCAlrROrcmLsmTQIVZFUYgo5AgAaBqKh75ywhx9QABQokBcRAKxk5NaJYJ+vNOJLiICIgISAguiACoBJLLNZoiA1mgMo+0OhIBEhAQAAmWSZLUcEhNaNWxmCUgZQKIQESsVOQRD4cJb+NAHDaKA4bNnUGX0olvo6OFqdkKXrFt5SJtq73VfW+Xl3/vwd2I3jDa2eHiRLn10ysx93/GV2UM3bb5Pgo+v/Fjvg4+f6a8/+fArBsc+ulhU43q9HspyopYVvtRivbg9U3L9fp6Yj489rb78yPT01A3aPfD045vjgvHH3nRPK2kuzCzsPXBgaW5pcrKd2sxVzhGd39j+8te+uLl2ugpQeJeQHvmKfXXpdZtTfznhPPvNBFxgkMnmxPjO3zy+9fkkSfKarTUnrE3WdnpfevBvgx+jl/Fbx+mf5giCyEgIAIqUSRQoCBI5AClIM60UokJUQVkSkTxRSZKgUj4EgFKRIVAioLVCsCCoDeRGCURUom2iCAEkeJfXcqtS7xwBxMiBvI8sQMEFA4yUj8a+cIWKOkQw1iSZHY7KUadKsEakVUAyYq1uZo1EJ5WLWUrn1wpbT5ZmZ8aFG5RsTaPf2xaJgZlFklQLR0DiyKQAFBIZ1BFBVKQKcWFu/9XVyxd+6KlrP3WgcpUIKUVKARgCiYioyGpFOtVCSiLFqgiczU7v73RPlQXPL1w37q1vrHUazdwgFoOiP3Tj4BKjZqbrGkydh8e2H09y4uFw0Itl9MqksWLPFTg9FO8idMr4g3cduea61uzK0dbsPlOvJ425cWfw1V96J22WO2+izhvlHbesvuPhxRet1e9ZbTz8F4W7QO++dWUUbTEeDcvB4oGbBPqDrYsHl2amW5PjIj515lK3O/SOtdZZoo5du6/WrPVHrixdAtWV9Z1xETmotZ85u/yx599y0427W+e3V3cunT0XYzlz6LrpdmPzwjnGyjtwrooxiJBOLKICJhCMgsIx+CDCIKzIRlRSbgfPIcTIUSQqIhAIMYqIbTbJpFpwsLMRPRMxABAZRCStAJhDVCC2NYWArFOBatzZgeiVNgokRAmRSWGa5QigtW62Wqi1RhJkMCrXqVIqzczZn3hm+s8mtXYZ8WCrq02cmypaxuUaWJRYoKmXvPpNHxjH8P7PnEh6VbfXCbur+eWPTWR0zZE73vKH79snJ16WvPwrnz4CACdnrr7vyPr3f6u2OQiDgSINCrDv6fBMFBYr4dhBbs+iMH7j0alPP37zzsb5R769VlYJvvXtrzQWE5208pl2vdFuzzYa7cDgJfQCn189v71zcVCMgnDNph44ev/wS5+96f4D3d1w8uFuteOiMOy9vdGauC7vZ7VmmiapTWbmljd6W09e+kRDpYX4cz+8evgf5kFrhCgizgdjhCUAxQgeYoIgSqNSqLQSEkTwgT27LEmstfycUCEAgjwHCGIArQyLZ1YobLTRyiqIVltkSGySagoh8nNiFKwCUckcIqfEqW2WLggIkMQgLIIkKWWuCALkY/BuEIJwhMzWJhpNUiZL0nNXQmbmlubbSquNzi4HqNkEAQGUoCMFGUZG8AAAIABJREFUznsRKHkECiPEwlUatCPxYxeZFmauuXzlwqXXPnPkk3u8D8IkglorVCzAWmtEQh1IkbAAA6CI1BbnDnU6pypnZxcPdLdObq3F1qSV4H2Fg25wEdNULy42lEKB9OjwZIAqqBqE0XYBGsBWO+5Kp3Aj8RTY5pP08z96XW3u2lqtlc/v1ZA9+ZF/WP3oN402Stk9L6I7v38S1m6BhYfh4Z+DWz7wT7/VZYgPHjzwxaYixnE1WDx4c1ltuc7mRH0izRKdm9XLW53tHjOK8MrK5JFDK1pLrz8alaJCeXmtM+hXkXnj59cOf+ae22+5Ro3Pr17tfvexU4OiPHzDXQvLmevtekGFijmWZWVNDpIMRj3PzrkSA/gYhSVyjK5M0uY4sBSdauQ4eAQGQaVVWZXOOYnRNOqMCqOU/R3wLBIBAFEBUZqlMXoJUTgmrSkkYkyR3Hh3GyMrYwBR6xSJRDjJMlIKRPB/Y4iitTVZqi1OTc0z6vOvP7Hyl0d98MilxiKw7G6cr0E51QhofCuphwOvf9d/fPe7//473Q5EV452esrU0sHptq7e/u5fOr7QTO7/RVx/+MU/eOornz7SqyW/fs233nBGAxNLyBPkIMxSjPHcJbOx6vct0eJyWJzFehvAJ71eud2xvcrjq958215FS4203qppnZVBqkhkEgakWutiZ31Q9lq1pigyglEioHz7hc/c+uX9gy48+d1hsVUWRdG9+Wfz7/7e7NxSszm9sLg0P7uY5RNehS3/tcxQrxw+8bILN31pL1LCHLQ2ZemARCQ6XyKhMdZ7ByiIEDkSYWKT6Ln0QxHRShNAYEAQRagIgRAAAYgQPcRQValNrUorduwDCqJgbmyS1KJwUQwEWStNgpmxgAIRQIAIowAKee+jhFatTkilLz04AguCRTms4giCFkRD2UanMVnbm9VIWwgsYzdOVGJVqpTOQNfreSOv5UkdKQhS5UNVlZV3o+DGg7LTH8/OHzh/4cypH3zsxn86MnaCxElKSkdmFWMsfaU0CFhhEeHIEWK0tr00e2Rz++nC0fTM/s72o73tPLGRwUvU5QAjc5qb2cUUlSeRINrkdV+NhBVIdL7ywu3OTnry3GjstcSfu/NI+/tuyjCPvlx/dmfnC98kb5IYj7y8fv3L6he/4ndC8bz2r6+eLuHWDzz6+e72GWkmZKf2vfeGqTSq3eHu3N7jRbHZ651q1xZSaCgkj+w9EyqlSVmbJ0oTDEfjsnRuPNjcGQXHmsL6z6/t/9TdN167P+fV7a3yiRPnd0fl9bfcfeRQzYx3JIQ8z0jJeFwgao84HBSkjHfRi64C+BCjr9jHfimbu+PZhh2ORs4XwDAclOyrsiy8K31ZNicmmExwPpYDV1XMopQSYaWNsTZ6RwDBe91o43NMDWVc9LoEAKRA6TTJjE0EGJGSJFFKMXNZDH1RKqWVsZoUKfTsh7+22/6jGQZFFJsNLMd+pzesKrA6aGrkk81f/u2/KsHd/2SnFiVA4MKx5maevOT7Dvzoi/YpFPfd/zFz9m/g4Z+DWz5wevEHf3b8jud/0urIFYYKVBFhVgOEqCq108eZGqXTrjZjZ9sxaXE9t1rHtCb42n/3wuihjTRLPFvXab21W5S9UTEcy4Xt7lblCg71PMkbdfKRAWymz75q7fA/7hkN/OaJEY+r/ng0uPVX7dd+3ybp1Mz8ZHumNVlbXF5JW82uPDGKqynpE/etHvnnBWAAEEWKmUo39CGKSJqkjFgUY6WUiAAyQCQBa5OULDwHsXSV6KiIQIBjFAGtNQgyS5DAMQTvrUkBiDkwR6VVzVitsxBjkCpB5ThG5ixNmYWDJ0GFlBhFqCWy1ti0E4oosnNSEahENX2IZRwyuRCDwvTi1Tyvz+lESMUY2XOlxLBg6StmyYxtZfVW1gTjU9tA1oooNalBPR7701tbi1OL58+fOfVDT9734Asyk9TyRpplWpPjsnJ+XI48V6V3VemjhKEblqVz3rbbe7Z3nnUuaU9fu7P1UHfTYoisyizPyyGHKmqrWjNkM0FRETEVDqiGZbWY420L7Wun2vOTaa+o/tsf/+0bN0w3wliKTMSgtsrOHK1PHJTj99Y/85nNB77QMwgi/NK53yGARzffQwiMuqV5Rk9/9EdfWJZxc3ejNr1/MFwdblyZmp6Zas8RWmYXAhOS0sQC0TtFajgqWmm2s72+sTNAH8oIW794+dDn7rn+8OFh98rG1Z3OTrfT3Tl+x91T7cQPelFiRqQNAAohRmbgqFCyLEkRRh6rEC1hYuDiRvfCWvemw/vyVAf2vUH52GMnB72yHA8leu+qrD0lygjzRCMddDqD3kjgOSHRKSkVvSOWwFE32kiIOicZj3Z3jdLKpooISGttAEVAlFJaaxEZjQahKAmBgUWiUhgju//XJf8lBwACsJpIqyIGlGyq2Yp5li3f+Kf//t1/9a0nd9cqUJyKdrFqNiaOHmr+5L1H6y1e7bgy4q3/8+2wekv/J2Nnz4+/+iuzL/p7hRIZ46hUjiJ6MgS9vj6x2zSJ3NYaTzYLZbUiFhRUTKTwvl+6HgSFQQSV8JSmprVgkmHA7nDsBWKUVKdzjbmdcsy+ZKST339hzycPFP3B5jPj0KscuP7z3qa+/q48q83OL8zNzZusXk/ztNYwM1s261WRH7n3mdu/dg1JyGzmnQtcVQTeeYyQ6wQJOWKW5gARxJShrNxYK2VFRZQgQUIAhdaA0uk4eleNffBAkVmnyoQYKlchoSJjjXUcIgbDiEpFEUaPjI5HPopW4EMIDgTZmMxACCiJsbnWKoIiQgAQ0aIiQXTcTCWlZr8aptGc3Ewatb1pDow+soCAD5Xz2vsYg48MgYVBQQwAynuvCI0iNFkx6GfNyZmJvRcunFh9/alXfOueNMs9xyghTaxNawpJK0WomFlEQLAs3NBVA1dmSbu3c2bkVW1i+eqFp6pRpEiSRUFBZ9gHrS3kYk0poN2YGas7l2dfuHfi0HTi4rjUybAzuv8L39r3aN8yWcXwHMblw/kNL69rwMGFeO4LY00w9K4EmcjvAIG5/M6Ht9/rgRHZxKRl6MEXPP+ZejYYbefz13K501m/0EhbszPzDiMBxBC10tYmkTk4T0Q+hpRwa2N9t9MVUj66jZ9ZvfbzL1/eO+e6axubO93ucDAYXnP79002k0F3RwQUIAAppbyvAAUAQwiJTWMoo2CWpzGUKFgMw5WNnaMH5yebKcdCGB56+Jn19V4sq1CWIfp8alqliUV346HFZs2wKwvnO8MKIvpQeeeL0lc+VGhGJVhVc27M466PmCRpZEiShBSxiCYEBMIsgivGw1gVwIyoBCIBRuHq35fZf8kRlSbSGjRL39SzULTnjzamG3e//K2H9k989sHOYi7VsGKb5A24+9ajdz9v3mAcVkzCCw/+qn7sZ8Ps88Kbudspb/3s3iN/M2PQMXvhIBz9KFQxkUT3B5MujBu5X2r0cjSTNUqlIwAKAe984yEiBABrrUYtEBQKRgTDpDSRFoYYXaZqHhiQJ0z9iZddOvrPKxji6Yfd5ultROjf8lb19XelaTY7Pz8/t2RtiomtT0y25jqJGQjq79z71D0P3KKNMpREL867KIXzIUaJzgkBCgGCJgDSLvoQfPAu0cZ5j4hZlnonDgqFJrPEqJ1zNuXIUMe8U/ZLN27a5+RWm8qXHhwikDI+8rgcC0mQMrE1hVjGqhgV3hWWUgbFBNboRFFiDAAUZcmRtU4YnXilWUcPw2FIUe9WtXptQSD44IUJow5SMVjPQYKwMIgAMCoB0d4JEgEAKlMN+u2ZuWZj+sqV06s/eXrhQ3sCKUEgIA0EWp4DUbQ2yipCQkFgjjHW2q32xML6lSfS2lTeWLl8/olQgE6ssUhKj8bBedaubzLtmFIOL79m8sXXTKpRp+er6FWv67cud3YevnR0h3Z1NY+6pvPZg+boS1MGOffFsnPOswgLO4kgQETXTL39sa33Xj/9a49uvscLB5Ikamvi+T37PjZb73fXD97wEl9uD7sXEkyBtQtRoVZaiTAgKiUoRErHyBLBl0VZVaCIOWy+efXY51+6tDwz2LjQ6416vWGv37vhjpdONnR3e02hChCSJPPeMwerUucqREAEHwSIlFLCIaKMBq7TLw7smdE2C26kgM6fu3Tp9Bnxgb1nkXSiCcYiyqG983OzdYPsnQtCBjk1lFjlKpchnt/sbg5gVFI57k6k+vylDW1SBEzTNHJUWhuVxOgIjA9D70bWWJOkNm9YawiVVnr9LZf2fng/ChilFIEPxfm1PheD5tyeN7/intccy2n00CP5jzx4Xoch5q3mi1587f79zUSgN4j9CAfoZGLCxPueX74/dPqYER355P4jHzyAEpiZlPLeJwkExgCysRVjNTy0byZN+Py2WphdWjv9L2HUTyni7W84RITaKKVQgRYCpRUw+FgRktU2RmEJxthEKVGYkDrxksuH71/G6NefhcuP71Szdw5u/RV9+ZsTj/3J3PzC3OzyZHuaSVSWmsl1sOuK8ode+PRNXz1gUPA5oPK0ZsGWwWtlvHMadJbWgEUrUkqVznnvLRlEstoQUuV9URUOXFn6RHEUQdCkIhFxCJV4z5XiaHVDIYUYIvqcdL1e19qMRsWYS6RgKGnmNaXMRre3NdgajXi4OyzKCogICFEDYln5Rqtp1ey4Wi9HRXDee8fOiIRac3F2fhmioJBCW8XAUCEllXehYq00YKzVrVI0HFQxIKFiDKjMYGdzfnGl3pi7fPnZ1Z84O/fhQwyOSClUKIBKAUD0kZCAWJEN3ltLMcbJhZlmff78qe9MTs802nsunX1CPAlZAbFKEwoX1e6Y983lr71x/oZFKMaj4WDU6fS3um68OUq2q8krvemCPaB1cPAHGgsH050zbvNctXXOAwEKoggJkmAAdsLXTL0NkZ7cel9ACSAiSAxJgjwx9Z4DraK7fe3zXjEaru9un8vTLLoYfHROECmGKMIIJgojAgGQ0oogMrNgcNXOWzYPffruVt1sXz0LoCJLr9+7/s5XKhiuXj6rdaLR1Os5AChliClyAADvXQyCmhBRk+YYBv1Rp18szrZslgqzRN7Z2jp/6pQvyuADkUonmmgSAannqU0yoykGT4hIWms2CqL3qFR/VI3LwKLFDVZmJp45dZFQG5sQUYghSVMiQmSJMDfXvPVwnVAareagrMbRFqVDoBM//Oy+j16bGCPRC4P46qkza4NeceTo0XfeSffsXYXv+cLojWsbvOeWe1vL027kd8reoepze+Wk0aq4788ad9POFyNgVGlc/h9LK/99eTjuE5GwIAJHVEQ+CqUTRdmfybGR6y0/s7Q4d/XsQ4N+qQjxjp86nKYJEgFAohT7qLWKhAYUAQpiBNGgI3I7zUsJhHzipZcO378317pzKT7zLxsXfupZ+J7WI390zcYn9u87qms5jwtdzwe1MxVcTKhx8v+5cvSLSxCx4jFIsKQVagFMkkQ8K82aksTm3rlamlibVGVMtbU6TZXxLghSatLEWAGzM9oKrpvZtjXGqEw4ivMs3HdFjE6jJtSVr4RZaWAOg8GolABYNbJ6PbGoVb8oAIALu7a9VhQeyRal5+g8c+UCA5WFcCgxaqUINbKLmEpqZpvtJkTMdJonGSJqgtTWY2DSQamkCiWo4EWcE1cFpZSPUgbodXbml/Zmjcmt9TNXfuzUzF/t14BCGNgjABCCAAIiYKZN5RkIUYdUdG2qXcvmLp99pDGzmLcWz59+CAIpjRUC+upge/L2/XN3LtaaSbHb744qX+wMdnpu9WIPL3f37oSJKqRKR6VvuDc7+Mr6M18YnvjSUACjCD6HkTkAInM0aAUYQK6f+TVAfGLjPUE4IIiQR6mhTiea//m6qd7G1etv/v5ivLW5djHNFOnIHJkT8SweOXBgAMVIwRpCNNFHV0UGTTGs//zakU+/oJWrrfWLMQIg7nY7t971qtHo6rnzz7KQQdKGEFEriwqYJUYmVOgNYwQgTbZyJUQpAqYqIsYouqp8cFV3azVWEQRIadOqExIELodDEh+ZAUAhapMicYiOWQAkb7Q9K7RZ8MOMq35vTEhkU0RUWhtrEDSSCJqlldl2wyh0eUKowJAG1Bz96defOf53+xuNjBB8GWKUbz59aatTveC64++48+z1swTf8+zufJ4mBWlMDHi20e1LL8D3XH7pJxb+edm9wV3ZLD/0uY9+vPan7Q/NjQfVeDAkDq4sIwIIu2hVY0IVu+JLrSAkK3ML09tXnijGPtGEd/30dVp5Qh2EFSpAEODAUSEaYxVq5lBP0lHlEpuk2gbgZ192cf/n5yfTpuur7zyz5/ILPwjfY69+a8+X3nj48NHZmX0mg+bk4kX/rV51WWs8+fLLR++fN2gce9IKGBVKkCAAibYQWUSU0sycqLSe1YMPVaw0kbBHgVpWz0xqtAVFVfDBgyFOra0lzRC8sCBijMHFQMAC4BmAQaFozIJzIORcmSS2ltddiJ1ux2rQCIDKaKpcjGR2+x1mHJXjyFU5Ro3a+yhIiG5Y9oFsGLVnJiZF8kFvUzVV2XdcqiQVAaU0Kh2NsvV6rlRiLcUQtMqYbQhhMNioTe9pNuoQOo9+/+OH/v5A1GRVErwXiFGk8qHZqLF3GnRnNATSRvnMZDadI8S1K09mtT3tqUNb6w+UTi1m+U0r7Rccm5+wsV/0t7Y6va3ucKe3uzXa6I23rnQPbeB1Oo3stVLH72tOHzQbZ91T949UFCF8DgCHGEMEBwyAlpRBIESNdM3k2wLKI1vvQdasgkJygokyeaLec9tKp7N67OgrYrm7vnYWlWQ1oy2yE630cOBcBcGz1pBmOsvNuIhRQnCRK2Iot35268DfXpvnSa+/5R0zS7+/+/wX//hwePrCuUtKsVKU16wQAxBFqULQ1oTICKg1CqCiJLpyPLKhUlNTEiOKYAi+cuWouwsByiJoY1HbonQSvB8PgUOMkUMEAqMMETlXyv9PEnyAbZpWBYI+5zzhfd8v/fmvv3Ku6lSdaJruJiogdAPi4ri6oIPgGLnGLIJpXNwdvNY4OquCiyKiIyNoSxClASU30E03nauqqyv/OX7f96bnec45WzD3fQ1LZ3I+kTo3IWlUbW4QWzBqrLHO+7xQIGMJAJkDhwqRFAHAAAKC+NyrwvgXtuf+bHfhPRFZg47g6nYbm/rU4f3H/JU/+d4uADxxJb37EzLRzef6nbyXT/d7r73u6t5BhG8rH/s/v7DiZ991rOsm/+ITH/j4oQ+c+sc767bWqKFt63K8ur5u2lQmjWqzDuWxUZJKuoePnrx67nPljhiI+NIfu4kwEXrjyBiPBFVVKYgzVgU4Se6dWmxjG2O01higS69bP/iJOcNgoXPxofrs7e8Ne+4CgMl/eMPs+Onpmdm9+xZ27z3giolFfaiVRWvdM69cuvEzB8nYEFtryKpttE4cULVf9EdNBJTQlArJOIeCcI1RESAQS2idMTZDoKYN5By3yWDyliyYxCyigGgMkTOejFFUUBUwRIR5TA0l8t4hIoHNrA8SWVNKwZLJvI8hdbu9qmkR1BmvYoehnMg6VskiqaFhU29sjs8+vdGx2cT0oZWVi1I0ufZjo8IsalRVNFqLeZ5X9baxvbZlY9A4EYbYlPuO3Oa9DdXq2e87c/Tvj2R51s0y0QSkvb5Bj60kQTRRTVYo2N2zRyHE7WoNieohz++79dP3P3DfyfCa77x70tJ4Z2NjbWVze2dtaWd1Y3trp0pJNUDWRD0fjho6esQfPFEsHM2e/Nfx4qVok5IoIkaCJCx6jQQBASVAD2QILNJCfjcq7Orf89X13+2qK6UxZMgTAwxc/7+9cn5zY33/3le01drq4hVhRgVQFVVrrSoJE0oEVOsIUTU5pYTWxCY5x8tvXTz2oeudh+2dVkGQuGnqW553b9M8t7K8rNQQOSRMnADJOGVWa2xKyVrMC0dk2kqMheE2VWNeWLCmCCkCgmvqpm5GmpTQ5Hl3cyVsriURnplBl3lJSVkkiQKqaowcY1QOZCbGbetcH2KpcURqkqix5POOc5kiWlQgE0PTVqUyqwoSJmFgBFRjqfql4eD3JgmJEIy1FnGkVhWO7ttjNp5tgr/jsPnyM6tkcxZGQz1jut7fc7T7m9/tAWAz9Ve+/LP3P/2JI7/8Y9fftOe9H3nvF573T3f98x3YMgh4S4mbxe1ytLG+XevqBt90aDYrIrbp8athfteuzcXHylKFE77ibbeggaZm7xHAtk0rmrIsI4OZL5SVDCgzp9imGFNEa5Zfv7P7/ilg9i5feTxsPtc088+PqUU0u5ozR48ePbD/+OTMrp1mY9V+zXoqq+H51y7d8oXDdYuprad6gzzvSpSYWks0KAY7bRtjiKEClaLXado2pZh50/I1iSUhCDjQxCJqvE9JDao1qCJOMCYmYxWIJPR8kVsPoBLqzPjQasTGWUAQ51yMrEAK4nKfkCmB93lmPCGUoQJVUEyB65qRDDrfKKcyNhFXVjdWFsdzWbc7tXdnaz1BOz8zDwLlSCMDgMQgWTcNBoOd4WaeT+1sN0Qwu8vGRDsbW0eP312V7cbKpctvvjT757sIrCfp9zs+69xwtDe3ayCobYCVJjz31BTXE96HfAKKrL93z+zXv/rpS48/bCh+6P/+/pXl9Z3VrauXrmxvjeoqAYklyL0F5RLs+Kmd+dn8Ta/urz6XNp8NW+dSEiVgoxBRGxQCk1QQhBSQCBRQlAAcYYbmltlfBMRH1//Aed/m1V1/9w87S8uPvuO/zq2v8IT9w5fuDeNyYeEeMps7G4sqIAzCECOIcNtGFYRkEDCEiNewiSqiao0FTVs/sbTnL48YIy2X8wsDY9PmxubRG17fhnPbm2us48SkimSMKGuClBLot1jn0QABSSOYQ6zyptSJGSZLokwkIZZ102hSg6bo9sqtdm1RVMPc7kQ5FLmXwKjWUCEqAKCqwjLcdOMyCGRWw+59WV54VWO9J/SqJBo7WR8NVeOyHJbMasmSMaCgzD7ziHDxh87u+8BhIgRRBcnQnV9cLcv6xInr2rbtdycc2OWlM1w3MSWXuQlv10fLOcgkdu856udmjr+m/U8/ffnX3vZf/mH38ey3fv/dz9z74E3/8/m1tkTGWkrcmrpcXVwsxW1uye3Hu50+UIAnV7Tf6W4tnrG+E9qIL/nJ69HYFJEgGvJN3cTQWkOagSRQFuuMR5P5XAmblIjg6ms39390TiSiwfE5XH2qCdUostb3vH36m//v7Pz0seNH5udPtjBaMQ+xKVPkC6/dOPLPCyE2maE8z8h4IwYdVFWV2ZwxoeDUYBqVGLhqG0SwJK1GEWyaRpWNcQSKKEDgCVlEWA1h4XNWbCM3bbDEE71JCZEh5UI9V3gqxDYeKKY6zy0opMiZ99aacVN1LBqXpZQcqVEr4hK3Nm8zKGKSKsp2VY/rZjgMmXMOcwYZ1u1wY8uagUC0BIZ2Kfm2DHUpbNeLojcqN3bNzwPb0KLKep0Ikfbvv72sx2tXryy+dW36T2bQorep1+1a25/b647sm+lan9nBhz+5XY1KSMZaTuIchqsXn7BUJeGmqn7jdcfL8bAd1lmGvX5mCMGBIQc2oyyfk3hscvTNc9U/fWJrD9E0WSeaED1qAhX9FlJS1AwpYzAohAiIquoJM8E7Ft6hAM9u/aGZHjzv/r/r7tvvbRGkvP8l37Mw3Pj9V06GYZyffRHr2ubms0ikYABMSjGEgGgNeWs1Rm6bBIAZ2ZiwrVgTiODaWy/veu9BgGQRb7392MRUsba66SZvcmZ5vLWZZxpY66bxmQupampJKpEjSyJjhBhEqcWI0uzY2NDkHIAJWdZpmqYsR001zowHBZ9l61tSb6mldv8Rh0YBpWlaFU8A1hISK4hgGq31xyMVzZjHuw/pYCoHcMZ7FG+tNwaYJUmqxk2sWQyDgCS0xhM6bz2hvfTG547+7Y0GgQhB1Dg6+9yl0bC8/vrbcDDb63aGm9g2cdDt3/W852FbPvD5j49Wl43dzgyiwC6O7zbvec/zPv3Gn/+Jarz6B+/948df85UbPnS7ApKxqgqoVbm9uby9XbfO+vmZvAAA61ZGND2ww+VLRS8bbg7x7rccdXmHFVEoxdaQERVrbWKOTZ04gAGjaJxRQkVAlKXXbx/4+DSAJaDxMi59vQllrejk4AsGE7Pz1XPdvu/3O9rRNHlZbd3EuPj69VOf2T1qWlVTZNZKPpaxtVlsIyIItl79oDtZtolx5EzGwmqlYwrrfBJR0RRDSNFaYpXMeCUgJKNoDcQYjbWRWTgS+bYJ1hGrkcS9IlcOYm2G0KGuJxy4rjMihOMwmrQZomnb2hhEQhTjCDMLoC5JJRBEtaBB1VRoDRhixrpumpiQqCwbaUUx26lbLnVctaOyCikS0qBvi6ITom2rMsWYdzr96Qlnp0dldf4HLi6852AppXXjXfMT3vbRKxbSz6a3L8TAg3PPbBCmzOVNiKuXn961a+Hq5adQsTLhZbMTL7rFTi/sHezdb/tTWdHR7JrucXM+F/zqJ77w1/evsWBEHQD2wURIXTROVEEVQVEsYA7UUcgAc4NFomBQWK21HY13zryj6+wz8U9v/cz9/ZkjlCdriBEeuPcH3LkLf3lvP5Rgu7f3J3bi+CpDh6nkkNV1MxyVzhWolFhEOSZGdAYUEQCoKoMGt/RDZ+f+/IAqgZWJXnduprMzHB84+bJyeHFreWPPnglEMkC75+aKzC9vrZ+/vDKqmsSNAhuj3lkVaFMdmiI0MDkjLrOAqMzVaCs1ESghelEd7kAzUut5ZpfkuUkxCgMCchJVts5yUlUZl360hcb3JG7l3eQyMKbwPidrO92s03N1w5K0HA1BorGuqmsWMcagIoBk3q//6Pquv9xHhN7nqE6jrCyXzHL06InQn93ZA2JkAAAgAElEQVTdmbi6NXHPC+4kHD34zPl2Kxm0odwyNPZx5AbTa9z5ja0bb/nTF83un/jkg1/9u7/54Pnve2TPh27oYtam4F03xSCxvHp5cRyzbs/Mdjwa3xvkV7diF2R784IB0zSML/jBg2Q9AKkQGPHep5QQkSyiiKoGjgCgIgDgrCEyl797Y/5/dlQgKwoe5he+OJRaWkAVyl/1zpfpQ0V30lioUZbx02pC5HT1uzeOf3K2jShIimw0F2l9lqUQnDNAXkJLJGDRkjNkiEitpsCE4K1TVVYJoXHOkcEQVZQRxFtrTd40taoAojFKZFNiACBUADREZLgwZHweUt7UNbKAJu+KEOuZzuSgPwFJQbSNjTGk0Cg0hXSNAWW2ZK01RGCIUDUqq7KzBg2ioEUUgCbFGABqHo7G27G2SBoxRlGkGFPdQB1ZXXLsxmO8+pbNk3+7r4lmduHEKDRFb7B/9igkHG5vd3vuS59/4NDBY888tZYXxODGW1e9726tXeAwKgh2n5q/8Y4TLzxkzokzAhPZxMs723Np+XP1YM1PnP7rZ0ZPXVlPzSEqtjltYfAAOZAjYxCMAqkaAq/QUeig6YPmYAVBQSyhFf/yPb9okv36D6+96md+tSBGnwuwbav73/Erg49/6f96fkcbndt7J+HS5sYys2UMKJk12saoStymEBUJk1wDwAqkhBhCdNavveXC7PsOMJAhy22M7cjY4o6XvG5l6fGr5xcBa+sLQ8Y6zHNjot/YGqkSgiASgKCCKioiAqk4xWCtAUVObT0aikZANrYAEEVM0am20zOIHkIbDFFoAkfDEmKIMbIKUjYhjTVFR9smph2DYm2OzjqfIREZRAJQqMuxpCgMqsosxhCgWGMIoPy5Yf+P+gqaZU4FRZRjjtYePngMetNFd3dJx43aSC1RBkkosxga741J8OiB19w0mgWAt//neMe0vut9f/rY5x9c/MGnDn/oZuakpIY6hrBuy5XldaVBvyMDT3mn17ajpW3ZP2UuXngONFdSvPOth1Axz4qY2CAqYYzRIZFHRzbGhNaAIUOGY1KWJHzxvtWFj/StIZdlUvkLXxylESSA2Zld9a47+v3OHQs7C/Mndnj7Cn8ZpK6b8uLrVq//1H7VlACQNM/6QBpT2zYNKFhLiIbIMSsAqyRjLXkXQ41AlkxKHFMCUES11qKxKUREMNYacs5alggqSAQAMSZEU1iT54M2tknq2azdqptW8jY2qE4k5r5rgAWt9y63mSRt4yhGQTKiQCAI5MhmxnYxd8Zm1nhrkUAlOqOEqqAAkFI01jBqzsYgVRqdSu46oCgoqimBL5uwubNOnF29Uj76uou3/OO+zeHk0tWudz6zGeSZNwBqss5kPT596tRNjzx8vq5r55yEYYgcy8XQbrzmO2+9/sTgl44uA8DTtX18h8ibD61mhiFsVaNL6+1DS22TDmKxbqpGYTba2qpNKoQGgEBJoYvGEhaAHaApg0akK9DJsn7Cm/b8vIXsn5be+YavfmHX/uutBchd2FgT0a/+7p9X/+Nv3zbBs4OZoze9uByeX1y8ZMmziHUeWJMKKIAIkqoCGRtiVAAEUJAYGwAc/vji9Pv2o3GcQqjaph0Vncm7v+M/LF15ZOnyMpnk0KsCEhiDIgqsnBgNoWoMYtCKSIpCljkRIgIYBAx1VQ+3WQyQelcwBOuQbDeEOrNREnKKKqoqai2AMLMqqmDe7YeGfdGNaYQpIoDxmcs8EgIikkUUkNTUtTBLZGEhIgQggwigyvUvlf0/HACgCIswKrBkeae7b//R6Zk9Uwdui37/xtZ6zQOTIpjU63lE47wZ5guf83f+5hn4zRNwz37+7PfjD//uu859/pErb3pq1wePEUMTmpmZ3U1dCerK6nBhzzHHa+X2BhlibrDYO5Px4tIVwkKM4kt/8phDIqTIwgyJIMUkIdocunmX0KG1icAQNU0bYxLhK6/Z2PexaWMEEbm2i18Po6u1s2bv/rnp6VufPfhd1/mt4/5qk8Vz9b/G1FoDF+5bO/CJOUuAhoxVUg+oABLbaEzWdR20hoXLqhTRPLOAGkS9EecKUQOIKSYQzfPMWZtiiwze5XWIYMV7k1IoMm9Np6zGRKZbDBwk5/vD8ThImUFeC5NxkMbed6owZhaQRvAaRQJmjdyCAqFRUVaNQQgxzz2zlZQsoiHq+NyQOgJHWCAY65WB0AKxJYeq3lIPC4KgENCYnIoATRIm8LkxzPaf73ry3s/f/PkvLz/yWBOgzk0HkcFM+Nz/9AvHz9+T/vuX5d+f3szsFNpsa/Ny5Pr6IwM1nf5C93deRLdMR/i2d35z95m6kNF2c+HqxDeXLmubAGpEL/EgWAM0Bo4oBsgqKaiiAkAOlkEsaAfNHgQrcjzrF4BLko7M/Vwmehb+6odPP6qaxGSf/Zkf/caT59/+7w888mcfufDud/7O0YmJzszsoZuHO0vrK4vAjODRkiYOEgEUOCGiMVlM3IZaOXiXA0CItYAb/vji9Hv3EnmR0NZNkqrbm3zBS77vyuWvrSyuIbEqKhMCIgKgGkQWRmdJU4ooCVS+hUiEiYxam8eQ2nLUjkaqDkmd74Q0duTQd5qqhlApGeEEoCBiyIkmRFAhEc37/dikvNcPUGfgim4PnM3IJmmQCMEQmaYp6/FYWUMKCACiCIAGrkHV6peG/T/sqFhQZGFPlMTZPD906PpsIh/r/pn5W0apBded7Drfp9397s2HDmZZFpP++BMH4Nt+7W55w/VXf+wXfnpiC8686cmZvz5sxaBBYzptU5HIxijuP3rDaOXJ8da6z13mtJLphZlu3aw4myVGfPXPXGesU4CmaUGd87aum8RqEZ11xhprgcWgQRYmg8ry3KtXjj+wh6/RQIqXn+LVhxuFQChHDh8686L37kzdDAAvH/9Zqv6slRIkLL1+vPdjMwhqLSICEkgUFmQBRNPNCyKMsRWVjvE+L6JITMmTTykZY+QaDuiis64wk2VTkhIRCMXMFd2sNw5NbjUIoaJKch5JLUOs2ySYvHGF6Vprh+2YJTjbCU2jGsFgwkgkFo1QAlaRmBhRFdGnlBSCgUKJve15siIaJbFGBAAEA8iJUUC47bickMiZ3GQO2BMatGjRgTVKRcfYpAL68dvOHfvgjdt1sbq0ub60UY9r1ZRn/Z9+if7id3r4ts0bfuDyo4989IF/7+b+5pv3d3sOYkrg92fNoayGb3vVX4yrs9vtztYtlG+TnGexqg7BA1jVBGhALIJXUtCElEQdAgJHtYg4QDiKeF3Wr1g/mbYLxbfs+Y0phMe+6/z//ge/F0IYXV39txe/YufWkz9y/z9srG1+7HUv/+Rt8yydvHeqDVe3VhfHowhEvUHh0bXtWMFHYdbaY4FCbapZA4IHIBaGtl3/8ZXJ984pGU4sqUmpmZ45cPsdr37u4te3NoYGJTQBiFgSGSKFBMlYAGEES2QUVCQkdqqCoGQJABCgqUdVPbZqyWK3O1HXWyqOywFDhZ2hthZVQDQFBVARBQBDVjk6Ox3CZtGfTyGAJd9B5zNSx8zGkrWWJYWmjk3gCArGOSMpgRpQRgOiXP3cZvG7AxXMfc4xphSBupNzg9n53etry5fOXjz5/O/Jen3s9ucnpt7yvS98/0f/Rbjes9CF5E4PJ5/svPQ7jnVubD/93KWnn3z4EUfu0huf3v3+YwpI5JzzZTkSbhudPX547sKZR6StCZRyg+7g3BSWYSkvchTCV//nkwIgqsxC4gEFCEXBZ04FYorGgFFPhlgjAHvfOf2Kqyc/vSclRsOgZmvJXnxwXG+2hIGOvWj9tR+Cb5uJj9+y/UZnvEF99tXLez4yxRCyzDMnMtimAIrWeO8cElrnOXFKDKpkQEBYNaO8bdssywAAEYqi0zY1MjOqsHiXAQGCzAzmyqZhDgyKgG1sFNmSR+QYlVCsy5WDISPoYtphtqJoLDrrEweRqKLOIycETG1MKpz7DgCIRmFkaTLXRRW4hjCJgioYG0Nw5lsYjGptycRgfSum78FAP++4CNuxdGbh6Owdt9x0x/EDN/2O/e0zP/bc6PziaDQMIfq8sOLqMP7oT82++FgHvu1Lo4WZYzfOzB7dquTpK88sbl0useoSOZzoPPHojVP0gU+sPPp0pdYOWAYGd7Rt1RAYBCVQAphQzAAb5C1gFRBAR+REItmEkDiccsVxzL/C4zMS7sDJvi1fv+u3EoSHD3/53t/+tcO3Pn87lB8+fGN1/aEf/9ePLX/jmQ/+5o88fmxGuUhyIM/GzOVotO6s6XcnBZljSZS13Bo1Bj0oolFG9ZS1dUsADObp158+dv+RmFRSrSmxJGP7e/ffvbnzdFmOgaRpojGICJ2iU45rJMgy2ymyKCFGRUBA1QBRYt7JQmotmZCa4WgLCYXZd7ylvGl22hGunKesk/xEHQOnmJRRkgojkhaFB1AVDKMuZWOiiXqnKaaTLcBRlyxw0swXAMicYqiFAwLFmJxzyoBgIquIWEOjn9wa/OmEMdagRQRNWO5ke/ZND/ozZ5/52sblZT+5i1Pd6U50pyfqrcUy2YU9t40XT4/rbZZUv+Dn9p7/wNqYX/UdL720tGKIlt98Zv4vDglaIgeAKUWJDRV7d03b82cfltR28pwyk3TXod2TKxvnVbAa1/i6n79JRFg0MluTq7L1rqkjcAIUBLLOe2MTa+JoMxA2Z1+1fPST8/QtNqUAai88Mtp4OhrDN95027+/7GPwbQdH71nY/iOFOnf26ut39n9shlUznwFATAFEvXMIKCKq2un0rLUiEmNQFOYIgACWmVXVGKMiBDbzLnHDrCGFPC9ANMQw35+qOQFA5JhYiAiQ0RoCsZi1baVohIMxlOc9x1UbKSRFh8YaBEptAAQiyf0gSpOUQwjOeGuMdwbUx1AOelMhtpYogbRt66xtQ82crDUq7DKXIjhjyvFOjS3K5Fzv5Mn9N584fvvJvccO7J9/7OzDFx49/ewzD336hV+SXyn33rT/0jNra1fKGAkJRJoXHXMffdt+AHhutfmTB9ZvvPm6w0f3zMzt6U3NWTu9Odx+dPuptY3NR//mS3hxbEtqvJXQBmNzFosgCoCCiPpt0WBEITF5MmPSoEBEyCmoYZtenM8+ETZWVJ2ag+T72vao8yOH/vHrm7/7ODz4ore/8wU/9B9NN/+Xt7/zRe94+9Tk7IV/++x7PvTO812nUiTeN+g1JKXoGECKbBBYDEbvisjREvqsaNuWmY2xRCgxGbRNCo+/5vzNDxwVQIk1CgLA5lbr/fUzs7VAqlOtkDqFF1ZhEKDcWdQEqBacCKpqlvnI0HIQ4LKqnMlDqmJsWCExinLbKmpohuHS07Y31UztBettU7XGWOdcNa4VuD8o2lC3AXYuZv09ok2xudbOnxBXmNwWBNcgkeUkKca2LRUYRFmQwKbIhCikwgKi1U+E2b8aVHUARSQA4bAxt2/fRH8wc+niueceW82nDnITwSeXxdiMQ5luvudVzy6emaD+4efdN7zutS8ZXF1eOn/24U9SKlMMK295ds9fHVciBKMKAFqOyundJ7q+XF48Ldw6tGihTdODDmwPN5pSNCZ8w9tvVQUyRgG9IgBkWTcEHZfDJEFYU1STA0JWt633pEjP3bdy4KMzWeaJjLRYldXlx2R0caRY33jqptk73/hQdt8BvDy78UetebxFY4DOfdfS/o9PO2etdYCQEuc2J2PqpmZhRxYRVUVVRJIxzllX13UANsZ474mIY9LUOusTk6UsSI2khSvqtikUqfCREwCCGm+dMdjExhj0thNjy4mNzZmjaIvK3vWqprWOQFMv6+UuTxwQtd+dHdbb46YENdZgZl037yqRRc79RN00CAktVnVpCOsYbGbruswyx5xvbbUW5++69btecOruEwcOz013T184/djDDz/+tS8PV07PT2d2hnao/txLz1/3mYPdPHvuK8Nv/tuzuYVKxKhxmGamshcezj/xyNWfuHtXKWCT331g4tjJw72pbtadkzp7anFnqYwhjdu1tfZfHhpVMSYOqAhIqAhGFa4hBVRg0QqgREogI9UaUVQnDdyIxeU0DmgJZZbMNJgC6DW7fi3a+KnF/6elsPC6/+1tf/4nS2ur+xb2pTawo7Mfuf+PP/97LZqqVvRHi2Lc1qtJkrFUdDpRhCA6KshQW9c295Fjr9OTqNZRjBEADdA3X33+uk/sFUUiHO80iFhVMDFzQ7dYy6yJwAiAyFG0bdVlzhMRsvO2riMAqgoZbYMyiyFDZBSxHG+mVFvyxjk0JrQK2tZlePzLaf8x2HPcAWgMTJZE2nGpiGotxRjaIGtP+YnDmnbszlraf7M3melmMizZGCqKIsZYp6gQQwhEBlVUiFNyjqy3BCYF3njTeNf/mByPaxa13nLTjK4sHDs+n/cGjz/0lWprtu3s4WDUZAvzEw12JucPTc1NzR/cd3SBuj36/OjEGw6MtnaW/v0z73Nlk2K6/Mazx//+5onJwXhUpcQsaW19NJg+0M1HW1sXO87FKnb6xcoa9PvCAsONptwZ4k/91ov37bnxppMvt6a3snrh6YtfWFl/xAM522V2TaDNrWFZtiFo3bRtLItuvvJ/VNPv7yKaENTxBA/dpWcXC5fY5VOdeP2N180enDMuWykvVLjjjQamc/euHf2XPc4Igg9pmzBTAAUF1MQpMeXOOWsBlZUNYObyqomIDIDGEEuwphvaJkq0AmqsKiNSp+jHFEMYO+eIHAEZtIoqkEQktME7b8jWTU2ODFEvy6sYqnpMaBCtI+u8ybIsBuZYe+et81Vd5S4H44yKNeihcD0gQ8T51ng1Rnb5lGnqUSpC6TaWQl2qVLSwb/+rXv6iW6/fv13uPPPUU1fPPq7j5alpTHNmbDkE3QnbBRVPfMeVWz5zjEkIzNblZv381tGZqYUjc9qz60sbCaunHl3Lr6699TsPDIft1a26C27m0NzTl0anFxmKKURK5KHIJxAOnHkqbCyNyIigAiURAAUEBQigrZioNKYISFbUIvbAWIyJAICIlQ0eMe4E+EbT7XNvTwa+sPQ7EdrrfuFtd333G/YdvgFtcj4ziN/4ymfe88FfLoNRLbrFoBwOs64mjADWQj9y6GRY1Y26btOWnSzv9brkfLlVJQTWKBIs2ae+5/KxD+1vLWZsF9c2yFA3y/I8G+QIHWgoFrYLBpum6fhcjCUQaygkTtewWmMzaxWFE1nnFJMypzBOMYYk3eyajs+LxaWr2tIjX2n2HtUjJzt126oigPqMYlQicM4wx7aGM4+ZI9fJ2lXY3ko33j5hc41cGbWKNut0xm0VxnUdWkR0ZAChbluDaNG0KRZZTkgX37C86+/6zOisIwOx0eWL+bEDe7sT+tUvLEmcyPbd1ds1n3dsAVqlNGrCsRtvnpyWFx+ZKmZ6H3hi/t7dq97AJz/1B2lzmchd+N5nD3zopHUuhKAs17QtTfWPlfUz5XgF1QNAjKwKoJQ5D6LluMIvffavJ6YX5nfdGNmGNB7XW1evPqbtMDfUhrpqRmTVCsQgMSXRtmrHH771axdWFyNLOU6hcd4VbTtyeXJZnjljfWFNMsYhYps4cy6JGGtv/rejYKiq25hq74uy3jbGcRJUaLUp/DUucUJQVQEiVbLkmKWqKjLAqk1ZM+JE0ZGEgqkJbd7piGjTlkSYZR0OrUHLkqImSy7LMkJs2zpJyowxZBVRlQAUkVISZ9FQpspAIbJFYW8yEhekKnrdqixDKJV8Yaf2z5183ql7zl4a/sB99/ziR2dPP/HksaWPlaPNGEtnTFtvb66uxHJz367ei+8+Obu3v2l2yh7G1jdSd4uObc3VnSuW8PR3LR5/YMZjBx2jZpighXHhJ+oUKY5zPzXl3da2P/3ls29+xTFYWlxb2lof+53SVmAbZMEsqDXUCYg93bmt3No+v7EN1EpCwARUC0RFRReoyZFn2BrEQBhVW+YSjagQaNe6F9hi1DSLyC3QW/f9+oTK36+9y2p37qYD9737D4698FYe6/JTX/ziRz514/e98MGv/lXdVEhFZrPYNgiqpJFhY31U16HXydo2buw04yYN+oVCBMJqBOtbwxhkemLS2PTM968t/OlUS7YwbtQ0ijzZ7ThOlMPCrmnvaJRSohQgWesBM0VGayJrU9aqkGeFMGe5VSYR9d4qQduMVBgRAW2edQnMaDhcWymvPGun59rn33NwdWsFAJlTUeQ72zvW2kF/0LTN9uZo8dzE0evN6mJZjcwtd000cRwip8T9/qBXdGNox/X4GkS0xnifl02tqs7Y1EqWOUt44T+sHL5/VhVE1BrTRr50pji6e77B6snz0wLkpvYdOHnKU1mtbSYtxuPq4Knbjh3s//R9dz4+9v/60NN77aoH+NwX3iPjsTXuwveePfD314HYuqpUVITHdefg3v2ra4+Md4YklFJiFmMssyIgEYEifuMr96ul6fljNht0sgzVb29dkThUjSoxpTaGoMR51mnqKqYqE2cttW3NEhns+s7SdjkcN6lMizujreXhdtVW5DIDFhAEfJ1SQk2JgaHwWUiNgAWUyI2zDsF457ZHZeacIQwpktqkEVARiUAJbQghcQwcCIgBpnsDYWVMZTVWMogIIETIzJa8QRtiSpD6RY5oRuMREoM4BCBDAsqpNWSJjKrmWQfRqnDi1qpFQ1E4auLWMM8e2nPTwb2nTi7svf3UiU5hLy9eNt5P9KY2Kvnld70/X/na+fMXyp0hAjnbzE9mJ27e0z3YW9cR9lxus3k33YY6aex3B52stzZaC1GeePmFlz/xws319SpuxcRImNkss9yE0KUB5E4oZbW0zjVxpxftDdqRsxfWRoGDssCoNjtqSxErumCa+Rna1d919vNnN0ZQMoMKkpDBvsg0uhHIEGQEpoLk2DBCR1AJEyRVBWMtYZK0D7JfP/bJ/2/pXYv1g4eIqjtP/ZePfQqd1hE+su/Q7h/9j53XnLx85rN1DD7LJEaICYFV1JBBY0TUOhNC3BnWdavkIKbWAFqXb4+qFCCFWMXqq/ctnfib+WHLpKAWQ9t0M5tq0xLsGUwVmQ+pybuZEGyORqbTs7mrU4hJy3pEaIqiU9Y1UTLGiyoCVuUYELLMFlnmMUcw1ajqFPkzZ7c3lmliKhw8nCVgAKjrGtEEkBgjIsaYuKTtlcm53U1TEqTe7kNStqWxjhX7nT6KtvW4bJrQtsagcaabZRGEVUm1aRVRAGTjR3YO/s2ctaZt2xjZWr+y0j86P1G5wUp9aHb+SAgNFVZ2Lrd1mU0cHjbbJ07ds+9g/62vvO19p90N8ByBpqb954//Ub22gUiLP/js3g9ex5w4MQKkGNXv3rfQfe7Zr5WjkHvbNi2zeJ/FmKy1hEYR8emvfwq8FN2F7mDe5V3SLHEVY9tWm5wq1DbFIBKcydq2Um1BFZGY2RqHACKkkgKPhW1bbX/94uMrO4/P9WabJttuy7XxDus4KzqZH0hLm1UVtSEaJK3JWhFmSU0IsW1AxBrqTwxCa7aHG9aiRXLeOZupAoAKK4EmVK9Qa1BlImPIh1g552NsrTWJkzNZ04Y6NJOdjqgbjUuRaMXWJhmHEz5TgyrELIhKQKytClrTS9xdmLr+wPx1nuD2kzfPzhw5dszH2jx57rmNlYtXVrfd4PCorK8++8SFMw898eCjxKGpm0Gfjp2YmzjSbQcYkAeuGNdNt9Mtq3HRL7bLkUPs+Kzf6Q+rYZvSc/euXv+phbJh5yQlzHw2NVFYzcrUxjA0mHnjIuUp7rAixeQKt0vsie22WR1XO9vgstFYxo1oS2Tb+SKbmKFci7OfvhQEW5GE1KRUE2yz2cJ4EMgJ1kZ7SoF0pIqsHo0FEzEo4ABdIL7/VPWmx7sLxnLivW958+t++M3ZxMQ3nvji5bf+wm2/8eun94Szlx9Ujiwhc8Yjcmr7vlu4TDV0CdBCTAEULFFQybOOgyyzWDbBOBtiY9R85M6nXvm5Y1WIMURGGu+MPGF0plXrhlIqpwQSkyG7M24i+FYiORcazqz2u4UCRkWGscuyIJJYm9TWbWzqxgh0i44ojKqRz+xoya0tG2MpKxISGoPMwgwxRgA1BkU5CRuYUbzscFen6Jps2MSYpCTs9AcTIjyuhqGKFq8RNQCJyFoE0MQhyP/S/HzV/+MBc1QFa3MQgHzhur2DNTjQ2XPzzMK+K6efTtrB5tLS5TPzu05trV+YP/mCe15y25tefOv7z+e3dC6Nq9FoY/T1L394fPkSIl79odML7z8ROHljRSSFmE8fmMz52WceSSyoAoAiiogiYp0DNECID3/lw45s1p3pzx7sFH1CCwDMcVRupXIduGRgBCcSVSJLRE2gCKKIBoBZUghV4pC7fNxUw9FWJ7OAne3R+tpoeW28ZpTPre1sVnVuNEGMrEGJFcmwtz0yFonLumZhb32v6CJlVTM21oaQlCNzCrHtdrsKLnFCVAtYhrptysxnmc8IDaMmZlBAbHM/SeCbZhy4CRKZxZNnSSTa7w6imJ16g5vAxBZzQ1OW86Z/71375r7zrlPH983lmdkc1ZmaL5678Mijq7efMOeeW1tt0tZ45rHHnygf+xjBuJchhzA/5ycO9+wuI141QZVqMqZrCyAAImYtfCaJAZDIFEW3TIGQTr/8wnX/ujcyRR6HUGVZxxpPCnmeJ5UQ6rYe5VmGrujn3RCCqhJRnsGxRvaM03BxjYliE6thwwGNk4kic/1sfJXXH12qyC+Fdg11R6BFPKpQo9kLmFArA5oQISGiQSJEw9IYNIJvXHhngfjF9d82ZM+FaO+8/cd+9eeXrlw+dfcrP3z3XT/6ta+86wM/S32YMoMysHd2u9xY31qfGExlztRhmCtOdHscQoqtgbhraq5wvltkFI2IgCHrrLB+/AVPfvfXTyFiE6uYFACMBWuzECU0icUFb/wAACAASURBVBhXt8Zo03DUlBWNRuW4SQ7y1a3NTg+NZsMy7pnaHahCVTKmjWknlNW43R42fdfzxm1VZRsixJQ4j42gKGFkcvgtoKogIIBEFgDIGOt7WVaGRLnvGKsxBiLf6ffnd03Xzc7m1lqsk4qACnMKrBZRQViFlEQAQJtfGPX/eMBJAdAYAgCn+/cf7tf9e3btOfjkmYfqtafd1A3z3WJ1ddHR9MbVh3qz1x+980Uve8nJr2wfeOGuZW3j1ZX1S9/8h42ryyq4/OYzc3952EEhpKRpa6fdd+B64OVL587HWBryKizMhigx+jxPKRkkfPCz73PGARXdyV3FxHzmu0ROVZu2StVWaIYxNs6TsqqwMCsoKDtrmJOmhATXECEBhpgQAUHH5SiFVjQIpCRuY2s4DqON8bIS79TjjXoYQJN2qmacOORZkYSAEEUyYzrdHiMLwrisVRMzi4D3XlSrtkYEFLU2r6oxIXnfiVq1IUiK3lpCRwYQlcDEpJGDv8a6xCKc8iwPMVlDdYxhyxZxtj99w4TPs1te8av33XB2beebjz052tzQMF4dhlEDw8Zt7sDGdnXl3HO88Vi3uZI5O+j7qb1Y7Jc2o1ajGCNCJAQWiSg3HlJKymTQEKhSCDHLMmNsTBEBzt+7cvBj0873RENKkdCRAQNkra2aum1CN/f9fr8JyRjodDrj8bhpGsrIK3VEbqbOzFpTlePUxhQlMqvBDgFgvpj85UeubJf2jLa7FZYRjrAhSJOGekA7EpVcV0EQEmgCETCZ0Jjif1r49Qmgf1n5rxsgqwCHvuOu73vHr1x/3Y1Fp7t9Yamzf+6n3n1fASZyY3xHkjSprnl8aO5QJxssb6+rRgmS+6wKVeS23+mT6k65k2XeAvZcPtnpzfQWHrjrGy/70lGRkLuJzHUcZQREaCPHEKJF24qr27KsWmv6ZZnWtle8y7ZGIbWxqdrlrW0jFKSWIKqKxiROmiRowsRtGEZWUGPYJCVSEk4ASa2KSFEUbdsqA6forFXGlqXT2z3oYqIoyAqtMAtTv5hq25hSijGqeBFBBVVEA4So1yCgFQJS1PpnN/Lf74kCESIBirXGLey7JU2cGMy50HSqpeHa9tLuSTl3+jHnJ7hetv0j+07dedtdt5xxt97cudglvXx1efHJjzfr29b+/zzBB8xm6XUY5lPecu/9yt+mc7Zwd7m7LKtCkbJIiSEtgRRFq0SSYSmCGcUJAsdBkCCwAkQ2ktgxbEgx3GIHsZNAdBBbshTbkSlSpKhCijTFuqzLso3c2TYz//zla7e85ZyT4QbO87ib/9FTl3/tQTEVMIeaNVx/1X2b1fMvfftl1WSGKgJgZlozzJcLqZWR8E8+/L94H0LokIOb7y0WF4FaMTIrw/YMdTLNJpWJGdnM1ESlqmYzRVUiBgARNZvQsFYpUhxKTkmrsCF3TOIUMFnxGE+3Z7fXd053Z8+tbw5S1tsx+JmRoHMKEJxz6A1E0Kap5DKFEJ0PZqaQmXGcRiBEY3ZUajbDnDMgqkj0ftYcDtN2SiMiLvf2hl0vWonR1IgdEZacmLgWPH58Gs/gB972hodf92M3nvta75pv42vOR9701WrK5zfceuvW37T+dpRTtLQ3P7h2/4G7x53D6YRVS2SrYBqD77c7F71zHhEBbDGfp5p349YzAPgQo6kBIJiY6XPvObn+/iVzo6ppSoiMDGRQSlGErm1RyCFWq+2sKaWo6jSOTWiKGahwhP3QPCrN7MXzcrpjQUNHJN++Z/b81Xb2x+cnXz69LeZAZuBWWu9n35F2AkQ8mp6bOQUGJERGXUG9F9u/+dCHfuPW//j1/pNrFKf02h/5wb/4D//J3oVLHu3OsArifvnvvDfzAKpFbb9b5Fr7abh2dEG1jDJtS++1MYVd3kVPpFyKVpNIfpRcSpm1nSP66jtfeMufvOHk5JRwXMzni24vcLyyf32XtufrMwS33x7mnGLoxqmQwaa/4xFGIygAlW+v+qEvzWw2bDeLhZ/vuTFPMtlUcbsacmoBq0jZnG9i9KKJCFStW2JOldkh8jRNwWNwYbOeJNdub//owFM7VSw1yXYt446gwDRaTmKgBE5NVQGQTBmJzIAQRRCBfXDDXz7p/s4hkmPnYhNMQHRaXH37pQcf6pZNKfCNr3ysrs8j0er4BnGoucTFhcXlR179xu+/ffGHHutuHDj9wucfp/UTJsCMz/3cVy6972HvQFQ94jbBI695+MUb3zi/tRVNJmBmYFpLNtNuPhMRVMNPffh/JXJELKX6WYzdEftltzggN0/jRtJKpJc0EhIigIrUBGAAZqZgBt9hImIIDl2tpUINSopYtVpVdmaEJSdSyQalpqkMYqUkW5X85AvPCAzOdRmwLzUVEeqkJM9OqqGjUjMCOO+8i03wuZahTERWdcp1KrWSmostcZiG7EMinFWxcdqoCSp13SxLdmRTlb4fCI0D+aE5+5PdpFOctW/74T/99I0ynt3c9eerO2dkJpqhqjpiozc8cAEvLHCR69LWcIYWHDk0IbB+nNCQvdvlMZDLScCMGC7MD7ZTLybMLjgPYIgUQqxa73rmR19+4EOXQYmZiXEcxyzqkFQVHQOU/dmhA0wyEPucc9/3eBcHrlkia7IYPTqI5GYUlma8gdUeaeN3ZVx+7Oz4m6tbqQ6g95GeKjDQ/UDm9ED8zsQAHJCBJVBEbc1lgt/57v7PfWVeBAZnV4B/6D9978//9b9pwfd9D0pf/PyH3vfHf9+HPZFsZJwre09Em2lwzBH49nDr+v59Dvzx6pZR7vxy1hzUqiXtzvttG2Lpx/nR4dfe/cKb/vg1m82WKDKrD5xz8Q77MhRTVRcRRWxvub86X11ou66Jy8Zt6kYze4NtHtu22Vs4Zx5AYkSPS29uk4opiLl+6A1iv9N5w+wqeyD0Q5+IXEqlabpx7HPt75ysxsGdnZSCuXEOvFbDcRjrZKnHaoNWX6sQi2RDRDFFNBOH7AwAFVAEkcwk/fJw+A/nm+2kwIYUYheboyuvf8+9j1xtY/PcS8+dn6xvPvElyCdlc+xio7XF+R7d/8Obd/0qAFxyq3fm9//+h3/vvuUWLYTAT//ZL1553yMcAhhglZ241732+pNfe3xYlSltdKoiUmtGMxe6EKOBoRp+/g/fh0iICEZIjMTGvlsexmZmgESx1iTDkPMxGSNk0QRmaOTYl5przWAKoKAJkJCCqoEJAKgq3IWIIKpaADVnZq6mImIguWRAE5WxlFzL+XZdar3VnzxzekuqxaZB0qlKUSAjb74iTdttbPz+3gXmMCU1TAaOQvE1VpyMnZXEzEms1hKiH6aeHbVdt1qd9X0ffGwYrMy/9eEbaOZdNO+7vb3ttge1YeihYmS9dHm/PWoXlz1ewkmTGpxv1wUKGDQxevZ5yoyI4J03Dj5nKVlKLcTASFJL27RMVGuVqioaok+l1FJu/sz6od+70PCeB3DB39mcqWJJZRiH0DCiC95XEUKMMRLRru+RqZaJwDWx6fvtcrEobJqymHZNZPa1ZufRx+g/dmf7pf5UxjPAA3KHRi9hcQCPGG9DWZZoaCtTAevAmBjVDOBD3zO850tdYzpGf8modvhz/9s/fvs7fvyl4+c7k/f91V/8+mvQuyOHWjnvwXIzrVwTz7crAr68vLgd+9ao6+LzxzeDj4685GqSN5qBtAtty61r+IkfffGef7Uvmhd+fxj79XYlZPuLZfAdIrIz55p+OO9mc5WQy4YQFPnC7IgaWJ2djmVsOV473G+b2dBvicql+T2DnDtwV/buz7XkijG2JfcljzE2aom5wRrY71UbENy0kzvbuzbnm/PNkEqqp6dbRQZjsSHSfN74LLmKLBfcxLTZ2Gabz9dTSrmMtOxcUnMOD9u2L5NWPv4vj1/361eOj3d9r/2Q5svOtxfk8LHH3vjdy73L7Xx/txm/+vgnbn7535bpRfIHCODb/e2P/2O99kZ4xZuHf91/8tcaFBc6MHj+vV+78muvaQJnVcgDzq/e/6q9Lz7++TL2Wso0DaYOwXnvRIpzzsxqLfiFj75PVU3RTAnJrBqjazqgpuv2FstXiapMq1zWgEBoVUcmh4YpTWnYgikRgKkagiohmipCLaUAABEBkGo1AOeC6XcgESACYC4ZENgRVaiiYopMz53eeOb2c0jVSFa7dVVerZPj2QAAxqXP6pACDH0/7sqFi8vYdKlOmKztggKMZWqaZha7nNL+wf5qdd623cnudMpT2802253WERycfhrrzR5a7bdj8A0ItB7aC93ePVf2Ls7c4nwFMm87J7Yep1k7W29WWQsAMnMTu5xSqlt2XT9uHQetyYfGAFKZIoe262qVUmvRWlPxzpkqeFSxWz+9vvqvFqFxXWyZ/Wq3FUmBm5QyOqqp3gUARNR1nYiUWpGo5ry3OGib7uz89Gj/wnrsreYhTZ4FjEupMQZiW351ws/3N6muzEDKdRfZ9MSqED2m3Rb7ndlCfUMuo4yg1fFfvPBXMpbfuvUrl4n7kg+df0NY9DIO7/6BP/d3/+4T/8lf+uTVs29chEPfArkxjVRdO2t34w4Yl7N559teRkvFOd5Nvacm+ujYSS1MM2QARBHZb7rHf/iZq7+1pySkBZqmH6bOtV1wqumuEGdgtRbt5s2uXwO7EMPp7rzhUFJVlNW4mzfz5Tzkqc7ni5LGC/O9VDMDHu3tgXhDNtMmIFbs/KxBnPm9pINXBigQwiTEwJqns7zuoGnDHDEkUQaXbOucD55bnk25lNrXukGF7bZUcbdunXz7dNRhfevO5srR1WuHrlvMo8NP/vjX3/LbD0zFxgL9WF68eXKu9213s5/9xZ9RgnUSre7Zb3zhqU/8Lu7OOe5VZzHG/vv+i/J9fwle8c7bf3/1zY+bbpHmztFzv/DVq+97KIQIiMe3j6898PoLe/VLj3+pDCmPY5lGA4ptawRQAMBCCKVkfPwj/0gViPiuqRYf4uXrD7bzqwA9ESo4xYzYOQ5EBOaBGIERIOfh+NufS9MIoFILEkotYKJS0fguVUVE0UTEdpcKICGiI3bOAXGptZbMiApoZgBgAGlcQXBsfLZdn+5ebONiGGTTT18/vmFJIzfHZXdnvWkis7lZG5qZr+w8GrMfp2RmgBicy2bTlIhouVgAKJIXwPPVqsqELrphevmzJ7bTSaeDa8sL1y/o3NTXpDJfNmwzScnMpsaNmzugMOVkJLN2we4uX2ot44Z8V7Tsz5Y5F0S32W2rlK71s9m85Dr0gzIwsneOGYuqiLz8k+f3fOBIzWJogg9pSsO0AyWpWq16JkQEgLZtpdYqYgBVxDF5CrUKgrVhXkyWs9mt02OHmpIwO0TwTuMLZfkHZy8zrKT2KntE15GV3KnUCet9wIGotyKmzhAYlxJ+9spfSVA/defvvMH7y+RngZ5N6bk6FQfLWr4vHP7f//HVlYoZDHVEpakMDoOPAGCIploX3bKUQkQhNp6alBMqxNgygAGqWbHKAF/94efvef8BR74U9td5Z1Bm3hUMedwWEQwNVCnFAAW5OOKUE7a+cZzHTEy9aXSNiTl0tcjFCxen/tyHzjkqqZ9KUUTJqY2+FGt8s9e1l/avzDk0Zi10jPF0fGmvuRihnchIaxvnBlSsoKb1eJatD64DgcYtzCKRQ6FaswsOwcgYFJ56/vbt0+Nh2BHP7702/9R7vvn9v/MoSGXwbHE13Pzm5rVTXvRefv6nfuyZl2+/8Pyt577xhdVT39TdCcUI3DmH0F6O7/4fxr0Hrq4ff9g/f/bNP+inLVNDDl5679MX/vf7EKBx7s46PfDo61jvPPnE0zKVPI1lTIoGBEDsgeE7UETxi7//9xCRyCGiFilQD67du3/5fqYFACJGgIpQtUout8DUyHluTaWUfn3ruN9tABRM2VBMgUBVTQQA4TsMzNB5ELVaOEZQLbkE75WAkUpJYICI9h1KREBBVCIRddGKM6lZy1imUrIX6ofho089virrNjpHIeXBCEYKpe5A42bYLKhrmqaHFMNSpZjWUlLVzBSIvHfctrNaiwBRojQVZBRnvSZUowjBzVdn56A7YVrGvVmztxnWALgbh8207bxn5wHIx6B5h7HbbE8bHxSAXeiHodaETNGFyCFypM6hYU7JVKcizPTST57c/8EjA4dgtRRQ5aaRLERoJLlUfUWM0bNTMEAUEUJrYjcNYymlC7N+3M2bdpvHpl0453a7bSlT69i2cuX/uX1quDI4VUOGa2AeDA0ZeEVCBYmAEKuBgbWMf/vVH/zE8z/7oGs61JfEnpBdD9Kiq6reue9qu3/9F683ydZ1WHT702ayNk+b5Dgc7h+sNmeGOaCbamXvJVckRoKD/SNTPt/djjxzHIY8pbx7+ad21357AVC9a0CM2RniLDYtd2NJq2ktJREDoMXYMJFUWI+7RRtVCiCOCg23Hulgb+/27eOubTn6WoqaNk1bchXV6JvOx3HcAoPghOwjWfRw3/yRe7vrw/giuKUzZyiELSiL5QrjXns5STpZv7CYXTakzkUt6oJDJ/1u1zaLNi7u9M/N8MCHWam3am2qzLzb/Prr/ujN/+axs83x6Waz3hTn4vO7N7z9Xe8cxvT9jz12lvP5yfnv/tb7Tp765mxvdB1ubzE3kRdXH33rj97Y+1PXx69eP1yfffP3s8Q8CjM89x88fe19j3jWjujGeXr9G95wfvLMjWdfKGPSMmllIEVAE0KsAIhIAIRf+PDfBAAiUjMR+8pXvnTPI/F7vvsXClYE0Cr9bvfcsx+Hml7zyPd2yweBPccD5k5Tvn3jS5YHZlfQgQK9AhFLGXIuIXhEAmORYmBEaGp3qYipIhgxmSgiKTBABTAAUqmAjMQIRlwNXVUjUwBRxSqqCoitYS0151I9soAWq2erVdvpVOzp2y8+e3qDCQ5D17gwStmWYaJabJiHPS0m4s3ClKuzQug2qUjRUocLFy4L4yCTjJkdzGazbT+S2VSKQdbMvnGOMZeS1ZDZo6pAyqqSmqYREXCgiFpRJc1ne1Naq6nz3oydgfPuqXfdvOcD+/O4VICUypTKou1ms71SisEEyGYmIgDAHkWqgZnJsrmoVtVqLgkMzazv+3EcDw4utE23Xq9KnaqJufjo77506wSTyE2oE0KriGSXjXuSGYAhsiCxvMUtHiW+hP7dj7z44afufVk2XxF5SXJVzg6bKoJ4f2z2L8an/rM3YcreN0Q29RkQVv2ZiNxz6erLJ7cRaxuXqYyMFMhvcs9Mm+1pjDOpdmX/kpiZyKSbZ3705MEPv2q1PifFxdznav00LNtF23pi3G132dhUvOO2iSlPbYib7ZZCMEODKqqlaHQIBN5zVdmb7yNgFROw6LlKmabRzPb9YtTEbCi5ViEfF+38yuww784P3F7DjlsDa9HaGPac40Ddanh5NZ0MuRzMl407IK0lF9ctPVPKu1QHT7MYFk1cpvEset6pGtc/etMXfuoL73Dot5vT3W734jmfzd9z84Wv9GV40/f8yDbdevnFF57444/X1ckPvvUK1u0nPzcg2fLyw/e+/ntu3fMTr95+8dLyW8Otz9biNpvsfXz6p59+4J89LFoL4OlpefMbHzm+/dzTT34799ly5RAYyXufNWupYiEEr6XiE3/4D+DfKVLOT/P+JQxhn6GaqUoRle1uii627Z46h5qNnVqRMjA20TEAGnoDQIRaKxISgIgws6oSBrVipsyI2BigIagpFSk6IPsmHlRT1aoiZgB1yGVtUhxFIGeAKpVAiRs1I0d3gSkiqJmoMpOaAiCxI21TmtbDajWuvaNl25WchzwplhfXp88dPx/jcs6xiB2fnVLgILpY7r+83RjAbtwOkpzzR+3BRgsYlGLjLjMpsHNBl91itFJKriq7Yaw6Lbr9kovzmHLp4owA0EEVmXfL7eqs8e2kZbU5p8iigLXGGF/+6fUjf/QqUBunicnnUsmwFtjfPxiG3VQGIvKvCOxKvauIqm+i5NzEUGpuwiJ/R8k5s0Pv4263BVRS7Gu5cgeXf3jyIoAqnWBdopuBnlqtgBNjUzEhDlYiWav80/vvePvsHb95+6+fqkVyTHJSbQXyWjdbSf/Gpjt72zX7wdecDWsXo+bxcP/Cyeqscjlfr4+WR+dDn/OoQyWPhMTIVVITZ6VOe8uj28e35y4CwZWjA6Duj9/4mR/9xttv3noZG5+mbc5atKRp8h5j9KoqAJ6952AGKqIqSASIAZ0PhAy7vveOU8kh+CJ11s5RrI0NORYrIuqcJ2IHfttv57POAZ6P64P2qItxtT1uXNMhou0ojiK0aC7IQIfLIwWuui01T2M9aq8XOJcaQqSbw3NO4rKdaZ12WBs/a+iQwByGVToWS19+5+m7PvVgoP2aS/D+6Vt7dvHPHB7Mv33jxVvnJ9evPEpy50P/5z+650p57Ru6bz15/pVnM5Tm6Pqj9z/22PMXf+x17tmmfFaOP15t5NKd7+zbP3/84G/e02HYpNoP4dGH78XGP/6FT5/cOmdpnaOSymzRtJ07vtV7RTOFRvFLH/mfaq2qiohmXi03bTBxZoZouRYRiY0nDOy4yBjQKzgAR8jK5pBqrei8SSVGIlQVMjADRKy14itUq4E6YBFBJEMgjOTNiABj2yyZXc4JQA3anNdSRodRydj5mrPkiXg0syKl1srsCAjRzASpISQEBEChhADBtSqkImYqWs0EAdk3arWhbixTrtUI1rtzsayqz9y+mXNZ9+vnTm5OpvNuth5HYD9OxbJc3NufCmWb2q7pugjEqZaUppK2zs+GPhmIIYJYDIE9TlYkK0Pp2oVnvxnWSrYbJjIwk1s/s73/g4dGmKfknGN2hE5VvQ935ZJVtWmaUjIb1VqRCZCqQXAeTBGBEOXfYYpErCop9xgdpkLeXf+N517YQVLYoqnUQwfeeM/i2kpmaqTcy36J+uUq7736VwXg12//yj4UD27Q+rzRgvAKO2f51Ufx6Z969XDY+iQ7nXyRxWKRpAx5LKYz7vKUL3SLk3GrCM77cbcjBIAgkpo429WhC82UJ5Y6X+x//Ydv3Pf+w/msOe830bucDFiCmzsPIomIilY0BCVVAAEXfMojEqpo0zh0KKJOPTlmdmoQY/BIUkqMfsw9It3lXZi5DlG2211wTWGJGkJwm+mcu6CTaM3zZaNgmjJpnXXRgwSKERbz5mgZD7PKdnc8c5cZp00636SzZu7T6A0mR0FqCTHsBm38/HPv+tI7PvFdZG6xCGmaPvnUo7V7tZrO5uH0dNjbX2xXLz7zqY/9N+991De71Vn7L3/vS089k1/zXW+6/shrnpz/6cfa59vdZ+TWZ7ZDporH/e1nfu7k6j++fPVwf5NSjFevXb0o7s55f7xebTbnMmzUhRbZLl5e7gYb1+dM7d6VFj/7O38LEc0MCaGIsDhe1joxk/eBXABkBUMAVTNFdBR8g+SJQimTmiIZIaBWAM0liRQriZmdc6pKrxABRBITM3XsCLGaMoVx6hESk5NqBqJWm6ZjDqZAjI4aIK5qaIoYAVHUQmgRG1URSSIjIQDalJP3zlmb65ZIiTw7V6TUmlEqoCFFtcQcDBSNVUBVDM2qVgDJZTue1syb3abg+mzSG7dvTTKRlTaGvvizcVuxmhTXtN1iXssk04DeiYZ+XVLVoR+XewsMxCyEvN6cSFUXXIze+ThNaUrVMd34iZNXf+CCMJqod85ExEyk5pxijIiOiGKMYNA1kYhyFSPKw5ByXuztTWlSqcvlspSSc/bkEKmUbCBETlhLzeHp6fIfnj2LhSXsUNHqIbiVTRcw7sBOueyp70H3Af7Clb/6L2/+KrCi0R0xZBCVloFBH754lH7hkdN0etr3VxZ7tzZ3HLdoxo6AaRHnITbrfrfXtmZWFVNOtUyMDBbYiXNOSUoWCB5ykVJu/9n80O8eMdk69Vb0YO/Spj+bdfvb7ZkPaCZZMiGZQriLmiS5SCIAAhCtxGSGTASItUoRYebgiIkAzXE0M5Fqpl6DUTHSXM1TUKrERBCYoaRKhs57xZpLVqkhhIjskWa+ncXO+Rp82+DRxe4ItfZw89bpC008dOinepay5IwXFkfj0F85fPhDb/742z/3+nHcoBmUxb/59NGDr//+1TaDlmb/wqsuzV944iuvnn/5Ta87ZOJ2tvcbH/zyxz959vBj33Ph3mvPLN/5BvfcQ3vf9Ksnnr+VTs7u3FnlzX++/t7fvnJ6OvZ52qzg6qUL919utrUfZDLitJFdsQRy+drBPZeOnvn2k6j7TVPxMx/4FTNzLjBxLZPz3gAVkIgAgF5h5L0LKoDIguJdB46b5lDBlzSgJq1DqX2tEyIwMiBJSWqVCWrNzIyIIkIY4P+HguZKTojZ+1CrIEIpNUTvOKgaogEFAzAzuosdk0fEftiIKTM79t4F0QoAIsLsQM2IAJiNfGyqGSFLLkrkOQzbszydjnXY5H7I2aFtZLi6uNCE9oXbt8+njXfBjFRNcnauW8buqLvr6KU7LzpPDbk7m/OXzm+sOLkR6jicSMqI8zDrK623mymh93nR7bN3paQmxC52o/TAlsayS9um7b79ntOHf++C+SC5oJIUTnlkRgVxzP2YGs8pj9S2nrmUioBgFqIP3G3781wLe9c0s1pGJFdz3Z/t5ZqzZQdckvnW57XCP3+mqXYM1Uwq0hyQVCu5RpSIBi1z5DXgZ75390NfaIBhz/BZdJcQClsWvcru9M9f5Fw0hpLKYj4vOUnRrENowt7isFapVgzFeW9Ci2653fTVKmnZ21s2cTmOQ3RhtT1nx8TBqtz+md3137kyTlspAo6Ws9np+vzC/OB8dbpcLLvZfErTkEd0MA7b6JuZ72qtENART9PkHKeU2HGpJdfkAjvu8tQDgAgAIZiB1nnXjSm7EJhQa+7irGipFWNwuWY1S1MOPoTQ+ODSuPS19AAAIABJREFUlE1x7pttWhVLwTsk9upSzfOG97oGfeXqr+5fwgp31jcnFGfxYrzc8GzWdh9802e+548uMDXO6vHtez7/rK+661M7P7pv/9L+xYv7ePyVH7z/mDwoCxP9yedf+oPP0L2vWp6s7pw9/Jfe/bD7/oOPz2MJHEu23UAf+IEPvfPTr12d59Uun9yctmO/fxCQ8vE5XgyBCKYKw5BmbXt02NY0aQlxhvi5D/0D0cLMIoKgiKgGIQRAUlMwMDMXg4iqQvDBwMh1vuu62ZFQYGuIUGSou77WgbDkNFUZPTtTqTUzE5gBYpomM2FmImJmg4rgTLXU0XkPiGCWUorBIToEUquApKr4CmIHhgYmtVgtIlWl8l3RMTkRVbUQgyFrqVILI5vDTb/13t06u/Py2e0dlZfOTtPk+jI1XStSFHTmo9SStSpRrck5R+Qc8DBpYNcwHEVCz03TsDGrV8Nbm/X5Zu2x3ZYdsey1TRIcq6QEolP0AEy1ljbG4us4bkg1jwWImfyTP3bn0Y9cJgfjbpQCjuLZKs0X3dHRcr5oTMu8bYdhytUyOJXSNqGUyYinoYLTfsoNQyrqAgJ671izOHSGuJnOAkV2tLtdpqcv7T3x2azaqyOzALJgNEkjcydciQaAxupfuPzfv+/Wr6wQDcDjdIBdxlqt7D92z+4tTsdOcRt8O00jM4ppUSVCAgu+JUZiIHa5VquqVRaLOROJpibO2UFKUnJCRAD0vr35U6vrH7hYy6CljqUc7R/upnEYd6Xk2WxWSpWaFJEdT+OURPZmyxC8InSON5tt3485ldi1IhUZiDB0c0cQHKdaTIWJpVbveBp7HyITecfRd5t+jRAIRAVm7Ww3DLFp+mkbg2tiY0qIKFYQIU+ZvSfl835z+cLB2fYsNl0d5XARJ7SmKnuuWY66/Zbbedt94q1f/1N//GAURXCf+qbceZ6W197oDo8qxlff+8De3KXb73/zNahmE6x2w9mzL4y/+7snQHMIS37rX/7x+25/1/LzgP2YV3vxWrH6O2/6+E9+/u2prKu4Lpb1jvfi3Ht66vnhi1977tK+OvbzrqvjcGe7Wcw6S+A94Rf+4B+V76jOOUMlRKk1eE/szQxfwWGGiGYGACFE1aAkzrdmNYQ5cRNiA4b97rSWnUOrNYkIgqkIYhYRIqq1on2Hc67W6hwyeUBgZjVwzomImTGSGRKygagqABBzrVW1AIhIAcAqCGAEamao6tgj0TiM3gFxM0l96fjlF1c3Jnbfvn1nElzniaq2oclZm46rVkVjwkU3n3IiRgUtGfphHaOr1ZidgROpIaIYmFFKggAhAIJD4wkl7YbgY2CPVWctKThkylNpYhDTKY1dbBjQwIrKduxF2aQ++e6Xr/32pXamWgAU2tDUDMu9xTBuYmtDToxoSm27aJx3qiUPs3k7gTngWeeHoUwlr4aJ2PKkSHiXATKHKtOsbZo23H5213+OP/3tr/x782v1/GYPUIAZsEW7KD777FSc0YOzd3z38i3/5uRXL9UQ2f4E5D4tp4bLex6OP/Lwi93n26Hb2da5iMwxOgYoUlXUkyfHm82KCRFYwEQLExHQrNs3yE2cV52IYr2r1BC8SH7hJ84f+MilgGigY60emGLQZCIZv4NDbFfbzTRN0QVVaWJgdlUk17FWFbG+H5AZEcHUOde2jWdnouxJrJa7qiBTcGhiXTPrZtFZvLU+RgpYs2NoY7vZ7gqYigAIIxOFEH2R4tChUgiegVbDLvqQayJwF/YvOx5u3LlFFSVw1tqF4BQDu2d/7PihD1zghSvHi6e+UNbTdv/qm976zp8Fp+dnq8NuufSfvujOyFYjbiXDs8/gl1544G1vf+fXT+0JeuwNh+UX9v4PwWnTHzNwjMuPvPlT7/rsAwZLz7OA6yQhNAd57OdhXrmRMogBSN2tz567czLuxuWsxUr4yQ/9qhkSeiIPoIioJTvPhiQiRMTMgA7QAAABg28AQilbRGBeCJVucTCbX69Vc9qCjCWPaEWlTlPvHDJ6QLjLzPK08d4zs4igqaoZogGAJlVlZkR03hF6ABQVRFJV5zjnjEYGpdbsfUhprEWa2BJQLqnW0jR+nIYKU5/x5d3umeMXXjg/n8+7lMaa06wLztpqkDm31qqJmiJCyjWbKIBjTmJVBmZGCEWL52imTctd48axbDeD9w6NRxouNjOmbtUPNU9EQA4NOCWtZeOgOTy4ULWqiKkkRAekuQAAmqHZk3/m1kMfuV9KISQ07VrPPqrClHbIuR9UTUys8Z1rwjLGRdv0afTErtb7Lx62vmmIbp9u2MGF5UWmeNafr3I/iRBK19F2Wt/4+tnn/mhrU37N617X9Fs7vTWe9fvb/vtme4+nUqBUw5dV/qurv/yl03+QypAcPyvquuae137v9tq9h0fx4PLFp4b3F2OrdZzScm+v1NS2e0M+TWO+tH/vzbPnnaM2toQcuD3ZnAioFA2OkCSGeT9sloulAedaQuslTd969537PnzJFROiCuoQwHNwvpSJmRFdQ9yPAyKQQYxzH3iz2cbQpjqJKhHKXQpMNI1jE5s+bbSAVmxnTUBVQ/K+VJnPG002n8+3w+76wZVn7zw/OzgctmvPrFkVrK/FE7QxlFw9x1ISoKkqoy+QHVI2I8VxnETSYrbPJDG4lBBC8FKGKiYCCDd+4vTVH7rkmW98+eDk/Pqjj71tOfdPPPnRpn31m976g08//ZsXD78e6jDvDibonXSf/8RudukdP/vnf/FvPP1GeMUPlP/rbfE3HKG5KdfdF3747Ic+eu/x+uzeq6+2XiY4HwbYVTtqgrFeai9PokXFe85SDJ3BaDniZ37/7wN4qYpUoWoIAQCcc2rVzEopiGZEjA2AYyYgdc6nVNm5VCuCc7Gb7+237aHjwC6qWRnGNJ1I3jjECjRNG4RKoGioKqKVCFAp1wHAajGo2UxD9ERkyLWOSEDIjtx6cx5CVLXYzkTEzJjZjAGEiNJUbt361mwxR/aqfNavnjp/eVdyQaiFsiREHFPeTtvOd0WFtC66hqjxPuQ6RnRZxwy4GhKItt2y6jr4PZKx+rg528Qu7s19x/M7m9OmJUmAKOxm2zwt2G2n2vf9vGHGZpTsHCGIVC6S2ZEHU/QG1dScC07Jk//yjzz3+j+4LrWImjBIGi4dHG3yJIYeXMpTytLNZ1ITaAzBmykzE6pTQQ+dZwMmpJrTrPMtWMPtxb1LrW+PFsvLi/tOt7fN6Jf+u3+x18ibv+v167Oplypq0O/kzsnVXX841VnRb7z1lx65+Mu/+dn3vPD8J/0sXLh43/r8/F3veqdJdZ7e/s4f/ZPn/nb0i92uH0Zljoa1aVokBEPCKKDBu9V2w9Gxk9QnA3ahAYRp0zNjoeoYVQQNg4/DsPvGu24+9JF7RGrbhN1uy8yxaVKZHHgffD/tuqZDdQJjkiIK0Tsw8xzBytAPPkRzXHLp2g7UainGKrUykneuqJKKqlYDrSoqVSo7vnRwdLZedV3rkJIUMgvOb3bbyYTRM5J3aApJcqqZEL1viZx8R0HHwzi1oWNF1YTgo49EAhD3lvu73faZH3vxez/2iKn73Md2fbrne9/2ri8//sHlhTdfvufaax44ev65f3K+/lKEPdeoVqs799RnyaO/9u//t189+jl4xWL41Js3/6Fkic4JlJM/V37go/dvd+X64YOXD65Imfphhz6l0ndxfuQvHPdD0vOL7b3OdUPdZj2bxUv4ud/7hwbi2EslhSGEkFIiIkBk9qpmauScY69qd4lp03SI5HzIKXsfgH2cL4D3iLwPHbOv03oYTlFHq9UT5zSIllLEVACUHSECEYE5VSm1B83OealqagYoWrxnVXPkdruN80TE9Aoz0+8w1YoIiK6aLzIVSUXqOOWv3/zWybh5Yb2iXELT5lzniz0xaTj0adKcjAKgE5CUB0eeW47orx9cOelvGmatrZog2i4nK9U3YRrz5aPDs34VQxtQkWSYKnhkaLf9VKUyKAErmvOOEUqVKSVCAjHPbkz93sEiV/Hg92fLT7/l62/+t/cPqdQi5N3cO0Df5zRMfXQegAVZDWsuSMbfgQbG4hgJvCOFQjW4mFPSWosKM3hvANI6m/u99XDsrTv56uGhM+Lm9uTU1BuwKYMkrQrw1H3v/dZ9v/DXnoK/9jBc+7v7NL/y8COPfPbzn/ilX/rVVDevfeQh4PD6q0+enZ8lEBSHSk0bmefn/enp7s6o4zAOZkUsDXm4szsvKccQjZAcazYR2TvcD+3cDKZhZOJc/cff9MTr/vB+MGjcoopNJakJMdZqRcVHB1ZKL03D7AMhFElICOTEBEwBzNBqqWbaNM3Q9z54M6sijFQxg2nKJZUKBdUEEBRk2cRcSoyRELkJaUz1FaoAhqqKCLO2LaoKGJyfpi0AvQLI+SnllAujSzk78kzEDtg5UPTkb//ZzX0fvDZsx6989HC2/3DK3773oXff/33fHRtdyu2vPfWrRlOtxZNnb3e+deXGVzvTzfXv+8GTN/7Xp+4aADz84s8vp08vu2W1Yqgv/+T5az90eTcNLXd73XK5nA39ThVy6ueNa3C5mB3G6Np48UIbv/L8F9ktH7hyHf/t7/wt55HJE7ZFEhGZmaoSO0QEIAA0EyJUNQQm58zA+cjsEND7kCu0ywshzoB92+2JUZ76adygZUItwzYP50iCFEyTqSIhADA5BG9gopPUCZGYHdxlgIB3lZoYmZ0rOZeaEMwAwKyKgFnOExEhMGINwU9TQQyp5pdXxy9vT796+3nN6kIjAlXrfD5rXExSp92ubRoBKJKnPO218zu71Xy5YDGhOuXtlUsPvnzr5VTqVJJXiLNZStU7jIsOBUpKAoIUnAMCEtVSSr/bNbFRqUhUVWLj0aiJzTQlMlArw9Q3TdvGmUj92jtuvOUzD1UogcMktUHsx2yo7B2ajqMCMyAFjlWriIzTQESoo2YVHxftgiNo1cAegcU05yIK7ILVPvouTbWZLsf1tW1/vIi1lZQoQrUMlL3b33/V5SsX/0X8xW/pPe84hY8dQfsbP3Gw/uyV6/ceH+/+6T/7p+t1nsbNOObXHD69GY4v7l9ezvcNOYMFK6Dk0BHiWAsqm5EAKKojVtOUp2IjozeDlDMFRYBh6MH0dn/yzx/6+J/5whsXs+V6daeIHRwdTHm36jVpOlvdKjo07UUHMTaEyH3pq2UKTM4HtH/5s49ffGZWBQg7YmOmXOo07YiR2cXQiZSiFZA8e5FSajEzURUAq+rY1VoJsaoQkZmpmkhVVXYuBm8G/x/RqmqmRgzetTklH0JVMRVRdd6ZKTuSKiq2ezQvnnR5mI9nR4Ns580Vv5iFRXfp4OIwfBH09jBOZgkRQuh2q7i6deB8cbPF/Q891D/1p/Ta/yz1nBgZnJiJ6NlD2+U3W2RmBMcOEKpUROedy2kkio5ApbjAEf2l3/J7y2sdIn70/X8jhoXjqCaOCRFrrQCACGaqKqUWInOOAZgwIqH3EZAQeUplNpu3syMOB1LX7FvyLVDwYWFSpabgaRxz3tyUfF5UtI6IRMTB+5S2pY5MBMBopiqIYKZgAEBmQGTFquMG0Yn8vxzBd9Tn6VUY9nvv077tV942vezMrHa2qAs1JLHqFElYiCbDOcEnIZCAQ+w4PjnOif+Ijx3/ER/bECDEOMGJiSxQVEBCXSAJUVRYabXSane2Tp933vYr3/a0e7Po80maWEQAIKWEgiI5M4Ng5hxiB8BKaUa77ldXbj77yJ3nc3TalYpoCB2QQJKQ07SqnDMp5wTQe6+Yp03TDx0ojCmHKGUj1kw7H7u23a7mpmz64aAqNw7aRak5pOTzC7QmJo0heGut98EYE30w1voclQJkRiBjbQT2Y48iJMpaQ8o88fYbL/6zc4ayAEUBzGlM49D1hXOkFCSdIBlry6JOfmTOox+tNZmDI+cZSlMOErQCZImBBVOKGZGQdGFdyl6RnvvzJ+DYwcFwY9/f7QBytOgfvPcsciwLF4J/ZPvv/M383QDwhrPp58Inb+/evHzp+IMvuRy83T+8zWDHdvzi47+16K/ef/zMxZ3TZzdOEGrnaq2KoqiVwr/FrBRFzoKoUCESAyBnVAoAEVgyimQEyTkJ46+f/tB/e/v9JAqUIBqE3PWLPqy0rUTIIIWcStesuoN1163HBUuIORprK1P9/uUvvvUv7t9fL3zy1URnTplBa2aJLKR146U7alftMJa2iQlTzJPp3IcUmSAyJzk8XKQ82LokIoUUWXzou3EQUE5j4owoKJwQlbIibIyO3hNAWZRjCgAqpmCcRgUgKoQBiZ595/59n9navzW//r0TxfT8iYv3zSZqNsU6rZZHT9elbjlCwSneiHhn/8bJ3ee2QncF7cn3/9I/WKbV7av/OsXdLHGr3mrjMgZ87sf2T35kRpZyGLXSzWQeh36Mo3OlRm2MSpJFlEH47jtvXvjj4wp7AI1f++z/FlNAAqUoRSESRFLKgChmttaKAHMG1MoUDJjjqBQQ2WFsq2JaVhMfg3aKBaxxgtqWtSm3CZWIUqTG4WBs18EvrSsh9QBkSAfvh24vhKCUQiRAr7UFZoA09CsAsNZx5jGMzmoiJaARUSklIiFEZhbJpEArTahjygIQYvRxzJz76G8f7D13sHuzWxnrhqHTxgBrAHYlKW0liyJq+5VWzqcARDFnHqKuDaqCw9CH0SglOTVl2Q++skWI3HGQTNtbTde3qqrCuIopaFMwE3I22tb1JMa0bg9iiITaGFNPJkO3Gvo+5Eykt+dbV99zePwjAljFkFgySyrtJOccYySiujKjT9Y5loiJUwopZeeM0kaRHUcPyIXWYIphjJbQc2zqJnjPOZdVkYJoq3fyZto/f3SwBsmk88/9wi8poxShxBA8pJz+31tn/rML158cT15St3ZObM9mk6ZpQFTvw+6du7t7d02iz1392P7iqwzaOoN5NFk7RYB5u27Ob57YqOeFq7qum1TlsfnZU5tnQUiUICAzE2AWJlICgi8AFJDfOPEHv3brp+UFgACAiESUOcH3ISIIAoDWOqWEaAiJmQEg8fibJz/ya7vvA3ZZbHf09GKZGNrTJ0+DlNHgOK5iQqMpjEM3LA/COgxD3ZTrbn13vZ8iGnIq492D23vrZSseFJ7bObn2PUJpjEo5KUmrdt0jkEDkwKATR2I2CiPnwOKM6oIvikJnYG1Fklbwnbc8/7Ivvvx7j6qufe3Z05QPry3vPBuHo/V6Edk5Y22z/bYf/eF3vvPhru+/8eg3PvPZr929eQtBHv7p/+KBBy9998o/a1e7pUUkBcKZ5Zl3HdzzJ1sGkRkYSBmTQ0o5p5SstSElp/R00oQUnnznnYufOibCKTF+9dP/NqfMAMoaQy6mMcVUFFWIo1JKRLRWObM2FlFpWwBjTF5TSUqUqYw16/UBcyzKibMlkkGlwEyJjDGV1o45Esg4HIYY0rASSU5jDEP0PsaAxMyZSDHnvlsZjTknYTZGI0JmTShGW+PKlIJSKucMIACQUtbKKSrCuNSWGCJS7rs8DEMC7sJ4df/m9+7cWXbBTiZIAckysHAcxogM2xsbiaMmRCJBJgWQTReH9SpYhUh5HEYGZBZSlLzf2thqxzH5gQ0zwLSchtT54EkbRTZyCi+I3lo7aebWuZT/FmH044CIPmSjK63ylXfsvuizp3JKMSZE8KEnFP4+Y0xTVgCqLKuQRhTMOXs/hBCcKwCUMVaAs/dsdAJWIIZNzqEsrAgPmXOKpnCn5YGi2zlarVfrePmBV2xulPdevu9ocdS3XVHi5+MPvmfy2NHB/mRanj17YfTCgqTAOTvf2pxMmmvX7t66eeveF537lx/8z40VUqoqVRjT0HPW0vfdrJ7FsWMhH8KkKuZFunzqvjPzU6U2F45f3pjPMYNWJhNKZoWCIoLmN0598O/f/ClAAUEigu9LOYkIiCC9AAEgxoiIRKCUyjmHEIjgN09/+L98/l3z6fGv/PXn9g6o2TirVOzuXnvogdMX7r0sQQuIYAu5GGGQZJw2RBiZh64dx1Q3jVUq+XE99GNKISaEcPXus+s0jrlPmcBgSMEp00w2Yxy6dacUd1FY8phiTMwgQ46gMAxjlNT1gTFffc/B9Hcvpv2Xzza9P1x34yhxNFoZVwztGP2wcXJrHNSLHjz53ne/ef/u03/2+UdWe4sbh8OlN/zC7Jzfv/o7ziKRtP1YFgUiPv7Wq/d+5hhmAsKYs4CgQFmW63ZNRCEkYTZGV1X17I/u3ffZk8MwhpDwMx/95xPXFK4OiUGyUiAi1hY5pxB9163rujK6GEOHiNZWgKAVCmtAYBSrDaFDNAJRmwoAcw66nGhd2WqqVBFTy2wIyeoyxcV6eXdsDzRxzklrdXi4N51OCHSIA+fAOVo3Fc4gWRExCqekjcmMIoIIzEmEFboxLBDzOMS2uzadHC+LrZyMMlkyj34ka5btsNevjrrVtcUdxcoT3Nrf67vWJ9ramBXOhRidZSTNObtCZxYgNrpSJCEMPoRV129ubZHHGEcAZFYsPqForS0ZEThYL1xRVKY4GtcpBREuS+ejN8b6GFarlUJlnW6amlC/AFW+8ra9S585JpxiiDFFInGqjjGmlLquq5rSj3Frc1NrEqGUoveeOQkLgBlCSMmf2zi73x31ftBErqpIZYbsfUQxTVmyyHl+uMz59u3V/S9+aTVzlZ0oo1kyCn989bJXtZ8VjtvzxpazlNJk1jSTppltNS+YTYxRnOGb3/qbS2fu/ae///6ycpPpRMB3Q8CMACiJp2W17teurPqh35hNh8hGUQqDUrBh7emNU6+575WnN05PbAMgY/BglUH67TMf+dWbPwWACICIACAiAICIAMDMShEAhBiN1syAiPJ9nOW37/nDf7T/S7/+G781LYpjp4/Z2fSZO4oinT3R3b6+ftOb7j9xbC5qku48Wh1/FYsnyS/QtvRxtVit62aiUAg0MGhtMgsLp9hzUCGm6wdPPn79uSPfVZPKkDWUDemqVJiYFQ4xipBPOeYYJecch8TaFKu2++rrr6v/8eR66AnEB2xKTUg+ZGUdEkF25dxVTl26dN8zT37liW8/tjXbnmxOVneeS8V9f+9f/Kuvff0fp+gRADUZpa2133vL8xc/uUOCpnBj9KRUHEVrJSJEKozj4MeceTabPf0jN+///GnOEgLjb/3vP3diurNRbyldFqZEEgBA0Ma4ELxSCCggOnMvwkQWUIlkrSoBVqRCGgWhrCaanHYNZ+Y4RE7VZNMUM1SlNQ7IjmFdGh0D5diF4TCH1g9HMUYistb5oSfFOSVkAqtQWKIHENGaY0AkZRwzIIpSGFPgmGLqUmJrppoMkjBmQFGoIWdm8dEjpAxm9OkwdJrz9248f2t1tFy3bVpvbsxWfetTrspaUZkiKwWZBTHVZRPTIFoN42Cdcc4WRD6OmRWzY++zH4Uzlbouqy72SMg+9yEyCzMTYY6xnjbLdsnCBMUQ2rKyTVkSKtTm6R/ef9Hn5sK4WKyM0USCnIUFELTWI0sMqS4razRnijGkFLXW0QdE3Q2DIDS2zJQTZ5UpMgIlJOUDc+bGOJvuf/H8RZLWh0dr5ybT2bar6qYp/bD6zPiqVy8+unPqnHWT/dt3Ta2LoqqqyXQ6L2eTvzWbbW5tpuC//c1H9obbf/XMH/ZpWC1WSqeqmTdKjz5lEdQ0LdwQY8ypLoshdIQKEubIppyGoW0sTWp3ZuPYuc2TW+XUmaJqJh984Mv/4Nb7jSoEGREBgJm1NgAg34dI8H0iAsg5JRZhZiL5zdMff+efv/Rf/6//6cdef+H05QsH47Db7hwsV/fs6Koomtr+yRev/PjDJ69feeYtb3x5LrYUJZFc1BMCZWxNqEiYMSukEIYh+iycwgrBVpPJ0eFRzjCGcTUse78ex/WkalIcVzFkDV0IzjVGO06xG1oiWIQxxt7oyQd2vjn9Z0U7GIxd0gJgy2aitBGQrY0zb3z4gU996pFL5+fPPfVo9O3QsQ+jtbb3w5lTJ+x0Yl/alUbXlUs5Vs6UrvzOW65f/vTJnHvSmgWBNIkYY9quLVxhSQ8peJ8U0fM/fnjywxPnypwQ/6/f/WVM+fjOdNbcI3lQ1gmjM4p0FX0Q9AIqhbW1RYzRaI2s0KDWDZBIAkFiAWdLREatFJX9sASS2fx0UW4xKq1LIB1jb6AYwr4wx2GtOI2p55w0gXDiTCF1zhpgyMmLEHCIqQVRDAkBARSivAC+DyFzJpZEYIXYWpNzBoDMaRi66WSDsxFhBgalQ+J+XCwX60W7PlgeXPMHaM3h0eCH0dUlA1hjJcPGVIME5+YHy71WOKVIRDmJ5bC5uTX6cNCuHKs4hnpSefSzqkSjeczd6EETgssxAcWcowFbF5UytPRhHPvgh+M72xETB376R/fOfHzqiunqcDGk0RD64Iu6yDHNm2lOkFEyp7oqh3FEREIVBj/EjAjMosCUFkUMEAy+VURImsj4Pobca1NeDO/c2Fb9asBx2Dx3cXvnuDJFU9Wff3zft93mwZ9LaCdbx3bOv3hcj0lCPZnU9cZ0c2MynUxm8+lstrW5/fUvfek7yy9fXz/aDv3YiTHOFaixyOxX6xUDWJ0ERJgR0eeo0FjlFAKg9WEAIRCr0U2muFM3G0a/+MLLP/+67/zD2z8HwoQAmFNKMcSqLkE0CLFkH1trKkRCROAMAERKGDPif7j84av/9f762dWbX721sKd3Tjc37sCi60vnXnraiE23D8Cvd6+3s1/52bd884t/dGrT3v/6HxeQYexdoVfrI4DcZ59C3Kg3SbCpt1DY+0EIQDjHqLVBMll8ux4z+3W3++itp/wYBHLmQIWGRJJxc3vTh/6wpULpX7efSv1o7DSmFpiVLoGQWay1iGCdY84g8Z7jW4vFwdEyTGd2e7qVMHw1liWUAAAgAElEQVTvyrW6nJT3QDEpFbDWBUsCkBPXt449PxcfQwgAWJX1EAetNKHyo5+VTTt0jJAlXXnHnXs/eQwIAAT/p3/59jM7W3NzbD51E3fSukoEQTKCQUikDQOhECISKREc+31UVJYbSCSAWisUsMYerQ4m0w1XbItCjolFXGnbdlnPj1fV9ji0ilCBiZxy7MK4Ah61pjgOOUVmSdFbpyWzD73RxvtBJPrQAwgAOVMgCiJ6P5IiABahlAatXWa2piJ0ShXMPqUh5eBswRxyCiJAqEnyclwchvHZO4d7R22g0DOu/GrHmCCcRFCENDpnfQKfw7zYNIXqh4EFVv2qLisfUu9HB9wGL8ib0yYzJMgWCAx1bdAaOWdjikW/gIwEMJ1NfNsaY5MIEtZlkxN/9x03Ln1qm1B1q5UqrUYa/RhSYhGtlSIkTSknAFguO6O1MQpAEgsBaK0JtAg4Wy2WSySpiyqkqLSuXNEPSyvz8/kNTaXWi332acgpCb3xjQ/7sw+7qmr2v/3417955/rjOR4Zp7dOXKjqrZhlOm2aja3JdDbd2JhvbpaVvf7083/w9X91GO5KjEU17cfOFNAUk9VyVdgaQBkF4zgqpRBx2R5WxcRZF2MUgRgYCawlrWZjWEsGJfni9uyp9xz8z8M/njmrlSWwOfuUu8X6UGs7m27knPzQV2XDgggirGIKIYaYonLuj1725b/+0a+L2Xrpyfz6H3zgm88urh66M+dPzSb6wowef+5KiJMLx+RrV4btrWpM81/48R+ab/icIYcoIiGEGKOXNAzrlIecgzZFYW0IHohKu1EaXVintZUIAsH7EUF1cSQgq2m1OHjq7jO3Dg+AeFa6KHbA/Mh3zzzzjW/eufkcQRnDAQZPpgKti7JmAMhST4ucxqooX3G+2lv0y5CP7tyF7F7+mnu/9dgzd3bHV/74/dsPDt0aOIaiLIL3gKC1KrWLMSpllFIAjADjODhnALTWBADRx2+/9fp9nzohyAKM7/1vXnX5vLlnfv7k7HwBRV02RIQKBz8gjk1xjAW7fuEKo5QWVmH0ReGUrnJOSOCcFRYQsW4ukEjZZbdflZXWjrAAMG46QSlIIaBKaXCuRlTJe86tHwYC5hSZB0SI0UuOABT8aKwl0oCUcxZmkQwsSiGgAEgMozClPFhTKlOJZCQgwigsLIhGK5M55eSN1pyk6w6NKrPAol0drm8nGHfb1UFaFo59VG2fhKULIXrph86WzbSakYYYgzYm5MTCB4eL2WwahnUm5YOflM66ekxeCWfIkiTlqIgKV/rkEchqjZzCOCprEwsD7Mw3hhAeeeOzr/ir8xYK4bgKI2YC4rqcLNcrxqRAg0BMyTgbo6SUECGmCEg5haIohm4ESK6olsultYXwaJ2NKdVliTiF26+7dMISD10bRYozpzYYddePT5356V++fLC9sXV3NT7x2GPXnno8jm1My42NU3UzR4XNbGM6m9XTmSuKpimOVnu/96V/vg69kWyaoh/b+WwqsQBMMfTO2JiSc0Xb9s6VOaayrBBQJFfFFBEAs0DkJIMfYlDG0GrlD36mfeATzYn5/JUXf+DS2cub9Y5VJsY+eA8AxrjRHxntlFLMOTNmyYLSDf18Un/w8he/9VOPzccOqXzlQ1vf6dxyaOYbhVXqxx4+87HPfKd0+hX3bT5xc9nzsd3V8O5L6dj9Lzt/+hgCE+mUWCnDcfAhGmv96LOwUTrn5GMceJ38UGhTFHXl6pT7GKPRVVlMRLKkIMwx5IN2Fdkv924PavW7/59f7O/v7/V1DXFMkAbgYRyj1jaEqG1ROmIetdFErq7LJhwdrVZJbdWV2draHrx/8tlvV7PjL3vzg/aeQwYyjCklAGTOzpphGImUHz1ittoYo0FyAuI0OuP8EK+9b/3g50776FkSvvfXXneqru8/Mb33xOWimBamdK7wKVy5dUVDd/ncq7UynDVAJFIIljGwBI0OII/juigrQUWkcwJrCER7vxIgY51yE2MmqqgIeAzjzrF7UwZBUZiHdgnInJJwCn7UlAlVimPOXgOG1CtlAJ0pihxiTsMwLBAUoOQcRXLmqFWBJIQWAUIMiAgCRitEWSwPZvPG2K0Yx5xjijmGnuN68H5r+/S6G1H8Mozfufb4k4cHRTXpugVzL1hSdooyZ9vFbjatx6Gd1hNGiSiLttWkY0oFWUWKCp1TYITKWJ+zVuJMc3hwMNuoOt9ra6MPlOPQB+Nc3dTr9bqgcsjxiR++deGT80oXzhI4G0ZZtfuKlVJaGSARQ0YpPeSkCWPKLJJZXDWNYUAURRpSGL0H1AIqjGMSrqpaUk77byj6dPnSNuROUXXrblurvvP56qWff9fkmy9+2atOnbqQJPZ9euqJJ598/FEY14w8mW4Y21hn6qYpqnr72LGbN55o+dYnvvsBMHmzOt7l3vtx4mpCzZzXbVeUE4ForVstO2sdSiZSxhiR3FTboPph6Ao3S+LDGKf11rpdcI5X37N/7uOnk4/O8rwyl46dvvf0fTMzJyWISKg3ypk1BTOHOCrFq7bzMVnnClEf+oEvjv+EH/nzb77iJad/9h0X/8V/eppp3jT5TW96xb2n8NNfvlaYeO/5zb/62t07q/bkqQvH9LhR3/zht76jOXGPCDADEQEpRMhxBBRJRMAheiE2qlYoAIJaeT9m5vV6WVVuVswEc85BGFAjZiTBuwe7X3nk+X//f34+aIQIYFxVFkbXDKnvVqHrgQU1cT4IQ9zZOcmEzHB8+3i1dWHz+Dwy7+/eXC5iPdk6deb6o185ePP7z6uqbXPoO0+kUxSlueu6sqy898YSIXESrbRwEojCgGCv/8TRA5875cOoDeFP/uoba42TRt17YvulZ16pdMHk23ZZGl0Vm9bqGFlTBtJIJAJEQGhy5hi9LUpra1IgrBBRKRSmup6yciLiQ3BlgUDaGdJbqFApZCYiiAm0QAgrEYmh5djFsSPglDNCFAbmnHi0ukwpyfdprYUTcxLJIiAgBJJzTDmkGIw2Srlx6IXG2l0IsiiKAoRyzjGOhEU3rJgRwRstSm3vHV1frA+vHi098UG3YBBSZunHQlkfhuwzOgqpRLU8MdvoAvRxWVez0B1pPSfpUtZJpcZYMXrwKclQuKrrurIqeh/6PiAKUdSgS1cFySpnrQwBfuftNx/44llhTiHYokjCCGq1bFPySmeDlJhjzimlsqyVIRDJYz69c+/e8u6Yu5D8RjXxY5zNJovlgabZYmgLx2rvHeNBN6ny2fMnh6Nxa7J66/t+8UN/8PHHpm97ffqr/atPV437oXf9yIvve0hZzahu3L773a89cnR42yjY3D4xjJ22OK2ayfbxvaf+8kvLzx3EwWTa2KgV6MH7Mfa2sGFIgqZA1/p+OmmGvjMvsBqAu35o275UpRCmLGVdI6e+75tJFUJMkG68e//Cx+d9CABu9N4Z44wlzDtbm065wjbTkoZuNakmnJTwOJvMOcjmxva2q//D/Z9/06df8oHf/eJ/9xP3TrboP/5Zlo3ZrSvPvu8nX18afuzqcHjr6de+5MyjzxWPfuuRsetYu5dfOPXLv/jwydP3LcJ6SipKw9ARICAmkNo2Xbc3hu5gvdpqNkpXW2Pbcb1oD0pXG9SFLXVRKEGD9IKUEUN3J/t//398fL+Niz0fkly40Pz5F75NCgBBaUuuLotmDN3FS6e//dW/0ESAcPzU6ZQ5k3vRix6y+uhb3/hWboeypCgyu3BWlnj8vu2TLx+CKARSSo1+HaO8gLMYYwiAlHg/5AyIqEGTcxrg+XfdfeiL54ZhAGF896+8umKcTcqHLpy5fPKcke2yCoWdG1uDYD8eWquATYwQc5dhXaot68qUYkhD1VQKZjkH5mB0QQpzzgiEyIBkTCGAgmScNa4ZY0Cwk8nmGEalCtG50E0aU9/dgpx9vzBaYgo5ewRRygkbxCgiLKKIYuyCH0CYEACYhQgNKSNxyJByzkop6yoAJZhD9EQ6hAjCw9ga0srqFIHTaF2tCHP245jvLu/4kG+ul88s9gJ2k3IyUy5DOupDyDlEm6U7sXmy7T2DF85KS+8JeW3LjeAPpsYG5qaerHyvVRFCNEYt1ytlLJEgsTKWgPoYKEUmcdY+8obnX/GXFyApa0xMURBnk6rtfWLu+nWMsjg61EojUI4BtdrcmMfRO5PLen5s5+y1Z2+iCf3gUenZ5nwch75bm+UP+QPJyW806tjJre6oPzaLJ3aqa6/4pz937ta4vHv83EnS06tP3xh9rwg0qUndLNr81Pce2719Uys1m9aQOVnYrKvn7nzh28vvRdCVURlZmERguV5bp0lItJpXk0Xf9W1ntA7BQxZtVdPUMSUiHUOsq2roR+tq78eqKlerZdEUN9+zf+JDtbbOmFKAFodLTbod2pRjVdez2dyHlnNs6nq96J0qBBiJM8cudbd+bLz8mc2nP3b3lfcUZ87MnhrmMLj7X/5iZw4J+Pai0Hl1fNp++ZHh2vXrZ85efPXr3nQxPtnadr+oXv7g2SLi9fF6u+6UVnf29vroT26d2D86aH28vb/c3Cin1YQUZcm7q0MLuqJiVk7uOXW8Ksqcsk/h9NbW0Orf+eCnG9jaffb2ndX4uje84u613aeuPJdSVMYJE1PSNN3a2SiqeOOp53MGZR0q2tycJ9/FiAmKzfkFqeJ6MVJeu3SHWW09dP7i62zX9cZYV5jRj4XVgpSYETHHDAggknNmEIiZnLEKn/6R3Ye+cHa5WnNm/JFffFW2NLP5Jx56yeljF7WaHKxvJK3mtijcPMbQNBsIGVHdvP184na2cbrtFn1Y+Li6//wbSrMdY0+Yc6bMsSgKIp05K61RGREwqI0rBDklme2cQyoYEaBAcMN43Sn0QwjjfvKjQZ0ZETjlDklSyiCRiPgFwjlGRZBiAJCxXebsjeGUvLalNg1iWVbTlELGAbi6s/dMzu3m5qm62IxJNAISITjOY8iC0HESp6suxzD6G6u7X3v+sYmtEqm26xahhZSnzYYy1XK9nM6ajDIMgfOIBGOAQvuNrTPL1YJySpJ974EyZ6OItAGyThvj4zjGsTB27McgqTY6MJCiJ9526xVfPC9MIScBqcoqpZ4ZMqDSuiwmbbsIfsxRBDHEWFXOKUqQ1103n20MbR99AkWDDzEm43ieHx53G8qt1uQUnzx7avf6HQSz3rm3xu7NLzl1tPKPf+1PL545Htm6SjezzfnO6enG8bKph37cu32rWy2LxsVVV5kAfPiVgy+PGhUqFGn9GEK01imwLNF7P9ueW6Rbe3t+GLXS/IIkRaGV0UQ0+NEaa7VOKVlXAIjSFLxHstfefff8x7eramK0xCw+xPViHX1QWgtKUZU5jOPQO+dExENQyhAZ71NVmNvvPTr1iVN2Dx/q5dqtvZvutDHu3ovnL5ySMTe39+NGnRq49um/Orry1PU/+aOPxv72v/nN3yzmZ974YPPHu1/dUIXP2ZrCc8ogKWetLEImSY0r2ZowDEiUmMdVW9aT/cMjQVUUhKxH78mKc66m4vbe6iVnN7/yqav9suKWaebb3XXOGUlv75zYOVle+d7N4yd2ypqf+u4NIkfGACTnXGGpH7sP/OGf/rsvfO7KV77dL27ochbWV3evfvfVb//B7QdHBuP9UJZFzhkkWVeEnJVS6/WKSNV107brnLMhlQkKo558+60X/+m5GFLbtfju/+rytNwkoyaOHjy5c27nRKPP+Lw2JLPJMRHUqgx+YAjaOEUl5ySgbdFkAYFeo0FIRjlGYWalNJHNaYwxsrCxjnMyZqIsW9MwCou1ZaXM1Loyp1SVMz+2Q3cQxxVC8H4AyUUxGQefOShIiKiIRCTlCMAp+pQC6kJyII7Bd8xU1dMMkDhqhcAlqDT0UaSztjLaIYJIVkYFL4RRMg7jkbUzlhGArG0Olssb+zcV5WfWhz7G1epoAAwDN5Oq97EgRqv2D0frsEsxBNmswdmNEFcxDHZSKlHrfqjKWdevXcEb5XGGvO7XQNi2LbCgUw6ZCQHxybfdeuAT22wKUzgfQ2Hs0HeEZJ1FQYUqy2isWSzbBBBiTtGjRIXGVWXXddaYnBUqjjHU9VTay+OtHR1bjclqgBzOX7p4sLsfTlyC7vCCeq4b2jf+8Ht3Trz0P/7fH6a035TGlZYll2W9efb81tYpALU82ItxtJXur337W/7rvdWC4vul0qXRZeTgh7HQBZLEGNHpPAZUpJSy2ubEOfkkLCxjSEVhRbiqqpi8Vna9Xld1GWNMOd/6ieW5P94iofl04nMax9FZOwxj8FyWVYwRhH0YM2drdTdkTpGArSHjmqvvuf3gp1+0zqvVk8N0aBZdOrFzfPv4zrHpnp3/wO2joxrh5GT/Y1/Ye+/73i989StffeoHzpU31t6qOD1394pP0lFVFaKp63qjFEkG0kXh8hhWKdSFJaKDxYqzr8tmteo5CRopTMMpk+E+pU3r1myca5/7s8MH3nXi1lf7W988HH0CFAFR2qaYjLF1VfXjcvRMyiIw5J7RXn7oTT/z8+/75Cc/qKvzt2/f9ANzDJsnN65/83MnXnRx+7WqKpxzuu+7nIWIlNYoyCzGsR9zWVbej6MfDdqMYJR6/t2793/mJKeMmvDv/ZN3dL4jVIrp2Fw9eGb74uZDxhSU2RZOWwIuWAKiIVQCSRhZMnMUySmnuqpTykVREFqlnAiI5CzMnAAQRGkDVk8AtLUqgEJCQBAQW8ytrW0xJV3GEMf+bvZDv95HCEab4HulBZGSF8CYAVEQUVIKIpBC531fVXWMmXLcO3jy2IlzSWpDiESCxCIaAIlTyiBq1d6xptbKMbPWTgT4BRIAkjIloD44PFz71dGwioy3l/3d1dGqX25NSiHJISVyROVqHFmLH9d1VYJAoYpubEOO1hTrbq9sZsJCCCYkNnh85/TtozsgtFgdNmVzcvN03x4JhcfeevPyp8+sh0H0WFfbzlK36JGArDg77dqVMYUiVBwDUGWrmNo+xBBHAjX0vfcdaecKZXTt8Nz66csQ15Q7o7EwyKk7e98D3zg4Eb0/PjxJYXms6aaT8oHXPPzqh99lbf3E48/+9df+5uqVx7dsto7LqZsfO3vqxPlVNzrtv/T4/3NDVpr0fGMyjH1OsjGr2r5v246EirLInI1xOeSMDJzrqlytV8IckwigkBybz4aQmUEhWqf7bowx5cx13Tz/7rtn/2hDa6W16kMch6EuCkRiRmutSAYGnyLnjAIp5+izKyBmseie/4m7pz8yS2Osa4tfOohbFz0PO5Oi2nnJwcFTlx987d3bd3/wZe6bV45CwqcffeqlP/SGs1odtk/d6etLF4/+5rBVTaxNvVqvNZmN6Ubv+9B3CBCziJKcc2EdIaUYIqecdDOtJcsYA6Gh1K1bPW9Udqomev7RveMPNOs2P/mh670kFCXMRIigBKmoYL1YGSwRAhob2QrTT/78ez7zyU9uTaZuut0u1+MYs4ByWJEnM5m9PO8cM9PZtB+GzDzmoASZxadYFYaRUkqFUSGACJdFYbX91g9dufip4yklay3+3f/+zav1umlm1lSS2nuPTy5unps39ayYKiqVYmUVZyJSAKK1gr+FOTMiMgMAIwIzA2SlLIImUkCUcyQCpXSIo7M1kULKYRRjLCmLqFTlkJABi6KoqlPt+lCR7vuFSSHENecErMWE5BOzd8VkHBdGTwgVYEgRun5trVHKRN8H36Io0sA5a6OVQgARMc7ZGKNSGoAINQCkFFkSszBnYzSQjlGIlFbofRhS7Puj3fbOYzeu7vl1DrEpC6OrdWbKWKAGrVfrZVnXq7ZbjouyLjLH0hYl2f3FESBMp3W9efaZ575bK40qaa3adlXY6tjWyd3lvgZ95e23X/fX9+4uj0JIF87eiynutftE2mhMozpqD7e2t1MIY9cNMc6buTUIoBlx3a5NqWIc4xg45a3JPXtP/QCGA8qjEm8MGpU16zMXzn+1eMulm384DCiU+74UPTx4rjmxUWydOn3hvpedv3ifKqePfOuJm08+842vff3wxl88+NIHTp67/2C4+YXbH7R6mzICslKqKMq6qXyKzhWhH0lLiJhSNgrW7bg5my6Xh0pTxjwOOTM4Z+uCUNnRx+i90ZgTcEatnc/D/k/3Jz9ca1KIlpR2Tqc4CmhnCxaOMUwns7brck6K1OHhklAP44oZnStu/eTBqY9uEmBZTr79se+1K7hw6rLW4wMPntm/szuI3ZjOtjdWy/7ierz1zOmrL73zgLK6oRzElafNnfykmjaSgvfRuWK1WBZOK01t1zbTucQRASWzUqqsN/q+RbRl6TBLF8eqms4ae3tvRdwZnt78zm63bC69WufUf+djt2KO3gswWEMMIKSz+OSFUBsDWUDACUmGfPz41Ldptj1fHC36drCFM0Vz/6u3794+OHNpOjltBSUxx5ytpkkz4SxHiwWhIKl23TprBS2SRPbGqCffdvvip47XdR1CwJ/5h69iZuuqzBTCOK/UiVl9bNpMLJ3auHfq5mU5Y8g5J0QCwJRiUZQiGGMqiwYRfBhD8FpTzqmuJwAKEJVSy+ViMqkJZ4O/S0R1eYwxpsTKFEYXxWRDq1LIlcV0GA6NsQh6GI7awxttdyemVimjshYe+/ZwNj2RDdfllvcRMSuqMnsRrqpqHDoByD4B9IiKOXdda61RpmAWrTUR5ZhSDsaYEIIPA4AQkVbWOFOV05xS5tT7IWWKY9hd7e0v73jrOOXdvd1BVMd+XhalJdQSUmJUq7UXicZV4ziSwJizqSpGGceuAdWlNUmxDl5Dk/PY1DZzqpsq9Hz1fYfF72cLWJWTjaaxVNw4usoRpk1TmAlAIKNCiHlMHXhNzhjZnu9Agv3FHmMeo4+BT25euPqt03mIHEYtrCBpLQ6zsvHGQ39/5zsfaNIqwvruQXfy1FZc8v7RQVXZWWO3tsxsWpE29WQz6cnuzSde/dp3/dGHPvia17zqSvjqE6vdShMqBMEYkzE65cQIdd3061YbQeW0tgpFRM2nzWq1JEXrYWl0CUgxhtIZySgISXKOAUDlJNa4MaTr77l79qMbTVUPfkSAqixA8XoVjNUhhPlsQxuKMSJAZs6Jq3Kyv39b6VIbuvL2a+c/cYwliUq7H17Z7VO/93v/Zv8g/Lvf/l/Wvaw7n7gfxrg53Xzitx9rXz0CwP2/+qJ7HynCxvw0jfSKjefXu8I49MEYG9JYqFIQE+fpZDJ2y+BjVVRd15NWiBnBKsOzeibIgjrnUSnShVtdUd/9s+vtcKcdwuntnYJD5NR2OYbsFJC1IfOYeuLSFIqAQ4yCWgQSD8eOnW0mbv9goQBziBmyKvDy6+4ZV7sXXjNfdmk2nw/jkJi1oFI6payVLgoSlpRyP4SQR2OKYeSUYPenj05+eIaIIQT8hf/hdZKTsrYLUSKQgUmD86raLOFYsXlyenxn4x4fE/0to1UpkgFAay0iSmlEEgEQVKoAiCJMpGMOSllnXUxBa41oESjEnoi0tiFlRCqKxhSWgQDR1ScRiITW7U1FijM5UyBiHPYP96/FuLB6qqwCFEQsXCOZYhxIATP7MSijFNnoV4wiLApVDBF0cLZhxhACShBJiCpn0EoDZqV0jODH/clkM2YxznGIPg055i7xjb2rVTkP3u/2+3eO2tEvQdbVCccp37h9uyxmRtdNWYasYsJx9JOmZFRMuFwup0W9Gg6ObR7f3T/oefBdqOt5EmAIw2q1/7P+4qdPpEgHh3ubGwUPRYbu1Mnzd+/cOnP6/N2j2xvbGyzQLnplwWc2hmeTWdf1IQ5Kq5gkpZzvvHW1u8aQCHslQJKJUqHlypm/+zbzl07yE7sUmN7yxvuK8frj337k2q1lSC5kaSCbUmsN0ft6ak+ce+jxR//6gYvz+aWtb4yPU4JAQmLX68EaF+J4fHurH4YQAgpqrfqw0srUxYZoIQat9DB6kGhdQZqOVoeb0+04BlcW7dCWdROCb9sWEYHpxnv2T35srhQBSaEL5pwkSzI5h6qqtHbKcvKBmX2KVeFS4K5fTaZbAOnJt9x40WdPrbpFelrd+O7dqjzx87/wkz/6lpedPvmS3/vIRz/9sU+f38rXFu3dy8urH7gN31d+o/6xf3RGjN3cbvbL/bve6Cm0q7GsajDSLzsyjgC2N2ZHq6O+6wm1MBalrSpTVw1BHANPJ6UfYj+sK9NAXd342u3Fc+vMOoBskNnd2y+cLDsubUXJA1lWsO5XwGVZIXA2xiht+64TUJnz1s5OCF6B5OC1oarAE6+80C73Trz2xNj2fhxLV+SUigJQawCExAlAIYCoISQf1zlmQh1DuvZ3ju75xPGcs4jgb/3Orzxy8/r+4S0CDgru3b5wY7g7oW57U23q5vz2iy7t3KPUzPsBcSzsXDgDqsw55agAnSuBiJmBSCvXdotJMydSWtu2bY0BZ2cCEpMXESKlteYXZGCJxlV94PnGTlFtK2O9X2mBcRw4tqvlblVZpRpOQTghiJAOIRTWCguRYo4siTkqNJkjojBDCAOiRcWINkvSqIIfcuxN4QiVsUWKnGOnlE4pKS0pirWmHzoRttZ2fa+0S1EYsu9WCaVN+aC7Ja5/8sb/TxWcAN1+noVhf57n3f7rOec733q/e7+76kpXupLlRbKNjfcdsAkYGwNDQgjNNgFKaRMmUJgAoSnpDGlKO02AMimLKdgYZLDB2HjH2MayZVuyLN2ruy/fetb/8q5PBTOZaX+/b+wvF6tVqatBYPRxnsdBF43vmUW1omDaBxHx2btXTpw47pzPs9K2NsogScWAgVkp1ARfevX1+z9yYqXIOiesW6SQFm4ucxNs2izrGwd7x4+dubX/zSS0RLWRDxvXggEhVA7Z3LtMYrN/ob0zYjshdsBAHImiRHjuzLvPX3nvmfP3gj3KRJovvUv2xFG4XMIAACAASURBVPGHNk5f2Dm5VRRSKphbPthffPFzn5nt3VgtvG33t2t9dWVxaX6jrDPb9wJVH4JCjBF6n4ZlYTLTO4sILgQBnOu8bXptpMlyqeS8WWjQWS4TRyXz1s0hSYEkBMXkIyVwctktS5Ndetvdc39+DCNY/lspBZIAEdaGQxQ467paVq1b+hii98JIiGCULjLdO/fNN++f/9hJa+dHHzu4c9ULDaCrUztbK6vZQxdf8JY3v+Gn/sd/u3v7WvtTePRj+/BfyU/T4N/lj8Z7pye7Akzgrg9MgnNd2a4pqtK5kCIxRA4pRkYpEEI10M6rotShtUqTtcvktcxQpDQ7Ct/84N6b/4eLj7//Rpja0DaLZSIUwhCzB1BEYjafIxOAKIwimUhhdJ4T9S4ak0mjY4zS4EBi24cL33/q5l/dOfPG7cWkRaCqrOazKRiJjMl7rWXTB6WD0SWRiN7Pp9OVlRUf4zffcOvMhzac65EIf+wX3qWpaRtz5He7rrkwPPa1w9trVX3mWH52eLzI9Ob4dKbKtm0JfaZrlDIllEomjkZr29sQgpSEMQIQks9MYWNKDIKkECImJhIxJCIZ4pLISMqVNs73eVH7gIxSZWIw3CSpBVbLZq/vXWZUDBZjZ/suBCulQI7AHEMkwgTeOZdSFEIQGuc6EqikjtFyEowBWPZuv8wr2/WCMGJOiFoIYI7IiCKlSATR26Zp67oGxrZbZLmOCZxPCIJDjwJs4P354by5fXdyAwyITDUdJ2wShGjRQY0JY4I6Gy96y5wiBgRFSMaYZtlGaSWqqhhOpkvLvl22T73l1hs/f99R38boMpkRx7Vq85mjPZPazifSdrEAkrnjCQY91KWpCxTkXHf/2rHFVO3O5e0r67H1BEvEIDASM3GarVwkpNHhF+554OJ07+DYmJddl6BaPV7fuXlZ4mg43FgsJv3h1bXN+oWPvvzhl755b88/e3fvDz/zb504WB1sBfY604eTo6bryixHED5x9G48GkUfbN/PmuX28WOLxcK6yMkJ0nU97LpOKqzripmNLprlvlDaOh9TJBYgYZSPDuf73oXLb9s/+Sfrm2vr02aupObgQ+gHgzEyOO+arl8fr3Z9C4jeBR/6LCt7a8uqSC48+9a7Jz+4ikmkO/7mF3e7aeoMPXJy59GXvuJ3P/25XGS7dy7Fn4b5Tx7C/9+9v1bTsfwwtJuXB6uXhkyK0QfPJjOZUq73RKq1LadQlLm1NoTEDAJUUUqHMoWeMHYNEYpkF74Xz/7p3Rf+0PbTf7Z3rF49vDqft01ilEohgRT0vKZpY4qECEhGZYggCEOMzGC9L0vJSQ1Kk1Kz/aKNtM2XPjq5+J3HFrNWK+OtkxpdH7U2AKyNYOTlomUWeUGUtIdkjKIYrn77wfHH1hC5yHL8F//m7bdnd8Zm2FlPIiYhRlhOgBV09wyLB089MJRGlQOOICVIMiiE0llKDBh98MAokJA5oQiBtUHnoiR0zhpjiFRMjkhxQkKBJAEwBEvCS5lleQkgEggGwQyNXeblamFG5WidSAXXLaZ3BCVkF31nm4UQAhCICAm8D1rpvu8BXIgOOGmTEQoE2fXzsqj7buG9l0IDExkOwdu2zYwUppZCx5j6vk3BMbOUyjkPySstGSFG6K3tu1ldV0QmgVwuJqbAxi5dCL2zQo0Opnu77fTQHknkxQQUWmGyie1Hg2Eu8qZdokDrnCxLjVjlFUc5ne6dPXPmD+759IMf2xKucmlW1puLbrFp8jZ1Y7WBlvfCUdCu6Y/Gg2Mb1fj6rZtzDC4EAEvd8uHT7/jUp4HsIrhFipETahkIeDa6SMDjydchzU/fd3E5t+TCeOykWXn45d+6uXF6MjtoOzte2yZTPPWlL37+kx/0/Y0LZ06++M3f/d6PPLZzXt/af9pk+mB6hETzdq6FFiRYgKHcO9e23Wg4igms66phPZ0vBKQQYLyytljMlFI+eCLM8yL6Li/zzlnrPSahlfJzJyvq3PL6t83W/59CGkFSeheNkCkFpUzT9XVVj+phYxvXdaOVESdcLKchwmLZSK3Yub3vbbc/MBgNxqvFyq0PPvWVG0mC/+ff9c5vf919GYhPfeO5z3x997P/4Qu7992E/48Lf3+0UuV3vk0Ktrv3LkdPF1rrY7dW8HHWxijEtumlyvu+jykMhlWzXMSEUtKg0rZPQqWB2dk72I1kA7dhEds57356PtopsFdHR/tgpY+cGEkoBpbEiICIKSWA1LsohNYCQ0hKqd651bW1aHvO1cWTmyt1urO17Hl+6c/Dfe8YUcIYEwpATNElIVTbNoipqORy4WOg1fXcRYmed06eOFrsP/fGg/X3rQDz1ngVv+vHHgnQjgdrrietOq9ybH1MvbO6MryzMXzRyQcUqDyriLguRwBJKt1bG1NgiJnOjcyDjaRQypw5ISgkxRCYI6FgTAgohETExEmryloboyOJWmcxYZ6XgMK2HVAyxVqeVaIY9D7mWlvX+n7WTu/mMjGz815npus6gRGAQwyEyL7v+wYRkERkTAmUSClBCAEpKW0QxGxyNBgMl01jMu1dr1SutWGOIaQsMwDsn2c9YEQiKXVK0Ps+y4oYJXNb5cP5sokQejedTue3J7eWsYscG6j6xeFoIEiIeReePZyBJUW4tr42nR/Ww3rpodCI3ts+LqI7Nai+8R3TEx/cWabD+eTGSx96zd7u4SA3Ksbry4N7yxN707sxx+WCttaL3ekSKSZKoem3x2euzQ6Wz744dEddaxUGhJhSTAmWqxcF8drsqxh6ZLdz/oFF03kfyqRA9hcffeX+3s3t4+d0lve+NaSrtU3J9JkPf+hocivNrncXGq97ZQZSZl3r2raxsc9NgYTzxXR9ZWidk8aEmMq83j/YNZly1gfmXOf4PI4nd+6bzaZKY9stmiZWwwIJY0wxegx4evPs1d0r88X06D3u1B9vudQ513S9y/PcGOVsbyPmpiiE7CFISDF6FJSZDEFaFxCxbxdX3j458SfjshaBgv5yc+nZ7A0vuPAj3/kI204MxmujsQD6/fKbP/WmXwGA4ReN+Xm88IR629vP/tGTnT/NpciW/WL/Xe2gqu6c2gsxlk/p4gkxeqY29ZBdywRKiWa+SFpoqXI5bA7Xjp6LzXxerYxWt+8bnRk17ceb5Y1nP7g8cW68f/m2kPW4gDsT5wMTUYKokGIMxuiu6wAgkfyWb3n55z/7cQaVGEloRHFq59jJFbG2kvZP4zKyX3RmpuMpfXrzRNt11tvEbtm2yaciM5lRdw/vDOpqsfDMnoBR5FlmvG1ufvds4/3jIisMAr7lH13Mq/z+ey5ww9fnd7mZeG1k6/d2u83NzOQDVei1Ak6urG6PtqpiXKsixMSU8mwghGFIPgREpVBE7gCikjkKFVPwPmV5LRG970mgEFKqDFOeqGVaYb8kATF6QQK1TqBT5CKTkXRZjRCLlHoAaJr9YK0UwrsjBISomKOPDqNXUnRdT5iQknOWGSF1zCyEkkL7YK3ttVYxBZPXiAlYON9JFKRUfJ63glRMHgGZiTD2ts2zgkiFGPu+BUlN00ihi6LgBEJKYDqYHgWEu0fX5m0DMpAM89nRs9P5ckF1ZTiAGYnTq2dMvn1t9+uzzuWZ8V1b5CY6l4C+/qbrj37idDY4frQ8WFsrwowu7187u7WaUcHaXL/5jfNbD92c3l4eHWVGWE9aS49UZmZ67cLRTQux4eQ4JOSgZOJEl069+/yN35bgVQqQurV7XuTatvfMCSX4V772dTund9qma3trTAYp+mizvBwOxtev3vratb9c4rVpd0RZaBfdPcfPXbp+PSQhyRupmj5mJR7Nly4GJTDPqlm7v10c27cLjOTZ5UWeXBitjW2zzHMjJAHIFGNvrcy064PreiElCoqe737X0eC35D1b92olnr55JWAopBQ55qpoF001GmgUbbPoMRKJrl2uDDeV9rYPTOLaWw9OfXDTxaXSRf/MXrWy/qbyoQv3bbt+aZTa3h5nOheyunx39l77yb/65S+q6LdruHCxCBvjv3Yt9aBGmkMY5PXB4eHkIZ8ecJOLSxFl/pQ88diqNtVAi4VbRMcgs2sfh4NrdwVqpDIiiTgLqRicf8UDF0R/x3N59et/9eW6KNcU7Lu1IGW/vI0BBTiF7IXiGHyiTJ1yfFWBZIjRhwQJAVnARkV+df2e144DNsRCS6OVSuiIBBIYQwe7Niuz5WK2MqimyzaGoLSOATAyKfQOhBb8D0D+XzEoGuoh/sOffWPvU+KE7GbT9qH7Lj5945bvF1JlRSZjz4PhEN18tXYvPP0q4uV6vVaWIyGJo5os9pQURV4gy9yMfeiIIAROsS3KNQZI3BJWwDGmWOQlKwLOUHiTj11vSQCRCoE5djqrYkwCEgsjdF7kYykhBo5RAXnnPMa5tU4ITqEXInPdzPuFkBT66L3VWgkhm+VkPp+UZY6IKYEQRIQxBkAtBEtZAAQBsnedlIIAExJgUlISqRDBu9Z2S0LwIRRFBURCSgSVUvTedV1X5LLrrSBj/RxjsTu/7sBPpt0nn/h8NlhJWXgeigH003Kl7K1kBNs3mmh1PACp2tY+/ppLr/78xQDt8dG9X7t7+cXn7vvaleeia13wxQpuZBuL+aSoj+0e3mq6ZlCUWg5my1uie8XBNSTfJtcRR+DnBcJ07fR77r/5XkydhBAZOfRr97yoWywiqNXVtWa+f+7C/SSMkroejZXUmVFZoU1mYmJpsr/++mPXjz4bQte4PggUAReTmZcxWRJl3S9uFNUGswg+eOeEZg4glN6s1+7M7mZFFhK3Tb++eqxvlzEGrRWIUGT50eRoNB5Pjiau82trG4eTqSnU3e9cjP4glxDW6/X92XRnZ2dxNJssJlKyNhlS3vUTghgIMLLK8hTI2m51Za3rZze+4/C+j56IMXXejuuNjvwbrq1snzqllAgubG0MpBEreaYFFYMBiuFH/urJ3/jSh+isphCSwgSsE9luGaVsWq4ZjSk66D13k3c6T9ny/N767WPq/3jPwTObgmjvzkzUD6vpZ2P4RtYd+p13hpu/Let7YPZnpth8+Zt+5MqTT7n5p/f7V5FouLvs271adRybIEsPLXsR2QNKQAHsCUXiBICJUzVcAeSTL9leOY/eNVpnfd9mufHJP09KISUtF8uirEgI57yOxMgoNKAqM7N/sJuSBODDd83Wfn/ImVwpc/zuH38YhbLJ6gJtl2LXZ6qyrq0GIw4i9t2wHLSIlSxW8v1zG/edWdtRIhcCxyubMUlr+8xkRufpeRy7rpVSoVAmX40pBT+TACQghoQoQmikqgCRiCWqpmvzomKQJMgUhbVWAJflQGSZkAPX2939qxub2wzS+lDlxvWp7w+UUjF07L0Sqm2dot75DhEACJMKsUdKgmSMEZDbtjHGpOSQWMms65tu2UoJdV321ktVAjKnJISuBuNmubDdgpN/nhAShdTGpJRiDIgQYxBAAHE2ncQ0LfVw6RIo6txib9LfXF67ttdllbBxtl4dP1y2Xd9F8FrKFPyoHsyahlA//bbbb/3KI4eHd6jAXI6pKi9f/VpRjJomofQ2Zm27e3J9VNWDW3euv/HRH3z8yqfu3qrttZNgZzLalHoOQRBASpdOveee678jISqKnDxiEhzGZ19kl8369okUeTbZ3TlzdjA4JgTLTOSFUmIAwCiorism+OIzH/3a1Q+sDIeHs2nHcZjV8+ncFHkCv54fv3Tz652Lo9FYkIzJ57qwwa/Xxyazu8swF4hG5c6z8x4wZSZbzOZZbXJtlFKMuDc5zHXOCTvndtY2rn/H0foHautn3iYiURa17a1X0neLE+tbK9nqM3ev5FruT48wsCyUd9b1yShTFObZt9w++6dbUtDcLY8Nt1Tq1kJ54uZwtD4UAo5t5nWmObjV8bheGeVKA4vOyP/0V5/6yuwW+L7BKEBO5/O6WCO207ggt6j1fak8uv03Z3af0Wr9HfHBr9KbDsLiDoxOyz9fF19/88t2nvqF1/x1SOKT3/T/0zd+3N36I+G+LGibw5df8OKX9+lb7kzyeVcmPsDlDVJlVEkd/aZKt2wi4gjQclJIyACIBEQnT56MJI69ROgVu2g9gdbaDIa1D731PaaYUmQh0PcxJBLSx5grsr7PijJEqItR2zUpKuft1bffPf1HG5RJTYDv/rGXBu6EkaooBzRo+10C5aSRDN6K5eLOiY2N4CnY5sVnz+9UWyhhZbQhkMuiQlUwRwZAVCmy1hoRvXcCAU0OKBQAk7GuL4sqJRQCsrxOkRbzuxBdVuQ+BiSlZJYgImH0noBZSaVXlNIcVR9u19lZaWTvZrka9+6AIFsubifXcfRCiJSScz1zUkp7t2AICKCNSUF570LwxujFciYEKFUQJde1HPsYg86rEANzVEohiJCiIC1I+dAjcEpBaJU4ESpgcM4zQ9sd1dUweBQyIegALgTft7ax/XP7dw/jM8Nq+/rtu1qwx6os1cTNvA2UqFm0C3bNsr/+jukrP3sBAU5tDHb73nnLjHkxyMTo+PradGEv3/3rE6v33pzuz7ruVMnOPbz7zFrqD0Vs2YcIDBwExGdPvOe+q7/H1CuMxIkROblMQn3yId/29eqIUIW+q8dr1SCr65EyZZ5VJhNSZkrrkIIh8YXnPvP5y7+TIjXdnCOwAyfESGtRmO85++3vf/pDETpBMkGw7Dj41zzwhi9e/hwj9g4rrU9sbM+m86nrpFZ922sSXeqLLPPOCymPjiZVkRNRSHG59PaHYeMDRQgxhD7XWUzRMYtIgbEu64O9u9qY8Whw+3C30DkDCemX85g4SCGuvWO280dr6+u6qsdN03beLiaz7/PndydLYHPm+PraSEuKda1Pnjohq6EKMQSMyWWqXAbWYIRRv/2F4hf/+ByGWdE/1uff5va/VIyPtUcOIcnwkYDb+sQP9He+osIt90s38ZVXfs4cAMDPfekYAPz8x7/lF594Zzq4TPOncXAuwWRTfmo4ft1saSb9pksEkrD5Oq2/OEz/UsQDnn08E72SXYqaIboYGHl9c2vWNOe/cxQj5Ip1VdrWDeuaU3ABYt+FEMlkKKGdL23fl1UVQpAKjdZZlrXL5dr66tFRV9fVM992677H1pa2jRbxR3/mdTebxjreymVRbl47vLY1PLa0E6NA0PpkvicEauaqzO49dpwDFylsb24P69VMFzqr28VBWRYRiAGVMhw5xSQkBZ+0Fi40RlfW2iwrnPMAschLQGLmrm+abjaotwGClDkRSSlTSp6pKEoUkpnKcj2BUpLmsz0pEQRLKqw94pQwQjM/EsKSyTEyAiCCtQHACyGAJQoLLEOIKQVEL8iEEAE4+F5KKUj6EEJynEBLEYIlFCG6rpsNBus+Ja0zZIo+hhilTs4mABLkiQYMLoXeud45K6TOssr7OGl3d4/uRLJLh12yS984qxbtTA/Vct4ygXfUWPvc2w4e+cRpQFEom2dVlFFFrbTpmrbHkCm9u0zeTu4/95LFMl47umUvPchdI1LLIWDqgZGSPxg+lFLcnD9B0JGQwEEREyQjXbn56KSZDcdjJXXbthsbW9s7ZzvbZ7nRRpd5Kf4OESmTf+qZx67f/YyL0aXYNC3YGAi0lrXOCGDRzfIyZ0mj7Pi0uaFEdmzj+KXL3yAtlVG5WuvcQe8Q2JVULdKyb10X+q16KCh2gL4NpNFkhDE7PDi88z3N6Q+vzyc9JKdzIwhqk3kSMfUmK7uuyU2uDJLgZjZPERklc4IUZh3Mv68d/66SgjZXR3uTpZCaKD1o1k8+1+57Wq1WagVlJjMdT5/drOqyqKreuuRCSDJCAoaVYfaWfz2+HTRlJ+30GQyfoOwHwvKjMr+Hu9tJl2SfQhzD8X/Idz+TRsdef1H8xXt+/d+85A4A/NyXjn3iueNved8PR8g4SuEPsF3G4YlB8x/XR/d08lhLJ9q0GUUV735I6h0nuZbXxMFfym7S8AGS50THtgY+4vFXVssKy94v/FwARRK5zpioLuV0NhVCh4hrG2uHe3u253pURxt6a1dWaga3XCRBvm/teHV07e0Hp/903TNXhvAdP/Hg2bWHbt99kk2tUixHcr7fVnWFrMpq2Ng2JkHUjnNzUtdnt05IkwPIMq+KspAib5qJkpKBinLFOackxRAZPDMul4umnRZ5PR6vOueZQSnhvQcAY7Ku7bKyIiqkEMzofM/MxhghtVJFiEEq1uWKUCZGqahIsUUNkDQHtP1R206U4OiDFAwcvXMkSYlivjhSSsTIzCHPS+c8QFos9gmFMQaAQ+C2a+pBDcDBsSApBAXfJUwpARELqkLsEJ4XCTlans0PjmZXVscbeb4tNQNL2/XMQSoRE6dETXNw+/atre21zGzcPrwtTLU/3/XeHaa9xknXSlNTb8GC+9LLv3nvR9bXy2E1lN4F27QxuoixzAY26lFezfy8ykac/EG3nH3ztaI9ZF5CCpA8xyA4HgweSBzXpl/REoAhQRIQjeRcUFVwW13wi04Oa61MSinPy7XNrSwvhFR5XkgptNby72hpvnj7Q9+8+lFRqCqqnvsTW+efvvbMyfHZ680dN5snAkGp9c3WaGcy3Wcfy6qoys29gzsRupPHzzG6/aO5s86zl71u/ELrvLPzleGmMSLFMJu3UrES0jM/+drr9//lVrf0PvBgvNJ1LbjoMYxGRde4ui45MAmBSF3vtAEfYvpbgYmvvvXwgY+dXC5akn65tGVRO9cHTe/yG2G32VvGe3Z2NCTfTre2RsOVKitzkgKRgChGySCEbt7802e6TIjph9P2j6bJE9B+jFfeTd1T6DSnHgUmvsR8TtYvE4DdYPiJv//B1+w8C3/n33z2VT//6Vew86RqCJwgITJWa6Puf1krtimeTFLKjTO3D/pR3g4XCxvBcufRLqdFT6HiD92z0zTL+WTzPdvHH+vY+XaidWGqTBO1ru9mNqZgskwIaUopEJeLSFIEZ4mk1lkIIcaAHJxNeZkfff/yng9vt96Pygx/7Kdf1Wt96dZVI3TsRaatyvLRUCVf6kwsG690xt5urA43s3xcD3eGm3W5DoAkohZ52zdCCAQAiYKEJJESM0AMUUqFxMCISF3XA0BmKqWUEDSfz/OclB76EIzOuq5TWhmtvHdCSkSplF40U63yRbfIy7IqakjGpth2i2NbJ5atq4oV17fWHyUXIPaCIDEqIZ3rAdk5K4QUAruuDSEpKTlFBgYAIaT3QWttrVVa5XmxnM+VJh86ITLEpGSBACEGZ5sYAunkY5fJtaaxmfIJVAwA0EhRMCQhZdtZEgXgUqJO7IK1IKtlP0XiL1+7M+lu9k3fxhjTkJV/8k3Pnf/zY+MskkLXN9EiadUFp1mDyMfl2iIeZEoR4aUnH06Hh4oaSClF3qvvfWrz3ZvLrwLz/Td/SwvA5AAFQjKSM5nWh0ZsmduH634RMM+rshZC5Hl5+tyZvndS5SRklmljjBBCKSWp+PRzvy3yw6u3Lh9OpzoXFQ4JQ8BGxfzi6fu/cnjVThYyV8JBnlXzftla2/UHO5s7i6YTQDrLrA9aDh0fuqNw7swLD+e3pa5u3rmcy2JhJ1lexWQFmM663e9uTv3xaLlsSQAa7X1iH6WilXHZ9z5xRCkoEEccrI67dtb1XYzB9i4GuPOu6bkPj723wFnfWmNyJNIEKOX3daPbt+aztt9cWUFvi0IOR3WWCyGJhDSZkSpHaUD1P/LvB3t8HPjLqbkNJ38aFp9J04/SyruDSGr/bzw4CZ6xDXiSVr41haiG51+xfY2aJyEff+yttz99def1H9qMDMqse+mVzX3Zv+ZF4fz9w+lv/9Ln+O2Klt/yrQ9+6Ymn28leG+5r3NDLb6QugxgM/uk73lRcuyJuT18Uzm1urP46iAQuCYMaYdl1zczrzEhJJNEHzrRqloEhxZiGwxJFCsEBYIqhLMYu9u0P+fHvShc5zxW++1++orVNSAGTia45vrp+0ATghdKZFCI5g2S1KjPJ925sKqSza6snt++HSAA9kmQSzjlFuLSNFCo3FSdkjlIJay2R0FozAwICEgJ3XZvlpm0bSUnqMWNPSFpXzIk5xOQJVNfPtM6KfDUyq2xsitLZVuW6qs6BEMii6w4RU0oOAaxrfTtVkIBFiq33XggJwD7YxE5KIYWKnkPsYwqc0NqFFJpIZaaMwEQixZiSY05aZ/PlvlbSeZ9nuRIGGJBFbxshI6LhFFrb57lE1gDC+54JiSQBIKjZbJ8otZ2XWT5bTrUqn9m70tild3YRj0QmlImfeslzL/nshoaVLEml452DFhFmXdgYrU4Xu2W94lgoTMFtXX68MrFjLbh1gtxjL/t9+DsP3Pyd+2/8FqaQSyBkKcFQOLE13h/NnoXDk5OH+zkmpcqiklIKoc6cOwtA2mQmy0hIrTUiKqV0nn34K7/qw10blsGrg2aCkQejOjMr3IWiyKbzfedbZer1eiBQHjSHs3ZWmfpYuXJlersoVbN0ERGCF7FESTVWUz8zQi6bVlYEjRiMy9lsfurYw3fnz1x+6+Ha72XK5LmKXeLe+kxk3bKxsc2qune9QbMz3hxW9dHy6ObBbl5lJsvYpQjwzFtunfzgenBhY310sHeg88LFhBAg5mdXxCO38Ykn7kbrc62khDzPVoe5ItYmVzozOZlymLT4tQ8cfHL3dVAxzP6QAodT/1K7G+7u74r8NXHjnbT3odg8QSJyKmDwKKc5mbMoH0zmFuxeec32K//yDc9+wt59w0dOyn6R4hTrM7JKO/Ljr3vP9/7jP/zXf3H8xb/61AZa+eArzn/uU5eXTLGbQ8xFOUBdQeoH7t/pbEWb87cOXihXTo3vv7w5fF9I/agojqZLn4JSmQ9BaUIghNAs7HBcN8u5MTqESKRJu77pqnqlGGWXXn93/XczFgop4jv+xSOMrRn95AAAIABJREFUOCxjwnTPxituTZ7K1GjS7beLbm2gbRMYaX9uyzw9dOL4C4+fz6uVUmSkQbFMwEQypCSVYiDmRPA89t4KhYKM0QNrl5yCFJRCIggoFZBG1FIaRKGU6PtWSsUoANjbJiF7B+Pxug9eCiMk+hiJJCmjTaF0lhh1NgLWDH0IvRKmd220oWtvSy1lKuezSwKQJIUQCOh5zDybHZbFICYrVEnIXTvPjIosmZOUAhAk5T40iKhk4UPrg4vehuCIgJkFaWYPpIui5r8VkDSwCrH3zgtkSRIJk0jROQThAi2WB5Wue15cOnjmmYMnJLRVfewTL9+9+OeZhqoDITWGxpm8nPcAkTKjovNaEBtoL507ur0kSYDS9uFO+cAXL/40/J2Vydfe+o3//sh3G0boHEqinZ1qd/VwZfXiUTxMT9ezaWIWg+FQGSO12Tl1BhGJyBijlJFKAZLSKtfV733uZwMu+tavr45qhNXyRWWZujC/tHvThYWNse06YJ8ZZaTuU+Tk+9ZS4qSVtQFJJmCMvLE6mrZLQ6qxXb+YC5OTCOxJ5QOh1bms3E/2y6++vPPBMQP6NuzsbM8PJyc2t6/sXSPKQhehzNDHZn4ohJ41nsABkMnLmFhnOP2+buexEzF2QC6F7NTO6cOD67mpuhCb1G13/gVPNZd2J4LlIENi5SlhSqQVoiyKUiqFUl47mP7nv/luWBmm5kvknkic89o/FrXmK/8x8jZu/jfQT3n+XmKP1SPJ3sRgcfOdEVYo7UK3eNXGoz/7qk8Qp9d99kVClun6p8E/KbKb7/qBf671xitufvJxe+NPnl7nzkzEORsGEBiTj/6AIhB8jpafTcXL/OAHsMiJO5xcMdvywZf8gee9XJpFs0DivMgB2uncaRa6rGJqnPeCyQNU0iTllwuXIq+Miufeurfx/gqEDtDjO//be0s5OHfinluHydvLpso0miY0WpcKihCbhMPlcj/LM536i6eOX1h/pFaaJDrXSqkRUSjNzwNQUqXEKbEUKoQ+xpSZatnOtBIpxRST950QuihqqQ0jx8BE1NtGAEqtpKydbYVERMkgqqpqmjbGWFY1sCCiRAJFZvJC60JQIRV4HwB0iC0yBNuZeoyJjg4vJ2+9PYrBE3KeqRjR+U7KzLlWaxNDAABC8sEBpBhjlmecpPe9MSYGTNwzQwouBBcpZmpMIkmR+9ACc6ZlCi7ESKRjsoQyMTm7JBIsckids45kIVUixtnsqAth4WdEuHv0pY+8cu9tX3zkm4dX0JmqqjoXlj3sHxxO24XUpKp6YEQ7m+/YVxzNF9GiQFbCS2f/+PTP749fAgAbs6+cfObXdrqnjfI7K1k2pi+GS76fHd9+8Kg9PN5dSG3lEw9HQ53lnbVn77lPSqm1NsYoZYSUDCikyMv6z776y7nOl/PDXoRNfaoc5N+48pkGraJhv1hikZEHo2SMfjQczRZTz966ea5y51yRlZAwxaBMWebmzmSfkKINKtdC5jF2iy7lQFKpBImYLn3bwbH3m5Som05kMXQphtRpLaqsWl/b3BmsX18sr1+/sra+upjPQ4zBe6UlUqrz4ddedfXEn1Z5PmBEwQGSN2VOspZ93yBHVhm71z/rjw73a51xSsCUbEhakRREFAFRScn6vzx5/9OLNaItCJ+X7hJS0Y/eRasvSM/9MjGL4m1R3IOz3wK9AoNHefrHxAE2/mmCIfTPClG7fP0Tb/gyB/vGjz8k6pGe0ttee/nxy1//lle//f6HhqkrP/KB9998Am+m+0I0IBM4p1PFZBMfSHg2dU/y8Ic8D4R2qVoT/TIruxe/9FflcNYcTgFjWa5G7NtFJxkjSqMTQ953S1Xq0DadDUooIXA4qp541ZWt99UxYlFqfM9PPBo01TnGjk2xMm3u5rCVDSO61Ns50cj6pja1UjrXxG5x4fjx+7ZPc6iESkaaGCOTQECOUUiplE4MWooQusRJyYxRte2iLAsfoiDgRCEm57osK7O87DubZWo5tTq3gmqXOq0ypbQxhXWp7xullFYqRjBGJkYQKgJIIRLrrDBEmXMRSCAmTKq1d1ZXNnxvIYKPnkNczg+1TgjKZDJ47vslou+tL4oqRe7aBQlg5pSSEAgAQkgi6VwDIDjEmFxRrTG4GBCoTVEoghh9M58rYybTWyH2x7cuMvJksjsYDqUZaEHMyTogkRbzfeA0KDdC9ErLFKv//dQfvOuJh5dLKbLlpTvPHvb6qaefrqW628zXVjdBeKGiDbDVPNjPehdM56GPfW+jwnhQnLvXff7GQcs7L/nqyZ98xzP/5Pz9q18Nd+a+d1Fq4cqyWHenwn7uE9SDYVFVLoR7LzzonBNC5HkuhCApEQUJITPxZ1/+9yHMJKytmzrV4fruc6+//9s/8Pjvl2YtJyWreu/gVp2Vw8Hg2q1rRT2KPqyawaP3vPSPvvy+rrFFloXo22W859TOzf27jDibz4u6Ep1cOTaO/aLhWCitNdg2PfvWg/F7Kc8rF1iZAlPCFJzvQwqiyEuRLbuDzlFh6mNr4/nibvKyykZlUT539yn3w3X5X3ht5eRkMWn7I6REMkshOAgrwxV35AL2JzfGJ24v79mHpGwC6ZctCyaVEZCUKjJ7EF++feIDz4xiEtE8jPYx9K0h7vRb+Mw71KX/26cnWL4C1cNZutH5p2j9DbT7AY9M6z+YxCqlvaRKxeojb/gabHzmf/3ca3/kZ/7e619z/J/9k/88SWK4cX7pehH845/9/WvP5RRzKTKn12I5ErgBmEPkVK1jcrLjyF4xewCWaCpx3wM/o9QRoQNAx46iHtRFa8Owzpp+Mair1ic79z6lvl3kxtgQdr9nufUHVdt4QsB3/OiDiUtKDYgUfJ7VUQVJCmWg3gUSvsjLjvXs6ODcqVMisErdyy9cyHAk1DKXq8ZkPqYsyymi9RYFmsxwgKaZG2OKfMAkEVPveiGkJCQy1toQO0UYAseYUopCpt7ubm2+OCHEwEIggzC6Sin60AtiQlKmEgKZU4qBVOlc6nw/HK4LRUV5PIFLnhMgoQ/OcbIh9YpU8n2z3MeUYrRCGOcbSEoZjSSYQSKF4L13AND2B3lecsKUuO1meVYbpVJyXT8xepBSkkKH6FLywAAohJDG1ClZZiCSmDBxDBRs0yElBiMlapknDlKaxWISvS1K85/O/MU/u/62ZtkeLfsnLz25d7R/p3PHS9HnfmU0mvtu1k6EGIz3dxaHKaZofUiBBdjG88FELpfLFdMUZU6xvfXKn7Dgh82vrKMWuSEtIfIKHwt3Td9DWVYmL0jJU6fPM7P6rwCRhCQhO7//Nzd+c7ZcmnJl6WfdbE6qcHZ+bvPBXvo7t64YM/ZxKZNYX1+/fXAXInpuN+v1s2unv37zqaKs6+HgytVLNtBaWcz7NjJkmU6N++5XfcdfPvWFm4fXLp580VDpx3cfTw6+8fo75z98MsRespYFdLYfDTabZpqSr6XGouDgG48C04nVbBHS0d5CMdmuaVlN3r24+LELrluUiW2prt+9s5oPZSESiOhdz70QRrNgxTWk4tAOBoMNlzaCV33UJESilJKNGFL6yT97kaBJpB3Q98fuNyUnQEnxEXfuB9Xdv/btR4R4KB57nWwej/OrvPZqPHqMgqWtf+DFgLhPCAjq4w/krz55rf2ph2n74d/788//wr/61bWNnRe+YP3ec6/58X/6yN5i8fO/9L5f/41rgM+oZhn5EKBO2XFZHk/mRCxPYSqxY4TAElIUMg8vetmv5NnN4BY2qAheSjblIHZN36fjJ9YnzdygOVy0g0w750JSt96xt/X+mpPsrcP/7hff+oJjD3/86c+vrRWatnabK74/ynUFTMip6x0LxVEK8lm2Pbe7A20urG+c29qa92FnvCFYSpJSU0IiBmbQWQYAStBsNhNIJIWUMjKEEHKjY4wAQERamaadCyGlzPrO+7AcDbcSd8wALGKKMTZ1PQDIeud7O8t0RqQSy6IcBOasGhtTetsJbUgIRgGkNZvG7mlVt+0uJEDgaJ0UglEsprckMVKORFqlFDNlqOsaRLDWKaVS8Azsvc/zzLoeIAoiJSQJA4zWtqZQtuukLJE8R83ce++UUiEEIIWEQojgg5ai6xpEZk6EKMgcTm4OBiuJSQnzH4794Y/efoukvLeuD/7m7u5T+8+yxC7s1ZW+dP3auB5MnNuen19OM78UgE0ItL+UGU8L6aSiti+ObPvgiWp2gfbx3hv2hUP84jnxNY4c8lhOB2J/u21aWY7KwTD5/vT5BxFZCKF1RlIRATJkprg6/8JXb34UwwJVxTGVRcaMh5PZsfW60sc8++f2nkzJMIH3PjrnUj/U48i8Oa6OfK9IBZsAxQMbDzx+9cut3zs7Orvw82mKQ69pKKx3Bcsg8z7O2qa78rbDjQ+o1Jhjm2vX79y+cHLLRj9tk9Igojp36sydvbtRpMipKuq+7fq+6YOAsOwD3f57hy/81L2z5Twy5VL0fW8j13lpfVsZI8qcGFDQ4dHBeDg8OFgc29r23uWFGWbDu3euSAgG6GS9MlosfuO92zcXi4i94PuDOoHdnwr2CGjlRTl+d+KeDj4Uhq+RG+fj5Cmafh1Gj2LzN+BupI0fSKhk8lHjhna3vv8Buv2l/qdeuFue/t4f/p+/8cnL+crF3/z1d73pdQ8yJkb4Vz/3+P/5O8kXHTtg31A4QD8jux/b26y1qE9Q/giT8g4FBSI687L337v+hUnXucDMoIQQFHygulZHh53OZNMsvHXGjLIhX3vL/pkPb1jXC1T4j37h1UOGBSP1WBjpheQ0bXsgJbKAMfDMueMrJzc3j9++c+hCCDAd5Sp4eMW5jdHgNHH0Lmhd5lUWnc+yDIVk5hQCIhBgAlZKhcRCCI6xbVuttZIaBaUUlTQxcoxJaylFzhAiP4+klCE67ybOubIca1XGxELqrKxjAuBemzIyeOuEKpXOpSylLoGXjIMEFjkE28boQ7RSUvTBtZ3WIqXQd8vgG60MA4fYpsSIghN7dySFCiFJqWNkZp8ZHUPqbJMb07azvp1XVcmspTRltcpIiML7iEAMseuPhACjayFy5hSjd84ishSm65fep3JgpCz+t2OP/dA3Xw+kg/eew/7k6O78xmw+HY/FlctPs4Jqc3M/cHW0Hm6IRasiL9E6gDBJg8XSDtGS6PT/SxJ8wFt21Yeh/q++djvlnnP7nTtNMyONpFFDBRWQKAJEMchgY/sHxsTYtAC2gxPXJHYcXtxeHFywwQReHAymiI5FNUVCXRp1TdG0O3PrqbutvWpw3vdlBPH+/I0mz+t8mj+C3hpFyWH1EYDmlsMvOvrdiW5Q2puL0owRPL+45JwhhHAuMWUAgSBEKTu6+Zn18XOY+LIOqYh003R7sxs7Q8FcU1WcpWWtPLaEEMGZ1yZv7NLcws50zIF2WvGoKoqyZID3H7z4zNmzZaUmk8Hy8tza+tqRlcN7lg4/ff6xqy65bjTKL9115CtHv3T0xmO3PXoTD+HM9qlhnlOwMk5GY9XqSEEFw14VKk4jpRR4TBl33jge5ZMcYbL1ptHery8QioILYC3GTBmbxdFwOja1xlJyACZEnk/jKIpTiQmdTicIfJy1W1lsG4c8kwxv6s0zj2QnvttgJCFoQ/ZhO+f895hHAetA9gXxSh91GJN2ci8s/BTYURg+QdI9fvQo0SdD/3YfX8bd5JL+6N4f/Iz842Ow+bD+3Yvf9pdP3P23X2+1r/ytP3j9L/3ClRgsQsKA+fXfeuJjnyswNJoEglA7mH/7tpVrr1144MfF17/5+KMP/xgv7rbRIWQQBBwsn9332L49H2uaaadDncHW26bBUUS8R4Rhb7UQtCw0InT9ztH85yLGEEKAXve+qyRL8rLuZaafzda6JsACx2NVtEmMESm9Cc3Ih3RmtptPjcAieKutv/XA7K65g8ijVtoyDqq66Lba1jrCmPdeK+WcpZgARlJKF+AnMEKEEAgYE+ocwTjUakpYIFiGgJ3TkcwCBIyB/CsenDQmJwRBwEbnIk4wptp6jKkQHGGMKAdEAMJ4MoxjgXA3TmfieH5ncDoWTDU1F8QY5a2SInJWB6uds6aqtCmkTABRaywmhFJWTAZRFGNMtTbWKue0YIxRrp0pJnkkqW4qFqWYBON8CMa7BiEmeBTAYyScr703lEqCZACo6wpjTDAG5AkRxiiEuZDiLxY++yunb3cBW208QrVuqrL0xis9wgRO5eNhMVJOd4LIj89qPB5uJFE8VgrikFuEB5A0Y09BJf2FQ7ea2oXG2EiwhzeXn4p/6Ur30SOttcmzPVfbrDvT6vZCgM7MIoDnnEsZA8YIIYoRpfhbx//S2xIxhhD2VgXvEKZMRMYp3dTg6DSvo5SBda0sdc4qpRlGOniGOSOJDhYTFHFR69zXaZqRU+sbWLp86LyoYuKR75aTYvdFPbddqBBH/2HP9M+exJRQxp02gflirJKsa7RCHnEB2njGsDVGadeJxWRYhZjKKOEsfvxFz+3+6my/31Gl8s4xykeT0cr83LHT51pJy0DggldFIaVwIYiA4lbLBd+ostPpeahcsB7AGC8xaoL87ocFCY4AWEQdToJJApwgDlHsgPY8vs4sv4KrDb/zmdB7I6S78WjdJShMnsHjb+POTdB+9Xym3/6WjBL70m8Obty99YmLjr3rD3/UXXnLtL7kjS/xH/+7WwF5CNSAufKl331+Rwql3/sLix9878FshkMAhFzA6EcPjf7zf/rmPfc8AcsvdiwGZQnC7UU1N/e+fhZQcKWCgKDbaSPkptMqjlhRVoIzb8jWz+Vz/xRjYFJm6Offf0sexhZagrjFmW4xnbYSXHvWjaK8ql1wBHnAvG4oj6nT2PsKNDDbtFrixv0H+u1dzgSZUkoYJ9T74EKw1krOAQL40BjNGCOMO+c4594DQsQaX6mdOOo4C51OG2FBSFSrAQJOMC2KASDfbvUrPUWBIECCc4cFAFBElWoAvLNVu9MNRJKfwNwYgzHCjAHBDqJYSgDhjGaYOGuKYgOIh+B97Qs1iFli7ETwFiBUq5IxrFQFCAOAc45z7qxumlJK4a2njAoWOe+dN07X2lQASZTw4ECKFCHU6AIRzmhknQuhQcFjjL33hBDrUQCFUaRtLihz1n1k993vW//potrGQDAWdWPiSJSlmajJzmQyGF9IBR+Ox3GHPP0Ad5UOKKzvuBmZq0AbpVOOCTFMdLpLaGNusBK1zl4Yr8zNFdjYxtxvf+6CO/LzW39bl7VqGo/I0squpb2HrdWEkChKECEIYQweE/+5J/7bUruLJVfTQYNJ0CqRsihLhxBn3Z3hJqYNQVESRRgBj2VRFjQwEzxYa5FNME/ipHK2m7WKaTXJJ1k7K5pqV3v12Oa5mw+84ofHv2hLa0li9LTO6803Ta/40b6qrhuLE0Ic1zLEKAq2NlaT2YWZfJqniXQhTGvlVbF34aqzg9OYm4gnT7701IG7lwWljXHb29txEoWgMyobTGzjGqvjKHJGY4SUabpx5jlNsrTKx97Rup468IUy/YSz1kqXlF/+aHAFsi6mUEKQAZABB84TqD31ke/W2dW4/UpoChj+VUB91Ls9kD0+jEhToNEnPenhxXdYz7CvAqXfvYS/6PrP/+Gj9/z1A28ZsMsv7VX33/86TELwGIG/5iU/OFmIl+1tPvep2wJ1GBMEJIQAXjtABODHRzd/832fePTCxXpmASsPyPU6zy7s/RPqNJFZmgDyFBGT51ZKarRk3MRcnLhje/mLLUoIoRy96dduiVKWl3Yh4aVWRT7JMkY8Q1xq29RVMTvTTTnHhIiISRI3lp8fDTCUu9L24eWLeknUTpYrU0ohtIOIikAQdt57Y62llGPMADnvPaVM64pShhB2zmOMrXXeKynjgDjBlBCutUHEUhobW6EQYWQRRsZaKSUgQEEwxkMwjW2CA2s1oyDjPmJAaFYW25xJIRMPHBBRqnahohQRz4hoe2u8xSHk2ENVbxMSCGJaWyHhX3nhrLFWO+caO+IkRshp7aIoDeARAu8DY7IuRtY6xrlxjiBLgBFKjdOCJ9prjCgKNSKxc54ywjkJFjVNDRDgJwj+ib9c/vK7z73WKoMQ4pxrY5qmqusizdKqrEvXaFNhSICOHvx+PhnXSpUQYH3AsZ9w4RHBIaSE285cl+wd16po8npxflE5NxiOori1pvc+F3+gN3r6qq3P+oBEkh245EpAiAvJOBdSGmNQAM2Lbz30NzxBnaiztrVmEepEUWN0Fs8pNwnBFWXFZJxQ7C2qnd6ztLQ1zcFr7xymtBxNpYyU0T74lV0rw8EEI2pNzoWkKGjl4wSmuU/TbDrd8Q4DwU+/fOvg19OgcUGrpgiMxxnltVaynTJKXaXiOIq4nJRl6S1Ye6S7cGqyE9J0qdf68fUn9n9jfloMYsR2ioJRQZkMyFfVNGZZbszyTDaZTuJOp5gUlIfGoNlub2e8JSVyjQvQEEiM80kWlUVlJr2H7xpSD4Ek1lGMMHbCodIEhH0glAdkPTsAM28OgaDpA6g5FkDBzEuDOITtwE6+zapn7PzbMNuNXO45vIgu/dGNn/qX6WD15//oyGXRJfs6QCAA8Q7u/Jnvf+9EeMUl5Av/+yaPsMeAQ0BAbdAUmEaW+FA26MW3fuiZ8nrPCHLI03T3vk+stL6jRRU8xj5NIjvMS0bwtA5LnTSR4sTr8pUvJ64hyDv0+vddvTDTHuVmph9ZLYOrdzarbjfGCGmvABmCQ0SIMlZIJmhSO1PkBoJJo/iipdkMs9lErM4drpqy351VjbXGcMEBnPc+BOx9JUXLGFPVoyydFYJXVUkIxpiq2lAGBHMgzjmPEIEQjA4OphFbtGEoRAdj7Jy1zlKGMGKci6ouKGWcS1UbRggm2AHmvO2t9qGRcYtHHSAi+JLSxFnstBaJrMqSRyE4kZfnJe56bUMo8umEUNvoupX1ymLMmMiSflGNoiit6xJj9BMADuNQ1xVC2LlaiFjKLHjS6FI30+BdnPbAMxExpYyqp4wFhIi1VusaIxBC6p8wCgUipPib1a+/Z+01xpgoipxzqlHgaQjWWMMYrxvnnGY0Nn5y3z1bF9bGSitVq4DqGre3d5wI4LCyKG73OtGecyJqtHNxnHilHSHWj/bMHfBn4n+BO9fEoSu2P3dJNLj4yA1aN4iQKI4IIxCgLhVph/ue+HNftVyLcu8n+WSu27MkmhYDyrnRajAeyDTDQcdRjDgzZUkQnTR1p9OJqBxNtryzxhiKqUwSBDgEA8gI3JoWo10rF40GG1HWRgDOqXxcWdSsvaGa/xToQkVZUpfaYMDB9zv9UVlQwSEAZtRUSspoUlXQ6IO79qwXY0q4lOTBG08c+uaSC1YALuyUi2j3woFJOUx559jW8VQA9m3tKlOplf7qI88/Nr8w70tHWWZtUQJleijTGTCGcnvZgZevrT9w8lFy5lixuJ+Ibm5sMlonkzOx3iQGFxAYChGn3gC3/Z9GaE+oTwUSUHWGWOP7N1Pa1s0JvPVPqHUzZC9FqHKU/3R/8IFDR699z1448nbnSCCKEUqQ/Y3fOfrfv2jmyOQtd87c+MLsVTftQXH8j1958uMf33z7W+d+9tUHWBQBhJNr6vrr/0ex6+XBVgBM4Pjqm3+dNUNEq0im03KkSixjaqY5TtpMosFPq/5neavVn06m6Gc+eN1cK60MLe2EeAGGY1JiTtrxvAOtTZlIFgzkleKSxbJrdJnITgAXR1meT6h3M6K5aHafDXbf7GrgglPmHcEkTKfTJGlZUzGaAAqNngaPOefkXyFCKMYsBIOAOO+tNd6Dd54yLNhcqc5QnFFBnbOUYKM1ZQklLAQPEIzTnAtGI+vQdLqWJH1CGSOSMh8QozLFLCK0h4gIwTd6SoMjlI3zYau1bJrRcOt4LChjWT49hxAWvIUhCr4mFDvnRpPzWdYjmFhrAAdvgVLknCWEBk98sN6b4C0ikdO5ECxggYACaAwcsKcYQ0DeB2MbAI8Q1Y22TsdSVnX516tf+8VnbkmTLv2/iqJgHAE4511ZVN3ufNOoEHyj65PPF489crpSjXWuUchVDY7sUMtSwx6p2r32xnLFUOVRcM4i7SxFRheLi3G32TM9Femm+ca+/zxfPveeKyoA5AMQyinlrXa6ODc3dcX/+vJbJ77uRZ3hxKzOr+ajxglWqkJY6PW7kzIPhIyK4Uy303hny6bT7u6MxkCRUw2NqPfGNSYEHEWJa7zxIxbFpvYyxZ1sAYwdVqNerz/c2XQWYyyPv+rCdfccGu9s+eAFoZvTISG4HQntPJVSW9vNkjKvKKH4//IET8uiHaU8lo/ecmrlC1mjnYxpU1SxnLnjytufu/DYqXzrwPIBKXaf2Xrw9M55m1cIYa30zMzyUqeLWXVhOCSkn4+fsTQKPtJ6xAhPkjSKccTjypfVKOU4iTpGFcXZo8nZH4vglKHCW80QMhjh6DrovCI0Ew9VYIbm2scdxDPkURj+A4GOW/hZDOXF/S9dU770f763DFdbdcXbwFAD/oHHjr/3988ePxtTxgGI0yXKmlv22F4/++qjXOd4N53+4NvXLs5m4MiXHlz72TfeDbtf4M2UVEXcF7fd8u9D4Dvb26LdXcqWzgzPtCg0Fqbj0fobi5lPx61OVlYaveQdi60sZnLG60Kwbr+VtuNLHnnuX2QcW6NJ8LsW5jEKxiNlm7ryMsK1CZyFBLM4ThEC0sj5FpvtRhd1dk2c60Sp0wZQkFISwqxBCLumUZxLhIL3IKVUqsEYCGHGFlLGunEhBEppCAEQCcFrhQjXUrYxAu+0tw4IeGe1bihjBPMAEIAiLAkuKesANuVUp1lWVnVnZhYQchCiNAuAjXIBTJR0omixcarYOaPyTckl4Z1ifN55m8QxIAShaRpFqVTNmBGOEbPOEwyE0RBB8PE1AAAgAElEQVS89wFj5n0NHgimwQfjtNM2gMMMptMxBp1EbaDYOQAgnAtrTK3yTnuOEBbAgKch+A+vfOl9519jbRMAjNaMscmkarfTplEAWIgIY6R1FTxZv5B/5ztHtQlK6+CcQ7iqgKCaU1+iNqI9Mbc1xVvdJCMEjcvp6q7VaTVAtBU7bk7GznkPFB16xcfCm39t5hsXyxEX6ZVXXToYbnjrpm7891/+jf27rh6OTl938dVnNp7uZ3tO7zwn+fLWzql2pzWYTlu93nB7FElWWeNKXQY/n80AmDoob3DdTGf6fd0AMuHQvktPnHlEecJIXJltidNL9158fPOUbqwURPKWM/rHNzx+8bf2CA7DSRlhAoyoWi32OgFQwHR7ZxBCE8WxajSllAWI2l3kAuO0qAYn79jZ97UZVXmCHcEi903HWh5lQzPRAa8kWRroOee8NlrUCYk9iJaIBqOtXOl3veydn/7hxxpPt4ebiUiayipT87iFvE6ylCGwoopxu5g2RMjzz7QG9/MGcWfBg8NQUjCW7Qrdn8FswbsBBIe89zymTBiSiuJuNNn2i2+8cvXHbvTMA3/3D/Dw3+Olo/bIW83sdecuDF9y54+3ncQNa3xDZcfzRmTOWeymDscERv6pr79gdTdHjPjAbnzF3U/vUEOjoGrm5a4rv71v+Yv5sCTpzCLh3ZnemeEFrcel8mdfOZr/3AymTVNb9Jp3XsYiMdius8h7H80uzNqqmp2fBSM0mSZx5tUUE4Z9U2lf17XgiQEHyDOf5dVOu5vmIyOk3T8zu9ppd7KF2ZkFDth405Q1ipxkmbPWOssYCwFhhL3TxjacRUncVY0lHBEg2tRSRFoHjJx2yPlaUCKkBABrPUbYe1c0ueSt6fjC7OJBBBgAhYDqOpciBgAfLCFMW9XrzRqHMYkQNNr4OJuztjDWRjIjWKpqEMCouorjpM7HjZlaY4M3EetWzU4AF4uuC5YghlCAoMumYYQxhpxtgkNK1UmSaGMQAOOSMtGYhmHaKE0ZYCyMLjEVCAFBvihrSok1hhDqQm1t+OTFP/zlE6/QWrdaLYwIwphQqrVjTBitgleUUmOhKMe6IXd9+RFba+2caoJ1mlKiaqMDI4BoRGb3dbU4F2dZvnWh3e1VzkwLF7VoO+nAM9FI625neXD+yUNX3/q/xNsxIR+65NEzz28kSUql7PT79x37CG7c1FUHd13+6FNfITQ7v7He7gpm+MxMT5EyH5fjYn22vaBQ6KAUIb/U2vWjE/cPJ4PZbEGjSsZZHMWTfGuls3Bqc9RqZ7quIUbeIG3s6vLq2vpGr98lYLWDp29b2/e1/lzS2iom3rnBYFtyxillgufTsjuzoHQjKarKKgB1BAShGIFMoiRJ7n/Bkwe+ubxvdd+FC8cRiVW1rSCJOOVYcFxPtDB1gTBJGG2grA26cnavbtUnTlwIWLa6aTnOD++99NT6+qgeqCp3LvRmFifb2xqUcRQFzRnGLFto91d60V2fmuyc5Jhw5GPffYlBmG5/29m10Lkdt1/svQoIC1X/m7fvK3M92MSTwckL6w+e2xQtag9c/qL/9KoXvPz7O+H3H7LzR+zsNXfc+bUfHwvO2MBjaBLAAc3SF8yJ9/1i78OfPL92aufJ+19NsbMYOJLX3Pnd42sOscipimGqA33xbb83VKMrVxafWju3b3HhwvqGNihtR8+95Fz/n6K6KL2l6M3vvy6OE+TQbCeZ1lWt65TzuJUmDBuiq0YjH5xnMUU+UKOqKsheKxuPt0oFrYhMa8qE70czG/lw70xneWZxpdvvJe0aaT2t26mU8cyF9XMywt6bKGohxMChqqqiBBPMrHXGGYCQpR2tDRd0NCq6/RZBnboaEgxVVXW7HWt1XeUyjhHilCBMKKOZ996FCiByzgMgSinGhlJZVSqKmXEEgqU/wdOmMjKJEWNKW2wbxhkABABvjHclDiwA6KbGBDunCcXBIw9a1TVBDBGAQLwLlBHrLGMUY1B1jcFZFxBClBEUAsY0BOMdNqahnHvvJefGeowBgq/rggsJAB9e/soHNl5PibTOIoSrqoqkD0CMc8oownzwPqJpWY6Njj5714Oucdo5b0mjG+vBOjAmACdRLJaXO/sONoNpSVmIefLE6VOYxbVTRVke8NeN65qrnHdXL7n55oO7D37r2OS/XrjlVy5Z/70XyeFoNBgO/vmhT2yOLnQI3hycR3Gqi5pzsTkezHVabenTdnd1+cj62VMni3WCnMQN8DnabPvAL1688bn86XMnn15ZWtnJh/PduaYwnsXaTinwjfF2KxOeNVGY0d5ap2JBC62fve38tfcdyDrt6XRqrfM2OB+QtxihMi86nRnr7c72mFLmsQMLnHMfnEcQc3z6NaPlL2WciLKwSYtwufTCi2/4xr2fRLwnRTDOShLlVcGxyJsgGaJOQxZRYx3ouBUzyiaD6e751V5v+fjzxwDKjarkliuXQzD9/uJ4uC3ibjMtPPe1nZz57lWjM7lnnPtpgMR0X078ii9/CN6S2Vf7pHN5a/yj77/eGxJwwJgdP/7EK172QY9vLudu/sCdrf/ymiP4D57wv/+Qnr/6prdvP36WsiAMbQJso9APDbt1v/ja3Tcwb0rDYmINAlejohntvvFe32777VOY9EKWhdztve6RPbOfQYCcqylF1tmmCZ2Z1rGXbfc+G0VSUiLQuz/06nycS4YTGZVVhSlpx0mDfOJ04HRjuLU4u2s0WovitLLcNCZmMuFC8KB0CNgZlBXVKEYiSlMJTVvG+2d7ActTa8euO3wN9ogSGjxyLiCEuGCcMWtq62pGs6rOGaN1pZK0rxtvjI5jjiXRFTBhg48xNN4hIfmpU8eXl+ZRIFxGhCbGlNrkhGDdYBmnURTpxiBEjTHT6VaSRLHsYiq8dwAheBdAY8qjNAsICR5p7a1HQsjgiVEDHLB1DqFgjQcIXNDgwXvLGarK0gUt45aziHNujNVNwzlvdNM0Jae8qoskEaZpGJVFOU7TlnFBMAoAmDKjPSAH3pTl1DqTZelfLH75V0+9nFNprSUEc851rWWcTIscUdQY4JTFPBOC1Np99GPfChYa66xR1oJ2zgOYxgOnMhK7FtsveCF+5Pjx1YVZbfzJ8xPnvQGoTLGqD05LV1T6ihe9ocXq6fRsK7vookNXfPLc/ANb/FcODW9ega/e97nz5ZN2shOn/bzctspP8qIz29m8cH7/8u5CjaejMWf8lgOX5pKc3XyegSasg1GTEXFyZ60zs8s3lPt8vS4pRxmLjS4NhtWly04894jCKBMy11pKYYpp48Pzd2zfevRI5by3NQXqXWhCAAJJnAwHgyyJVTUpCk0oRST4hgQcEMZlmSPMLrx+vPjFTDWm31sqpuM0JdcuXfnY5nGLQVeVdSgESwUNWuOId0W/9mCwFRZV1SQgA9hQimno9tv83HBjJpkfVBe6YmlaFr1W+9TGqb3LB61HFy48k/JubSfP3y+G5/bYYtOSeeGmDtWONjy6zEW3obqAdv9VV8/846deq7xCBjNmv/jP3/7D3/5K4Ksnq8vf/8aL/vj/uQw9CPi/POZ+54E3/z585eyeK5ZPPvvoU8oACtjPHUlCd+f4nTYYAiwEE3xQiH3hC4++679O3NYpJINlC8ADamK8SG+95L05j/bMLE2KC6nkw2Ep4uipl5y66JtLqtFGG/RLv3MbQbRWQwgs4azT7tR50RCQlE9rE7eYqXGnnTIEzoEvRjhKWBQ7U0vgk7rkkua5w6TuZUvW1XVT9SSd76wu9ZKldB7LRDcNIVAUYxFxTtPgEaPMmAYT1DTTYjrpdvs2RACWEBwCMtY29UTKxHhDMRc8dS5U1dSZnZnuvIMmai0K2kYo+ODKogBoAHmCOUYMUYpCqMpCcGY9ElFktAq2DoAj2SJENlbTOCM0JTQilDOW5aPnKbiAgzN1CBZjXOQ1horTaDzeNn5nbu4KIZIAuKoro8s4TjCizoUQHADopvJBMYIwimo15Vxoiwj2lFLVOEqpcwYjXxZFkkQYk79c/vK7zr4WBYcQUk1NKSFEMMatNVrXXAhCCASmTZW1Z//qb75WFco4MM4b7a01AN5YhBHmkejMRkuXqHODbU59J8rGhUoSgoms3ZBvHWi2G750UT9Dj9z7w5/5xd/lXV+Vemlu9emm/9Fn2y9cMu+5wp46c+x7z3x+sv3MQn/mzJaKW23b7FSFqhtEEmAW176sx8XsbOv0+XO3XvXix9e+PxvvPrR84NnNM6YZS8qnxeS2F7zmU9/+xMrS0ta4SGnLN8UVB688s/38YDpsLGRpWxDayeQDNx27+O4eQSKO4lacVLVCUgyHO2na8SEU1TQEPM6H3vmEZRpsqWoEnmMKgZx+9ebCF+LhNH/NTa9+9OSjnXRpfXzSg22JmV7cOz9+XsRJPrErs0tbk/NJAN4Wo6FbWGxt7YwETVtJr9WaGVZnEs92TNVrJf3OrmJcjMvBytzSsfPrEqmdSV4VNRWCleQHnz1P+DUY7/IUXPksQIy99eAwnA1yL0le84F3X/Xb/+56itlDR+9/9P6Tf/GR+7IUZrL95za33/nL17zjHbcnaYoewOhrRx9jf/C1Y3tv+uXfuuH63g/vPfPd7z/zmX/40vm1c//2PS/9tX/3tuX5GY8c8cyh8OGPPvvBj5tw7GN08aUwOOfaF3lX0SZc+cq7cPPA4UOHA9dba2fn+iu5Vo/e8tzcZ2KKOCUM/eLvvLQTCzWZzM8vbIw3ggskSyLgzo1BLKmdDQUh4pGg2DsXAEQkqOeCW0KyiBoIlBAYV4Ti2jqY1jt7Z1du3Lc/ax+o6lEwuUxaBDGjDQTtQdRNwRgiKJtMHu/0LlOlD3ZMJbfWcCq9g8ZUURQhRDDiVb3d6i4Y4ykGBJENyjsjiPRQeQCKE6CB0xgAleWUMkwIx5hjjL3XlIiimHrvkzgltGVsw6WQMtHOAThEKBWpUxMm+97ZanquKY31FWfMWAOOAS6McoRZguJWa6ZqcutM8OC8MaZoxbMuYNVUjBHvEcZAKWMsRsCdVyF4HIx12rmAENK6BmTASx2aT19+/9uevkZGM1VdCJZiQpWaEsbz6dgblaYRwRzAF/lItma/9MXjW8PC6sp6bIyx1oYQrMWIYcbZ4mz3+hvlufH42bUz2A6vu/Sq59Y3juy/eCsvx6fUZJMF0d05+8Ttb3yP9rlz8QuvfcG4nnY6vWkx+fzWvh+dQe+/srx5Dz67fuGJY/c+fOz7lTqfRSKN40I1TV3H7ZlTZ0502t2Vfn8wVg1TqjTelPOzs20ZFWVZq4ZSFoyayWa683OPPfQQaiW62CQYGk2irLs+PH/Hta9dG57yZnrvzc/fcM8BKRljNIV42Ixs8Jxl43qSYDZRVa2bLOlOpluronWi3OSBBsarvOaSP/Py7eV/bNem2b1/fmPj7HLv8JWXXHn3g5+vctvlfL2uVudWNduxjct3dNQVqUzy6QAg4xkB1cRxDyO9tHj4xLkndi0esmrj3FbebotOnNSNj8Ti9nRLOt00VaD+3s9sm6EP7DIwpY3mWPpTvjrnq5MheIRTwI7i4ykXr/6pPfc/evzciREnHLwFWnbal1TqtG1QJBbf9JbL/8MHfy3uJI9eZ/0Vf3XzOxfg2ndorAVgg+rZ3d+r1QNQr33w3a//ww+9ziONg/il37znc/ckZucbQXUJpBCotYbm0/QwvuKFf9fLZqpmqGrIJNIubL9ZX3733pOD9RRx9O7fu40QVGvFBGfAUkmGVd2N5nI9monReilsNazranXXsnOgPUTCctJ2FtVN6XVldGAc9ZLO9mC00G9jIhZaGUd479KiVnWczXKIhKRVWXgfREyCFwg5KaOghaeVscpUVrZSozwCaJqCEIoRcMHqWhHqPVBKJSNIcm4D9QGD185oRFDwTJlaCG6MS5LU2obRtCjGQkpKhLENJshog3+CCsa4c0FbL6OEM4pphFlim0CEpxj5ypVmxzSKEeSMw4Q6mIIDFKTWYxeU4F1nPWEYA9e6YIxRhGpVcsEgAGWcs2Q0HnIOQqRVPXXGF+Wo1eoBQAheqTKN+9aFv9131ztP3JFXlYyB0cyYilJSK0MJYQRbB4Qi0zScc0DuO987e/zUwBqljXfOWWudc97jQJCUfHVp5o7XHDj6zOPfvee+5ZnF6687tHt+f0qyh87ft3baD9b7rjgvuysKJhftu6G/0Ln8yBWjXAFg50OWZfeto384vXjdgnnXkbyVJJLSx04+84/f/fTW9j3dLIsQGZaDfNIQylsJF0nHugkO6fZwbc+ug1M1Yph650ajUa8T0Sg5eer0jZcdmRqXj7cZIoBlIEF7pbaGnX4aEL7/Racv/9YiIXj3yv4zOwMcdOCMOaOdZoAa6622M1kv15MQALRdHw/2dOdGVlWjavPn6qXPZmXtewv99bUL0OTXXX7DM8On98zsO3ri2EV7L1nu9OZaK0fXHljbOYsDoyxYF3sx5KGrqymN0wBVBMn6oOSJyuh8mpCiVFY1lvh9c6vDfLJ3afV4vrGkD33qz78UHEZsP6A+DqV1JWnd5Nv7udly+cPMO4NRbMrSnGW4e9XL3uDL9fOnHx1sbdIgZXvXJVfsOvrog6Fe1PjYe979lv/x2cu+3nrti3/10+5qHS5/BzAPVv7Bn93/J3/5TNwjL7lc/emf3tGNeiyC//mV0W/+8bFGn0THj5LZIy5X3pqAOoi2b/35DxszObDYOXbmvPfeItx9f3vmrrQxdaEa9N7/9rp8PG0nmWtU1XCJG89MN8k2q3EG1ASdshZCmjPMeJKXLtDAhVS1BlxEfIb4SgOmOrTi1NQjkrSFZNaZPpb7+6tRxKKkp7VPs8wH1WiFIUbYN40SglOSOucwsQESrYeEADjpvKKEVFXR6Lo7sxAAEUKrvNjcOba0fNAHxGjAngEOnKXGOwQuTdtFXvhgVGMwsd77LOk5TzgXjNEQAgAhlBvtHAQEEwRSRnM06gDTgJmqFadUlUOKYq1G4LEDncruNB8gcNYajLF14+AYpZ5ggZAJPiDKtLZSRiF4q6vgUVUXmLjgEaMiIEswZ4TVqmKUMcYrVRq78dG9D73h4V3799xgXTOZjCjlkWxN84pgHAtem5pgGrzjnGBA9z18/oGH1oJ33gdjjLU2hGCdB0KiiF20Z/aOl11W10Pjw3y3S0S8Mdop1JauYVDbB39UkWaocNrqLQc9onQ6GoeLjtzQ6/cOHDxkbWA0IYx+4mT/8XF20y7/7kNjIlm7QzfODD7+7U9sTh4MulbaWo+CVZ5S6oTxk7rB3X4ynZatOIbgtDPeuThrDyeTWR4bUiYiHQ/GQsqsJZG33AtB2ciVD996+vK7F9J2i+gwrMtWS+ZKQ/ARZx6AIxYYoybMttIzm9tZmmzlo8WolSMTE/ytq5697YHLhvl2O10ZqwvWxWW9PdvepQtTYmXycrk/t5Lio6OcYkKE9cpJYSOyd328Dk6Pm6ZUg+sOXXt+6+mDK9dCPX5m8yyXSTktCtPMtsVS++Iz07OtGJ97kB+/50TjRWdmloZdg3wdQ01BGNQGN0Z+HQJBiBno4fm3Q/nwa99w8xUvvPip4xsJrjdOX7j0yhcMy43Tz587++S31WBhVAkKz0Hy6i/OvuOWd30+XI3d1b+EwTeO/H93PfexP3nsq3e/6sL5CZd0piv/+0ee+PMveaK3/ROfxvtfZs4/SfFhxyXhrYuufmz+wD+DqQ24cqvcv7r3qdduXP29lQvjbcli9B//9E1MxnWtKAqlL+fiGeWaoMuVbP7Z7fXrFnd969gDh5dW01gszu5Wim2M13mE8rFiAa2pwWVLK6dHpQuGONg7u6hN0ckWnFeJxCv9vXU5jYSo6oZQbozrzsxUZS0EpSSyXgXvAMD5wCizRgMEwSLAfjIpokh6bwFhzChGyDcOc+RC0I1hCBtbBeSzZD5gyhEJAXwwzhkqW85Z7x1CpFFj54D8BAWCBWPCWAvIIct8cA4HgxyDmFCetHuIxMVgO2lZ7OYH02dN7UIAQivOIkCBM2GaAKBVMcQkDHYuzPZWR5OTrWyWYIEQZrKtmryVdYqpwcQjjABR57x3ylrNmMCIEx50pf7+wPd+9fmfIgJv76zN9pfzfIIQpowLTq2uCI2LclyVRTvtWNOMK/7FrxwNHpyz/z/nHCBwAEnErrliz4tunXd1QDRtylzTkASxVYzvf+6ehd7hJ+6bbAzRrj6xzRAH5PBie3WlKs2efSsIwWiSpzHrL8zPLSyfCctHh31C4NqF+paVEAhKY35+c+cjd31I5cdmesvEO4uwdXZaThjtWIvG5QbHKGtF29OBlLEqGoex0ypOWFN7ijBnfurQbJphpJijBrsnbz+/9/PpwsrqhXNry90eRDCa1lRwV5QsTYRnlSs1Amq0pYJBAOPSTocgXKvi8Zecu/zu2e3BaHHXymg8FSIdD8Yry7NnhxMJmMi4K7BnlHozLBuCOQp2NK6IBI5Y0NqgaFJs2yo0WCvjOnGcRrGMWJOrQa5XFvdneLA+Hmzm9eDhy/zgxEgVS7P94ShlwWtnQ9DIT0IAh5YsBMyuwb0r/eQR2PUL5MSvk/T1onPFiy6xuw9ll17aPnxZb6Imb337V4utu1pyTma3FYOjhLmvLn7omnd+J1wL/sp3YAAgfmtYzWbZVOuNwQAZ9eLXfiPXApZ2u/s+gFZ/HZ37gsd7AtuN5P50aeuia/7cGx+E5SZZmus/cP2T++9ZYDog0UJ/9Tc/3zi7U6luFmlttMGj4bCdJhwazth4NJJSrC7tK82OJDHm4qlT56/ZP5+Kuee3zy12k/PjxuGqG6XKCs7Ewf7eiPiYZeN8u9VuEZp4UyOEvPdVXXFB4qhbViUXAUBijCll1lpCA4bYWG3shEDCGKpVRQlFhAbnq3pIMaWCEMwarSiOfcAoGCGYB2HdCJCgWI7yKUGBUmAsEiwNyENAEIBQhrCwrg7BJ3HbONw0U0p40prP8yHnEWDayro8SY49+8SB/QcH4/NWuyiSgkaTcUlpBU6OJkdb8WqtS8HbxjTW5eCpMQ3BvqqKJOUICKUMIUIwq1XeNLqqp7O93VEc1XUFIThvgvN/t//uXz5+ExVRLNsAVGuFMVeqAgDOhQ+Ic+ycds6Q4PIKfeIfHwzeawvGaOcU8iiA9Y6nmbzhisXrrl/EmFAiq3qaZJ3xeCJlTLCgzP79p08ifQZVTR0S55EJ1HoIGO/Zu8cGJKO03ZsTIvbBYwJZKj+/c/Hi4i4P7n1HyigSxrk0Sb/4wy/k6/+iynOOdUoThtNJzKLc5t3W3qdPPjvf7wVGXD2JWykTDPvYhUqXtfFubTAAr9vpbF5sh8BmkvTpl5858oP9CUNl3RjryjJvz7Q01GBTrQvOhGDOWolIQ4BWjV3oZ1rXgTLkyeMvO3Xke3uOP3/m4OrSpCkl0FFTxAhXAWcC1wZlWTYdjTGAcTbNOmkqts9tdnork/Fm1I61xcaPJiMkE8QocjyC2uJazVAx9Io39Izekihy01U/ae9L9RNPPa3qbDLtOLNmQ+O99Dihye0mfw517wAWGwCuTlpX0YWb8Mk/1vFh2r0NiTmrGNbWEROShBWP253PSbyTLb3OTkGbk19f+aO5V/7vuauORTf9BqxcjYjFgXvvjIM3vOlPfvCjksytmihBT/2um3s/DI9R6GssEUjcPvySn/1wozfmOvL8tplrh3uve+7S716UJCx3DH3gt6+/5tDhqjBWq4lDZbWx1O9kbH6ht3s0GUyrUVFPra3m0myu15vi7mg8RrAtWVaVdms8SdLGWt3r7K+butN1B/uXBlfMzx22xtdKGauSpMUoK4oCYQQOERqstQQL4yqEcF2rTqfjHUE4IByMacAHIYT3iJJoXKwLnkBwjEqt1TTPWy1pDar1WPKWNuOZ7qp33ljPuWisSeVsrQaUIQTcmGkAhAiTMrHWIgQQMGMRjbjVNnjHBIBPEUaUQ1FNMUmLYhRFPZFwV+rxZMAoYoxao6VgGEg+3TDOpWmbEE4wt95hhAAAAfLaYOKregzIIUScMxTHjJG6yTFixnhECEaqLPNPHPrBu8+8FlOHAuOcYxIm00GSdJ11RTkWPIYAGMBZZ6zjUfyxT97jvXTeqAKMrwLy1HtAVsYze3epN975SmOCh9K7YJTXpk6TBBMSQjWY4q996Yc+JN6Ddd4BDoh4EwIETKgPKOp2ujNz7V6/0+tlSYQhfrrsHXf9qi5++tqFm5axUpohPHXw7LOPb1z45rB4Ggm2NRjWOJ3P5MZwC7TvzM7lhRqMdma6kmE5LIpOmgGCxmjwkWl0t9PSdUDQPP2qtfmv9GXElW7QSOEs2dnaTCQSUcQ4GexMV+YWLaqLAhq7tTC3R2A+HG4jGWxAz96+fsV3dmnrEA6ICLAKCMdWG8BalYKLxkB/ZmY0Hud1NdPtN81kNkvLuinrqYxndciLvOYMsMwEklrVIkqV3lYjGlq0mIxno1maFZNjV649ec/2+qQxMN/fq/R4NCHBC+TnPPUk2uPad0LxlBcdbGKfErb5Rde+HqLVcOH/RW4JWN+1DpHkEGIJs74xDeGZNVt0+k8RTOL4xa6e3LXwK/+S/fXhl57eddMd+294a5J5TGBtfbr/4N1YnMBuhOb3+tN/Br332OkYoRp8O0AnyPmrb7+3wV/bO7eQdeWpUxemvzjpfi64usM4oL/+219uSzEsdK2b8ej5Tkuu9A/229FiZ7cLAJhd2Nia6LNRiDnBp0e5ZGWazvY6i5PBMEpbMaPFdLywtDLNIWkrN5ZJmys7jUTLe5emEpB0zhFCvPPeBg86SRLdBKWG3gdCiLOe0MSHJgSPMXUuIOScbzjnznrGYu8dxhKjgBDSpnQWijInlPRm5rUum8pEqdRaBYytGUo+ZxPr2cUAACAASURBVIwPUDAcE0qBsEbrJE5CAIx9Xowkj5Sy08kAQjW/cjFnSQgU4UAFR2AQyqiIrKsJFQiotc7WO5y0qmoTQuAsq9W4rKZJklICGJBSDUaYcIIxKYoijpNGlT4oCCifbBHk4rhrHfgQaqW5YH9/0fd+fevNzmpKpA/OuYYxQYjwDkKwdTMIPsRR4q3zuGmnKx/5+A93JgNVBEKKfNy2dIcjh0FgZpd6nX/zjhtqVXoTK6XWzh/dvXvPeFy00r5uXCsV47zzz9/8oTEOY2qMD4CD+wmLCXYeWe8DoVQm88u7erMrWTeNov9DE3zA3XaWBaJ/ylvWWrt87Xynn5xUEhKqBDAhGEJHehERUGSAQRlBAcerIipl+AkiI4qooFxQARGZhKbIICGUECIgJoGQfpLTz9f33muv9bbnuREv/39TN4ufOnP+t077R+3tX37ufXsP7h35UVY8tdmduOf0WvvhsNPPcadNpg2z3Haubs5srjfVcHlptLE+XVxZySqn1tb27913/OS91vqmXpjNdoyvb3z0LZf86/mjZjwccsry/Xvvvuy8Bx5ZO1lMHgxHfdtWzpxZ2961urpxZrMa+PFoaTLZagZ+3sV7nnnyAZ85uLp7dX3jTM4hFEYtzuSNza0Dh84+fuSug2efr5Lm3Xxn1i5XA/LM1q23ncFo/Xgetio73NmcrR48a+vECYaw2YbB0uDMkXv2771ge2dzYa8n3f/1v/z3Q4v26FoChLpa7QNEHJIuiDsP42mVk3TO/9a1G8VXxqxk3UG7l0++X3a/jOtd5fjVkB3gvcIHYelSGh3U0oALnA17kzdvotlXx24Idt/nVl7x5aW/wdV/Pf/xl++6+JmPfNQjbvjOl1/7Hrr9zgXevoXKmbD59zR8MenduQXEkep+Gi4v7tu67Omf62dzks2tGa3/zPbBzx5mOjPbIrzpi+8TzG64XOYxFj115vR4kVCgrqq6bmLsS8mlhxYnEnA8HJIbGqB5u720skfyPEkF2JUYrSFlZR0kQ7XxqgVEYshsSozRWktEgCVncc51XWuYRITZxJiqpiJwxtpSEqG31uRcmFzXb4eQgbOxjgFDiFXNUoyU7Kqlze3jw8qnSNazMQbZSHSiM+cY1AnofD4bj4Yh9UXKaLiqYlIO1vis4irXzeYld009rupBSDNjF9vZSW8HogXJKUBMva3Q0LgEyXIC8hCwM6YRUcSSVZmolEyAClBKARAi3NlunRFS3NlZCznu23s2WSdYZptb3tcfuuhLv3DrY0O3tbKyr+t6YyAldR4Me1CfJTOTlCwiliqgyVe/tXHDDfcxh9iKmnbSNiWlYQ3GutXF8NIXXRZjXFgYqdqux6o2xhARt93Es8/WaTv54Z1rt3z/9hAElENUBREpTIbEFpCsmhK6plrZc9bK3gOj5YWm9pUfX7314Jumo7N1425YyTk/7YGD5+79tzf91mue/ZyLN8KswYVU5hVlYjg5m/Xz3jeyPZucP957ppth3dx777HRcNV6EB12oR0Nx9997B3nfv5QSgTSHVrc3QzqE93msHKzOI+d7FlxzWD/9s6WocCKyQy8p52dncoPXENffvB3H/m1C9tuujpagQwbuatsvTNbN0JntqMrHVTV6spoe+sMkC1aWY+VATW4fXq258AiqO/DzmzeW6JzD57Vxrhizv3KD/5Zeh3Uw7W141hzvO3Sk0c3dHqyk2xhlmE3qhYdEOyj/f9NNz+h8TQsPwf9WbJzl4wOoFiSuVLCjWvkwOuxZJ0eoeK1/waUIONHmOWL1S6AdIUMxkjWmbxO7ZG6O/7YxQc/5qxjsvrdb699/7rbpzubyLQX7HI68DPYXAgnP1DCBdLdQalT2i14Lte7xI+f+II/7qRrAIDp7mce3/uJesnv3Qk93vHNjzJjTPO+n9fedV2sBj5Gcg4I2Vrfth0TZe1C6GpThxhsNRTIUEyRdjDa1Yc5Si4lGcN9CM5aEamroaiIJkM+xlhVFSIWBYCiqgAEoF3XAygzGLL3y1JE1BhWVRHZ2dkZDQdknTWupCianaFcQMCEMKurBpS7LjCrMY4IkChmQcgMBiHfd+oHZx96eOxBoHWuBmXnbYg7BU2OBQErZ5wfZCUAjX1WCKPRgYx9Xe8TEs3ZWMfG99FiWW9nAcuWYc4lb5y4a2X3qpS8sXnSe9/Ui6GTorNmMIwxYVE03bzV0WiIXDNBH3YsLcbcicgHzvvir5/52W6+ORzuFoUiMfXzUhIaYsOIBhGZjCqoYB92vvu97a99+740i9ZJChOVmNRuy1JVDXf5jd9749NnMc/ablTXbDjEgKDEZI3JisbQZGtaOV8UNjZn/3HT7WeOnbKMWVTUKUAugGRSKipaRNGY8a6VhV17xrsOLa+sXLP5oL8/dT78GF/31gO3/u282zz/gaMHPnJVZbprac++vQcGmY5unDJV9jQcjJtT6ycffM6DN09P7p2frKrBtOu84IH95179kG+c/YWlXc3q0mBhbfOM9bZr28Ru3749d99x06GDh2d9p8I7bZ7lqW3qfh4uOfsBxzc3ZX7m5iceu/gr5+5fPoywde+p7V0r+yqUKF23k+2u6syxbfTMpYu99ABL46EjDP1UrK+AT4eNvfVgFqIYNmCytMgeZqWNeT4PWEQHI9zUb356x/lF6WchbRntEu4C7ZEOFDW48jMEG7L5deQKDv86rP8H2JG4hgSBGaffkbiB+16m/RqVIKaGzZu1vwmaS2j/5QK1YrGsJRNBq1lUpdn42pOqQ49+YLjiJz/7jVvbN3+hVzZWZkWD6gRGT5fpEZHzEFVxoLAMZhf5Aw9/1uer5qY6phm6O59027lfvHDSb6w2C3jHN/+2bae5xKap29lEgZxHVWbgmEJVVUSEBApcSj594t49excRR4DqrGfiPszX12dLS5WCKyUhgmhJsQyHQwUABEMGABCRmYsW1dL3/WAwAuDNrdPG8HCwmENRFO8rVUXE2WzmnEPCbr65vLKvm+faN1mmIQRVAGRrXSkZFKyzQA4REFABCO1kuj6sh9vb65XbxS7mlCyPyVHR+XQy37V8VskRqM+500KAphkO+z5NZvctLRwisoPhvlBmBAWkiIKxrtiIsUJnuEDot10znm9vztrQz44tL+1GMJPJpO1OLy3uY6MiQjhU6L2ruq5r6iqmHGJ03qYYVeVPD3zmlXc+BaCMBmZj82RVWWuWEKiuBrPprB7ZEFJVDUCpYLTcHDs5/8TVt2x1fTdPikYFTErebA6WD513tnvpsx426ztjDakADELaLqUMmmVR9JWbTltnKebWmoHByjl35Mjxe+89Mp31W9td7LoioEpFIABCUhAqosjN+MB+jvH/bV5z3F8AP1aduGH1mmeLkBafMK2uumc++8A5Bw/vP3Te5PQamHx8q12th4Kyb1ftbENQdWl25PTRpWbpzNbRrz/p9E9+9eIYQ+Vhe0eXVvYUnaeoS4t7j526ex4mUWQ8du0sGnaHm6W7u/U2zo3nNoU7n3Dm4deeD1Rt7pwe2MHKytLJjaOjZteocVs7ncFU1836fLp3974qddNZVOOGNdaEJ9v+TDunKM3Qd2FnebxntrPRNMsb89nQN0dP3qcODg2Xr//c9j13xuHSAyfHryNWTVr4EMoO4llFp9A8QfZdgff8oYVZ2fcOoFand6M/pBa0iLOunPoHGD9ZF84t0ylVAWhEO0dh+4t5+Eje8/DCI4YgMaF3drKZjcfBIoZTT5mcAcVHX/U5Gzd/90tjs3Bl2r6e44T0CBQT+cpMLZoxlBXDy5KGey+/d2H3hy88dPYU5Lan3veoLzzM1pu3nejxrhs/UkoxxoQQrK36LgImEEUCVYkxqJZB0xTwImWyc8Y7qeyiddx2PVEBdYDBuQGRgx+JMTpfiRQFUADLBhFzziJSBJkZAIzhUjSXsLiwVArknFSFCEpOxDalZIwREQYoKq4y7XQr9tFY3wwHOScpnHOqa6+gUhAJDLuYgvMDpBD7jAgxzrd3Tu/atcfblQTJcrO5fcbY3pHtOhzUYzLQtuvjhYMhzkGIObFZAEqqBAo5dFVVzWNkyJUddtJ5a9tJHC6t5r5lothPh8Nme7Lehx1nGxXvvFFV5wcl5xB2vPMpprpeiXk674qzogp/cc4XfvnIkxHsbLq9MFqUwn7gpSTQtLF5RoHG4wVCZ63LApZsMeU97/tcbuc5cxbOpViL0p/8wR1nfu11z7/0IRdab0TydHt7aXVXDFLXPuVOlaSkqqq3J5vDZlkFRGMIk6raZZyvmmEMebbTH7nnyC0339p3SUVykiAQirKiChRHW8uXfWLp1+HHdn3qOXzkm8yAHAk8Ms1Dvbxv/ksvuvKKR1183lkPPb11EpSSBKMDNWpQ77z3Hho2kvI8x78979onfvvyUydv3mm7XSuDBT/cv3DO8VO3Ht5/fj3k42eOjerVo2eOFFPZqi5dN2/sfNqNmtHJ6eYtT7z3sV+74O4Td/HiaNkuTCezbGB5tAi6gLLl/Hgy70qaGDKDQUMEGmGtP3PQNZua1iehqQeVBeMrLpYkVH50ZOOuCqu1M5uD8XBhYfjpD5yM2yZJBxEKtFpi4f1QMuFIQJT2weHX4M4XYOsGHfwU7n+6bPw70Qp4gwUzFDYWT31IDrxOSLjtxZLaCib30to1sOvFuHRAjbFFojXYbonpTdiFtgfvrzALg7XrH/WYawdwz6//8zI/8F3V+m93R78LOgR+msJxwJHiQeUVrFbsOD/qKe+Mcztclo3nbB/85ytXht0dd92Ct1z3fmYGIFUAEgJXcm/ZJBWRQkQiUhsfRZz3oe+ZTOymaIp1S0iqOUqBpDnHmTWOyNb1IKTMjMYaUWTElBIiqiobZnYxxpwDmczUlCzEWbViwtDPEcFVtYhYY7u+62br1tdVVWvOhIjsQgqMguTv1/d9O28bZ9ggKBNjKqXIXMUyuZhz7VeQyqw7xrhinDb1YLLTpW42WhgiIZApIc7CTtMsDKpxDDCPJxnHxNiMlnPs5vOZrwdSTC5r44Wz+3ZWDRe5bmS+kdOESjWdrfnKGq7n85ZNdm5AyF1oGRxA28+jaRopKfT96vKheZiEPn3owi/993ueENq4sLRSBEAhlymzRfZMFlUBkZljTCKBQcgP/vxDX/jAB/6hcnVdG+M09mV7zkX79/3Ry6+8/DEEklOytsqZBGKMoWnGs8nGsBqG0NmqkRIBgUxl3RCkUyZA0FJE1dmKyRw9cuz2O+699+775tNe1aQCUUEy0/Bgzm1/1iM3t09e+8F3MlpmAgQEtAaZjEDydjjrQxfjxZcsvvKFj3va4x/bJyXQTuEb//a97//gJFSj88/Zf+rU0X9/7ukn3PiUxz30LFs20Oh8tmONTnraM14qYYrkY55O5v1otKebb/3b7TeNl8bddKcM7AKOPvrQG1/8w6u6MD21tb08Gs1n29bzxmwidqiR5hohy/Li0sbGOrqeeCCWTN9GoZNnzowXFjPI6nBhWkj6WJnOEB/bancmk/MOHbIlDcbjD77zP7SHnEG1iHaYkvC+LJagBTokZRv2vpGajHf9VTGg57yd2pNFZoYbYSC1pFjkGG7frWc9TydrwApgwFo8+QVJ27z7p/P4gJFSDFHsuO1lgARjmdwHZz/oClh9+qnvyqV/HPKpd36hi3nGCElq9C8s6V6AbaQLlQ6gW6Fq7yVP+L1qzEvG3f6sIxd//qw9dXWKDf7wG3+NiNbalFIR+C8iYljlR5g5hlykJ0ZrKkVmIAAiaxBEVWNMMe94qgeDZmdnEygNR7vm0xzzGUO1NZWiNc4z4rydGGeY7WzWK0VLrvI+p954Xwo45wVISnTMMURBNNaWkvE/ESoCqDEmhN5VlQigqkhWDFAgxa6UYKslBiw5TbvZaLxiiEUkx2CqYYgzgeJdUxuTChg7QDLHj926e/eh9fVjde0H9e56MMolK2QitKZOKYuo824eApnB8uphdMuIJrTbuV1TrCrXbG/dkfqTRRIASw4GgY0JWZkNgaKWosyMXZxWbre1+N59n3r9iecXKobrXHqkDqARUZCiUEpJqGSNJSAwEkICyTd89+T/+I0/csggqoSgKKoK+V1v+cUnXPkow5wTAEbrfclELEw+C/i6KrkHCUhYijCZUkSRRURV8X5krLEiRVRY0Bh/8sTGD2+98467jq5vzsAMlnYtTHYmAPzRj/9jUsuoJEpMBcEgAQMRixIAmB/puu7sCw5Xzeqjr3jQzTcf6ycnULEnv7R8uIat7z335ncvv+aclX1jF0EtQG7bUNWESgSQU4wxWEOh74jUOTeb98a4rg/rW+t/ceEXX3zbY1dcM+3bImZhvDIeju88etutZ+5ZXFrdUy9sTE/vzKbWV0rOG0CRzclOKwWk8pU9evLUYHGxYjvZOmFtVS8sTNswGC7aCibTsOrx7/7k9jiVkOeoDKoptywLgQ6DnGTaLTrFxZ+BlUt1/W9o+07Y94rizsF0H6kXdioKgGwMbF4HS49VYs0R0RRUinNd+7j6y3HfI8BYQMulldgjkGIwPeSFFa38k+zKmzv86nk/owpv+fwx0iKwlP0vAhYM3wKzC/QBygPkhYc875srzbdHI3fbT2+s/v3SaNhomeAdN3w4/4i1ltjKjzAzggCAiMB/IsRyPyYLLFKESBXuR6UUw75Ih8qq4H01nU0q10xnbSnbg3okCs4PEFFEtBTrbMqF2Ip0oHi/GIMoEpJxDhQNm5wjIrClFJUIvPc5lxgLESBpSrHr170bDZtxCIm4HgwGoe9ES9tPLbFzPqRSuSqX5K3tuo4wej8Sxa2d9bpydTMURQVmKgQDKaWuh1mmXReNrRCx7/umGQAqouYsSGqczwkRZn6w5Hwz3d4O0irFpcEFjiDGvut7z7S58R9kdjd13c9bg0DWpBx8NUT1pez0Ifz52f/yhjMvCCE45ySbko1tKKcyqKoiiYwvuUfM29vr3lYx9t7yifX4jJ97swUC0QKKiFKUmH73N17w9CdfYb3xbpRSS2xAqesnVdWQMQogRWI3Z3ZVVakqIhWZM/sUBcEItAAIgIgmlkhIMaamqUMn7TTecfuR2++4Yz6d3XjLke/cfJchIyKqqEjA6IAEFQBF8X70I8w8HONo+fATn/Gol7zoVS996S9y3w1X9j/88qeeuPOGrz3xew/9zLmvedlTXTx10YUP6sNsOFiQgjmL9b5oRsW+aw2TSFFR5zwgp5yJzfsOfuq1x18gReJ8liQjikpIgiHnabcmmWIpk27SpbA5mdjaGaQCuLa9M2wW6sad2Twp0Cx6O415Ol9fGNi1Lg4Hg9hHQ8qmuv6f1u/83lRVCmQtSSBoNpkfrXrS4LyUobplOvhLkOd49N1iH4Tnvly2TwNmQCY2Ino/JtSN79KBq8p0XYkNBnErcuafeH5Mdz0HF/eLClov3RTmrTehmKFghdVIvbnSnf226d1ffsDPP+7C4ds+N/nynXP0b0Xf5HwC2uvBPAzAOfTjBy5f9eQ/PHIsbL4oPeCLe4HYIuFt1/8VAJRSrLWiTD+SUiJUACilEBES4Y+oopZSVJhRpCioakGwIklKcc7lrISmaPbO5RwQQAgd21IECSRmtoxEiKyaAMFZr4olpSJChmNKCEkEra0NecDc9x0zeV8rKBGG0CMiCKoAkIgmIp9yVBXnqqoeb2+u1fUA2QNEyYUIpBQ0BOCLADFAjtbatp07XxWdq7i6rkNXUt4ZjseqXDJaa4lBtczn86qqQ+iHw8F0OsOUydX1YFGVJRb0M5Rma+t4XfFgsDrdPrm9sb7n8AWpb1PfETNiU7SbTLea4cgCsnV/euCzrzv5HJLx6bW7rJeFhQWgsbNec46pJy7MFoABWWKXIc42J+Srxzzt140iIRZQAFVFRH79Lz39Rc9/IjsirqyzOfVMrusng8EoJmBmQmLFrG3O2VorUgBcDB2SOmeksKp4X4FaNcpEIkWkWD+MfQRNoHLkrq3v3Hr3kRMbX7/u+tvuvI+JSIE0CZAgg6KiIiAzA6BzLqdw+IKH7Tt74TP/55rnveQXj37/lq6Eq572lFv//cazP3Lxk2984uT4Ny+75KKFxZW+n3pXi4aUiquqlKMqVJWPXY8IhIREAJBFAOR9e69+zZGnJwRDoGiYPYggmhRmCI58ttBkTcdPn+hTXBksjIfNvOusre5bO5XyTs4y61vvdG1S2tAuD4f39QFNmG1h5drNzQ2Jw09++G7NViWJ9KoqpUP3jJjvwXIf8/kiZ+DwHyJCOf13pvuhHn6HaNLcAhIggQAgglozuTUtn0edKClKp3YR80k4fbU2l8LqY6GucD5lS5BRwkRQDQ/E1sV7o3ql23MlHP9G/PnHPKD62g/xyzvvFJxAZpx+C6BSXTDVXvUHfv6X/nHfYrrmwq/8xLVnbc/a2nu87fq/KqUQETMjuVIKADBzyT0zqyoAxJyssSJARIwgCEjMikWRjcQgzvN8sgWgVVWDshoEkBSUCNFZFpJSsiYLlEo01qacSRQInG9SVoldXdeC0HVzZyohVc2GEMCWkvo+jEbjFGZELIJMNsQta+sQs3HOmwZIAFGVd6Zbw8YSI5uqiEEQLSKaUZgtKCChF0mAaphn7Ww4WgXFor0ISAoxheF4kcmJQEo9IjpXSU5FKWUZDkcxtcQ5pUBsCJ2IZU6p6xj8LEyklBTDoBlIDqpSDYYxdqPhMiCT0dgHLfCnB69+9V1P4so09Tj0SbEQkSqE+dxY1CwiYJ1ja0tCNYWzVZZLLnu5UWSirKIqRFYVX/eqn37FLzyjT+14cU/KRSVaU4vGlMBao5BBBQsREyIoiIqkOBcBw84Ym5AAIgAQ+iIBFEHZ2uruO288cPA864YAVdFkWQCwbpY317avveH7H/n4p//9P77vFQSMKgApCBIRABhjiHmwsvorv/Ka//lrr/zE57/4/7zuzQXwwIGln7rsiu3fvvPxX/3pRZleeO6eUhSwJ3IlZ0aLqimFLNk7N5u2zlpFQERiFlBN8r7DV//aqecTQIrRNwORIjkalj6RqmLG0WjQhbmqxliUiFhBC4pmJcumC9P7Ttx9YNcFk3a7kJ5ePwaagtQn1+/Yu7IHRHud/sHbb2rnLKJSghQCmKl9bNTOhhOFB6o9Lr8Ili7l6b3p9Adx/AxdvRTnM7QkRRFJRFDU0qxMCFYabaMab/oNGKzKyWsANsrCz9Hu/ZhAy1z7zkKfwSqRGk++RtfQyVtkV/+WGVw7fNeVF42vu81+5b5nKC6KcWZ6jw4PWVuHetdzn3W1Hxy55Umnzr3mkLHoXcE7bvgwKCgoIYkKM+ecEAGAmDnGnplFCiLmnPk/mVIyMSERiCnSEylTpQoi0nVdMxgAqEgmRGOMJT9tT5ZsAXsBtrZRta5yErNCNuwUc07J2BqAsyYDpFLms25xsYmSrKkAKKVe1DAjIhDZrAlFYphnzYN6VSSpFlVUyHU9KFlLVsMFyShhzEVzQABmzjkT2S5sA8BwsBxS0pzrxqdSDNbWmrbdsc4YVxdRABRVzb11VTufGUcWvGXXh976hsh2KVZGlG3upWpIhAA1zLeNsTlna23fTZ3zWdD7CjKm3P7pgatfd/x5s3bbuaquGwAsKQiCCHg2aKuSS0wRrbFFkWHSbq4sHLjwspd4rYMGFTCIguwAXvGKp7z2NS8J7Y6UbL1nW6WUEJGIIItCcna8056y7HzVAFDK0SDOu9b7itCBYWM4hl4ko6p3DgBSSsCDECbDQZMKgYq1vhQxbLOEkqZ1vXTsZPfuP/ubf/mnrxFgLrmgMBECF9GF5b3j1dFHP/aZ4TgNhtXjH/vcIunw4QMve+mzPn/p5x/3pQOPeejlvhKLWFVNAUbIAGSdkyKCKLmkELSIsaCqzLYUYcY/2v3xXz7yzKpqRFLKybmq5LK9s8VkhsMREYtK2+5YJkQDyFXlc06lZM3RsEuiOZVY5oN6vL5+uqpwUC/MZ23I+dTGhuTJzqRd35G3/vG3TO4FWBVF5iLny+I5drIZ4G6UvWhX6Jw36Oy4dt/F7RvxgnfL9B5hAgAE1iKgidGU6Qm7fFZuZ+iodDNTjcvsDpz9i/qnwL5HYuWp62S+DRC1WdLYsRodLKJlc/qErFZ69Ld/d+GVX3lG9VMXVl/9wDuvu/v8x11y0e889Z7rf8i/c91VV170dbzkh3ke1144P/fzB+aQWBBvu/6DqgCgIupcVUrx3qmKKgKASBYRY4yIIGIpJedExAqqAIYoxLlhJ4KqMhg0uUgu2XCVUkixT7k/cs9dlzzoHM27AYPlSkGMxaIBwRL6LC2hQyDjMMYowqDZmAoR2FnJSRUAVKEQWCQppRjji0DJeVAPQgxFOyIGQBUopRg2bLjrOgQAxHowKKKohdDkLKWIakgJqqoxhhWBQDc3N1d2r4ZuxsSqgIiqmnJi6wCQwFrnJtNJVftSOkcLgr11S12are45u5tsCSurmU3Xm2oAUFRMKSnEzlqrJSBx1/fOejaYS/jTff/nDWsv3t7ZGAyaFGPTDDc3NldX98XUOeOBoG/nimCsBRVJXNdelB/4mJ810RTKkAEIBMAR/NyLH/dbb3x1380Q1FirP0JEOefYd2TY2aGxmPoAaJyvELVoIoSUkmEvClKycyanzIR93xtjUkpEHEOLCL4elpy8r/o+EFHWZMAyc8bgB+MTp/o3vOHdN918C4ErqgplaWGMfuU5L33mVVc9+8jt111w8ZUf/MD7Tx87XXt8ycue+Zb+re9Iv/jgh160NF7RHJBECEARUefzuXOWFEspzjtEDH303nddR8Qi+oFzP/9LR56OyMYik2nbeV0PVPoQIhGpKpG3zrazmfe+5DblVEpOKTbVKJVSVY0CEkoMeVD7Y8fvzVGWl5eLKhomIGI3r9KObwAAIABJREFUHNUv+Pm33nk0pZxVEgkEPc815xWY6fzGgueptObg72dm7k/Bxt/K6iuMc7mQCqgCKiCKilI/kWYRYydMzKSzLahR1/6B6LAsP4kW9pZ+m0uHRUqzpKVgVvUepfjBQpIO730TXvbSN+15/Vce9J3fG33gq//3O7/7dPgvH742POxxV/zC1Z+pBE6/Kl30xf0nTraHdy/gXTf+LSHdL5dcSgYARBApzBb/k5YfMcaICACoCiIRG1VUDSklZytmzjkxswggk+G65FBKVCjeDnMi5BZ0oDBHwJyEgIEllc7ZEUBSsW23PRjUln0X+toPi86BaskZSREBEQk4lyilqKJScdbNZrOmahSJ2cZQjLExTivvQuwRgIyJMY5GCylnUckpV3UNCgiF0Cim9c1je3c9oOvaokKmqr2NIRLbEGLlbMqJ2MaUnOU+BF9V1lYpppJ6ZKoGrpt1bMejykfNuahhATWxD0iECNZyjD2ARQSV4r1r59F7957Vj71x7SVIpJpDCIjsvSGgeTf1VbO9tTkajxEx9r2SLgwXQm4Hi0vnPeiF0qpQJkEkFiiO6aonP/jP3/U7k3aGCISQY7TOAoCKsoFSgJgAHGongkBUSgY0qllylqy+qXPOUBQRASMzz+fzxcVFEZzt7BCSIBljQwh1XU2nk6YZGVOkgJbG1xxKO1hc/OK/fP+Nv/3OthdmXhrXzttfef1vU7Xzvv/9kXe860+u+ew/3H3b3RrD2vqRhT9o3jp/bhL7kIdeCFmsrbOglOicJaaUgmfqQwBC45wkESnWWkRU1T9a/eQb135WJBfp27ZbWlrKOaeQSylEZC2nFI1zpUjXzQd+WEpxzobYx5SNtc47KYqIJRcQSSlYw9O2HQxHUoTYKND2xrEu2ee+/B2ahyl3rDbRT1ii4JKJm6kYKMEsPS+uXME7R1i3U3sdnfWrMF3T/4SAiCqC2YSciZmpxDk3Y5ieFu9h518prZf68bT7wQb6PDlF/aSMD0M91qjswPQmnLOL9+6zs8/2l70WAB63AV9ZAd13KfzYDAa3bpz/r/NjX11Lx59/z1O/8+iN6cZslvHOb324lExEqorIAKAqzCQCzJxzTCl5X4sIMyNizsEYK4oAhKoiolBURVVUsa6HRZQIYgyEkFIy97MUo3rnybiUWkJIIXdhOmjGSKpC3lcp5W62Y70C1n2XprP79u85NyuIZhVAZAAQzaUUa7wo2soaRhIRBAQLgMSQQkSkUjKzKYLMthQxxnQhDEdNKTGmQGKIiNmmqCKBScg5doN2sm2tRSRjTN93zleiap1P3dx6rqomJt3eOjKslgGlaAVYRotL88mm8YPQbrTddHlpPyCjQt/3RCQirqlVM6GmFFTYGP6TfZ967YlnIVaqAVCdaWJKqLnvYjX0JaGAqqhFQuaSYy/8+Kf+93nIlEympIKIxIgMdOElq5/56B9uT2dkjGOUIqoKCAiY4pzYpdIaM0ZI1jVFlAgQsZ1NvLUgit6haslKSMRIRCEEEUFCg6yKaEwOoFpUxTojmBCMakxx27vdKsJs2na7Xr3ocVe9QIWohIc/+mFPvurxn/zsNc3iIz76sfe+7Q/e881vfgV6PHL7HYc/vPAH+ZWH9u8uCJICItiqcgw551IEAJG8qAJAztkQEqFIIYK2nf31BV96zX3PZgZRscaJaErZGl9KAZTZbOYdATASe+9yLjkna03XzSvXZMn3M84RWZFimLVoSl0oadAMSbEAFKCa3Kzf+Py1N739bR9DwpRKsZdT2SqQcXBQ2u9ROSc7zwfeqOEYSOKtz8mBV5bpBhIJICCgKJjMHRbMZAfcr6dqmXMnaQrlqM6/Se5RyAtZa9v49NAHgR2jHyKySoKcaJr06PTQzv+cr+5fe/E/AcDj3HeuXXk1/NhaRzevn33c9Sca/PJVd131tXNuvmfNL4zw1q//VSkFEUWKMTal3nsfY3HOAUAuhYgQVET0vwAqpMpXJTOz3o8IAdCwV8giRbUIABHHmLyrUIkIkUBVECGlpKoAIKBMSEg5yaydlBSssSXBYOyqqrLGxRh3tk4xM1lPthYgVOjnc8tiqqEWFuims43lpYMpReco9h0CxhCQMlFVe+cHQwFGYGSKXVQg530KM2ObnIsSOeuJUFFKKYasSGFDIfTOjGKeODM+ceYH+1fPFU2hT009TlKYDbMJIc37yfLyrlI05cLkiEBKkJLJcCmxrmsRVIlSJMTsnFMF0PTu5Y/9xvqLJ936eLSb0CkU1JwTxRR9RVIysSUm0aQCJdIXvn7DW976DyHMRAQR4X5KSAXV79pL3/jnv267CSJbpgKSS7TeIzothYm6dmaMibGv67ECAmJOmY0rpTCDahYBa61IIi0pi6hU9UBLLkWziDFWpCCiChCypMQGcy7WVBEFs1rOx07c9dY/+Ktbb9thqGax/9WXPzE684+fPvIv1/7z33303bt2n/uud3/4/PPPObjC8hunfmPn5x1XsbREDMAIZLiAYilQshbN1lpmzjmTIdJcciY2AOaPdv/9G8+8ULWIFkQoRa2pkxbvaxABKELkcmn7VtRXlcnoXZ5lqrLMHTcxzREMQDTGxFgM+axZBQ1TDB0ylpKZR2ykrt1jn/b69Z0ZdCh8VsQVTq02e2R+K+B+wBb3vlrr8+nMd9h3KWzC0qOh7wsRZwHWkgGMcJd1MIK4gxFl2MBSDft3abkHcR9wrWAISb76SYQGmkPkF6XM/TyG5d3GmJXy6frMpxa8gfMe7e+98Ya3H4Yf+/7x6oezpcHC8Jt03xd+8ugDrt63OF4cN4h3fuvDIgKAW1tbS8sLsQfnHGAkohijsaaIaAEiYmYikgJdP22aBpRVFQlKSTmnqhrknA1bUQUqKgCIhESIRAig5UdU1VoLADEkNsRsFYip5FKIaTqbD5sRqAJS3wVXW02RibOoMZhSZmMMcx+mhg3hsOsn3iKyLaIqAJpDHwYjXzIaU8WU6rrqZvMgXeUcG4NEpLZAstaxHYKkXIooOFfFFL2vU0zMLFmRxPAAqCsFS0kh9oZtSn3laxEppRjnjHHEBpGsG8cwV42lRGcdgAKAiEoJOedSkkiu6mGK3fsPfvpVdz7RcIPIRQIbQfSGPSJ2/cz7hpBSjiBF1VdV+bXf+ovr/+3u6WQb/n8IioodYY2YrvvS21cXDoWZAZ5wVXlbh3kLlBE45WycTykhMiI7X/eht0ac9X0fiKFktdbnnBDBIOdSiCkXIS1EnESYjZScY6i9LznPQ26aSrSAQgl9SPPR4sH3//k1H//UZx5y3v7lXUvf+u5tb/iVp7z/7771pt9/bzPauPb/nr7yyRf98i+/4XnPfu762nH4zXvfFn7VmJyzGGNKKYAgJZcsdT0oWZQQAIwxfd97V0tObCClBGz+7ODVrzn2HEIkJSJJKSOSsoWUwQFJjAXqDmbxyL76nHZkOtDcb29tTc895zDzoJ1NkZyL2IZpYdme7OxMQztrlxaXkADRWAbIcTik8Xg46ff/1u//yb9+7Rsgw4yXajkOUIsY5C3OFkaXwPLTy7Q31pT1D/LZvxvXbrXGJmQARSR41MMhJ7BDKC2UBG4M99xOR35QwteQLgS7rFgzWi1rUEDrXWoW2Q5RJYOnFT/Qrf33vtoW3rNrOI/pJw43/+0Jo9q67R347vHh8kplV5e/38yvf9x9V33jrNFwPNsUvOOGvyZiAMpJjPFF5sycIxsniJhyRkIoqKrMrKoi0Vre2toYj8eATlUB1BguRVSRyJQsxrFIMcaoKqgSIRGqCgCnlJgZESWL9RaQRDDG4JxPJfvK5iwqICLMVkAlJ1Q11pXUGWsAgMh08xmhb7u1umoYURTZWCIT49yaOpeOmI2vw3zOBMwAUHXziaq6qrFsu9g65xUtg5ZSfN3kLEh4vyJKyIgZgUpRUUGy1pgQA4AQkDEuhFDXro8JAJrBQFVLScZ4Is/sU9ohMiklAEE1KQctuZQ0HI5zzu9Z/djrTjyHPKoiqkUkzXNCzjmzoT6IMWyMPX3qzJmdHz7kgVc85Zmv25z182kQARVAJEQQTQCIYIHzhz7wa4948IMFo9UKFELfxjAfjZdUNZZkrWVTiQoRAxKKhtiLCJNVyNa4lLO1LELeufm8rWqvOfV9sFUFgIZkPp+raEm5aoaqKlKIAExFIqFrXeWPrpW3vPldV16k19+8c+jcw6enC7//7t/8/Te95yMf/fj/eP0Lb/zaqWc844o77j7mfnfyJ/yKolalY+ZSCjPnhMaYUoSIBBIiqioRoVBI0Tru5rNqMHzv3k++Ye3nSimOfCmp62fMlp10oQzNsIshDeJdH/ncm//82quq+qUXne9na/uPHXnXVD+zb//yrvGrX/MLi4sH1wMgNV//+vey1OOVQTeNd9157OSZne35FEQtxq6dAuslZ5efvOSsix+wevTE/G8+eeO9R1nFs0jUDcZdPCDIq2n/qzhNZP6dcsUzuTdARZQBFLTAv92E3TotnC1xR8Mmul0qEfOG9N9GPqS8C6ix7FM8iVmgWlS7iNSQryEWXB7KYN/FJ15G/fYCx7rWWafe8Lm7F89baeqqbmpnzj206xEP/Nyjb3z+t6+IKXpPeOvX/1IVjPFSlBkQGYlyjghGRJAQiQiAiHLOdD+AnJJ1HGKvBNY4AG5nbTPwXddVVU1EgAYA+n5eNzWDESmiRUQAQFWttUQECKVkFbXsEyiRYUSVKKpEjEiqWlAdsyiIYimCmFCKFjWWu76tq2FKKRcxhpwxOQUBSQEUYtPUYAaogqAhR9JccvCuYjtMKap6QCjYW3BFirFGQHKI1to+dMxYNYt9l3xlYijWoQoDIIAYg4g255TK3PsFvB+BqGBhNixakBCRCU2RAlCyqCFgpOlkQizeDd6z++9ff+ZnQ9cba4iolFxSJCIAQCQ0WERCxlt+cFczWCn57le9+s+QPQKqYow5JxFNIBapAKobLrzjTc9+6pVPBquaIJXtIsVXS1BUNRFKCn0zWoypR6ZSwBgnkhExZ6lrn7NKEYVM7FJKTACqkrMxNqt4V4f5lgD6epyLptgyeed8TH1lfQp9EbHegYhvqk9cc+M7/td7zz13+bLLL7/n2Ppv/s6f/d3H/9cjL33W29/+ziuueNjp9fniu+Zv23wRN5BCT8hEhtmq9vfz3scYCY2I4H/RQr4qAgwiqu/dd/Vrjz3XGJuTIIn3lQiEmLy1RaJIGP7hX86+fVf3kMsX2lODtfuuPbLxoQOP2X3Z/0cSfABqepWFon7LKt///WW32bOnz6QXSAFCQigRkGKUHgQPoIDHhgTkAOq9tmM/4lEOSFFEUFDhICChBCkSAiYkJCEhBTKZSSbT6549e++/fd9a633fO3Cf59rXv/GVx49Ppm1TLKOlkts6VpJEWD1at9dNQCq8d8+xA0ceZ+p+6z/v6y/q2pmY2jOWpsMzuxF7Zeoa7dGOJ8uOJyACorN4MdKcOQ83/w6f+4vl4D0Y51FELXG9USZHIcwheUjHwSLEyppTNr0XeRHddqMKgVRPoKJBn6sZtYwSrULnl2Tb0nVHfx3xZFBREOe8JKi5LM0OdmwepOKv+6U3Lm2b+T+b/u3XD73Eed/mgo/c/jHnnIiYmUhm9kRkIESWc+504mQyJmPnvSERs+W2qLJzhmBi3sU2Nc6hKQMwEZmVTqdOKSFiKYW9L6U4x0SApCUrs8ul8ehayd5FMjA2MGeGRKYmTM4MzABRSimg4pmBHRG1bRtCSLkgopkxs9kYtTp96si4ObJ165XOeVNG5GzJsTdTJEXzk+kkVlFVwSBE3xZ0Juxiyrmq4nQ8qfv1ZH08Hee650MVUinEjIiMaIhFlZhMybT4EJRcRMptrqpekyYhUFLxBm1pvOuxg7MQWLVV8KYFinEVmfxfDD7yzjOvRWVRDcGbScpZTcGMmQDY0vS3f/9Pp02+f++h6SR6ZDaZ2TBz/MgxIMoQsPiSp4pSxQoo/e7vvfPVz7/8zHTYrweICGClZFFlQiYE06wCgCHEtkkuOAZNubDvcJ4kQHJBRQ0kMrVtIt9xzEWyD85MQaltp4yGaOS7epYkIjMhkRxjnE7y7kcfLrm5+dY7vvS572fw520ZXHPdK5967cLhIxuveubiO3/jr7Zv7+84/0nLb3zgf6dXtlb69ULbThFNtUynjY9nVSAGkFVRzYiR0CNi27aIyMjv2fKptxx+qXd1KoWIAWw8Xuv1F7CMyuqJ4e//4+z6+FtzS7PjvHM0fPApL1l73os3z8O6WTMaVr6fZErORyjkJ5I7BpkR0zQN5hZG7fTQgYNPuOTCo6cOu3b27vsfHraTR3YfO3n+T8cgp06vVbE7HR2ZHWxc/t59dOjbpEDkk3TC0i8XSYirkgmqeTr9mAy2+OlpCT0tCQvDzBwOj4Mm6W7E4VFI32eak87Fjl0G4/aUYQgADS8RK6RjSpWnvs4u/MrVX28ff3B06nAW6joBEkUs4jfNDRYvuvZ173zZ+rT9263/9qsHXzqdTOYGM7jvzo/bj6lqUQshFEnMgOhUS8652621aCoZkUoRQvAhSlERBSwA6L1HNMIAwGaGqMRoZqpqZgQACN6HZpra0tZ1V9VUS3AIzKUUEECHZhhDpSpJDAAImYhUlBmJKKdMRKpKRGYmkrz3zjkzyykWXYmBGeoCBRFFFX4EvQ+lFGLIk7H3kdgVEee7bTOKvX4zWYs+FFNECsxNakSg24+ldECmgICAoGcRgjKBamHXlzJFMmBumhyqgJycdZOqWA4KWQr7Ti6JCFXNB0b0pqIyRlczur+e/5e3HHplZ9AvpVVVM5Ayrjv9nKwUYEzO18cn0zvvuWe4vPKBD3+l36UnX/uiTbNdmaw8+P3dex57bDVNousRAlFa2jT7izf+2kVb8NJd56opEakKEaoRM+WcHZ9luWQwQCIpwsz4I5Qn6y4GMwQkcmyqAGBGUibsoxl7X6XcBO9yTiKSm1G3N2/AZlYkh+hySu6sUD3y6L1f/8+Da6urJ1flW9/68nv+7K8+/Nmb/vGj73/Vf/u54VrcuDDz/Fe+6rGXfPMPTr2U2HIGYlQVACs5+yqCUWnTmbWjczMLVd1JKTFXIkJEzFxyfv/2f3/r0ZeD8bRZC75CBB8Y0AWPN734xrmfffvxL32GJ0cevvZpO/qX8KVL+w82m2a749F6qMOZ3kVXzJ0WjXUd+rEtubuaCJC6Xdy//8D2bRfffiCvz1526syJE0dWL7x4x+GDj2/ddk654+OPPfxQkxE1Hz90165tFwZvP/y+tX7oAFC0jZuIn4LVLj3zLdzyczQ+LIaa110uNrNka8d1dhdOT2JzCuqdVk5g812wBexdbm2yTh/GBwAdIQotUnfG1h8DmAsOS33ozTfw7/7GC86cGJ5aX9XCzfro8J4Dd9/2vdjv3fjb/31dDtRu0wd2/vuvH3olM+U24aPf+UczK6UAALIPIRRJKU2Dr0XEeSbCpmmdY5FSxWjAAJBzCs4BoaogIgAZFMfezESyKgEAIpoZmDrnANAUfRVHo1GMgYjTdMzBq0J0Pokwsxk654GQiFTFzBDQDJBI1RyJqsKPqeH/r23bUDkpDGaAGTMQEyKcZc6rive+bVPsdBAptZmIsmrEAuQcC2rysQaz8WitmUwAx4TdYmBMseqE0MGzDEPwOScmNITxcNSr55Ci4SQJOK5Pn350tr9JhTQ1R1YO5NEo+hg7od+vQ2djcC76BdEWbArk37XwyV/Zf4MPnklAVQpQiKriAzvGjGomX/36g//1nW9+85t7OoNOWS8Wq+DWLr9o5+bFOB3r0VPT9bVESPMLvVe+7rWfufnmhV74n297rSoCQNs2AFZ1au+95NI0LQCxIzMLIQCIquWcY/Dk3HQ67NR10xRJiZ1XVQQNrmqaMQc0VDDPzM55AxyvnvSh9r4GwLZMAMg5BwACk8cfH3/3+7f9wg1v+Pcvf/3wsZU7bv3aL/zqH97yX5+64/Z9c/OL87OLP/vKpz/48u+949gNyMzI+CNgZmiAjto2WRF2TEQmZobAehYiAoCV8p4tn37bsZ917CbTaQwdEWnbaez17nj3nx28Ww5f3b3hoZV7N+xa3upecud9H3vK6zdcODdZK/tx8xdnfhEAdtqhN/AXTZuub78382Ihb4Alw3B9tGHDYOP00f6pR7deMPcv//ClP/rjd37qXz/+jOuuW17X4ejwpz95j6/WhqenqPnic+eGo+VH99nyWhOACBs1EL4wh5mQqjJ3Fawfo8G5unI39a8so0dp9iJNp3l60vyiUQujWwAXsH4SNMvY3WTTo4ZsJgQDnj83r50AaMg54tPv/n8vfu6zslfIY7SoiMTse/VAWE+cXOl1elbW37/jK28/8epcChDi3js+ZmaIqKbMLMXO8p5FFBDATE1ElYkYQaSQ75qkM2dOLm2cV+sUyWZAGIhUDRARQBGImUUEEdl7EUEEAEUkteKcK1nJCJgAyAElG+fSMrOjSBRKycyUUhNCUDVAIudMyTmXUnLO5ZxijACQcw6OENGMcilAhYkBsJQCasQQfJSCahI9O+dA5cTKYeTuez/2xYOn1kipKOecq0jPfsqlM71ep3ILi0uz1Ma6X9T5WBM1qehDj+xFDmztEy69qBN5fTia687t3XcYUXdt2xmixa4/fqJ5z4e/sDZRMxTLpUy7bnrFFVtOHF3jAOMWti4N7r7+ob/tvb1kXTl9fGnjYq/qk2dEAiNVoDD40s2fu+2BR//zlj1dCuzchZduf8H1b/juPY9+9UsfLdPluYWZLfPVwoaFujeY37Dp0aMHvn/Pgadfe+XvveMldeiHEFTFTBFyycXUHHuOlYiolqqqVKAoOO+ayVp0TJCG68OZucVpa8AuRp+bMSEQsxoSO9A2ZykK3lfeYU4J0cAyUgDzpahz/rHHH77osid/+COf/tXX/sxwKrFbPe/Fv/72X33dH/31h5G75194flURUsQ/WP1g9duMragxMwCoKrNrcxuCLykhuZQaj4zgXcUigojwY+/Z9Kk3H3oFM0tuYqhVLed2fObwva/+01t27liUctme40c3z1x04UL3rkdvOff80ROv1oo+t/iW5c4l8GO74MA2OITAP925q6Qc/QR9Nb+w5aH7dz/1mc/5+Ec/+cs3vvJP/59/+KO/eMvtt9975Ni+Fzzn+nFa/48vP7Tv4EM523S4vjTX2zDjIE9Pnx49fHCtmRaCSk0dX9PYA+yeV5g9LYmug+tYWme/zdzERqeBncU5XPuqYYc6V0O7R3nJkRRNZslZx/rnYTtVaJQ48oHbb7pq4Ltik8n0JGM0JGIGMszRV4LaExn/7Tk3v2nfi81MEfCH//X3YOSCd4FQgqRWrXBwhE7PMmViEVUFg9a7aGjMXrKVIj541UyMiCSlJWQDInJqxoSEltqJonXibJGhFGJHqlpKYWZCBwBmJiKmZsbsqJSJ98GFCgEcQZsmoBlEVMxVdUpNVpyZ22J57D0qsoiQJciyOlxpsfUWgvc+VKHqTRvJSVwAz71HD+/93n0P+tA/euL0pJ3tdru7tm184vlbp2al6OxcdI56sXPw0JG2Hafp6u33Peo9axltXhyE0N297/Ch422ni9tm61C5Th3JbKbf2bapf8H55wzXmkkag7E4vO3O47uPtgxO1RQ0MIgCATqVQR96vd53nn/XVTdfPpoYaEaYahmaoaqB5Y3zgwP7f3D4JJw4oY78uRcs/uSL33zbf93zyL03Pes5l1xw3lXfvvWHd959KwA859onRo+33fOgiVvcMLNhqfe773zdtsUl55wIePaFClgxNcIAgEQkKYuIc6yqSGimQJEsEVEWcpxy2yARhRqNQZKpgPOooirsMOfWsZeiIVTj8bSYMmO37sZQf+HrX3nRi1/+9S/cfM0zroohIMl73v3PB48evfeBI+Z81R3Mzc//8Z/+yV/Xf/7cm/GGl7zByAGAWWFmMwaws0QNDYiA0NAIHJ6lqkQkJb1n6dPvWH5NKaK5+BhLTq4XbnrTb40fp8vmwlcPrm4dLVfdukM+NcM9rR162rXQDXu3vP7hc14DP/a747eh7+fifIhVxNpNWqhz4btvv/Pnf/Xn//njX/6517z8b9/795ddevlFT7rws//3U7/0i284eGRfdAv/+JnPkcU8HZZ2ffPG+UFk0uyBxnl8z0PHphM0GwAuFjD1T0ASjpfr+BGsFyFziRWldWxPa72Dx1/R0oP6CkhHkZ26GcirYI1hj+NORSNiS+vOrXzvi5eYCTEQshZEMpFSVR1AAFQpmpJ+cPtnbzzyCgBUAXz4tn8gcggAZN5FBDCDlBOimJmqmhk7ZgqltIie0FJqQ3Deu7YtIThVEdFp04YQnfNqEHylUogQCVTVFEVTCB0VFREAcM5lKcwMAMxswEjFOVeyVSDZDKiMJ2tzg81FtckFkEsa5zwRkWOnTt9y1x7I5dnXXtHhWnzn8b2Pel8Lhh1bO1s3LzkiIF8FUkGkAha8r3xg0UxkaqktdmZ17dSxo9+644HV8crKWhrMbJrr04mTZ7yPIbgz45nhZC36aCVkaNGiysQjU11N2iYEryUnpeB8Lq1qMaAI0WSCWBdZZ/Jm2bRFdoRMiPOzMwsb5lTljhfe/exbnz5qp6O1cduUtskBrLEkGEqJj97/kBTdet7CTzzvtd+/++GDj33xhS98xuzCVV+5+ev7D3z3+S94WjuNwzOnT6+c2fPIXkI3mJvdtm3+8JHxm3/lRS985rZUcDCzATFITmbO+aCQUykI6OhHAEOsQtu2pWTvaDJaIyIfOzHUKTVmgOzMDLQgWpOyIz4LER27aRp5FwwMESTrWbFTpZQ+/6VvZGtedv31nbpPWCZTveu++//gjz8YmVtlQb9p0+bf/503/tMHyaUeAAAgAElEQVS5n3hX89vsk6iYmYoxO9GcSyZ2RAGVAdJkuharwOSZuWmaXq8ruXn/ti+86dArnPMqmRyiZBf9n771L5/XyuLeA3910U9sXhpc/+V//6DE88pIKRzpmF50uQAevPhXTsxdtjj+wdNWvzS/sUN+RiB67+bqCXNcG9Pufacuu/C8+3/w0LZdiz98aP/ozOqfvOv3/+SP3nfOjsVX/+yrP/+f38iT8X27j5TRqXErSzOD4Du9WsjcsCEEfPTAD1ZOj61sNhobXAA4I34zyUDrBRyegP7ALNp4L3bPs+EtqMrVk0pZJ1pQOIk2BACFGfCbHfrCwN6dP79+y2cvHQ7XATAnYQcAmrMg8LQZV1WkH3Hv3/qZt5141XQ6UTXce8c/qSKY+cCq0rStcx4MmJ2ZOedUFTmrIKIhOi1KbLm0iMbYAxADZXIKGkJoU0IkE3DOEZGCShaDwlSpJiTHzKpaSnHeNU2zvr6ec96970jKo7ruiGCTeZps374j8/MLM332hOfv2kqWvO/16k6v3+30ugNfFxsSebUmYYewIIKIgpqm1nuH5BQFzCEpkdecHAdEh8Dr66e6gzkgh+Q1e3By7MT4O9/9fnfDnFPZsjDbrSuk9uDR0b7Dayvr6zPeudicWaEzo2VEbqeTrYuzl5y/TfPk23c+cGrYTKUEIwQSLMVKN9TKVdu2MUBgPzvT7/d73V5PJXnv77j+3mf951Wnjp3KkgAhl6IcFuY3V9G34zP7T60/fO+Dzfpk87bqGc9/+vzsU779jfsPH/ryT/zEdSXDPXfs7s/Kkf3H1sbZwAPo1q07ZmY3XP70K2ZnO69+2ra14Wqn1xnMDiwnVSPnDRQwMLGKEJEROedGo/W6rkHRJK+unel06yrWqeROVaVm6n1HwQAQtJjx2tra/Pz8eDz2VZeZEaFtW03jWHWQWNE817kwahEYMSpodWp95VU//7t1qEZTYed37Nzy0usv/fpzH35feLuqGKCqmgGTByyj8XoIUQQYlQhz1l5vDrAAKDOl3Abmv9746d88/dqmaUOosooTnTQTqOWhl/3h3Nrk9Bt/qaTHr/nkTTM4+88vefPqBec8/MOT99x2+8pwxLJvx0xM1/3mhXv+KVQhDmaKg96gEznMDKIQrLV45+27n/e8Z9171x019QyanTsXD51cC3r6uT/9M1/80q3bdm1dW63ufeihjQvzh44eP2f7BpGwfXY4mXn+hRcM/+bPPwlGTK0WtDBQCcoXc9gubiNN1qyeAzeH40cgLGrzfSpDcU8hjwik5TDIGNEbdND1FTvkfaUnP/qBS668oIMIORfmiJDb1HaqjnNRrQBg27aA9nfbP/emgy8dj0fMDvfe+TEGZ2pNM3IuoCe1wgAKYAYi4pxDJATOZQJAMfZySQgCaIS+TdOqis55xqAqalpEPLKYAQISkTFQMq0AkoLSj4mIIjFzKSXGGBBzBueoTaNeHdukzMFKSWXkfT2dNnWvboto1hAqUTUSACyaAHxgBXDNuO11q2nK3nHJKshSGu+7ziEhK5iPcTJtev2+FDErYGqmBNZMC5JUMeamIFhbkhBUPgDkQOANx+1aaVa15FgNVC2LU2CD0unMI/mcSsd77Iacm3Zso/Xpg7sPfPf+R0atRs+z8/352R4xJikddmd99Zm3Xfsfl01HuWmn2WxubmOo+kmaaTOtQqed5t17Tz7yyIMOuB2vDebkqddcw9i9+aYvxA43TTb0jhwBclUtbt26aceWQWybSfO86y555pOeWMVKxdSA2RuMcyqVmzUnYGCqBqbFnGMzRYSs6hCJUaGAkpghGEoBRAoxpbbypOLMxECZ0cgZIAAx+Tw+k4WqeoCEqCVZcVqMQYtaKZ3BzCv+2zunJTSt7Ni+tHXzzGVP2Pqt6x/7w/Hr5voLqRREMNOcEyClZuTIJMv6etPphMHsgNiRq5wjA4nRE8V3zX7sN468HM3UhDsda6XTqf/1gx9u/+MHaVPcvXnrhTWu3be28YUv8zs3L85sfGjvnls+dJfj/h4ZTORMfvIzL9/9J5dsmEkcsvfGTpmqOszODIJ3e/Ycn9kwf3J5LUbf9ZrHpdTVANJsv3t4FXZt8NBf3L88XFkZS9NobruDmZ6fPmHX0ie+duvqyRnAFqSHPBadBJAWuuY6hBdY6Ut3OwkSVkWV4KBNTmvvCd6mpd2N6M1qJDITQmVOYmcqv/eTH/qDSy6YUS1EAEAAiKhqCoZtm1WhqmLK07/b8bk3H3pZzsUMce/tH3XOqYKZiQliSHnkvSciZpdzCT5MxitEZqreV4BUpHGOO3EgYikl5xwAUKjMTIp557MWUyEwRDAzMFMVMwuh07QT71wRcy4SARCKmAMxEDMwBWA2MynmvTcDM0O0UooPkYhUDYCB1TEDYFL1oDkLEYom4j47hwDEZ1kpgggiSs5KymTSNCPvKgZAcsbRtOScEcAxE3sDIUIzVVFEYOZSsqAxeBMkQi2KpIA55UbzBMEjqQh2qq4hAAKxA2ldiES9U6emjx9bGQ5Ha2vrxM4xiukXr/32c2+9aria1ocrGxe3r01O1vUMorILTYK1aTtam959551gKbVJiqY2E5MkQQdmxRNrYV/rwvzSzEx68pMv+trn77riiic+9xlXPOO6KwRqsQlT5NjRpozWjnukwcxgfbraHwzWT532IUCcjQzkwGlr3NGUk46cR6czk/GEApuvtB17wtSWQowshMFZnLanPDIqV52QDdUKByeCgKbFpEi3jqWdxno2W37jm/9w/UyZprH3vadfe/mzrnnSJ6/4xm+v3LBhYV4EzERVRcTKqE1Jinnv1SJD8o5jZ+DrCKDeswmsLh//6KW3vvngK4CiyjigY8+d2e5fvPH35o+WK2T43tPloO8+8YmXbj13+8ITr1z85sd3ffN7PeTgaDbOHhjR/p1Xf77New9//ZztW/rd/jR4IyImH4NaKcU1Wb2PBEAEnsAAz4zbeW9bLrnw8N7HpLQvvOF137jzsRP7954a5aU+hE51/gb61Jf3qmDWxBzNhLBn4olzsYMAFxj2jXYqibOuYAYZmkyg2sjtUbSDigMkb3Ym2kgskyuWe8/+6ee+/sXnPfVJi6JaVJxzpkVESy4+RCZmJlU56wPbPvu2YzeYak4t7rntI4hoZqVICKTqEDMimoKZOudEiuPIwQ3Ho07dbcYj77GUYkoxxpwzIna73dFkzMyIwOyzZOc8mJ6FRKZgAIhEwACKiKWoaqsgdV0bECqVkkTEDKtOJSJgZGaqgoRghgTImFo1s6ombTMYknMK5n1UQQBwjgUEESeTSV3XkkoRIcS6rqetMLKUzM5KyYGpaVsXA6MXFRVzziEUImyahoic7+SSwJQZ0bEpOHZmxYcqp5YRJ+Ox8xUYqxZC16YxookWclTxDHtLktj51LQ+VL7qNE1qU/au/pPeR37qW8/cd+RwO50uL683pSGX+h2vUtbWxqktpcSTJ9ZzllEphKilRB8c8Xg6RcNep25QVFAT9/tp15aFZz39kn37H3r+sy5PoNu2bTt6eD2X9cFsxyYSa9p3LJ9ePvqcqy/t1Z0f7j9+5NixGKt2ot/9/pHT07S0sdOr6PGD1q6eetZPXOz9MIZ5Z7ZpaWbLlsVOrzvTqY/uP3zuuZckcrKa6sW6oMtjHE/PMAGTSS7BRzVkH9s2uTBz/903sbZfve34bfftz21eWly4/vqn7dy++InLvv0X6fXj6XqsasduNJq2bZuzVVXV6XQQicikTFVSt9djmEl6Yjwebt50iTH81cK/3nj4FZ5jASIzRK1nqv/5qt86T4b5WB7s6H9qaEu7dnVmL17a/dWdB0+fB1ErXBiNKueiYnc8+eCzfuc1d/3Vu9rxI93Z7qaNIURmRueQ2QiBnIgyueg9SELEAp5RQq8/HY4908zMzOYrrt28tLC2Nh0n+N7XPjs7O3dkffLgA17yfNYjiH2wiQEReU/bcnko68R0E3sGmzEiKuugIjzD9hjJutLUoxYcYzHgGniwbcuW1/zadVtQnvOMzSnnuu60qXUciEjEzko5m1kIkcm9Z+kTbzl8Q5bsnMMf3vqhGIOZIUIumTCqNSJahYiIqlJKRiQFACZitqTOkYgg8lmICAClFLPivDNTRDQLMcQsoqqAwMwi6lwAa0pW730uredOLi07p4qI4AOJCBiVUpqm8YGrqgJzZ7Vti0jOWU4Sq5DSBInV2MBUW8lGyHXdmUxGMTozSyl57yeT6WAwaNuWiNSAnScEVZEkwERoJbXehTbnUHVUwbMXESJidrkkRDUpObdAGGJHpDjPo/XhYDCTc0Ek56lkDBVrQQRA0JyTWomhyqUYEgA5F5EIEVNOIAKI793y2XeeenUuOh0nNTmz1jx+8PgF520NjKur68ujnIu7+94fPviDRzbMLTbtaDpZ6w86CH4wN9+kqWnKowSuGHtwXcyc86RQXFs+I4Ziw6WlneujlTZlQNy8ccuZk4dSqTsxz892F+e2jwruf/zAhRdsXZ6stqfWL911zstecMnOCwf/8e1Dx4+dvvLJVxzad/jk+jh4PnHizKOPHw00nZ1b6nXylRcvbLtg+333Htiz+6G52XTlJefPDHpLi4uVp6WlHUa6srbMgQf9pRMHj/3d+/7lZIP7j002zM5f89SLL7lkZ6ff/fxV37lx7zNcDADBDAAw+Fis5R9DNCYvZqmdqLSmo8pvC3HgYwpVfPfGf3vVXVdt3rxFCUCZwCPTZ/73h3oPn3zW2vK/SnWLj5deecUkT8/51u0bg/Y7m05Nxk8ZrfaweSjUvbr60tXv2HD3R75j1zy28t0WptvmYH6uFyNFH8gFJsqlcAgO2DtnquxcI8VTSEU70bEKzixe/KQnl6Tbt4X/8/e3zvDw9b/w3+954KFvfOPek6fOydyELGJTZF9kAkyBl+ZmAGB8/Phps82MPwTMyJdaOQJ6Bvn0wuKuM8ubTO4G6If+4Keef90zX3BZZ23Ps595iWpBAEAdjobeB+e8Y9e2Tayq6bQJPvztzs/dePgGBTAzfOyOfwIwMwU0Fzqp1RABAEFKzlmKOucMzIwRIOfGB6dqAECEqsjMiGhmSAwApSRVIUAffC5KzhFKzgnRmFmyVrHOORsIAE+mw1hVauh9TzQxMZEjBERUK2c5B7kUJidipDJt1mKM3vUlJxe7agUgGVREBFjUkoNgPwYAAuacAwAzI0JVdM6pCogih5Izm4glAxJAHyKzK0W8D03TdmLIeQpaguNiBubEkJicqaiAY2RXSuNdZVAQnYlKLmCGBgXlrOAcExo6laS5Nc2+U/lQ/eXCZ9525GUGSuZGk5W6vxidG68v59w4H7vVQAFc9M57tGIQhiM9tbyerXnw4YMP7T6I6M4MR5j1nJ3bxpPm6PrKXLXh0ksXnnDO4nlbBkjYn5/71L9959xds5s2zIeKRi3Mzvdm+n3UJrVTMhu2ZRCjFcVOv0BrqR2Npp2gotXy8pE6Un9mSbV1vlP15hFtMhyzJ1d1MOd2etxVvUPH3Qf+5Za9ex41MdUUO8V5v3Jm7F13oTPNpZlzfef0yLqcu2PpKVft2rRpp68Xv3T1137tB5cpDxAyEU2n0/F4HcH1+11mIkKkWPfn2qbJzXo71c2btxa1ut81k3ctfOLtx14xnbYzC0tShFBUmgfvfGj3//rYz8TZDX/z1v84Uu5+8OEjd9z9jIMHJOXTV1+/n+SyW28+XFU3UoAqneD+p6+88YW7P/vW9snrJ7+RrO36tHkmzve6sY6ejJ1TZOZARMF5UFHnYsGWzZEhmBl3N23edu6ufY/t37Z5154j6ze8/EWHDu9fPrJ81z237t8Xj61uYPRmU8CM4MHWfvI5m573vItCvfU7391z4JHhoSO7zbobNm6+4sr5q645L+VDv/fOQ8JNofBLr71cyqM7ztv+nEv6S1v6pmcVxwjopEjKOfhgpVRVzDkhwXu2/PvbTvwcAUku+OgdHyFkMzBTEWFm731KCZkcOzVDQFMFQFUgIjBr2pF3ZMZAaooxdqbTMROE4Ns2MYeiiZBEpNvrTcaTEGIp6pxDBAA0QwRONiQM3kUzEVPHLEVUNOfGMQPoaDyqY2UAiA4JiRgRTDMRNG3LDCF0UmtZJlXsEVIubRVrw6LYYfY5FUIkBmIAKwCsagAKogKgxTqxIqej4amqU08aNpsQUQxVbtNovD63sJnAMWqbC3sickVMS2LPIorkRckhg2aRpKwMSugYvRgAGBIQgiKAgWPKOZsqO/c3W276jcOvIKK2bYAAAc2UEYlYAVM6rUJV1QUw8hWA6Y8UVxAD192u5OI4tKnJua2qSIFUdTxqtThXoWVFpNDpiFJqG5FCAKaFYwBE71hKKSkH79q2AXQxhlIKokc2kKKmhhQYmvGq917Rm5UQBkhskHI7nk6yc945Xl6X792/97EDy7ff/aAHTimpGXsPLNY2HXLn7VqcNvnqp13s3Whutt646fy/3/mFdxx9CTMTwnC4hojed5IkhKIinmsg7yMDmPOhJJud6xMhIoPh+3be/BtHX8rmgFmLqIiUQqH+yKveHFanW8btRRXphZt/b4VfkNe6zPdXtuVlLzr1rYee//hj86V41LlCH3rG7z/vm3++dYY/Ktd+7PQeR8lkMtMrG2e7G/q1ZwQXvDmKHhk9/QiCQwBAM0JvBh7CzGJvbma8OqFYX3rNU72fWRsOR2eWH3vwwdPHjj54eOb0aMFLq1TYJu98508eeeDbFnqZ0+xstzfoq6bR2sqxQyelhIWFzZ/8as+Inv/s+pqnLH7vvj1XXLr5DS86b5INAYMLJRd0ZKYq5lwQzUyu5GSa/+7cm9+45wUppbru4r47PyoCzoWcW++9/hgiqmQAKKWEENQwBK9a8EdcKUCgyGUybWKoUm5DYAI6cvTQ/PyC48BUq2XvWVWd96batFPn3KQZBR+ZPZgBsnPBDM9ybComgCIafByNRnXdYabyIxJiFFXQ1kQREQyn01R1XJuaXrdvZFLUeZ9zYuO2TGPVU1BGh4Q5ZyZHhDkLMxtkp1FACmR2UJJEX2kRYgRTFWtT0qJG1unNuRBzaUHZBwdEzlXC3jkGJGZvQEQODMxEkyMsWSYpNQytSi7tlBCkjBHAzLxzjutpM33v9pvefPTF3vW6nTgdrxOoiJSccmlEU+XnECiX5L0zMBHp9/uqVkSzlrquc0rsu6oFEZwnkGhW1BIimAEYAFCb8vpoxTNGz8Fx3Z0TVRd82yZir6reOykZhVJuqxhyyYamOZlZUXDeIUrOpep0JRfv0VSllMm0AaOq6sToVWU8Sb3+3IlTy1u2bhwO0/LKdHl59PAP9lcdSuXM4oaNM7UHyM7h7GB2sLD44fO+8Mu7n922DaKk1Hp3llcN4/FqjM77mCSrFOcCk+t0uuwAQBEdWP6bHV+58eAL686MGnvnEHE8HseKP/auf9326O6VfdMjMVWh7g3TSnChHe9p3bZLN52793Dt/RyzZ+0VfHj7tbWPO458FybjNDj/fyzLgTw2axlGG2fd1oVZTyF6T468Z0IiQkeOkZjMALyBjzIt6Kteq8repzZd/pM/jRRKkjJpjx7ct3z44HBy5vEjgyOn+j/1wpmLLpo7efRAOr1KlYdcRMQQCEPTtFU3mh/c+8D8lVf7LZv6vu7vfuD+N7zy+U+8sDLU4HxKLYAIeGZidoSe2KSYqTDjX8x9/B3LryGi08uruOe2fyByORfvGZEAABHNDAFUBQmIUEVVBRC8Z7UCwDG4+x+456ILryxFmCmlSXD90Xjdex9jRYhqCckAUAoyY0rJeQZUBDZDYgDzKuKcM4PlUyf6g4HznpgRlNiJiKmpiJiEGJ13IEmKAlLTprqaAdRSBMCcDyKZmQ2AAFNuvKuQBICJCBFzyWqA4JjdcLi6cub+nbuuVqmLNKraqQc5C5EpEbED4BAqE/M+AKKgMYJoQWJVRAQDQyIAJjRRVDAiAMxmBODYOVVCM88IqqDOzMbDoakZjErT/M3S/33r/pcQrZMPYphVoaRuPcscR8N1inWIjAiEqMr2IxpjKKU1Fe+5aVswcM6piqoCCoJLqQQfneci6n1AQzUCs9xOc25DxUQ8nTbILpcyNzcnJQOYQ23bBGg5NalgdGxnEVtbgIyZY6jAe0IhMkIyQxEzU9EMBjlnQsglYzMVtaruAhGYrK2vT9vp8qnxtF0bzMxu2rQj+JpD9YGdn/v1/dfnJiGV1KacWgYzaHM27yrEoAAqRU2ArNPp+0Aihch5tL+74JYbD17vXH99uDozOzsejepeF3M1Ho6+8vrf7AXdP419R2udpreuy2WjlmOd+f7setv3UBP1AHuifcGPP+Mdv3XXu0tdHzkz3MjwQej981oHEMTW69juWJrp+MozB8feOWL05D2hIwOKgYiwYR+LcmZQABYzpp0XPbF1FXU6iNycWl45ubLW2OYNOD2zf3b2vElaGa0tc6xk0hoYuwjoBVRkqqU997LrV0ar9dxAx6cP7Tvy3j97NTIjQG4lBj+erDYtDGYGKWVCR2RqAECA+v5tn3nr0VebmnMO99z2D4iOkA3UzBBRRLz3ZsyMoqmU7MjMwMy8j6LCDrQoYkDMInCWiOScY/Qi2fuQmmJQ1tfWNi5uVcwAYIbTaUMM3Xo2pxawAFDKU+ccoc+5DSG0zUQlsw+qVlW1Y5emE2RMqUGmGAcAICZnMStRIAyq6JjUsnNUsjZpGkLILYQAzsfRaIgETTPpzczUnUHJhohMYTQ9w446oTaqQl2hq2IYKJSihuhVoVghADATAI9sqKoAyGiiJkQMwAhKRICKpFg8umAKhgqQESm3xTkvmACAAEWFKKrKu2c/nCbj4doKE3rHZuqdR/YlSwzclkQIBgaGzgUzRQTRklNhQueImFUMAQzAzBBlPJr2en1VEVUfo4qaKLISOTMyIySxomrgYzQtqsqEpRRmUEUAQ0RD01Kcc2JG3CXHZqZFGLMKAAKSEqAZnCWSiNAMEBgMxdoYYzNpiVkkiRayqNaurCyzj3XdDaEyo3vm9v7Tg+9gc00allRQJacJanbBxcpP24mWsfMzne6Cobei7KBtp3NzC03T/q+5T771yEsMu/06sndINJ5OunVnMs2P3XPfkb//9NGVaX395fC1HxSAznXnrH39sa7Y5bEaVWFtMh7E6KB0FP/t6t980/f+spvKwLuDULn1tQOLG950KLfQTZYZh4sDmOv2IpEP3jkOzkem4CASRQqxAjUUhwSczYwYIKs51+nFuo7d2e7SjvH4RF4+tHrysOfQFPRYwKhRc4rIJmJAVqRYMQXh+fMX5hfW1w5RKedccvmNr3/qcJimbao7AzT0HggxlyRFQ6jadmpAxK5I/tCuz//a/lc4R0USPn7nJ3JJTCyCEDSCLyUnFO9cySk6JrRSRFUQwUwRvRQL0Zui6BSAgw9ACoY5t4B2lhQr0jBzFXvjyXrwtRkAqPcRMYsqQlAtbdt6H0oRH5wqSFFmZyZmIpLrugPmc2mZ0EwYzJAAWcTIEbMrpQBYkTaGnnNu5cyJuppFBAAhIuZQcgnBI6JAYxYJXS6T1KSqU6mpqKYEQGRYmom46EXEzLz3zhkAIRGyMQAghNAB8AgOpHEOkV3bNGhFDbLypE1VcAgsQAaI2kYPaspUqSmSY+fEJHoP6A1dLi2i71Z1Ox6fPrl/afOmnIsV7VRdkaImRbJnK6qEsYgiFmYuRZmcQco5xxhFREtxjkrJzjsCP0mTbq9WscnaiIjYu1JK3RkAQM7ZOa+WpRgR+cAmqWmHpoJIIc45xyklROQQclJABJIqVqUkAGNmMVVlRJRGFDIC5ZQ6dQcMzTSXYgoqTV13iyQACVSLJtFC7AE8O9e20zOrp2b7m7hyzpHlnPJU1LWTYa872PfY3rle7M30qsHAUdWmNsaQUwsA79vx5f9x7CVmWakCA1MLPpbSRuSJk/s/dtPjN925XqZd5UnHwzQReGjLWNpt5p+0NH/MipskX4W9s1cT5Z98/I4OlOALdLfulfb0aPSRUTmaSyQ8k201lkGcxSoE5zue+sShijOkIboQ2DsCJmBXAAwJDNREQQBNkHzojCftZNp6LM4xOzBTB2iICihqzB0OYTQeNaMJ9uY3bdp0+vTj25cuPrp++H1/9pZmfaW4+P+xBB/gu99VgeDPOd/2K2/5t9tLkpuQYiQCQUAfCzoKiIisYl9hd1XEtbEyj7uu47ProyO6OmNHRxFmhmV0RZoKItIkIkjoSHpucu9Nbvv3931/5VvOORt45vOpA8VYEMk5m0tp6rrrV87ZYZWqygMgkf+dE295zdXvVwV8yoVP/EXhaK0BxZgS6GAtka1ELaiAinDhkoxBACBCtIWgPf/4F2684fahEyQ1hhUSYlDBUiQEb6yoWAAC4JQYEYnQGAAggIKIKTJDJqKUCxEZhVJK27aqAN6BoBYoORtLRKDCAEJkFUBVkFAKEeFTAJBFCkfnXMkAkK11pRQiAjIAKsLMJaeECG07iWNBKWhsHJOiaafWmbVld9hOGlXBLwMAQiqCAAhEXBA0GVQiFBXhbIgEwBmb86gAxtXOhzz2ACqIIdRakjIDkopa50R0GGPlAxksnK1zLKpAmRkBrDCjAmHwJsdsrGVmIhKBpxAiARUtfd8bYwHAOysiVVWXnEvJznrvQymsaDhHZw2zGAOqaqxV1ZQHIhJhJFwtu+l0LsKlZOcMqKgoZxHCum5zygBYNW0aE6KAikIRQSLDzK2vqVKgQuCGIQmXK08+ceb0qZ3u8L77H2LFfownTx6bVvbcDTekLhs3QURrbU7S1BCjFUW0mYSFgZCKRIGcMzlHnDGVvTbUwyAMYX3eDGNkBVfV15989D/f+ZGfu/Siw/3t2doJ64MArWbSplkAACAASURBVFb9bK01SopG2+qvXvELdhj22yOLC1c2TzR2d8WAMmp6zrP1n//5rrXNs+fO7l/f9X3/357+M6/57K/5qME7j35/czrdu96k+o8Oh38x5mruezEHCsVjsBVZu64l5KXO58c2NichtFUwTtGQIgCRAIg+RRRAUYqwIrCoiBVlVQFQFVRQUbDeac4xJyKDAB6w1GXr+LMeePi+P/nN1zo7OHSJDwVraz0oAOpquRNChV9CoVon0pRjjOnPnva+n3j8Zc75XBgfuudPRDOiiiCoCZ7G2JMxgMSlsLB3Too670rJpeRQtYhqKIiys9aQTTGtlodVOxER52hMC+YiTNZalhzMxAebUrLWpKzWmqeoAoATYeZiDJWSEUFUzJdZ44i08JiTWGc551KKtS7lpKAhhJKLsZhSqqrG2zblHgCMCfplIuK9Y2EEJGNK5nE4rCoPACoIGnf2Dra2jqqCInpvl6v9zfUzLJJiQkIAUImFEcmEqmIdvanikBzZmFfOVzGmuq4JcMwRCUGAjHJJIjzGtLZ1DFVSzMYFkKyAoqAAUMAFx1pEi2FyzirAmNnRaP1U2AgzQBbRzFLXE1FUFVD11pYyAoD3bowjgFhT7e3tb2yssSREijEhoK9rVO66ZU5lfeNYjBFUEZGIS8nGUkpxOtnImZkFSQGKsHpX5ZzIUozJWts2bWFJcTREKqrATz5x9dy5m1Men7y+/8b/961DqWLEBas1RktRZk2haSepqHW2T0tgdJTW5xoqOnnyKFGazarPfebaqRPhq591y3rT1vNmrarTECfTdWdtEW8slMj9uKoqR6hVXd3/+X8+ffIGV03Y+rbd/N2Tb/vJx16Y4th1uaprH4K1xrF2BHmV2qbdvXz5Xf/uT6uNfPW+vbpdT2k4Sph1kNbbAxxZT5T4jV/39GsHq786/sof+dR/mLIF2n/bs7/3rve+5Wxwj9V1OrJxU7Px0+cv33/gE4Fo53V0Ro+V/hRpgPBkBbMzx9fJTia1D44MIKIQKoA+BZRZEFFU9CnAoAqgCqBAiACgoiqAzIyI8BQcr+xOl+POrTeeet2v/QhCo7pKnTEembWqgio7a7a3dyaTKQCmFFVVpLST+j8ee9tPXXpZ1TSlMD70kTdYa0S075L1iai21oqOnME6Z4hKKWTJOQsIqpzTqOIAk4oVZS7ZYFEdQzu3NBmGxJxBbeG+aUOK7B3mHEvhup6CrYjMMAzeOYeFOYmOMQ5aFED7vkOEqvFtewSoiikaA8xi0Fhrl4f7s/laYc2ZDfnDxe7Ro0eHLoHNhJ6ZFXJVNd1qyVq8MwRsTSAKhC7lQ2ca5lJkLMUZa3IpRITGTds5YBJmQEdEKSXvfE7JumCdE2QUHYdIRskIiYAJIsglGzTkLRka+86HSR5HfIoJhCmNQ9XUuUiOozHW+acEVckCSIZAC3OwJuVsQ5PjCEAEeez2jA9NO112o/OVRQFDzAJIksRaK1KQcLHYnTQb3lfMCdBYa2IcnXMIrmgmS6qoClIYAQwRKjBnhZJzMo68D8xsrQGlnAVUraUYJVQ+l0iEIAUN5cw+tJyToQoJyBQk/Zv3fu7SVX388hP7i2Eck7W+Ck2fu65f1bUPVcX9aK0XKdZiKQxKaRgIRSlUjeu6ZUx5a3bkNa968c1nJ6rmcOciuaademsJoOZSVIpIAVNyz9a1fjrncfi9M+/46YsvTYmr4JhTSr1KSTk7svX6GioYY+/7zAP/8vr/1j+yMz9zbHs35xRPV8pMgr2w7ZK0ib75WTfXrXtz8wOv/tTrHrrlltXzzrz1oePPf+CfX3Zmdu3U5uyTD71huPb6i+R0XbGosOL+RLugUAFHoMl8vWnc0Uk1mdRVMEQASECoKKosoPAlqKJC8GUIAKTIUoxBFWFEo1BKQWtL5gevHF6+Gl05/Iqn3/zGP/l5Hihq9r5VhVKKsVoYQMV5m1K0WqythUWUf//0O37y4sv2Dw+n8zk++rH/gqCIUqQYYw25wiycVVE0O4uEVlXhS4iZjTGISASlFCVD2hReGOM4HUrJZA252rk6jZzSsvZuFdkao6opReHkLLS1K2noV6O1BgBKKd1wgCBN0+7tLY5sHs8lAVlyNG3XlDwLaikAkktk5rpqmdS5QGhVAUwm8DmJcGHO1lgyhIg5ZQAWLU0TYkoAZDA4N/n8v/7jDWduQbREBAaJIISqZDAGmXUyWcupiGYii2BSHpEUQYWTM0bRiqhxYmxlDbDYnAQVWDMi5FQAoG4aLkKEZABpsji47h2O3WgqaZspgo0slgDU9F1nHAZfw5c552I/hLoa44hooHAqcdkdGgez6ayuJzEWQIMmeGtEUs6JyAqDtQ5AUkHvQLVwEWUAk6tqa0xLjn3KMYSgYkRkOp2VwsJaQFBLv9xrKm/rqUGbYlKQyXSW0liKVFXbdauqrUB17PtYaG3egErTNKvVeHAwTiZOOHLpV3ntz//6oxevHayWq5IUTWFRjHHewLd/2zdxtudu2Dp6tN6+PmBc3nDz6b5fhQrJqIEKUa5cuSgcq+nmxsYRBCOCoCXFxMzWWRD+w3Pv+Ynz3+aCQ7DMSUVUYRw6BGQudVMpx9nmqY+958Of/oO/hOA3rLm6OxY/MXG1ZkwqYxGpf+il5S3v+frjR97zktf94Bd+718v7b4rHHvJM48+59H7INy+fUruun75R++1H7a3uP7q8bB9a1PNof/Gdnow7D0m+W8vXzpy4li/WJbKrE/DeuMqZ9BYa4kIRNgaZREgBABVxwhWRIhEwBkyBFkLMIiDBtOjlw+fePRw89ixq9uroSQL+oxn3vJ7f/jaMkZgBgQwNieoaw+AOQsCIUnOUUEB9M9u+Yf/+f5/E+qGjMVHPvaGxeLafD5jtsYEYwwAGwPCFlBzjEQkwohIRMwCoNZaUVYVUEKMzrRSyliSJSJjU2aRQQVVB1S13hJZUBJAAF4uVyQGwSBEa62oMJe+2+cMoEAWNzaOdH02XkEra9H6sOq6+Xw+RrUGrbXOVzFG52yMsaoqUK+QY+r64TCNfdNMqzCxtqrqtjAbwv2DfYtEBo2hlAoZA4DWWuesMLFkEfY+lMKIkPNY1Y6LEjlmcc4iZi2ZyxjHvqiUDMwx2HosyYVmPl8Dlr4frTXGoLWoriay4zgSkbcVoAKiKuTYj/2AREBo0IhoVVXMRRWMMaUU7x0SMaMLIeUeFAnJ26Ci/bDs+xVidhZjTsw4nW1OJ2spjc6FUpi5GLKIgGSGmOpgckkIFgisCTknIqOKqmkYx9l01g99qLwBFc6r1SLUwRo/9n1V+TEWJABAZ4Pz1A+jd1XJuZ5MCFQBuKhBG3OyVg73lqGyw3A4bSplee3r3i5QM6g1ftUPJ+b2//jZlzaKWgeOBbxaNYxsjCGyi8Nl04ZSuAqtMoTal5yHYaybpggQIZEppVTe/Naxt73m8sv0KWIAJOfkrR+HWDh770rJVVPnbkWz2cfe/J57/+J9J0+tHV5atmuTR7f5mI9Bx0rrBTDm1Pr2+tazb77l1E3//Nadp5389q6/vHl6+xXfeeSNb9sMqx//aPnhE3AT24O1sLLyTXo0L8fW1edPZBNnb5bDdzzx8EG3Ozf26HxSNyFYMKSG1DlEdAzinBMVZUQZKEy0sJgc0BGgIVOke/jycO3yysXowJ48vfXkleXaxukuD4vFE9/xypf+/Ktesrvo2nbSL7vgarWSU67r9vBwOZvMhnFwzhnC3zn19p+79vJueVhXDT58z1tEhpRS064xF0AmUlURYURy1gJo33fGGGZWlRBqRFAQIkzpcHWQYrq8MT0J3rKoiiKit6Zb9VVFqIaMy1nQBAUSkar2CAIoBmtVsdawlPyUIRLSWDIAW4cqta9IFRBAVEPVlFKsM1wKkWMuzKVtW1UtOSuoKnpf55Sdt6olpbEINE2tqt47ZQLQGEfAwoVEMyGQIRE0ZAGVSEXEkFssuio0iqMwNk0LoIRFclIpfbck59t6Y7VcqGjVNlmUmfM4bGxtlZIP93er4EVBlb33ImoIAE1mJWtDmBtE51wRzjlZawBAFfjLQnAxjv3y8nR6pBSjghqkCs04xNWi2zo677vRGTcMaTqZxTSOadzc2kRAAFAFZg3BjEMBRDSWiEEASVIqSCQi3vsYR+cCIepTQFGBiwAAGcOSQdR7xyWiQWElQpaiDLlA207S2AsUZ4MiqSpCzswxriwElJ4MjENm9r//Z+/cW5mCLhegYLn0pR9uva157Y+8nJelmFK3M2tnSOXgcKdtJ4aaYVgZI+PYeRcAgAylGKfNJMWkqsyMzv7+2ff87KWXqIqiDcGkGI2xKoiEzAVAK9MMJldqxNd//aYPn3/fuzbBcz86MNcWMoJpKW+QjGhgzE176lPPfdWvXXvTRg6X29Q1py58RXvnR+6/fuXivyzUm/Ub52t33Hq6n9Kxxdbs2HoaL1wiYw+f2H3R857xHd/18Gc+ds/n7//8Fy9ce3xnHDoVttYSQnCTuq3bSVO39fnHPlsloGPH+uvXG6cjy06fri5XewduqvkIolUWoI31ZkyB6jUfqr1upx8O3vXe37YgiJYUnLWA0ve9977r+hDMMAz2S9zv3/D3r732Pd3yYNJO8ZGP/SctRGSKRqKQ06igzlYKBZSssaUkZwNzMZYAgIsAKhENQ2+srWy9ff0C4cGqz0eOnTg8WGhKY1y17XQY+vnsqA3B+kDWIRlUElFrLBKhQREWzaUka1uE2K366exoydEYp1KGIZIdUcX7Bm21v7MzX5s5a5m1H7vpdAZKqiAFjAUyBtCUPCIaRMuFEY11BMD9sEJUQ6GqaubMqQAyEfbdQJ4InYhaS8zWGFTNiAqoXCCEClAAGBUkF8nS8zCfHT9c7CsIKRjnSklDv9w4ejb4ILlIYQCyTsbYIWLOKVQTRFIVE4ImPtjf3z9cnDt3SymxlEJoFSQEn3NKedy+su8r3Tq6geiBDauQpbquUxTnnYKmnAGysNahKVlSHp231hhmXA3L2WQOqswJrVW2Me1BsVUdDDkiKJxUEREQIcaRoKiiD00RAUDvvXDJZRCmqqpFJKbekmZGY8hgAYbDw5V1LnNS0PX1U8ZIGXsgK8qGCLB85nPbH//chUcuXd3fX2bmEKZNW3Lnnv+c9hXf+w2ssyGCwgqAiZzBxloYhkG4HO7vzTePMUvTtNa4cdhHxJSSMcZb+L2z733lZ58rzLOjZxCklFz7SgCRQFVzjgSKFESYeWgm9d41+ds/+PPxkUen8+mTDzxx8szN3chP7l075ZRYFh4feekvvvgtv/KCr73jgOCDD1x92umTN9ntP7p3+yvmG6ccbd7wtKYlTe6GjUne2UnGxcNVC063Tu6/aPOylc0TR4CKNxwms7qeuTBZdnHeChmMcThcHvztRx5606+/+d/86P+oOxc//YHP7SyHPdZI3iqvoT0BA1kvjABomvWVls07brX7i0cev++XfvnHnvPsW2az9T6OLCkYn3N23oGqMZaLqKJz7ndOv+PHHnqBDV4Y8eF73pjLWIVm2e0611hjRIsCOztTyC64cUwlFmuZMDhjySIADMOoIr6qnLX7u3tcyubWjZ/59D233XpC1BjnVdQYl3IBAESD2BmaFCmhpjQYC51tqzxWir2hYGwdu8O6sotuackCirHWh7kIP8V7F2MEyMLonAcQKJGFjA8s4JtKS2QuxgejRjgWjkYNugoAEQkAiRBAmUtK0QiDBRElwQQiRYALaHZtg2pLyiUnQrLeqqoLLg2ruq7HGIkIqQYSZ2esQxxGZBCGIZdjp4+XlJmLITI2CHclgq1Ykk88oCHnGyS3Orw6mWyAUSiBCEULM5OpS94zUsZu23sbtVZspKgY6wmIgEWbeeMplMLoKEchUJDCJSlaa50q1HWdS6eC1gZrQiydMVYEc2LrgABFcimjtVUuBQGMtSwWRECjShIFawOokoHFYvfJJ6+cPXNjXbexLGOftza3+n7pfOjHvq5bArtcdKRjjkMqMt+YqBjvmpJVKHqLoZovV7ap9CP/cv6/vv2errCBZTBlawrPf+7T7n7WM9YnJ9F25CuIpMSpZIOenFktdyaTjazqAVMuzlhmApXfvemtP3Pxhyn0tkCRoqhjHKowBUlig3LRVFIZp5ONYVyVsdTrE6tyzzs+9ej7Pri4vjp5ZD3LeAdP77+8t2e6qhgm+fzX/t8v+chv3H68/ZALN+j+/iqW6elb9lawsXlk5gHcuWZd+w7S8Fifzh2/dXJutuyC5W51eG34qa9q5jOF1lXVMCwNKSoraN8v6qY+2F+26/MXv/gX0bf/8dd/8lf/3evlIF7C3IIh5E0lVMhIlijP19wydnV17M6TsV/LD9/3sh/+hu//3uctkxiBSRtyYe9DYSFjQaCokjEI9Aen3vaaKy+PMaEhfOCf/iwOHQqVvPLBIBlfzQGsyIE101LAeSpFkYRMBSCgJedEBpkzEhpyUiTG1I+X27BljUkjV7UfxtVyddi21d7OpRvP3Xm4QNdopW41rKpmZgxBEVX2vs6lq5pmHAYVtDagAUTHwjGugjeqgGAQDVqKQzJEpCXlQ+f9YjlsHTmasnUGEQGN48Ip9d7blJL3jaoaY0RUgRFRVRARUgKDAEgKxYgBF/vIpXSHT4R6Uk/nDEjomIuzBhEljgDAzIYMEIamZnFFs3NVGkZDsFruz9c2AZCcLcwIUuLgbTXmoa59LhbRKwihEUlEFhGGtAyhGfoUQnO4/WDl6sVyb+xx1XenbryFjAveGD9Tyc5aAYypD84b44DQIKrCU8YYnQ8AgIAAMI5RgZ1DkVyHOTMjgjEm5syFQ/AAJY7ig1dRJHQ+pFhQRaXk0gM643yR0vo652ws5Zz7frTWO+fNU6xNabVc7HhLFoPIiKAsUEoBwLpuRNTVjQEskhInS3OkrBI5Df/6ML71nf9wee+aCfUGlu/9vnNPf9oty52FnU3XJrWInayf6A871Dyseh+w6DiWISatGn9w2L/76y/85GMvVqHgsjG0f7C/t7tTCDenk3aysd60A1gyPHaCYPeGMZhoxUg2Qxne+stv7J68cmzidBVvsuuHuVxORfv44W/5v777k3987SvP+S9+yvTL9dkdR6qDs+fuJMQxLU5sc+j2ZazGCQ4a4Nwtsxd+y/FXvJDdoWbc/eS9y/FTYbYZqE4pCakCl7GszdcODg6JXD0p3/bC162gvO99v/57r3vLR9/30GAWBGFdcCqQUUeEvGbOPe34+V1JV0dUOHL3OblSXv2qb3j67ac0OM2jdSLFhBAKMyggOTRGSjEE/+nm9/7Ig99C5kvwkY/9F5BsyR3sX+NojFdypqrb61e2faU+ODJB81A3oRQkq3Fg56wxlFKsg9s/WBC5p1TtlAvGvL9abbf1lnc1kQM1LIMBs7//xXmzGavN9Xa6t/vQvDmGHghDStnghIGRkAyoZGucQRvjiFjA2jjmum4BKHGqQ2vIxDiAYGEBIBH1tTGgAChARJYLE8lycTiZTYkwxlhVVcqjiHARMgS5FNVQVf1yYYM15INvmEtKCTTvXrvS1lW7vp5jnLRNTjGzCrO1NpdiDIlSO2tiBLK0WnbeWUld286dd0pYSrHOa1aVzIrMYl3oh1XdmByHZTdOprUUNcjB19b6cUysNHQrX8uqhxNHTrMKEeQ0kHMIQmQADQCWlLmwIBFqjKlpJ4pkkIkw56QKVd3mxEReGFQiGRApIuJcKwIi6jwRylOIqJSc5cAaLwKowsWEqo2ptLOWY0KEcexzyUQueF+4gKqzUHLSopIEHa6GbrFYbKxv5MKTtgUEkcIcyVSGrCUY+uvNZM3aSR4t854L0wsXx8eeuPzhT1+6dqVLeXl4sK2Ozxw7Oq4O1jfr5z3zaIr95trmmVObs3o+mUwK08baDPr4ujPv/IHPPuf/++sPXVr61SLt7w3z+ZGD5aGnFOqwMXXoRxjgzJHpi19059Ej0+Nrty7jzucfeejI9NzZ45N3/unfX/6He5s1CtFf2ll8zzOetXri2m/c9j/dvvO5jb0P9tvD5XDkj+9/931/8aHlO95+cmPCn3t8/SCTb8DFLxy/+Wn/6w8efeFXpO6h/r++a+22bx2/+IV45YvxB76zkquLuD+M1E6PTKbzXDpC61y1WCw+/dDHf/l/fw85+3dv+/eHw/Xv+e7fnKG5LjJB3lJyRFlErcnr9sytx3fv3a0JIx6Zz2fH7mh+9t9+s60aLJhFCW3XrTbW11erZdtOhrE4Z1XKn9z6/u/75HPX19fHYcQv/MPr20k19BEwaRm2dy7P5+uonmWcTDYAXcoRyI7D4EwtOjrXMrNzFgByHCfTqaKO47g2mw6xACkU6ytiLiwl5yRJx1FSikeOrjmExx//5JHN24bU9xHqthw7cuNitdf6KqshE1DRVbZb7VehSiMrgfkyAERNZKtc1HpfSm+MISQRJUuaswIBOdCIQKUkHwikEmEAQISco3MekQAo5VhYQ/DKrCKEBASAGUS7vqvqiQ9TBVZRyYlLAYuESETGWiKSLNe3LxzZPBUqM0TgInHoZvM1QC152Nvdnm7MHdVclmRqQJ/ziGAJKyk9YOuCaEa0tLu3PZ3WztPBYbl88YGvvPNutZ4sSkkGSVSkFGAm5zNrqMLB/s7G+paCySWziA+ulKJJAdVaI1yKFHqKMSpAxiChMJMhyZEZnHdAAJL7vvfeI6Aha00ouSgJc1LBqgqL5QGaqq5r5hK8VwApw7A6kFymGzcUjkhQiiDn0LQxF2csc1FQQ6ZwwczL8aCuLA/FOL9Y9esbbeyXrGDQuqplaGoHviFEP66G3Z3xAx+/8OGPf6Hr9voh+TrUfpaGZcfRoh47io2DW85tvPtrHvqG99/22X8diAqSVTRdP6KUyrlqts5lBNUi0VAVUx77NGv42FY9rZu+PzjoS3c4vvqV3/qJ33lXs7+3o+bI3N+S9Ne/420Pnnk+APzKJ35u67bd4fz5ta88d8c911aXHrnxsO4wDyF3d3/dTb/54472r//7113+rKle8a2TO0/Mj84v/NX7C/NXfddzFntXY4S6bRHZhTolJiJjYfuqfuDD97Jrf+Blt4Sw/r0/+ovpweV5yBuC64qWIAM04EuVRe16CYRwQAFnm2Zy5A9++1uyltpYU7lU8CmgUnIizmqbup0VTq+/8T0/deElJWUiwgf+6U1p7IPzzACgTdPmUmLKofI5JxA2RGiAS3bOKiPLCrQqXARG0OKDZwYiZ60b+xUAuKoxZLxtc+lXi65qZ21Tr1aHwgUNGfIpdYBUhYrLsqSIbNy0jTG27RzVGwuEJuWSc/YeWUhhyD2V3Lm6apr1FAfyBhVzGhQSEljTHB4cztcrBLdaLTfXjl248OjxE0cXy25tbasURUEwUDgSYUwxhADaMsTglcB03dIZZ2xrbej6HUDmbCdt0y0OQEtoJwg4DgUog4z1ZL3rRhC9+MhnT589UwDn61v9waGxQVS8ny0OH19bPzmOaUir9fnGOA7CWtcT8tb4AKUANlIWIjaX5Hzrg1WFlKJ1BELGoojkJD6EnLNzzns/DgOgWh+YkdOi71btpHHOZLbWaooR1ZOVUoolGuPSugYRc84AUFIKoUKwxjhRNBZzTgDCrMb7ECpgBWARcc7xl2RjDDMDQMqDtU4YUs5V5dNYqsopJ4VAFkTV+QoUS8lIqMoolgxyzkQ09tdyghBqa8mEZugjANatV7GqgiT7B9cn1dSYohCeuNz3LPfee6mAuXj52pNXnwAi4cobGeJ48ccunfyjLY4D2KBMikUUVZBQVBjVAoGIcGFEZI5VUykiM0/IdxIrcDuHy7DWPmN7WborT1+/af3ZL/+Jp/8qfNnR9Lm/+uQv2dX+FKb3X3vk2Ycbh/Vu3usPf/wHz/7g167Of/zaa//zzte/eOeu6zvvvPfM19zYj7u43+7dn1/xGy+OvGFMYC1oSAgxxaJorSLNfPDkZNUtD3aHj376C2/8rfftwugFp0AVABGsKTWEhtwoxtxww+SOM6k2w8Hyp/+HZ954Jow5rk+2lssrfZerpl0uuhNnz5aUOXsM+oZb/+lVD38zAKfI+IX3/1rq42J5sLYx8X6zH3rv66quVIy1JsdCaIYyWiPDsCRUUhriYn3tCHPDeQWGnPUi5IM3hP2wss4hWlVOo1gn9aQpuQCAinIpIjyfbRlDsfR5zMLFkPqwGdPKWBFNKWNV1SWXum44FzJThayaRC0QFM7WOORMBhUxRfGOlsvDpplY44sMIH7VXW/rLURDBowlRZWSxphCaOq6iXG0luKQQqC9/WtNMw/VrBRUvrJc4mS6qdoY1z/22AM33Xg2JzbGMeecIcZVU8/I2pRLCK6Mo6rd29+dzBxKRSTWNtYhEfV910ymhtyyGxByzl1TV56mDPLgfZ88c9ONdb3GjHXjVE3Xr5q6JbJ9v6rrdhw7H5wxBgARoBQmIuecKA9jdL4qvTpHQLy/v1PXQcVUTS2QSWG16tq22dm+1rSzEIKqAgCpLzz6QF2/qJuZCiOBKKM46zyLoIKollJEpGmalHvnnKqO49hW05IjS0EE45wKGkOlZAQDJPlLxFo/nU67rjMGnfMpJZWiqqhkrTNEqURrNKesqtZSzGNKXIXaGAPqVBhRhrGrqRLY2d7Z2d0ZP/FA84WHHt8fdlnJUHXxfzl/6vXHOTI6IONT6Qkdlwwqhqhkds4xs4gYY4oqgcZxMAaiahhytOjAutqMUL1ydvqHNrfftws/+u2fhC97yeFf/sxHf+d4NXno4qWvfMZXyec+dW2/+F//t5Pbqkf+8S8Wf/Lo4jUvf+ZXrb/lF/6o68gd7MwHfHLgu1/5PX9+z0e++tatZ969NW+mX/1Vd2XwBLsZHOSt5bhd19W4kqoKGyhbHQAAHlNJREFUdVVdy9d/6Dt+SwiXrFPEKaIDmSpVBt3GdPOum6rAu+e3d548EAwnn3n7n/7mj169/GBozeJQRIa+l7qdVtYdDgf1ZMNieNNdH/ixB5+fEgwd471v/1Utigi2NkCORaazeSmFDBpyhDgO0QSQgipYV9X5h+87fvwEWo1l6WkmoETeucY7XiwXTdMw537snDV1mAKkvf3DjY2NwsIC3lLKsQrTq1cvr61veV9ZYw4OdsGM03YTJIgY61VEjKFh6ONwaCs3nRzJCRTVoEEz5mQIoogY63IWa3HVLdfm64gGRFbDtnfrRCqarfXMZIxD8oa08CiSVLwxZIjHIe/vL0+fOXlweOArj8WFEBKv0HrI1WRa9cMKhBR4jMu2WRvHzvkKQXPJrgqSkzGeOYtGUIsgpTDzOGk3Y8rGmFhyFRrVDFCcdUOMVbPmQJZDclSs8cO4IHRkEZRE1HsfU6mqatUtvLcIwMzOBhEZx6FpaxZRMEQxpdJUM0SLkhVMSinUVlJCsgBUOOmXiQgijrFr6kYERaBp2lJyThkRS0mhDqpiyZCxzFxKcc6J5r7r8SmEpMYZZE5xHEI7ERFrPTMQGkBJMTV1y1CYmb7EpjQaYwjBGBIA5qyqAKhFvDecU06ZmBBBNIuO4MjaRooa4JHA0RyxL8kQ9u3G8Te/+VNZdBiGv7z7g1t/cCzDOK/qWACIORXlUlhU1VqTUzLGACKXAojCQgQgWjgXABBlI2vj7OeftV4/fvH9q9kLbr/hi+bmt5z64efRhddeeuPFVZft7Nz1Jw7MQW6+4ehvfh9Uw/2feMMXfuPzl+/aOndDu3jw/ic+Ie65VXdF8GL39P/zuyfj9jveuXv+4IrJ3sbhu77zpmfdeebs6c3DjsfFwWy25Zt6/dipoWRK5hMPfOyXfuHdItgpOOEW0ILMyWyebm678Uje3tl5rHuSzbaxUlVHT5/+s996GaYUi7WkOaZQmYNubCfe00Yu7Kz+8S0fePUj31K4gCG8/wO/rcz7B4uNo6esRVUgMrlwYbbWCBcVsc5LkVC5kobgZzHvs4hKTW40xqpaFjMsLoxZjXGtr3xdg4bgdbnoCdF4Wwpb74QDIQ3xcH19XdiMaVHXNUAQKYimlIgkUqBtm+Vy4ZwlDIpasmxff/DYyaN71/eZD6bT00pm0m6m2A/j9mx2g6VquTpk6cZeQ/CbR9a4uFKioqqKDVaF9ClFUk5EmajO3IewYbCIsDW25GzrNo7RWAD1OV73blJKSqVr6i3RmJNWdVUSj0MXqqooNb7O3BM54aC4LElz7r2zwAqURBUxcFo2k5mxbcrkXGZ1HAfytlsdIlhLgGBtbQFQhI0FpNq5kGLywXKKiKYUtcY5h/3QGeetrVgEVAAFQMZ+dFYBQIvJZeVdbWxIOVqLAEBEi8WirltrLSIS4YULF48c2arrRkRKUXLGWkOgcYylFGstEamqiKiqMdZ5SDGiiDIrAhpD5AC9MYVQS0xENosQGQBUQV+FYRiEMxGJsjHUrYa1+VYqxXuUnEouRSOhr6oqxmjBMDASjKtuurY5jr0xCOqrqhYtRMSlt6H6D8fe/uqHvu38Y3t//aH7P/W5hxlSbU0RFiYBRRIQTSmJCABYEgDMRVkULfIqjVRuKO5VXzm9um+e++yvqRc7B08eUo71rB3DuHP52u27cXHLZhrOHf/fvn9y93D/vf9y5ZF7PvmmJ5Zn1laPbK/dVC2vpnpQLaN1YdXZ8QTcfdfNd37T086dOLXdTd/1d5/+0Ac+HRycOO4WC/m65500Os4ms6fdcubZz3rW4dh/6EMf/5X/5+8jYm0sl+KQDOLW1B494lfbC1z5RUM7yyio1uHtt97xu7/xcijY9zvzzWO5G5iu3/PR8+/9p/Mv+ro7Xvytz9hfljfcds9Pn39BYnXO42ff/Su2mjOAAQJlYWEuuWRv2bqptUZ1Zf0kRhaJjqCozxy1KMhuTpRSLjGrXvbUMsFk/Xg7PS3CAETGiAIKIEJMY1VVWrRwrus5S6FAKIiqMXZ1e1RlABVUq2gQCQBTSoqdcwERx2613DswYdpO131dbV+7vL6+FlyVExivOUciDKERyMvD3hhhLpPpWkoJwQiD4hhCXUr0blJKEVbRgsTO1VLAWEOkfYzT6XQcovdOJZWSQSpVGYb9ECpjvQiAQU65bnzJA0AFVImKs1LSSEQxrqrGKDZSiFT6ftdj2N2/XNXemckY++ArVIxpgSCKxocmsyrU83mL6JaLVTNpyYIC5QzekzNuGFZxOKjqjVARMxMGkUyEhQuAMmtdTZizQkJwQ1xVfuo8xr4ngr7vQ3BoaxUphUNVG2O4sAiAAiAAqKqQQRWy1iiIcMmZS05NE/q+67p+bWPTkJOCgBkAEMFam2MGgBACM6eSAEBEQgir5d50st4PHfMwnW6oEJFBBAUgKMMwVM1MuACKgHgf9q5em05nqoiAWQWwqBRvwrJfNpOJMQYVxmH409s+9FPnX6Ca6qpdLocHH92+97OXPvTRh2Je1ZWB0ilQKQKKpQgAMSiRch64SFYfrP1+5aOnbtw83nQPPxyTI6u2pb2FzIufbmysPf9Ft/3gN5rpxc98+O3Xzz966cGr5XB89BKajWb49HW5vXYjLTu+Go62HhclZpbVEqwt3TCuz6dnz1br9eTzj2yrQHBBBTLncQRkvekcf9PX3fH5h3f++u8eEDHOMBCeOn0GCRo7+84X333iWHX85KkLj1658OSl7f2yt1jeeLJ55Xfcct+D10HHveV44dqTf/P+PWva2bRepnj2iH3Vd9/9gZdefM3FF3JmLYgPfvj3M6cSwTlYHg5kh6p2B/v9tGXEevf6nqTezeHE8dvTWIlsu2orlw7EaBHBlfCkaoymmeqSNTfthMVbrzmz8x4AiRSAAJCLeO9T6Y2pMo+oaq093N+pKzfE4oNvJ2uIFaCsFl1dVwBKCMa6lDKBKFY+RC4upRKaaUoDoSIokRctALC/tzh+9JhCTCkLHK5NquvX+/nasYPlMrgwjtFXCmpACRFVBVABwJDNuRCBNTblFIIrJYkE7ylnNAaGfmmtVYCmaUpRa10ch+vXrx4/Mhki58JcRkJt26m1th9WKK7rd9q6AnZhNs1JU8pj7Cft2v7ho+trR0Hqg51rLFHBnDh+I3hfN0GEq6qOMRuqcik+mJyUeazrKiUlhFxGVTXk9w92t7a2xnGw1hq0gKXrVlWY9vH6bLr++OPn23a+Nt+KMTpnicj5uhQBJVEmKoXZGsdPkWRNUDXehVy6lKK11jkHYhRhuVzWTROsiTk550vKzhv471QBEMEYHMfeUDDGEBEzx9gxk/eeCFIeEYwqWGtYcrdcbG0cE/VFBpHifDg4WG0c2WQuLAUALACzEJAqemvGHIlIMlvvfvf03/zUYy9JaTABy5ipFO/AtrPPf/H6Rz/+6Nrmse3re09e2r7voftsEEApic6ePHXnbWevXNv/4Mc+86IKn3Xbnc31y3t7PfV7YPM1pDU3PS3u9NmnuRfcvW2vX3viY7v9blpJAv7Xz/MNR9zuE3n+FdUXn4CqSwN31dFjZ/3qYO3IZ+7dBo/kISVgcMbGQGvTup3U/vjJoyV1G5vTu+6848gp2L5SpMBsMj15bHl885gL5lNffOKO2884HJzBeh6eePTayWM3ZJUYV0Ljw49fvL4zfPCfLp47fequuyYf+sdrT1xdddIEPeyLtz6X0gDmPJarr77wnHffTphLZnzgI384rkYuwzjk2XoDyDkaS2tqxqqu07BMw+LwsN/Y2hrTQrOfbGhlN0tJh4s9g3ayNinsAVJdrcVxGWOcTObMogo+BGYFcABZIYXKErRD/P87gpNf27L7IMC/bjX77HPuubfq1Xuv7KqyA0mIUEgcBRhEAgmJEUNmzJjABGb8BUiMiTKMDBJzRngQRiA6S4YACo0t4iaVqnJ1r7v3nmbvvdb6NTz7+y4RjBxVps8+++TZ86emCpEAHRmWdSmJRKa2DWLImV7fP3z44YfX84lrRvWanvRYLBxBXMNUc6HWVkQE4PP55fH4Tu/24x8+fPrxz377d7/94a+814Yj9JKPvTfhSVVZSHVMdbdsjyIEAO5gQyO81uxhZpQymqIkBCd4C3GMQZQg7HQ63d3e9uXEuQzV0OijIQIiMyUQH42EOVdbWzvM746huzn3ph4doLLEy5f3T5/dMYsqjOv9+XLa76f7Nw/f+PZfyGlSHapbzrNaN7OUdqMv5pZSAkAIYuZtayKSBLZ2JaLeghkhOFfum0kCRHR3M08lMeUIVG3M1cy2bUtJiJJqZw4WREhjjJTKtm3znMfQ3TSNoa21lDNAjN5rLWYeESLiIaqakvgvbO4R4cxMSES8ba3koraKFFXbtoWo9P745G5/3TaiFBHny/LkyfvaYYzOTGpKLCKwXB+QzYfs9rO7h/knn3/6vd/74T/8yd/ezVX4oLEgAnge1q3Ffq4AzVHnw/zpp49//vH14aH/zne++d67GK60390/nF//n/Wdo6itP/5/X5/uTwE2X+DVx5+fv3PzzkfPf+1bTz759NP/8C//+P76SP3ht/4S//wlvo7tpPLzLxcuu9YDcErp9LDQ0+PTw00tBe5ubv/q737z937zN6d93pXds/fx1BoGAtC2bozZRgy/blunipX2gNR87Lw46O4wdTUkvl573QlnLZj+949e/9G/+9EnX14u3r0NhSQcUbA4XGCbVUwK2CVx8dg+/wevPvgX30QAN8KffP+7GNaasuTH01fTdNjVw7I0ZgyMJBweqitQIt4RmulWuFzWVykf23nd/D7AbqanZT+fzo83hyNSxlDzYBZAdN1EUu/6xRdf2rh+61e+hbBDRA+XRDpcuDxev5zqPoy2pZ3uf5TL7c3dMykCWkiypOQ+2hoPb/689eWjX/11sKKqfZxzDvS9u3q4abAAsRDmZkvN8/V6yumt4o5upH5BjKkeArS3nlJFJEBr25bzjonHGK31aVcRyEMJ67qep4kR+bqspUzgDtjNhlvoiLqfPLCmedtWAA1wQk4iAV1NdXCdZmZYt7XkkkTMzZwAwXUlYkRo7SppNrNSinBVdUR1UzdMWfpwQCQxCjBzYo7wlHhZ1qnuAKD3DSGbecrkRiwgwkM3Hb4sy35/EwFIQwflXMwaErw1TXVZriKybW2/P5jG0CXn4h4I2MaibbuZd6++/vru2UfDVCT1rRERIpZSW2ssYe6mkXMFDBFxd0QMR7XBTPALeL2s0zSJ8NY0bL08PByPd9u6mOtuPphj1+Xm5ni9LgBYahndGXm3myJiG93fUkul/vPn//qffPl3mWMdHSgJkoB1KugDvEckcBLxYb1tSkyEoepBfFvo4Rq1kAEw8VzrpZ0y7TGpBeaYIWsiEuao+NM/vf+P//7/6vbw4mzzzd13/vKzH3z/f7xxLOzzYf8bz3/tb/2Np9v2Yp/q/fkEnI63d29Or4Dm4+2u8A2ibssCEBC+hrZLvznsgWlcN5nmQPPuMqEZI4LpmqB0X3IuNmRZTinbfvd0nsv96/ufv354R+Y//pM/+aMffP3ytNYkmFm3bpACYlvay3/88/f/8MOUE0DHn3z/D0fzWnYP96/mmVI9dt0gkEnGGKWUiEAu4eo+INxsY+IIVHX3LiLnhxjx6p133gFkoElkDl2EU4SShAf3DViAGQHQI9S01GldOxMkhm29ztN7fayBRoLnxyBeslQdmnaHkvB8ekNIN8d33QcALNcNsbNUNRJJ18ub3e4QjsRALEL2+OaUapGUSpncZOg1YjCLqpYyaei2qkgWwRiOGVW1JOzNN12TFA6TlHuLlJGIAcwdzGG3P/TLCQiHNgCSWjEIHJhyoHoAMYrAWK8iNYA0HL1dL8t+fxBJZiPCPYiECKi1LZeMCGHS+uXmcLu1cxu6m/d9mIiEY2K8nO/nqQbhcl1FSkoZkLat73bVvYeTea+ljKEakdNuXUeqKTRMzxiSJOUq67qmlNZ1BdIsZbS+3809mvDOzNQ2Is4pa+/uHoDMSQ0IuY1zmCVJqppyCgckJHI1FhGzAQCMrDYAsUzTaI2IzCznbFjBF21rylXdkhRV6OOM5tPhqE2lZgRRVQCICCIgBDdv60iFzCwJAQQG/cFH/+Yfffx3Hh5Oz569v27nnDMRByTVDREhmETbptO0730DQETvfUz1aLoSYesbM0dYa8okxEAcoytzLrkO65KotTHvbppec0qE2LfeG5SJw4ebb9s1SbUID0NIl8vp+fNn69rNAiBySqrDPQAhJXE3N8wlt74BxLasUy3qVqbJLQGu22a1Tuv1UsuM6IHdg6Ypr2u7nNf9fjaznHOtdaz9f/3wiz/95OHHn3zxePHPPv+CstVp99nf//iD737Ytq7u+N++909Tnvqw58/ff/nZ14PeHI/vE+yQEAnG6MtyPdzcuXtKjAB92BhNSJGM6EZytJUDmjhc26VMZZ73NmjbHnMtlPZj1VQgyzRUw/u6tuPtO9d1E2IRfHx8JRi7+QaR+uiAiJGGrSnt1DSnjBhmmktR3RDIFEV2qk1E1DqgCadt6znnlGj0ltPeovUhLBAQrV8Ic81o6hEMQUGdUMzMw0M3JKqlCst1u7qx+7rLN+pXgFKyjOEdPLMAgAivpxeppvuH/vTZUzcafaTCGsO67/c3EdHbOsa6bV1SEkGRwiwRYea9X6a6M9OhjoA5Z0IGRCAmdrPwGD685DLMAsC0lTydHpfDzW3vS63VHUxdphQezAwYwslsEHIEBRgGhikRDOtuUfKh23k0EEHznjObujts2zrP87qdw2Web8boCDHGqNPEImbGxK1tAMGYAICEkQhCVT0cPJzCkcTc1JxY9vM0RtvWZTfv3V1VEVFy2ZZTFlrWdjy+t7XHCEkpC0wup68+/dmz59/GtEPC3joxUTAxmfvQCNDdbtdbIwDV+Hu/8c/+Zv/tnMtyvTJT7wMAWbjWej6dzDwXIWIAEuYAB0CIAAwEcXczzSl76BhDOHmYqr9VSkFAZHTXbdtKqUSoYxBhhCOR2eitl7dyFSlDfVs3RJDEZkpEAD6GikhElFLdfdu2WouqiwgAqDkhmSkAqJuHpcQRVGv2YRBITL2vahHhpRREDjfzX2AiAI5QMz/s70D8v/7gz756eXlzejgeDn/te7/zsz/76eN14E//03dZBJB6t8CHKX/zup5DWuXJTAGilBq/1IfmXE7nr28OT0azCNcwYSYUJFwvS6k8dCPidTsTpMTSox/mJ0NXwuxhEIFASALIKZsbIiSivCwvPSDnSpwSYx8NoUgmNyeibVsBgSBPu3RdzrVMyKDdmUrrK4AyJwA8nx9r2htudXdYt9fWH/b1LwKg+oll1/uWMiG6DslZRLi1HhDaVndlLnNN59NXNmwbD9PNs8R71cacy3wM8zADAAoZQxWvETDG4+3N04fH6wcffng+n0XquvZS8ul8Ot7cta1NNWt47z0liQhXcwdEF07mvZRyXdaImAp99tnH777z/m5OxLt1XSN8v9+rivmI0DrliBh9EFFK6bqpu+/3+9ZayaWPVTirAhEQBZiu25pzYhYAaf3krlPdR7BIMdsiAjEQ0a2O0S7Xh1JkN+17b5Kz5NRaEEZidPOtXVLKKaeXr1/fHo5mnnM2AyYkwtabiKhFTmzamWlrCgC11vP5XGterhcIOByPQnUbj0LVPUZ0SceK6bo9AOhbKSUiCgcWQuKUJ/OhqjnlMEeS3jcAiMCpUIRvWydMgdZaPxwOY/QIBAgRAaB1vdS6dx8W63rG/X4/emeh6/I4zzsdHkG1JnMF8LeQEiJGhFlHh5RTax0JRWYbI+eM4Mt6TpIM1GOEJ0T3GMzSOxJRTiUiUqLeW+89JVm3CyKVOiOQeXN1ScmJUtiL15/0Ft/+6NeRZF22XJK7D3Uziwgz3e8P67pO09RaEyzDH0gyxCw0euuStKTMcmu2IJUXrzb8n//290WACMGgL/bZ5//9r/zWX1e/c1gQCYGJ5HJ92M3zsjSSfD198c7dNyw41QS+aC/MiDTWtQMEITFlxK1vb/q2Jtmdt8s8PS2Fzdda33WI1tt+f2idPSzlREQYllLpagB4frwXhl09IDqV0rce1tyalMlUp2m/rt3RCSAn7P0CKLXM7vRWoA9lYXr5xYvdfk45iKS1LSV0xyQ1gpjBvQP69bKm6ZhJTa9Iadm2m93RTQ0oorlxLmjqZqPUOoYRp2FNmFx9jF5rJqjrcv3yq0+fPv/Gze27kud1tJpjNE9SdAwAQiIAB4gAFMG2dfBR58OyLACQS+prP9zMyzJyIjVjBIR49fLFzbtPswi46xgkDADrugLAbt5v21LKlKSa9TE2ZhTJw9B9ELMFCqaIi6sxoqTdtm0eHhHAnFNazidhThXdAVEgmHOyMXSMnBIV0be2LaekIe5ufdRSgILI8Bc4ZBc+QpswmjpASMrLtmbJzKyqKSULaEtLqaAwgsdbzixjXHukQeaYd0J5DEWEiBjRGcEtCIWFzDynbBpAAWARgcgQej4/Hg6zqk+7eduGmaXM4KmPdZ5n1Rhdc8Exesm78+mxlEKM7upGxIBIphDhRKSq0zSdLve1zq11ABcmkTw0WHLKaGqnxzf7fe6rEiYgJuJaq1rrfTAlCyNCRBijhdH1enn//eetbcwFEHvXnHPE/enxOs93gczEbqqDJAlxW9eVmXPKDm8hIiD56AEAImJm7iFcmHCMi6MQCnFohz7aPFU3W3vDn/zn7/Z1XK+v9vNe8k2q04sXX7337o0aMQsiBAz34q7ICACCZehgBrUOPoIJqYK3KZd1vQKAA62nS57y7d2zZbsCidj49Iuvbw7Tiy8/ff7BB/Ocl/vHw7tPL5ftcJwv53VXqg5CeNjdfuPVi8+neiMiW7v2pse7gxmL0PX82gNte7VsYz/z8e5XH073u3oT6Pev39zd3opIoCFJ722MDVB382R9hxiSd5fzQ9g43h0cACJGGzmJeetdS511mNtQD+QtwW54TPNNayMxJYmH06mUkrl0J+Fws5KndXk02FLa57xTA3QYeqnzfD2tj4+P3/zgfTPzSEjOTGM4YDDl0UcuxXSICCC4WyrT6I0QKKKbRnjOmZlVhzsQEiGpbmpap8kh1sfl8ORJ5riuLXEKX/s6+rZhgkxVsjgV7QsCE7F7IPnoWqfiPhCZmVzVHREk0B1cUu691TKFueoaZqqjjw4QN8d3HSCVJAS9rehk5g5BqPcPb9578tGyXlPORHI6nUSIUJmllHo+P+6mG/+liEBORB6ePEyEwtHdhzYAkF8yDffNgadd0s2HLrnUdW2Hw6ErZEH3IYxDPXFpbSOOdW37+WbdzlO9tXARBLd1W0S4bUbsCNz7mOfZzFQVWZiS2yqpALjZEM4ATAlcU8QazhGqZnXambq54i+Aqk5l18YiIo/3l3l/IA53M4uScO1UKtgGKWNX92gUabebTpdLShKujDi0uTsi5ZSamnnPUoXqdX1d8oFTjDHW04u2tVTvdCzzdGwa++Nx6yeKLBIAHpDB4C1HSBRm5u6qij/+L/+KQAB0dODMkpK7M7GNlX7JbCBXs7Zc713Xw+2ziHB3AAzzXOTxYZ12eYxWUkGkbdjtcXp8fEAo05QVYmIajqMv92/unzx9cr3AdjmXPYQCgt3dvtv5reTrQ7N1V59dLx0R9ocy3tJl2t2oWm92ON708xsNT5TuL69uj89ZWIHmeb4uiwjjUDN3Nw8lqsQgnIe2cCd0HS2VrMMDHAAQKAkuywqEOReCsEDTES5ll82MCYliXRogIRIBcqa3WtvCo3e9u9uF566KCBgS0AOAaVIbTMRCD4+X43FPRGMMRFDDnApEBKi7MwMijq0hEacMSODd1HPOZuaRwxuitnYFSKOPMuUxek0SkgW9a9R6O/o1cTXtUhJBvi4Ppe4JcYwhkojYIQjJ3VmgqyNEuLsGEZt1gAhAEgk3Ny1Z3DNCAMC2LeFdak05P7x5NU14fVzNfL6Zl2t/9vSjy/LVfn5ibgDUe8tFGNPQTghmzoy994gQkQBgIfC0tWudJh2RUgI0ABhdkZAICHMArdtpyru1rTmnWnIAmikRmmrJSXWEg/lb1rYt5SoZwyjXpG91rbUuy9Wd533qzUul3kYEEolGz2nn2lQxJWnt4qGtbTkfUtpFdCYGcBYZagBIROuy5Zzdo5TkpmOMlOsYgwVEGJFHW0utZoOgBOiyjlwwDN2NRIgQwsyiFjk9PnpXqUy88zDhmnNZtwfCFOHDPWMaowPjaEYwmCEovv7yenu8y9ncw1x9tDEG51yn6fXr19M05Zz/P0ezVIcqRzLfAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from mmpose.apis import (inference_top_down_pose_model, init_pose_model,\n",
+ " vis_pose_result, process_mmdet_results)\n",
+ "from mmdet.apis import inference_detector, init_detector\n",
+ "local_runtime = False\n",
+ "\n",
+ "try:\n",
+ " from google.colab.patches import cv2_imshow # for image visualization in colab\n",
+ "except:\n",
+ " local_runtime = True\n",
+ "\n",
+ "\n",
+ "pose_checkpoint = 'work_dirs/hrnet_w32_coco_tiny_256x192/latest.pth'\n",
+ "det_config = 'demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py'\n",
+ "det_checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'\n",
+ "\n",
+ "# initialize pose model\n",
+ "pose_model = init_pose_model(cfg, pose_checkpoint)\n",
+ "# initialize detector\n",
+ "det_model = init_detector(det_config, det_checkpoint)\n",
+ "\n",
+ "img = 'tests/data/coco/000000196141.jpg'\n",
+ "\n",
+ "# inference detection\n",
+ "mmdet_results = inference_detector(det_model, img)\n",
+ "\n",
+ "# extract person (COCO_ID=1) bounding boxes from the detection results\n",
+ "person_results = process_mmdet_results(mmdet_results, cat_id=1)\n",
+ "\n",
+ "# inference pose\n",
+ "pose_results, returned_outputs = inference_top_down_pose_model(pose_model,\n",
+ " img,\n",
+ " person_results,\n",
+ " bbox_thr=0.3,\n",
+ " format='xyxy',\n",
+ " dataset='TopDownCocoDataset')\n",
+ "\n",
+ "# show pose estimation results\n",
+ "vis_result = vis_pose_result(pose_model,\n",
+ " img,\n",
+ " pose_results,\n",
+ " kpt_score_thr=0.,\n",
+ " dataset='TopDownCocoDataset',\n",
+ " show=False)\n",
+ "\n",
+ "# reduce image size\n",
+ "vis_result = cv2.resize(vis_result, dsize=None, fx=0.5, fy=0.5)\n",
+ "\n",
+ "if local_runtime:\n",
+ " from IPython.display import Image, display\n",
+ " import tempfile\n",
+ " import os.path as osp\n",
+ " import cv2\n",
+ " with tempfile.TemporaryDirectory() as tmpdir:\n",
+ " file_name = osp.join(tmpdir, 'pose_results.png')\n",
+ " cv2.imwrite(file_name, vis_result)\n",
+ " display(Image(file_name))\n",
+ "else:\n",
+ " cv2_imshow(vis_result)"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [],
+ "name": "MMPose_Tutorial.ipynb",
+ "provenance": []
+ },
+ "interpreter": {
+ "hash": "46cabf725503616575ee9df11fae44e77863ccc5fe9a7400abcc9d5976385eac"
+ },
+ "kernelspec": {
+ "display_name": "Python 3.9.6 64-bit ('pt1.9': conda)",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "1d31e1f7256d42669d76f54a8a844b79": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "210e7151c2ad44a3ba79d477f91d8b26": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "43ef0a1859c342dab6f6cd620ae78ba7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "864769e1e83c4b5d89baaa373c181f07": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9035c6e9fddd41d8b7dae395c93410a2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "90e3675160374766b5387ddb078fa3c5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a0bf65a0401e465393ef8720ef3328ac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9035c6e9fddd41d8b7dae395c93410a2",
+ "max": 132594821,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1d31e1f7256d42669d76f54a8a844b79",
+ "value": 132594821
+ }
+ },
+ "a3dc245089464b159bbdd5fc71afa1bc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a724d84941224553b1fab6c0b489213d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43ef0a1859c342dab6f6cd620ae78ba7",
+ "placeholder": "",
+ "style": "IPY_MODEL_90e3675160374766b5387ddb078fa3c5",
+ "value": " 126M/126M [00:11<00:00, 9.14MB/s]"
+ }
+ },
+ "ae33a61272f84a7981bc1f3008458688": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a3dc245089464b159bbdd5fc71afa1bc",
+ "placeholder": "",
+ "style": "IPY_MODEL_864769e1e83c4b5d89baaa373c181f07",
+ "value": "100%"
+ }
+ },
+ "c50b2c7b3d58486d9941509548a877e4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ae33a61272f84a7981bc1f3008458688",
+ "IPY_MODEL_a0bf65a0401e465393ef8720ef3328ac",
+ "IPY_MODEL_a724d84941224553b1fab6c0b489213d"
+ ],
+ "layout": "IPY_MODEL_210e7151c2ad44a3ba79d477f91d8b26"
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/vendor/ViTPose/demo/README.md b/vendor/ViTPose/demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..60ecbc33987a0e8fa77c9deaa99918c0e9373d0a
--- /dev/null
+++ b/vendor/ViTPose/demo/README.md
@@ -0,0 +1,75 @@
+# Demo
+
+This page provides tutorials about running demos. Please click the caption for more information.
+
+
+
+
+
+
+[2D human pose demo](docs/2d_human_pose_demo.md)
+
+
+
+
+
+
+
+[2D human whole-body pose demo](docs/2d_wholebody_pose_demo.md)
+
+
+
+
+
+
+
+[2D hand pose demo](docs/2d_hand_demo.md)
+
+
+
+
+
+
+
+[2D face keypoint demo](docs/2d_face_demo.md)
+
+
+
+
+
+
+
+[3D human pose demo](docs/3d_human_pose_demo.md)
+
+
+
+
+
+
+
+[2D pose tracking demo](docs/2d_pose_tracking_demo.md)
+
+
+
+
+
+
+
+[2D animal_pose demo](docs/2d_animal_demo.md)
+
+
+
+
+
+
+
+[3D hand_pose demo](docs/3d_hand_demo.md)
+
+
+
+
+
+
+
+[Webcam demo](docs/webcam_demo.md)
+
diff --git a/vendor/ViTPose/demo/body3d_two_stage_img_demo.py b/vendor/ViTPose/demo/body3d_two_stage_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cc6b0d8923cb8130b61a1b10b61179b79d01424
--- /dev/null
+++ b/vendor/ViTPose/demo/body3d_two_stage_img_demo.py
@@ -0,0 +1,296 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import warnings
+from argparse import ArgumentParser
+
+import mmcv
+import numpy as np
+from xtcocotools.coco import COCO
+
+from mmpose.apis import (inference_pose_lifter_model,
+ inference_top_down_pose_model, vis_3d_pose_result)
+from mmpose.apis.inference import init_pose_model
+from mmpose.core import SimpleCamera
+from mmpose.datasets import DatasetInfo
+
+
+def _keypoint_camera_to_world(keypoints,
+ camera_params,
+ image_name=None,
+ dataset='Body3DH36MDataset'):
+ """Project 3D keypoints from the camera space to the world space.
+
+ Args:
+ keypoints (np.ndarray): 3D keypoints in shape [..., 3]
+ camera_params (dict): Parameters for all cameras.
+ image_name (str): The image name to specify the camera.
+ dataset (str): The dataset type, e.g. Body3DH36MDataset.
+ """
+ cam_key = None
+ if dataset == 'Body3DH36MDataset':
+ subj, rest = osp.basename(image_name).split('_', 1)
+ _, rest = rest.split('.', 1)
+ camera, rest = rest.split('_', 1)
+ cam_key = (subj, camera)
+ else:
+ raise NotImplementedError
+
+ camera = SimpleCamera(camera_params[cam_key])
+ keypoints_world = keypoints.copy()
+ keypoints_world[..., :3] = camera.camera_to_world(keypoints[..., :3])
+
+ return keypoints_world
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument(
+ 'pose_lifter_config',
+ help='Config file for the 2nd stage pose lifter model')
+ parser.add_argument(
+ 'pose_lifter_checkpoint',
+ help='Checkpoint file for the 2nd stage pose lifter model')
+ parser.add_argument(
+ '--pose-detector-config',
+ type=str,
+ default=None,
+ help='Config file for the 1st stage 2D pose detector')
+ parser.add_argument(
+ '--pose-detector-checkpoint',
+ type=str,
+ default=None,
+ help='Checkpoint file for the 1st stage 2D pose detector')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument(
+ '--json-file',
+ type=str,
+ default=None,
+ help='Json file containing image and bbox information. Optionally,'
+ 'The Json file can also contain 2D pose information. See'
+ '"only-second-stage"')
+ parser.add_argument(
+ '--camera-param-file',
+ type=str,
+ default=None,
+ help='Camera parameter file for converting 3D pose predictions from '
+ ' the camera space to to world space. If None, no conversion will be '
+ 'applied.')
+ parser.add_argument(
+ '--only-second-stage',
+ action='store_true',
+ help='If true, load 2D pose detection result from the Json file and '
+ 'skip the 1st stage. The pose detection model will be ignored.')
+ parser.add_argument(
+ '--rebase-keypoint-height',
+ action='store_true',
+ help='Rebase the predicted 3D pose so its lowest keypoint has a '
+ 'height of 0 (landing on the ground). This is useful for '
+ 'visualization when the model do not predict the global position '
+ 'of the 3D pose.')
+ parser.add_argument(
+ '--show-ground-truth',
+ action='store_true',
+ help='If True, show ground truth if it is available. The ground truth '
+ 'should be contained in the annotations in the Json file with the key '
+ '"keypoints_3d" for each instance.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default=None,
+ help='Root of the output visualization images. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device for inference')
+ parser.add_argument('--kpt-thr', type=float, default=0.3)
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+ assert args.show or (args.out_img_root != '')
+
+ coco = COCO(args.json_file)
+
+ # First stage: 2D pose detection
+ pose_det_results_list = []
+ if args.only_second_stage:
+ from mmpose.apis.inference import _xywh2xyxy
+
+ print('Stage 1: load 2D pose results from Json file.')
+ for image_id, image in coco.imgs.items():
+ image_name = osp.join(args.img_root, image['file_name'])
+ ann_ids = coco.getAnnIds(image_id)
+ pose_det_results = []
+ for ann_id in ann_ids:
+ ann = coco.anns[ann_id]
+ keypoints = np.array(ann['keypoints']).reshape(-1, 3)
+ keypoints[..., 2] = keypoints[..., 2] >= 1
+ keypoints_3d = np.array(ann['keypoints_3d']).reshape(-1, 4)
+ keypoints_3d[..., 3] = keypoints_3d[..., 3] >= 1
+ bbox = np.array(ann['bbox']).reshape(1, -1)
+
+ pose_det_result = {
+ 'image_name': image_name,
+ 'bbox': _xywh2xyxy(bbox),
+ 'keypoints': keypoints,
+ 'keypoints_3d': keypoints_3d
+ }
+ pose_det_results.append(pose_det_result)
+ pose_det_results_list.append(pose_det_results)
+
+ else:
+ print('Stage 1: 2D pose detection.')
+
+ pose_det_model = init_pose_model(
+ args.pose_detector_config,
+ args.pose_detector_checkpoint,
+ device=args.device.lower())
+
+ assert pose_det_model.cfg.model.type == 'TopDown', 'Only "TopDown"' \
+ 'model is supported for the 1st stage (2D pose detection)'
+
+ dataset = pose_det_model.cfg.data['test']['type']
+ dataset_info = pose_det_model.cfg.data['test'].get(
+ 'dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ img_keys = list(coco.imgs.keys())
+
+ for i in mmcv.track_iter_progress(range(len(img_keys))):
+ # get bounding box annotations
+ image_id = img_keys[i]
+ image = coco.loadImgs(image_id)[0]
+ image_name = osp.join(args.img_root, image['file_name'])
+ ann_ids = coco.getAnnIds(image_id)
+
+ # make person results for single image
+ person_results = []
+ for ann_id in ann_ids:
+ person = {}
+ ann = coco.anns[ann_id]
+ person['bbox'] = ann['bbox']
+ person_results.append(person)
+
+ pose_det_results, _ = inference_top_down_pose_model(
+ pose_det_model,
+ image_name,
+ person_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=False,
+ outputs=None)
+
+ for res in pose_det_results:
+ res['image_name'] = image_name
+ pose_det_results_list.append(pose_det_results)
+
+ # Second stage: Pose lifting
+ print('Stage 2: 2D-to-3D pose lifting.')
+
+ pose_lift_model = init_pose_model(
+ args.pose_lifter_config,
+ args.pose_lifter_checkpoint,
+ device=args.device.lower())
+
+ assert pose_lift_model.cfg.model.type == 'PoseLifter', 'Only' \
+ '"PoseLifter" model is supported for the 2nd stage ' \
+ '(2D-to-3D lifting)'
+ dataset = pose_lift_model.cfg.data['test']['type']
+ dataset_info = pose_lift_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ camera_params = None
+ if args.camera_param_file is not None:
+ camera_params = mmcv.load(args.camera_param_file)
+
+ for i, pose_det_results in enumerate(
+ mmcv.track_iter_progress(pose_det_results_list)):
+ # 2D-to-3D pose lifting
+ # Note that the pose_det_results are regarded as a single-frame pose
+ # sequence
+ pose_lift_results = inference_pose_lifter_model(
+ pose_lift_model,
+ pose_results_2d=[pose_det_results],
+ dataset=dataset,
+ dataset_info=dataset_info,
+ with_track_id=False)
+
+ image_name = pose_det_results[0]['image_name']
+
+ # Pose processing
+ pose_lift_results_vis = []
+ for idx, res in enumerate(pose_lift_results):
+ keypoints_3d = res['keypoints_3d']
+ # project to world space
+ if camera_params is not None:
+ keypoints_3d = _keypoint_camera_to_world(
+ keypoints_3d,
+ camera_params=camera_params,
+ image_name=image_name,
+ dataset=dataset)
+ # rebase height (z-axis)
+ if args.rebase_keypoint_height:
+ keypoints_3d[..., 2] -= np.min(
+ keypoints_3d[..., 2], axis=-1, keepdims=True)
+ res['keypoints_3d'] = keypoints_3d
+ # Add title
+ det_res = pose_det_results[idx]
+ instance_id = det_res.get('track_id', idx)
+ res['title'] = f'Prediction ({instance_id})'
+ pose_lift_results_vis.append(res)
+ # Add ground truth
+ if args.show_ground_truth:
+ if 'keypoints_3d' not in det_res:
+ print('Fail to show ground truth. Please make sure that'
+ ' the instance annotations from the Json file'
+ ' contain "keypoints_3d".')
+ else:
+ gt = res.copy()
+ gt['keypoints_3d'] = det_res['keypoints_3d']
+ gt['title'] = f'Ground truth ({instance_id})'
+ pose_lift_results_vis.append(gt)
+
+ # Visualization
+ if args.out_img_root is None:
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = osp.join(args.out_img_root, f'vis_{i}.jpg')
+
+ vis_3d_pose_result(
+ pose_lift_model,
+ result=pose_lift_results_vis,
+ img=image_name,
+ dataset_info=dataset_info,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/body3d_two_stage_video_demo.py b/vendor/ViTPose/demo/body3d_two_stage_video_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f47f62aeb8f4b65f340c46f6b9580e773f9100f
--- /dev/null
+++ b/vendor/ViTPose/demo/body3d_two_stage_video_demo.py
@@ -0,0 +1,307 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os
+import os.path as osp
+from argparse import ArgumentParser
+
+import cv2
+import mmcv
+import numpy as np
+
+from mmpose.apis import (extract_pose_sequence, get_track_id,
+ inference_pose_lifter_model,
+ inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_3d_pose_result)
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+
+def covert_keypoint_definition(keypoints, pose_det_dataset, pose_lift_dataset):
+ """Convert pose det dataset keypoints definition to pose lifter dataset
+ keypoints definition.
+
+ Args:
+ keypoints (ndarray[K, 2 or 3]): 2D keypoints to be transformed.
+ pose_det_dataset, (str): Name of the dataset for 2D pose detector.
+ pose_lift_dataset (str): Name of the dataset for pose lifter model.
+ """
+ if pose_det_dataset == 'TopDownH36MDataset' and \
+ pose_lift_dataset == 'Body3DH36MDataset':
+ return keypoints
+ elif pose_det_dataset == 'TopDownCocoDataset' and \
+ pose_lift_dataset == 'Body3DH36MDataset':
+ keypoints_new = np.zeros((17, keypoints.shape[1]))
+ # pelvis is in the middle of l_hip and r_hip
+ keypoints_new[0] = (keypoints[11] + keypoints[12]) / 2
+ # thorax is in the middle of l_shoulder and r_shoulder
+ keypoints_new[8] = (keypoints[5] + keypoints[6]) / 2
+ # head is in the middle of l_eye and r_eye
+ keypoints_new[10] = (keypoints[1] + keypoints[2]) / 2
+ # spine is in the middle of thorax and pelvis
+ keypoints_new[7] = (keypoints_new[0] + keypoints_new[8]) / 2
+ # rearrange other keypoints
+ keypoints_new[[1, 2, 3, 4, 5, 6, 9, 11, 12, 13, 14, 15, 16]] = \
+ keypoints[[12, 14, 16, 11, 13, 15, 0, 5, 7, 9, 6, 8, 10]]
+ return keypoints_new
+ else:
+ raise NotImplementedError
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument('det_config', help='Config file for detection')
+ parser.add_argument('det_checkpoint', help='Checkpoint file for detection')
+ parser.add_argument(
+ 'pose_detector_config',
+ type=str,
+ default=None,
+ help='Config file for the 1st stage 2D pose detector')
+ parser.add_argument(
+ 'pose_detector_checkpoint',
+ type=str,
+ default=None,
+ help='Checkpoint file for the 1st stage 2D pose detector')
+ parser.add_argument(
+ 'pose_lifter_config',
+ help='Config file for the 2nd stage pose lifter model')
+ parser.add_argument(
+ 'pose_lifter_checkpoint',
+ help='Checkpoint file for the 2nd stage pose lifter model')
+ parser.add_argument(
+ '--video-path', type=str, default='', help='Video path')
+ parser.add_argument(
+ '--rebase-keypoint-height',
+ action='store_true',
+ help='Rebase the predicted 3D pose so its lowest keypoint has a '
+ 'height of 0 (landing on the ground). This is useful for '
+ 'visualization when the model do not predict the global position '
+ 'of the 3D pose.')
+ parser.add_argument(
+ '--norm-pose-2d',
+ action='store_true',
+ help='Scale the bbox (along with the 2D pose) to the average bbox '
+ 'scale of the dataset, and move the bbox (along with the 2D pose) to '
+ 'the average bbox center of the dataset. This is useful when bbox '
+ 'is small, especially in multi-person scenarios.')
+ parser.add_argument(
+ '--num-instances',
+ type=int,
+ default=-1,
+ help='The number of 3D poses to be visualized in every frame. If '
+ 'less than 0, it will be set to the number of pose results in the '
+ 'first frame.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ type=str,
+ default=None,
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device for inference')
+ parser.add_argument(
+ '--det-cat-id',
+ type=int,
+ default=1,
+ help='Category id for bounding box detection model')
+ parser.add_argument(
+ '--bbox-thr',
+ type=float,
+ default=0.9,
+ help='Bounding box score threshold')
+ parser.add_argument('--kpt-thr', type=float, default=0.3)
+ parser.add_argument(
+ '--use-oks-tracking', action='store_true', help='Using OKS tracking')
+ parser.add_argument(
+ '--tracking-thr', type=float, default=0.3, help='Tracking threshold')
+ parser.add_argument(
+ '--euro',
+ action='store_true',
+ help='Using One_Euro_Filter for smoothing')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=8,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=2,
+ help='Link thickness for visualization')
+
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+
+ args = parser.parse_args()
+ assert args.show or (args.out_video_root != '')
+ assert args.det_config is not None
+ assert args.det_checkpoint is not None
+
+ video = mmcv.VideoReader(args.video_path)
+ assert video.opened, f'Failed to load video file {args.video_path}'
+
+ # First stage: 2D pose detection
+ print('Stage 1: 2D pose detection.')
+
+ person_det_model = init_detector(
+ args.det_config, args.det_checkpoint, device=args.device.lower())
+
+ pose_det_model = init_pose_model(
+ args.pose_detector_config,
+ args.pose_detector_checkpoint,
+ device=args.device.lower())
+
+ assert pose_det_model.cfg.model.type == 'TopDown', 'Only "TopDown"' \
+ 'model is supported for the 1st stage (2D pose detection)'
+
+ pose_det_dataset = pose_det_model.cfg.data['test']['type']
+
+ pose_det_results_list = []
+ next_id = 0
+ pose_det_results = []
+ for frame in video:
+ pose_det_results_last = pose_det_results
+
+ # test a single image, the resulting box is (x1, y1, x2, y2)
+ mmdet_results = inference_detector(person_det_model, frame)
+
+ # keep the person class bounding boxes.
+ person_det_results = process_mmdet_results(mmdet_results,
+ args.det_cat_id)
+
+ # make person results for single image
+ pose_det_results, _ = inference_top_down_pose_model(
+ pose_det_model,
+ frame,
+ person_det_results,
+ bbox_thr=args.bbox_thr,
+ format='xyxy',
+ dataset=pose_det_dataset,
+ return_heatmap=False,
+ outputs=None)
+
+ # get track id for each person instance
+ pose_det_results, next_id = get_track_id(
+ pose_det_results,
+ pose_det_results_last,
+ next_id,
+ use_oks=args.use_oks_tracking,
+ tracking_thr=args.tracking_thr,
+ use_one_euro=args.euro,
+ fps=video.fps)
+
+ pose_det_results_list.append(copy.deepcopy(pose_det_results))
+
+ # Second stage: Pose lifting
+ print('Stage 2: 2D-to-3D pose lifting.')
+
+ pose_lift_model = init_pose_model(
+ args.pose_lifter_config,
+ args.pose_lifter_checkpoint,
+ device=args.device.lower())
+
+ assert pose_lift_model.cfg.model.type == 'PoseLifter', \
+ 'Only "PoseLifter" model is supported for the 2nd stage ' \
+ '(2D-to-3D lifting)'
+ pose_lift_dataset = pose_lift_model.cfg.data['test']['type']
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ fps = video.fps
+ writer = None
+
+ # convert keypoint definition
+ for pose_det_results in pose_det_results_list:
+ for res in pose_det_results:
+ keypoints = res['keypoints']
+ res['keypoints'] = covert_keypoint_definition(
+ keypoints, pose_det_dataset, pose_lift_dataset)
+
+ # load temporal padding config from model.data_cfg
+ if hasattr(pose_lift_model.cfg, 'test_data_cfg'):
+ data_cfg = pose_lift_model.cfg.test_data_cfg
+ else:
+ data_cfg = pose_lift_model.cfg.data_cfg
+
+ num_instances = args.num_instances
+ for i, pose_det_results in enumerate(
+ mmcv.track_iter_progress(pose_det_results_list)):
+ # extract and pad input pose2d sequence
+ pose_results_2d = extract_pose_sequence(
+ pose_det_results_list,
+ frame_idx=i,
+ causal=data_cfg.causal,
+ seq_len=data_cfg.seq_len,
+ step=data_cfg.seq_frame_interval)
+ # 2D-to-3D pose lifting
+ pose_lift_results = inference_pose_lifter_model(
+ pose_lift_model,
+ pose_results_2d=pose_results_2d,
+ dataset=pose_lift_dataset,
+ with_track_id=True,
+ image_size=video.resolution,
+ norm_pose_2d=args.norm_pose_2d)
+
+ # Pose processing
+ pose_lift_results_vis = []
+ for idx, res in enumerate(pose_lift_results):
+ keypoints_3d = res['keypoints_3d']
+ # exchange y,z-axis, and then reverse the direction of x,z-axis
+ keypoints_3d = keypoints_3d[..., [0, 2, 1]]
+ keypoints_3d[..., 0] = -keypoints_3d[..., 0]
+ keypoints_3d[..., 2] = -keypoints_3d[..., 2]
+ # rebase height (z-axis)
+ if args.rebase_keypoint_height:
+ keypoints_3d[..., 2] -= np.min(
+ keypoints_3d[..., 2], axis=-1, keepdims=True)
+ res['keypoints_3d'] = keypoints_3d
+ # add title
+ det_res = pose_det_results[idx]
+ instance_id = det_res['track_id']
+ res['title'] = f'Prediction ({instance_id})'
+ # only visualize the target frame
+ res['keypoints'] = det_res['keypoints']
+ res['bbox'] = det_res['bbox']
+ res['track_id'] = instance_id
+ pose_lift_results_vis.append(res)
+
+ # Visualization
+ if num_instances < 0:
+ num_instances = len(pose_lift_results_vis)
+ img_vis = vis_3d_pose_result(
+ pose_lift_model,
+ result=pose_lift_results_vis,
+ img=video[i],
+ out_file=None,
+ radius=args.radius,
+ thickness=args.thickness,
+ num_instances=num_instances)
+
+ if save_out_video:
+ if writer is None:
+ writer = cv2.VideoWriter(
+ osp.join(args.out_video_root,
+ f'vis_{osp.basename(args.video_path)}'), fourcc,
+ fps, (img_vis.shape[1], img_vis.shape[0]))
+ writer.write(img_vis)
+
+ if save_out_video:
+ writer.release()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/bottom_up_img_demo.py b/vendor/ViTPose/demo/bottom_up_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae343acd69458925f160937dd805a87e50d9d25b
--- /dev/null
+++ b/vendor/ViTPose/demo/bottom_up_img_demo.py
@@ -0,0 +1,127 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import warnings
+from argparse import ArgumentParser
+
+import mmcv
+
+from mmpose.apis import (inference_bottom_up_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+
+def main():
+ """Visualize the demo images."""
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for detection')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file')
+ parser.add_argument(
+ '--img-path',
+ type=str,
+ help='Path to an image file or a image folder.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default='',
+ help='Root of the output img file. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--pose-nms-thr',
+ type=float,
+ default=0.9,
+ help='OKS threshold for pose NMS')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_img_root != '')
+
+ # prepare image list
+ if osp.isfile(args.img_path):
+ image_list = [args.img_path]
+ elif osp.isdir(args.img_path):
+ image_list = [
+ osp.join(args.img_path, fn) for fn in os.listdir(args.img_path)
+ if fn.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp'))
+ ]
+ else:
+ raise ValueError('Image path should be an image or image folder.'
+ f'Got invalid image path: {args.img_path}')
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ assert (dataset == 'BottomUpCocoDataset')
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ # process each image
+ for image_name in mmcv.track_iter_progress(image_list):
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_bottom_up_pose_model(
+ pose_model,
+ image_name,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ pose_nms_thr=args.pose_nms_thr,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ if args.out_img_root == '':
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = os.path.join(
+ args.out_img_root,
+ f'vis_{osp.splitext(osp.basename(image_name))[0]}.jpg')
+
+ # show the results
+ vis_pose_result(
+ pose_model,
+ image_name,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=args.show,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/bottom_up_pose_tracking_demo.py b/vendor/ViTPose/demo/bottom_up_pose_tracking_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..b79e1f40de85995815048123c452a022e676d0e6
--- /dev/null
+++ b/vendor/ViTPose/demo/bottom_up_pose_tracking_demo.py
@@ -0,0 +1,158 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (get_track_id, inference_bottom_up_pose_model,
+ init_pose_model, vis_pose_tracking_result)
+from mmpose.datasets import DatasetInfo
+
+
+def main():
+ """Visualize the demo images."""
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.5, help='Keypoint score threshold')
+ parser.add_argument(
+ '--pose-nms-thr',
+ type=float,
+ default=0.9,
+ help='OKS threshold for pose NMS')
+ parser.add_argument(
+ '--use-oks-tracking', action='store_true', help='Using OKS tracking')
+ parser.add_argument(
+ '--tracking-thr', type=float, default=0.3, help='Tracking threshold')
+ parser.add_argument(
+ '--euro',
+ action='store_true',
+ help='Using One_Euro_Filter for smoothing')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ assert (dataset == 'BottomUpCocoDataset')
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ fps = None
+
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+ next_id = 0
+ pose_results = []
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+ pose_results_last = pose_results
+
+ pose_results, returned_outputs = inference_bottom_up_pose_model(
+ pose_model,
+ img,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ pose_nms_thr=args.pose_nms_thr,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # get track id for each person instance
+ pose_results, next_id = get_track_id(
+ pose_results,
+ pose_results_last,
+ next_id,
+ use_oks=args.use_oks_tracking,
+ tracking_thr=args.tracking_thr,
+ use_one_euro=args.euro,
+ fps=fps)
+
+ # show the results
+ vis_img = vis_pose_tracking_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/bottom_up_video_demo.py b/vendor/ViTPose/demo/bottom_up_video_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..14785a0c031412f96fd09027e5a995d297c31e2e
--- /dev/null
+++ b/vendor/ViTPose/demo/bottom_up_video_demo.py
@@ -0,0 +1,135 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (inference_bottom_up_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+
+def main():
+ """Visualize the demo images."""
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--pose-nms-thr',
+ type=float,
+ default=0.9,
+ help='OKS threshold for pose NMS')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ assert (dataset == 'BottomUpCocoDataset')
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+
+ pose_results, returned_outputs = inference_bottom_up_pose_model(
+ pose_model,
+ img,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ pose_nms_thr=args.pose_nms_thr,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # show the results
+ vis_img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/docs/2d_animal_demo.md b/vendor/ViTPose/demo/docs/2d_animal_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb994e8b49f650e608672c306950a18c799f02f0
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_animal_demo.md
@@ -0,0 +1,148 @@
+## 2D Animal Pose Demo
+
+### 2D Animal Pose Image Demo
+
+#### Using gt hand bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+*Pose Model Preparation:*
+The pre-trained pose estimation model can be downloaded from [model zoo](https://mmpose.readthedocs.io/en/latest/topics/animal.html).
+Take [macaque model](https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth) as an example:
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py \
+ https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth \
+ --img-root tests/data/macaque/ --json-file tests/data/macaque/test_macaque.json \
+ --out-img-root vis_results
+```
+
+To run demos on CPU:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res50_macaque_256x192.py \
+ https://download.openmmlab.com/mmpose/animal/resnet/res50_macaque_256x192-98f1dd3a_20210407.pth \
+ --img-root tests/data/macaque/ --json-file tests/data/macaque/test_macaque.json \
+ --out-img-root vis_results \
+ --device=cpu
+```
+
+### 2D Animal Pose Video Demo
+
+We also provide video demos to illustrate the results.
+
+#### Using the full image as input
+
+If the video is cropped with the object centered in the screen, we can simply use the full image as the model input (without object detection).
+
+```shell
+python demo/top_down_video_demo_full_frame_without_det.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_full_frame_without_det.py \
+ configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py \
+ https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192-fcafbd5a_20210407.pth \
+ --video-path demo/resources/ \
+ --out-video-root vis_results
+```
+
+
+
+#### Using MMDetection to detect animals
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+**COCO-animals**
+
+In COCO dataset, there are 80 object categories, including 10 common `animal` categories (15: 'bird', 16: 'cat', 17: 'dog', 18: 'horse', 19: 'sheep', 20: 'cow', 21: 'elephant', 22: 'bear', 23: 'zebra', 24: 'giraffe')
+For these COCO-animals, please download the COCO pre-trained detection model from [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ --det-cat-id ${CATEGORY_ID}
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+ configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py \
+ https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1-3a3dc37e_20210405.pth \
+ --video-path demo/resources/ \
+ --out-video-root vis_results \
+ --bbox-thr 0.1 \
+ --kpt-thr 0.4 \
+ --det-cat-id 18
+```
+
+
+
+**Other Animals**
+
+For other animals, we have also provided some pre-trained animal detection models (1-class models). Supported models can be found in [det model zoo](/demo/docs/mmdet_modelzoo.md).
+The pre-trained animal pose estimation model can be found in [pose model zoo](https://mmpose.readthedocs.io/en/latest/topics/animal.html).
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--det-cat-id ${CATEGORY_ID}]
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py \
+ https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_macaque-e45e36f5_20210409.pth \
+ configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/res152_macaque_256x192.py \
+ https://download.openmmlab.com/mmpose/animal/resnet/res152_macaque_256x192-c42abc02_20210407.pth \
+ --video-path demo/resources/ \
+ --out-video-root vis_results \
+ --bbox-thr 0.5 \
+ --kpt-thr 0.3 \
+```
+
+
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For 2D animal pose estimation models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [macaque-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/animal/resnet/macaque/res50_macaque_256x192.py#L51).
+1. set `post_process='default'` in [macaque-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/animal/resnet/macaque/res50_macaque_256x192.py#L52).
diff --git a/vendor/ViTPose/demo/docs/2d_face_demo.md b/vendor/ViTPose/demo/docs/2d_face_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..a3b0f8397ce1d185e20b9bac9dc19f719e266411
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_face_demo.md
@@ -0,0 +1,103 @@
+## 2D Face Keypoint Demo
+
+
+
+### 2D Face Image Demo
+
+#### Using gt face bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+*Face Keypoint Model Preparation:*
+The pre-trained face keypoint estimation model can be found from [model zoo](https://mmpose.readthedocs.io/en/latest/topics/face.html).
+Take [aflw model](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth) as an example:
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py \
+ https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
+ --img-root tests/data/aflw/ --json-file tests/data/aflw/test_aflw.json \
+ --out-img-root vis_results
+```
+
+To run demos on CPU:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py \
+ https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
+ --img-root tests/data/aflw/ --json-file tests/data/aflw/test_aflw.json \
+ --out-img-root vis_results \
+ --device=cpu
+```
+
+#### Using face bounding box detectors
+
+We provide a demo script to run face detection and face keypoint estimation.
+
+Please install `face_recognition` before running the demo, by `pip install face_recognition`.
+For more details, please refer to https://github.com/ageitgey/face_recognition.
+
+```shell
+python demo/face_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --img ${IMG_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+```shell
+python demo/face_img_demo.py \
+ configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py \
+ https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
+ --img-root tests/data/aflw/ \
+ --img image04476.jpg \
+ --out-img-root vis_results
+```
+
+### 2D Face Video Demo
+
+We also provide a video demo to illustrate the results.
+
+Please install `face_recognition` before running the demo, by `pip install face_recognition`.
+For more details, please refer to https://github.com/ageitgey/face_recognition.
+
+```shell
+python demo/face_video_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/face_video_demo.py \
+ configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_w18_aflw_256x256.py \
+ https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
+ --video-path https://user-images.githubusercontent.com/87690686/137441355-ec4da09c-3a8f-421b-bee9-b8b26f8c2dd0.mp4 \
+ --out-video-root vis_results
+```
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For 2D face keypoint estimation models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [face-hrnetv2_w18](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/face/hrnetv2/aflw/hrnetv2_w18_aflw_256x256.py#L83).
+1. set `post_process='default'` in [face-hrnetv2_w18](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/face/hrnetv2/aflw/hrnetv2_w18_aflw_256x256.py#L84).
diff --git a/vendor/ViTPose/demo/docs/2d_hand_demo.md b/vendor/ViTPose/demo/docs/2d_hand_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..14b30f749a1818a5b85309e3c1818a7b44d89aa3
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_hand_demo.md
@@ -0,0 +1,113 @@
+## 2D Hand Keypoint Demo
+
+
+
+### 2D Hand Image Demo
+
+#### Using gt hand bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+*Hand Pose Model Preparation:*
+The pre-trained hand pose estimation model can be downloaded from [model zoo](https://mmpose.readthedocs.io/en/latest/topics/hand%282d%29.html).
+Take [onehand10k model](https://download.openmmlab.com/mmpose/top_down/resnet/res50_onehand10k_256x256-e67998f6_20200813.pth) as an example:
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_onehand10k_256x256-e67998f6_20200813.pth \
+ --img-root tests/data/onehand10k/ --json-file tests/data/onehand10k/test_onehand10k.json \
+ --out-img-root vis_results
+```
+
+To run demos on CPU:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_onehand10k_256x256-e67998f6_20200813.pth \
+ --img-root tests/data/onehand10k/ --json-file tests/data/onehand10k/test_onehand10k.json \
+ --out-img-root vis_results \
+ --device=cpu
+```
+
+#### Using mmdet for hand bounding box detection
+
+We provide a demo script to run mmdet for hand detection, and mmpose for hand pose estimation.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+*Hand Box Model Preparation:* The pre-trained hand box estimation model can be found in [det model zoo](/demo/docs/mmdet_modelzoo.md).
+
+*Hand Pose Model Preparation:* The pre-trained hand pose estimation model can be downloaded from [pose model zoo](https://mmpose.readthedocs.io/en/latest/topics/hand%282d%29.html).
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --img ${IMG_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py \
+ https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth \
+ configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_onehand10k_256x256-e67998f6_20200813.pth \
+ --img-root tests/data/onehand10k/ \
+ --img 9.jpg \
+ --out-img-root vis_results
+```
+
+### 2D Hand Video Demo
+
+We also provide a video demo to illustrate the results.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+*Hand Box Model Preparation:* The pre-trained hand box estimation model can be found in [det model zoo](/demo/docs/mmdet_modelzoo.md).
+
+*Hand Pose Model Preparation:* The pre-trained hand pose estimation model can be found in [pose model zoo](https://mmpose.readthedocs.io/en/latest/topics/hand%282d%29.html).
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py \
+ https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth \
+ configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/res50_onehand10k_256x256.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_onehand10k_256x256-e67998f6_20200813.pth \
+ --video-path https://user-images.githubusercontent.com/87690686/137441388-3ea93d26-5445-4184-829e-bf7011def9e4.mp4 \
+ --out-video-root vis_results
+```
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For 2D hand pose estimation models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [hand-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/hand/resnet/onehand10k/res50_onehand10k_256x256.py#L56).
+1. set `post_process='default'` in [hand-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/hand/resnet/onehand10k/res50_onehand10k_256x256.py#L57).
diff --git a/vendor/ViTPose/demo/docs/2d_human_pose_demo.md b/vendor/ViTPose/demo/docs/2d_human_pose_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..fc264a34da3e5917a33b5282e35fd2c7aaa5066d
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_human_pose_demo.md
@@ -0,0 +1,159 @@
+## 2D Human Pose Demo
+
+
+
+### 2D Human Pose Top-Down Image Demo
+
+#### Using gt human bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results
+```
+
+To run demos on CPU:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results \
+ --device=cpu
+```
+
+#### Using mmdet for human bounding box detection
+
+We provide a demo script to run mmdet for human detection, and mmpose for pose estimation.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --img ${IMG_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --img-root tests/data/coco/ \
+ --img 000000196141.jpg \
+ --out-img-root vis_results
+```
+
+### 2D Human Pose Top-Down Video Demo
+
+We also provide a video demo to illustrate the results.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --video-path demo/resources/demo.mp4 \
+ --out-video-root vis_results
+```
+
+### 2D Human Pose Bottom-Up Image Demo
+
+We provide a demo script to test a single image.
+
+```shell
+python demo/bottom_up_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-path ${IMG_PATH}\
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR} --pose-nms-thr ${POSE_NMS_THR}]
+```
+
+Examples:
+
+```shell
+python demo/bottom_up_img_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py \
+ https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth \
+ --img-path tests/data/coco/ \
+ --out-img-root vis_results
+```
+
+### 2D Human Pose Bottom-Up Video Demo
+
+We also provide a video demo to illustrate the results.
+
+```shell
+python demo/bottom_up_video_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR} --pose-nms-thr ${POSE_NMS_THR}]
+```
+
+Examples:
+
+```shell
+python demo/bottom_up_video_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py \
+ https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth \
+ --video-path demo/resources/demo.mp4 \
+ --out-video-root vis_results
+```
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For top-down models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L51).
+1. set `post_process='default'` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L52).
+1. use faster human bounding box detector, see [MMDetection](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
+
+For bottom-up models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L80).
+1. set `adjust=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L78).
+1. set `refine=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L79).
+1. use smaller input image size in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L39).
diff --git a/vendor/ViTPose/demo/docs/2d_pose_tracking_demo.md b/vendor/ViTPose/demo/docs/2d_pose_tracking_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b299413b2dfebde22c0d7024b32e8c0b880ba8d
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_pose_tracking_demo.md
@@ -0,0 +1,101 @@
+## 2D Pose Tracking Demo
+
+
+
+### 2D Top-Down Video Human Pose Tracking Demo
+
+We provide a video demo to illustrate the pose tracking results.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/top_down_pose_tracking_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+ [--use-oks-tracking --tracking-thr ${TRACKING_THR} --euro]
+```
+
+Examples:
+
+```shell
+python demo/top_down_pose_tracking_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth \
+ --video-path demo/resources/demo.mp4 \
+ --out-video-root vis_results
+```
+
+### 2D Top-Down Video Human Pose Tracking Demo with MMTracking
+
+MMTracking is an open source video perception toolbox based on PyTorch for tracking related tasks.
+Here we show how to utilize MMTracking and MMPose to achieve human pose tracking.
+
+Assume that you have already installed [mmtracking](https://github.com/open-mmlab/mmtracking).
+
+```shell
+python demo/top_down_video_demo_with_mmtracking.py \
+ ${MMTRACKING_CONFIG_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_pose_tracking_demo_with_mmtracking.py \
+ demo/mmtracking_cfg/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth \
+ --video-path demo/resources/demo.mp4 \
+ --out-video-root vis_results
+```
+
+### 2D Bottom-Up Video Human Pose Tracking Demo
+
+We also provide a pose tracking demo with bottom-up pose estimation methods.
+
+```shell
+python demo/bottom_up_pose_tracking_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR} --pose-nms-thr ${POSE_NMS_THR}]
+ [--use-oks-tracking --tracking-thr ${TRACKING_THR} --euro]
+```
+
+Examples:
+
+```shell
+python demo/bottom_up_pose_tracking_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py \
+ https://download.openmmlab.com/mmpose/bottom_up/hrnet_w32_coco_512x512-bcb8c247_20200816.pth \
+ --video-path demo/resources/demo.mp4 \
+ --out-video-root vis_results
+```
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For top-down models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L51).
+1. set `post_process='default'` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L52).
+1. use faster human detector or human tracker, see [MMDetection](https://mmdetection.readthedocs.io/en/latest/model_zoo.html) or [MMTracking](https://mmtracking.readthedocs.io/en/latest/model_zoo.html).
+
+For bottom-up models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L80).
+1. set `adjust=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L78).
+1. set `refine=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L79).
+1. use smaller input image size in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L39).
diff --git a/vendor/ViTPose/demo/docs/2d_wholebody_pose_demo.md b/vendor/ViTPose/demo/docs/2d_wholebody_pose_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..a2050eae1d6a1cbb4870292563239369922df629
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/2d_wholebody_pose_demo.md
@@ -0,0 +1,106 @@
+## 2D Human Whole-Body Pose Demo
+
+
+
+### 2D Human Whole-Body Pose Top-Down Image Demo
+
+#### Using gt human bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results
+```
+
+To run demos on CPU:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results \
+ --device=cpu
+```
+
+#### Using mmdet for human bounding box detection
+
+We provide a demo script to run mmdet for human detection, and mmpose for pose estimation.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --img ${IMG_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth \
+ --img-root tests/data/coco/ \
+ --img 000000196141.jpg \
+ --out-img-root vis_results
+```
+
+### 2D Human Whole-Body Pose Top-Down Video Demo
+
+We also provide a video demo to illustrate the results.
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --video-path ${VIDEO_FILE} \
+ --out-video-root ${OUTPUT_VIDEO_ROOT} \
+ [--show --device ${GPU_ID or CPU}] \
+ [--bbox-thr ${BBOX_SCORE_THR} --kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_video_demo_with_mmdet.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth \
+ --video-path https://user-images.githubusercontent.com/87690686/137440639-fb08603d-9a35-474e-b65f-46b5c06b68d6.mp4 \
+ --out-video-root vis_results
+```
+
+### Speed Up Inference
+
+Some tips to speed up MMPose inference:
+
+For top-down models, try to edit the config file. For example,
+
+1. set `flip_test=False` in [pose_hrnet_w48_dark+](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/wholebody/darkpose/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py#L80).
+1. set `post_process='default'` in [pose_hrnet_w48_dark+](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/wholebody/darkpose/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py#L81).
+1. use faster human bounding box detector, see [MMDetection](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
diff --git a/vendor/ViTPose/demo/docs/3d_body_mesh_demo.md b/vendor/ViTPose/demo/docs/3d_body_mesh_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..b1e93db7791ebdaf8fced2ef6637740f76bdccd7
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/3d_body_mesh_demo.md
@@ -0,0 +1,28 @@
+## 3D Mesh Demo
+
+
+
+### 3D Mesh Recovery Demo
+
+We provide a demo script to recover human 3D mesh from a single image.
+
+```shell
+python demo/mesh_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --json-file ${JSON_FILE} \
+ --img-root ${IMG_ROOT} \
+ [--show] \
+ [--device ${GPU_ID or CPU}] \
+ [--out-img-root ${OUTPUT_DIR}]
+```
+
+Example:
+
+```shell
+python demo/mesh_img_demo.py \
+ configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py \
+ https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224-c21e8229_20201015.pth \
+ --json-file tests/data/h36m/h36m_coco.json \
+ --img-root tests/data/h36m \
+ --out-img-root vis_results
+```
diff --git a/vendor/ViTPose/demo/docs/3d_hand_demo.md b/vendor/ViTPose/demo/docs/3d_hand_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..a3204b7d54d50df4c0f447f074784342787bdef2
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/3d_hand_demo.md
@@ -0,0 +1,50 @@
+## 3D Hand Demo
+
+
+
+### 3D Hand Estimation Image Demo
+
+#### Using gt hand bounding boxes as input
+
+We provide a demo script to test a single image, given gt json file.
+
+```shell
+python demo/interhand3d_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --json-file ${JSON_FILE} \
+ --img-root ${IMG_ROOT} \
+ [--camera-param-file ${CAMERA_PARAM_FILE}] \
+ [--gt-joints-file ${GT_JOINTS_FILE}]\
+ [--show] \
+ [--device ${GPU_ID or CPU}] \
+ [--out-img-root ${OUTPUT_DIR}] \
+ [--rebase-keypoint-height] \
+ [--show-ground-truth]
+```
+
+Example with gt keypoints and camera parameters:
+
+```shell
+python demo/interhand3d_img_demo.py \
+ configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py \
+ https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3d_all_256x256-b9c1cf4c_20210506.pth \
+ --json-file tests/data/interhand2.6m/test_interhand2.6m_data.json \
+ --img-root tests/data/interhand2.6m \
+ --camera-param-file tests/data/interhand2.6m/test_interhand2.6m_camera.json \
+ --gt-joints-file tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json \
+ --out-img-root vis_results \
+ --rebase-keypoint-height \
+ --show-ground-truth
+```
+
+Example without gt keypoints and camera parameters:
+
+```shell
+python demo/interhand3d_img_demo.py \
+ configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py \
+ https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3d_all_256x256-b9c1cf4c_20210506.pth \
+ --json-file tests/data/interhand2.6m/test_interhand2.6m_data.json \
+ --img-root tests/data/interhand2.6m \
+ --out-img-root vis_results \
+ --rebase-keypoint-height
+```
diff --git a/vendor/ViTPose/demo/docs/3d_human_pose_demo.md b/vendor/ViTPose/demo/docs/3d_human_pose_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..4771c691e62567ec3c5214f38932e020ef6b4213
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/3d_human_pose_demo.md
@@ -0,0 +1,84 @@
+## 3D Human Pose Demo
+
+
+
+### 3D Human Pose Two-stage Estimation Image Demo
+
+#### Using ground truth 2D poses as the 1st stage (pose detection) result, and inference the 2nd stage (2D-to-3D lifting)
+
+We provide a demo script to test on single images with a given ground-truth Json file.
+
+```shell
+python demo/body3d_two_stage_img_demo.py \
+ ${MMPOSE_CONFIG_FILE_3D} \
+ ${MMPOSE_CHECKPOINT_FILE_3D} \
+ --json-file ${JSON_FILE} \
+ --img-root ${IMG_ROOT} \
+ --only-second-stage \
+ [--show] \
+ [--device ${GPU_ID or CPU}] \
+ [--out-img-root ${OUTPUT_DIR}] \
+ [--rebase-keypoint-height] \
+ [--show-ground-truth]
+```
+
+Example:
+
+```shell
+python demo/body3d_two_stage_img_demo.py \
+ configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py \
+ https://download.openmmlab.com/mmpose/body3d/simple_baseline/simple3Dbaseline_h36m-f0ad73a4_20210419.pth \
+ --json-file tests/data/h36m/h36m_coco.json \
+ --img-root tests/data/h36m \
+ --camera-param-file tests/data/h36m/cameras.pkl \
+ --only-second-stage \
+ --out-img-root vis_results \
+ --rebase-keypoint-height \
+ --show-ground-truth
+```
+
+### 3D Human Pose Two-stage Estimation Video Demo
+
+#### Using mmdet for human bounding box detection and top-down model for the 1st stage (2D pose detection), and inference the 2nd stage (2D-to-3D lifting)
+
+Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection).
+
+```shell
+python demo/body3d_two_stage_video_demo.py \
+ ${MMDET_CONFIG_FILE} \
+ ${MMDET_CHECKPOINT_FILE} \
+ ${MMPOSE_CONFIG_FILE_2D} \
+ ${MMPOSE_CHECKPOINT_FILE_2D} \
+ ${MMPOSE_CONFIG_FILE_3D} \
+ ${MMPOSE_CHECKPOINT_FILE_3D} \
+ --video-path ${VIDEO_PATH} \
+ [--rebase-keypoint-height] \
+ [--norm-pose-2d] \
+ [--num-poses-vis NUM_POSES_VIS] \
+ [--show] \
+ [--out-video-root ${OUT_VIDEO_ROOT}] \
+ [--device ${GPU_ID or CPU}] \
+ [--det-cat-id DET_CAT_ID] \
+ [--bbox-thr BBOX_THR] \
+ [--kpt-thr KPT_THR] \
+ [--use-oks-tracking] \
+ [--tracking-thr TRACKING_THR] \
+ [--euro] \
+ [--radius RADIUS] \
+ [--thickness THICKNESS]
+```
+
+Example:
+
+```shell
+python demo/body3d_two_stage_video_demo.py \
+ demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
+ https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py \
+ https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth \
+ --video-path demo/resources/.mp4 \
+ --out-video-root vis_results \
+ --rebase-keypoint-height
+```
diff --git a/vendor/ViTPose/demo/docs/mmdet_modelzoo.md b/vendor/ViTPose/demo/docs/mmdet_modelzoo.md
new file mode 100644
index 0000000000000000000000000000000000000000..6017fcdb8d8f054ede2fcce19a1804e420ea5390
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/mmdet_modelzoo.md
@@ -0,0 +1,30 @@
+## Pre-trained Detection Models
+
+### Human Bounding Box Detection Models
+
+For human bounding box detection models, please download from [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
+MMDetection provides 80-class COCO-pretrained models, which already includes the `person` category.
+
+### Hand Bounding Box Detection Models
+
+For hand bounding box detection, we simply train our hand box models on onehand10k dataset using MMDetection.
+
+#### Hand detection results on OneHand10K test set
+
+| Arch | Box AP | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: |
+| [Cascade_R-CNN X-101-64x4d-FPN-1class](/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py) | 0.817 | [ckpt](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth) | [log](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k_20201030.log.json) |
+
+### Animal Bounding Box Detection Models
+
+#### COCO animals
+
+In COCO dataset, there are 80 object categories, including 10 common `animal` categories (16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe')
+For animals in the categories, please download from [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
+
+#### Macaque detection results on MacaquePose test set
+
+| Arch | Box AP | ckpt | log |
+| :-------------- | :-----------: | :------: | :------: |
+| [Faster_R-CNN_Res50-FPN-1class](/demo/mmdetection_cfg/faster_rcnn_r50_fpn_1class.py) | 0.840 | [ckpt](https://download.openmmlab.com/mmpose/mmdet_pretrained/faster_rcnn_r50_fpn_1x_macaque-f64f2812_20210409.pth) | [log](https://download.openmmlab.com/mmpose/mmdet_pretrained/faster_rcnn_r50_fpn_1x_macaque_20210409.log.json) |
+| [Cascade_R-CNN X-101-64x4d-FPN-1class](/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py) | 0.879 | [ckpt](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_macaque-e45e36f5_20210409.pth) | [log](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_macaque_20210409.log.json) |
diff --git a/vendor/ViTPose/demo/docs/webcam_demo.md b/vendor/ViTPose/demo/docs/webcam_demo.md
new file mode 100644
index 0000000000000000000000000000000000000000..a8a82a89d7144a91ee33cc4902e3672971293159
--- /dev/null
+++ b/vendor/ViTPose/demo/docs/webcam_demo.md
@@ -0,0 +1,49 @@
+## Webcam Demo
+
+We provide a webcam demo tool which integrartes detection and 2D pose estimation for humans and animals. You can simply run the following command:
+
+```python
+python demo/webcam_demo.py
+```
+
+It will launch a window to display the webcam video steam with detection and pose estimation results:
+
+
+
+
+
+### Usage Tips
+
+- **Which model is used in the demo tool?**
+
+ Please check the following default arguments in the script. You can also choose other models from the [MMDetection Model Zoo](https://github.com/open-mmlab/mmdetection/blob/master/docs/model_zoo.md) and [MMPose Model Zoo](https://mmpose.readthedocs.io/en/latest/modelzoo.html#) or use your own models.
+
+ | Model | Arguments |
+ | :--: | :-- |
+ | Detection | `--det-config`, `--det-checkpoint` |
+ | Human Pose | `--human-pose-config`, `--human-pose-checkpoint` |
+ | Animal Pose | `--animal-pose-config`, `--animal-pose-checkpoint` |
+
+- **Can this tool run without GPU?**
+
+ Yes, you can set `--device=cpu` and the model inference will be performed on CPU. Of course, this may cause a low inference FPS compared to using GPU devices.
+
+- **Why there is time delay between the pose visualization and the video?**
+
+ The video I/O and model inference are running asynchronously and the latter usually takes more time for a single frame. To allevidate the time delay, you can:
+
+ 1. set `--display-delay=MILLISECONDS` to defer the video stream, according to the inference delay shown at the top left corner. Or,
+
+ 2. set `--synchronous-mode` to force video stream being aligned with inference results. This may reduce the video display FPS.
+
+- **Can this tool process video files?**
+
+ Yes. You can set `--cam-id=VIDEO_FILE_PATH` to run the demo tool in offline mode on a video file. Note that `--synchronous-mode` should be set in this case.
+
+- **How to enable/disable the special effects?**
+
+ The special effects can be enabled/disabled at launch time by setting arguments like `--bugeye`, `--sunglasses`, *etc*. You can also toggle the effects by keyboard shortcuts like `b`, `s` when the tool starts.
+
+- **What if my computer doesn't have a camera?**
+
+ You can use a smart phone as a webcam with apps like [Camo](https://reincubate.com/camo/) or [DroidCam](https://www.dev47apps.com/).
diff --git a/vendor/ViTPose/demo/face_img_demo.py b/vendor/ViTPose/demo/face_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..e94eb08cdbba139b1104b5fe16b4648b1d03b8c4
--- /dev/null
+++ b/vendor/ViTPose/demo/face_img_demo.py
@@ -0,0 +1,140 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ import face_recognition
+ has_face_det = True
+except (ImportError, ModuleNotFoundError):
+ has_face_det = False
+
+
+def process_face_det_results(face_det_results):
+ """Process det results, and return a list of bboxes.
+
+ :param face_det_results: (top, right, bottom and left)
+ :return: a list of detected bounding boxes (x,y,x,y)-format
+ """
+
+ person_results = []
+ for bbox in face_det_results:
+ person = {}
+ # left, top, right, bottom
+ person['bbox'] = [bbox[3], bbox[0], bbox[1], bbox[2]]
+ person_results.append(person)
+
+ return person_results
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument('--img', type=str, default='', help='Image file')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default='',
+ help='root of the output img file. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_face_det, 'Please install face_recognition to run the demo. ' \
+ '"pip install face_recognition", For more details, ' \
+ 'see https://github.com/ageitgey/face_recognition'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_img_root != '')
+ assert args.img != ''
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ image_name = os.path.join(args.img_root, args.img)
+
+ # test a single image, the resulting box is (top, right, bottom and left)
+ image = face_recognition.load_image_file(image_name)
+ face_det_results = face_recognition.face_locations(image)
+
+ # keep the person class bounding boxes.
+ face_results = process_face_det_results(face_det_results)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ face_results,
+ bbox_thr=None,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ if args.out_img_root == '':
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = os.path.join(args.out_img_root, f'vis_{args.img}')
+
+ # show the results
+ vis_pose_result(
+ pose_model,
+ image_name,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=args.show,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/face_video_demo.py b/vendor/ViTPose/demo/face_video_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..cebe262eb61b5ade18ff065eddbcc2415b7c137c
--- /dev/null
+++ b/vendor/ViTPose/demo/face_video_demo.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ import face_recognition
+ has_face_det = True
+except (ImportError, ModuleNotFoundError):
+ has_face_det = False
+
+
+def process_face_det_results(face_det_results):
+ """Process det results, and return a list of bboxes.
+
+ :param face_det_results: (top, right, bottom and left)
+ :return: a list of detected bounding boxes (x,y,x,y)-format
+ """
+
+ person_results = []
+ for bbox in face_det_results:
+ person = {}
+ # left, top, right, bottom
+ person['bbox'] = [bbox[3], bbox[0], bbox[1], bbox[2]]
+ person_results.append(person)
+
+ return person_results
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_face_det, 'Please install face_recognition to run the demo. '\
+ '"pip install face_recognition", For more details, '\
+ 'see https://github.com/ageitgey/face_recognition'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+
+ face_det_results = face_recognition.face_locations(
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+ face_results = process_face_det_results(face_det_results)
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ img,
+ face_results,
+ bbox_thr=None,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # show the results
+ vis_img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/interhand3d_img_demo.py b/vendor/ViTPose/demo/interhand3d_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6dbeff3b9cba6eb95b8ec0ce98d4ac8ae48cb0a
--- /dev/null
+++ b/vendor/ViTPose/demo/interhand3d_img_demo.py
@@ -0,0 +1,258 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+from argparse import ArgumentParser
+
+import mmcv
+import numpy as np
+from xtcocotools.coco import COCO
+
+from mmpose.apis import inference_interhand_3d_model, vis_3d_pose_result
+from mmpose.apis.inference import init_pose_model
+from mmpose.core import SimpleCamera
+
+
+def _transform_interhand_camera_param(interhand_camera_param):
+ """Transform the camera parameters in interhand2.6m dataset to the format
+ of SimpleCamera.
+
+ Args:
+ interhand_camera_param (dict): camera parameters including:
+ - camrot: 3x3, camera rotation matrix (world-to-camera)
+ - campos: 3x1, camera location in world space
+ - focal: 2x1, camera focal length
+ - princpt: 2x1, camera center
+
+ Returns:
+ param (dict): camera parameters including:
+ - R: 3x3, camera rotation matrix (camera-to-world)
+ - T: 3x1, camera translation (camera-to-world)
+ - f: 2x1, camera focal length
+ - c: 2x1, camera center
+ """
+ camera_param = {}
+ camera_param['R'] = np.array(interhand_camera_param['camrot']).T
+ camera_param['T'] = np.array(interhand_camera_param['campos'])[:, None]
+ camera_param['f'] = np.array(interhand_camera_param['focal'])[:, None]
+ camera_param['c'] = np.array(interhand_camera_param['princpt'])[:, None]
+ return camera_param
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose network')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument(
+ '--json-file',
+ type=str,
+ default='',
+ help='Json file containing image info.')
+ parser.add_argument(
+ '--camera-param-file',
+ type=str,
+ default=None,
+ help='Camera parameter file for converting 3D pose predictions from '
+ ' the pixel space to camera space. If None, keypoints in pixel space'
+ 'will be visualized')
+ parser.add_argument(
+ '--gt-joints-file',
+ type=str,
+ default=None,
+ help='Optional argument. Ground truth 3D keypoint parameter file. '
+ 'If None, gt keypoints will not be shown and keypoints in pixel '
+ 'space will be visualized.')
+ parser.add_argument(
+ '--rebase-keypoint-height',
+ action='store_true',
+ help='Rebase the predicted 3D pose so its lowest keypoint has a '
+ 'height of 0 (landing on the ground). This is useful for '
+ 'visualization when the model do not predict the global position '
+ 'of the 3D pose.')
+ parser.add_argument(
+ '--show-ground-truth',
+ action='store_true',
+ help='If True, show ground truth keypoint if it is available.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default=None,
+ help='Root of the output visualization images. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+ assert args.show or (args.out_img_root != '')
+
+ coco = COCO(args.json_file)
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+ dataset = pose_model.cfg.data['test']['type']
+
+ # load camera parameters
+ camera_params = None
+ if args.camera_param_file is not None:
+ camera_params = mmcv.load(args.camera_param_file)
+ # load ground truth joints parameters
+ gt_joint_params = None
+ if args.gt_joints_file is not None:
+ gt_joint_params = mmcv.load(args.gt_joints_file)
+
+ # load hand bounding boxes
+ det_results_list = []
+ for image_id, image in coco.imgs.items():
+ image_name = osp.join(args.img_root, image['file_name'])
+
+ ann_ids = coco.getAnnIds(image_id)
+ det_results = []
+
+ capture_key = str(image['capture'])
+ camera_key = image['camera']
+ frame_idx = image['frame_idx']
+
+ for ann_id in ann_ids:
+ ann = coco.anns[ann_id]
+ if camera_params is not None:
+ camera_param = {
+ key: camera_params[capture_key][key][camera_key]
+ for key in camera_params[capture_key].keys()
+ }
+ camera_param = _transform_interhand_camera_param(camera_param)
+ else:
+ camera_param = None
+ if gt_joint_params is not None:
+ joint_param = gt_joint_params[capture_key][str(frame_idx)]
+ gt_joint = np.concatenate([
+ np.array(joint_param['world_coord']),
+ np.array(joint_param['joint_valid'])
+ ],
+ axis=-1)
+ else:
+ gt_joint = None
+
+ det_result = {
+ 'image_name': image_name,
+ 'bbox': ann['bbox'], # bbox format is 'xywh'
+ 'camera_param': camera_param,
+ 'keypoints_3d_gt': gt_joint
+ }
+ det_results.append(det_result)
+ det_results_list.append(det_results)
+
+ for i, det_results in enumerate(
+ mmcv.track_iter_progress(det_results_list)):
+
+ image_name = det_results[0]['image_name']
+
+ pose_results = inference_interhand_3d_model(
+ pose_model, image_name, det_results, dataset=dataset)
+
+ # Post processing
+ pose_results_vis = []
+ for idx, res in enumerate(pose_results):
+ keypoints_3d = res['keypoints_3d']
+ # normalize kpt score
+ if keypoints_3d[:, 3].max() > 1:
+ keypoints_3d[:, 3] /= 255
+ # get 2D keypoints in pixel space
+ res['keypoints'] = keypoints_3d[:, [0, 1, 3]]
+
+ # For model-predicted keypoints, channel 0 and 1 are coordinates
+ # in pixel space, and channel 2 is the depth (in mm) relative
+ # to root joints.
+ # If both camera parameter and absolute depth of root joints are
+ # provided, we can transform keypoint to camera space for better
+ # visualization.
+ camera_param = res['camera_param']
+ keypoints_3d_gt = res['keypoints_3d_gt']
+ if camera_param is not None and keypoints_3d_gt is not None:
+ # build camera model
+ camera = SimpleCamera(camera_param)
+ # transform gt joints from world space to camera space
+ keypoints_3d_gt[:, :3] = camera.world_to_camera(
+ keypoints_3d_gt[:, :3])
+
+ # transform relative depth to absolute depth
+ keypoints_3d[:21, 2] += keypoints_3d_gt[20, 2]
+ keypoints_3d[21:, 2] += keypoints_3d_gt[41, 2]
+
+ # transform keypoints from pixel space to camera space
+ keypoints_3d[:, :3] = camera.pixel_to_camera(
+ keypoints_3d[:, :3])
+
+ # rotate the keypoint to make z-axis correspondent to height
+ # for better visualization
+ vis_R = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+ keypoints_3d[:, :3] = keypoints_3d[:, :3] @ vis_R
+ if keypoints_3d_gt is not None:
+ keypoints_3d_gt[:, :3] = keypoints_3d_gt[:, :3] @ vis_R
+
+ # rebase height (z-axis)
+ if args.rebase_keypoint_height:
+ valid = keypoints_3d[..., 3] > 0
+ keypoints_3d[..., 2] -= np.min(
+ keypoints_3d[valid, 2], axis=-1, keepdims=True)
+ res['keypoints_3d'] = keypoints_3d
+ res['keypoints_3d_gt'] = keypoints_3d_gt
+
+ # Add title
+ instance_id = res.get('track_id', idx)
+ res['title'] = f'Prediction ({instance_id})'
+ pose_results_vis.append(res)
+ # Add ground truth
+ if args.show_ground_truth:
+ if keypoints_3d_gt is None:
+ print('Fail to show ground truth. Please make sure that'
+ ' gt-joints-file is provided.')
+ else:
+ gt = res.copy()
+ if args.rebase_keypoint_height:
+ valid = keypoints_3d_gt[..., 3] > 0
+ keypoints_3d_gt[..., 2] -= np.min(
+ keypoints_3d_gt[valid, 2], axis=-1, keepdims=True)
+ gt['keypoints_3d'] = keypoints_3d_gt
+ gt['title'] = f'Ground truth ({instance_id})'
+ pose_results_vis.append(gt)
+
+ # Visualization
+ if args.out_img_root is None:
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = osp.join(args.out_img_root, f'vis_{i}.jpg')
+
+ vis_3d_pose_result(
+ pose_model,
+ result=pose_results_vis,
+ img=det_results[0]['image_name'],
+ out_file=out_file,
+ dataset=dataset,
+ show=args.show,
+ kpt_score_thr=args.kpt_thr,
+ radius=args.radius,
+ thickness=args.thickness,
+ )
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/mesh_img_demo.py b/vendor/ViTPose/demo/mesh_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..127ebad3b79c19d8dffae0afd489bcc0212cba8f
--- /dev/null
+++ b/vendor/ViTPose/demo/mesh_img_demo.py
@@ -0,0 +1,93 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from argparse import ArgumentParser
+
+from xtcocotools.coco import COCO
+
+from mmpose.apis import (inference_mesh_model, init_pose_model,
+ vis_3d_mesh_result)
+
+
+def main():
+ """Visualize the demo images.
+
+ Require the json_file containing boxes.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for detection')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument(
+ '--json-file',
+ type=str,
+ default='',
+ help='Json file containing image info.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default='',
+ help='Root of the output img file. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_img_root != '')
+
+ coco = COCO(args.json_file)
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+
+ img_keys = list(coco.imgs.keys())
+
+ # process each image
+ for i in range(len(img_keys)):
+ # get bounding box annotations
+ image_id = img_keys[i]
+ image = coco.loadImgs(image_id)[0]
+ image_name = os.path.join(args.img_root, image['file_name'])
+ ann_ids = coco.getAnnIds(image_id)
+
+ # make person bounding boxes
+ person_results = []
+ for ann_id in ann_ids:
+ person = {}
+ ann = coco.anns[ann_id]
+ # bbox format is 'xywh'
+ person['bbox'] = ann['bbox']
+ person_results.append(person)
+
+ # test a single image, with a list of bboxes
+ pose_results = inference_mesh_model(
+ pose_model,
+ image_name,
+ person_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset=dataset)
+
+ if args.out_img_root == '':
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg')
+
+ vis_3d_mesh_result(
+ pose_model,
+ pose_results,
+ image_name,
+ show=args.show,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py b/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e60b6b73971d598e40efdcc408d9385b3140b71
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py
@@ -0,0 +1,255 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[16, 19])
+total_epochs = 20
+# model settings
+model = dict(
+ type='CascadeRCNN',
+ pretrained='open-mmlab://resnext101_64x4d',
+ backbone=dict(
+ type='ResNeXt',
+ depth=101,
+ groups=64,
+ base_width=4,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+ roi_head=dict(
+ type='CascadeRoIHead',
+ num_stages=3,
+ stage_loss_weights=[1, 0.5, 0.25],
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=[
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+ ]),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=0,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=2000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=[
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.6,
+ neg_iou_thr=0.6,
+ min_pos_iou=0.6,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.7,
+ min_pos_iou=0.7,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)
+ ]),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)))
+
+dataset_type = 'CocoDataset'
+data_root = 'data/coco'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_coco.py b/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..f91bd0d105b9394c514ffb82d54117dba347680a
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_coco.py
@@ -0,0 +1,256 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[16, 19])
+total_epochs = 20
+
+# model settings
+model = dict(
+ type='CascadeRCNN',
+ pretrained='open-mmlab://resnext101_64x4d',
+ backbone=dict(
+ type='ResNeXt',
+ depth=101,
+ groups=64,
+ base_width=4,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+ roi_head=dict(
+ type='CascadeRoIHead',
+ num_stages=3,
+ stage_loss_weights=[1, 0.5, 0.25],
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=[
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+ ]),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=0,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=2000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=[
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.6,
+ neg_iou_thr=0.6,
+ min_pos_iou=0.6,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.7,
+ min_pos_iou=0.7,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)
+ ]),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)))
+
+dataset_type = 'CocoDataset'
+data_root = 'data/coco'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_1class.py b/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_1class.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee54f5b66bd216c485db0a56a68bf2793428d123
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_1class.py
@@ -0,0 +1,182 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 11])
+total_epochs = 12
+
+model = dict(
+ type='FasterRCNN',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)
+ # soft-nms is also supported for rcnn testing
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+ ))
+
+dataset_type = 'CocoDataset'
+data_root = 'data/coco'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py b/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9ad9528b22163ae7ce1390375b69227fd6eafd9
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py
@@ -0,0 +1,182 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[8, 11])
+total_epochs = 12
+
+model = dict(
+ type='FasterRCNN',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)
+ # soft-nms is also supported for rcnn testing
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+ ))
+
+dataset_type = 'CocoDataset'
+data_root = 'data/coco'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py b/vendor/ViTPose/demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..05d39fa9a87a0200f9b9d29cd19acd28c155d126
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py
@@ -0,0 +1,242 @@
+model = dict(
+ type='MaskRCNN',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch',
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ mask_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ mask_head=dict(
+ type='FCNMaskHead',
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=80,
+ loss_mask=dict(
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100,
+ mask_thr_binary=0.5)))
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img'])
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type='CocoDataset',
+ ann_file='data/coco/annotations/instances_train2017.json',
+ img_prefix='data/coco/train2017/',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(
+ type='Collect',
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
+ ]),
+ val=dict(
+ type='CocoDataset',
+ ann_file='data/coco/annotations/instances_val2017.json',
+ img_prefix='data/coco/val2017/',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img'])
+ ])
+ ]),
+ test=dict(
+ type='CocoDataset',
+ ann_file='data/coco/annotations/instances_val2017.json',
+ img_prefix='data/coco/val2017/',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img'])
+ ])
+ ]))
+evaluation = dict(metric=['bbox', 'segm'])
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[16, 22])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+custom_hooks = [dict(type='NumClassCheckHook')]
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/ssdlite_mobilenetv2_scratch_600e_coco.py b/vendor/ViTPose/demo/mmdetection_cfg/ssdlite_mobilenetv2_scratch_600e_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..91b9e593817cdb25899fdd664fadc10f3c0060d0
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/ssdlite_mobilenetv2_scratch_600e_coco.py
@@ -0,0 +1,216 @@
+# =========================================================
+# from 'mmdetection/configs/_base_/default_runtime.py'
+# =========================================================
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+custom_hooks = [dict(type='NumClassCheckHook')]
+# =========================================================
+
+# =========================================================
+# from 'mmdetection/configs/_base_/datasets/coco_detection.py'
+# =========================================================
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
+# =========================================================
+
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+model = dict(
+ type='SingleStageDetector',
+ backbone=dict(
+ type='MobileNetV2',
+ out_indices=(4, 7),
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
+ init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
+ neck=dict(
+ type='SSDNeck',
+ in_channels=(96, 1280),
+ out_channels=(96, 1280, 512, 256, 256, 128),
+ level_strides=(2, 2, 2, 2),
+ level_paddings=(1, 1, 1, 1),
+ l2_norm_scale=None,
+ use_depthwise=True,
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
+ act_cfg=dict(type='ReLU6'),
+ init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
+ bbox_head=dict(
+ type='SSDHead',
+ in_channels=(96, 1280, 512, 256, 256, 128),
+ num_classes=80,
+ use_depthwise=True,
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
+ act_cfg=dict(type='ReLU6'),
+ init_cfg=dict(type='Normal', layer='Conv2d', std=0.001),
+
+ # set anchor size manually instead of using the predefined
+ # SSD300 setting.
+ anchor_generator=dict(
+ type='SSDAnchorGenerator',
+ scale_major=False,
+ strides=[16, 32, 64, 107, 160, 320],
+ ratios=[[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]],
+ min_sizes=[48, 100, 150, 202, 253, 304],
+ max_sizes=[100, 150, 202, 253, 304, 320]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[0.1, 0.1, 0.2, 0.2])),
+ # model training and testing settings
+ train_cfg=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.,
+ ignore_iof_thr=-1,
+ gt_max_assign_all=False),
+ smoothl1_beta=1.,
+ allowed_border=-1,
+ pos_weight=-1,
+ neg_pos_ratio=3,
+ debug=False),
+ test_cfg=dict(
+ nms_pre=1000,
+ nms=dict(type='nms', iou_threshold=0.45),
+ min_bbox_size=0,
+ score_thr=0.02,
+ max_per_img=200))
+cudnn_benchmark = True
+
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile', to_float32=True),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ type='PhotoMetricDistortion',
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18),
+ dict(
+ type='Expand',
+ mean=img_norm_cfg['mean'],
+ to_rgb=img_norm_cfg['to_rgb'],
+ ratio_range=(1, 4)),
+ dict(
+ type='MinIoURandomCrop',
+ min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+ min_crop_size=0.3),
+ dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Pad', size_divisor=320),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(320, 320),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=False),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=320),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=24,
+ workers_per_gpu=4,
+ train=dict(
+ _delete_=True,
+ type='RepeatDataset', # use RepeatDataset to speed up training
+ times=5,
+ dataset=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline)),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=4.0e-5)
+optimizer_config = dict(grad_clip=None)
+
+# learning policy
+lr_config = dict(
+ policy='CosineAnnealing',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ min_lr=0)
+runner = dict(type='EpochBasedRunner', max_epochs=120)
+
+# Avoid evaluation and saving weights too frequently
+evaluation = dict(interval=5, metric='bbox')
+checkpoint_config = dict(interval=5)
+custom_hooks = [
+ dict(type='NumClassCheckHook'),
+ dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')
+]
diff --git a/vendor/ViTPose/demo/mmdetection_cfg/yolov3_d53_320_273e_coco.py b/vendor/ViTPose/demo/mmdetection_cfg/yolov3_d53_320_273e_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7e9cca1eb34f9935a9eaf74b4cae18d1efaa248
--- /dev/null
+++ b/vendor/ViTPose/demo/mmdetection_cfg/yolov3_d53_320_273e_coco.py
@@ -0,0 +1,140 @@
+# model settings
+model = dict(
+ type='YOLOV3',
+ pretrained='open-mmlab://darknet53',
+ backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)),
+ neck=dict(
+ type='YOLOV3Neck',
+ num_scales=3,
+ in_channels=[1024, 512, 256],
+ out_channels=[512, 256, 128]),
+ bbox_head=dict(
+ type='YOLOV3Head',
+ num_classes=80,
+ in_channels=[512, 256, 128],
+ out_channels=[1024, 512, 256],
+ anchor_generator=dict(
+ type='YOLOAnchorGenerator',
+ base_sizes=[[(116, 90), (156, 198), (373, 326)],
+ [(30, 61), (62, 45), (59, 119)],
+ [(10, 13), (16, 30), (33, 23)]],
+ strides=[32, 16, 8]),
+ bbox_coder=dict(type='YOLOBBoxCoder'),
+ featmap_strides=[32, 16, 8],
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0,
+ reduction='sum'),
+ loss_conf=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0,
+ reduction='sum'),
+ loss_xy=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=2.0,
+ reduction='sum'),
+ loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),
+ # training and testing settings
+ train_cfg=dict(
+ assigner=dict(
+ type='GridAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0)),
+ test_cfg=dict(
+ nms_pre=1000,
+ min_bbox_size=0,
+ score_thr=0.05,
+ conf_thr=0.005,
+ nms=dict(type='nms', iou_threshold=0.45),
+ max_per_img=100))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco'
+img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile', to_float32=True),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(type='PhotoMetricDistortion'),
+ dict(
+ type='Expand',
+ mean=img_norm_cfg['mean'],
+ to_rgb=img_norm_cfg['to_rgb'],
+ ratio_range=(1, 2)),
+ dict(
+ type='MinIoURandomCrop',
+ min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+ min_crop_size=0.3),
+ dict(type='Resize', img_scale=(320, 320), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(320, 320),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img'])
+ ])
+]
+data = dict(
+ samples_per_gpu=8,
+ workers_per_gpu=4,
+ train=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_train2017.json',
+ img_prefix=f'{data_root}/train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=f'{data_root}/annotations/instances_val2017.json',
+ img_prefix=f'{data_root}/val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=2000, # same as burn-in in darknet
+ warmup_ratio=0.1,
+ step=[218, 246])
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=273)
+evaluation = dict(interval=1, metric=['bbox'])
+
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+custom_hooks = [dict(type='NumClassCheckHook')]
+
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/vendor/ViTPose/demo/mmtracking_cfg/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py b/vendor/ViTPose/demo/mmtracking_cfg/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7fccf0cbe9929618274218274726eb28577273
--- /dev/null
+++ b/vendor/ViTPose/demo/mmtracking_cfg/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py
@@ -0,0 +1,321 @@
+model = dict(
+ detector=dict(
+ type='FasterRCNN',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch',
+ init_cfg=dict(
+ type='Pretrained', checkpoint='torchvision://resnet50')),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0],
+ clip_border=False),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(
+ type='SmoothL1Loss', beta=0.1111111111111111,
+ loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(
+ type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2],
+ clip_border=False),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', loss_weight=1.0))),
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmtracking/'
+ 'mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth')),
+ type='DeepSORT',
+ motion=dict(type='KalmanFilter', center_only=False),
+ reid=dict(
+ type='BaseReID',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(3, ),
+ style='pytorch'),
+ neck=dict(type='GlobalAveragePooling', kernel_size=(8, 4), stride=1),
+ head=dict(
+ type='LinearReIDHead',
+ num_fcs=1,
+ in_channels=2048,
+ fc_channels=1024,
+ out_channels=128,
+ num_classes=380,
+ loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+ loss_pairwise=dict(
+ type='TripletLoss', margin=0.3, loss_weight=1.0),
+ norm_cfg=dict(type='BN1d'),
+ act_cfg=dict(type='ReLU')),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmtracking/'
+ 'mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth')),
+ tracker=dict(
+ type='SortTracker',
+ obj_score_thr=0.5,
+ reid=dict(
+ num_samples=10,
+ img_scale=(256, 128),
+ img_norm_cfg=None,
+ match_score_thr=2.0),
+ match_iou_thr=0.5,
+ momentums=None,
+ num_tentatives=2,
+ num_frames_retain=100))
+dataset_type = 'MOTChallengeDataset'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadMultiImagesFromFile', to_float32=True),
+ dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
+ dict(
+ type='SeqResize',
+ img_scale=(1088, 1088),
+ share_params=True,
+ ratio_range=(0.8, 1.2),
+ keep_ratio=True,
+ bbox_clip_border=False),
+ dict(type='SeqPhotoMetricDistortion', share_params=True),
+ dict(
+ type='SeqRandomCrop',
+ share_params=False,
+ crop_size=(1088, 1088),
+ bbox_clip_border=False),
+ dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
+ dict(
+ type='SeqNormalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='SeqPad', size_divisor=32),
+ dict(type='MatchInstances', skip_nomatch=True),
+ dict(
+ type='VideoCollect',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_match_indices',
+ 'gt_instance_ids'
+ ]),
+ dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+]
+data_root = 'data/MOT17/'
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type='MOTChallengeDataset',
+ visibility_thr=-1,
+ ann_file='data/MOT17/annotations/half-train_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=dict(
+ num_ref_imgs=1,
+ frame_range=10,
+ filter_key_img=True,
+ method='uniform'),
+ pipeline=[
+ dict(type='LoadMultiImagesFromFile', to_float32=True),
+ dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
+ dict(
+ type='SeqResize',
+ img_scale=(1088, 1088),
+ share_params=True,
+ ratio_range=(0.8, 1.2),
+ keep_ratio=True,
+ bbox_clip_border=False),
+ dict(type='SeqPhotoMetricDistortion', share_params=True),
+ dict(
+ type='SeqRandomCrop',
+ share_params=False,
+ crop_size=(1088, 1088),
+ bbox_clip_border=False),
+ dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
+ dict(
+ type='SeqNormalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='SeqPad', size_divisor=32),
+ dict(type='MatchInstances', skip_nomatch=True),
+ dict(
+ type='VideoCollect',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_match_indices',
+ 'gt_instance_ids'
+ ]),
+ dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
+ ]),
+ val=dict(
+ type='MOTChallengeDataset',
+ ann_file='data/MOT17/annotations/half-val_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=None,
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+ ]),
+ test=dict(
+ type='MOTChallengeDataset',
+ ann_file='data/MOT17/annotations/half-val_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=None,
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+ ]))
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=100,
+ warmup_ratio=0.01,
+ step=[3])
+total_epochs = 4
+evaluation = dict(metric=['bbox', 'track'], interval=1)
+search_metrics = ['MOTA', 'IDF1', 'FN', 'FP', 'IDs', 'MT', 'ML']
diff --git a/vendor/ViTPose/demo/mmtracking_cfg/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py b/vendor/ViTPose/demo/mmtracking_cfg/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py
new file mode 100644
index 0000000000000000000000000000000000000000..9736269bd9ca1f950eadaa7a4933656db3130ca8
--- /dev/null
+++ b/vendor/ViTPose/demo/mmtracking_cfg/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py
@@ -0,0 +1,325 @@
+model = dict(
+ detector=dict(
+ type='FasterRCNN',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0],
+ clip_border=False),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(
+ type='SmoothL1Loss', beta=0.1111111111111111,
+ loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(
+ type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2],
+ clip_border=False),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', loss_weight=1.0))),
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100))),
+ type='Tracktor',
+ pretrains=dict(
+ detector='https://download.openmmlab.com/mmtracking/'
+ 'mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth',
+ reid='https://download.openmmlab.com/mmtracking/mot/'
+ 'reid/reid_r50_6e_mot17-4bf6b63d.pth'),
+ reid=dict(
+ type='BaseReID',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(3, ),
+ style='pytorch'),
+ neck=dict(type='GlobalAveragePooling', kernel_size=(8, 4), stride=1),
+ head=dict(
+ type='LinearReIDHead',
+ num_fcs=1,
+ in_channels=2048,
+ fc_channels=1024,
+ out_channels=128,
+ num_classes=378,
+ loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+ loss_pairwise=dict(
+ type='TripletLoss', margin=0.3, loss_weight=1.0),
+ norm_cfg=dict(type='BN1d'),
+ act_cfg=dict(type='ReLU'))),
+ motion=dict(
+ type='CameraMotionCompensation',
+ warp_mode='cv2.MOTION_EUCLIDEAN',
+ num_iters=100,
+ stop_eps=1e-05),
+ tracker=dict(
+ type='TracktorTracker',
+ obj_score_thr=0.5,
+ regression=dict(
+ obj_score_thr=0.5,
+ nms=dict(type='nms', iou_threshold=0.6),
+ match_iou_thr=0.3),
+ reid=dict(
+ num_samples=10,
+ img_scale=(256, 128),
+ img_norm_cfg=None,
+ match_score_thr=2.0,
+ match_iou_thr=0.2),
+ momentums=None,
+ num_frames_retain=10))
+dataset_type = 'MOTChallengeDataset'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadMultiImagesFromFile', to_float32=True),
+ dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
+ dict(
+ type='SeqResize',
+ img_scale=(1088, 1088),
+ share_params=True,
+ ratio_range=(0.8, 1.2),
+ keep_ratio=True,
+ bbox_clip_border=False),
+ dict(type='SeqPhotoMetricDistortion', share_params=True),
+ dict(
+ type='SeqRandomCrop',
+ share_params=False,
+ crop_size=(1088, 1088),
+ bbox_clip_border=False),
+ dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
+ dict(
+ type='SeqNormalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='SeqPad', size_divisor=32),
+ dict(type='MatchInstances', skip_nomatch=True),
+ dict(
+ type='VideoCollect',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_match_indices',
+ 'gt_instance_ids'
+ ]),
+ dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+]
+data_root = 'data/MOT17/'
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type='MOTChallengeDataset',
+ visibility_thr=-1,
+ ann_file='data/MOT17/annotations/train_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=dict(
+ num_ref_imgs=1,
+ frame_range=10,
+ filter_key_img=True,
+ method='uniform'),
+ pipeline=[
+ dict(type='LoadMultiImagesFromFile', to_float32=True),
+ dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
+ dict(
+ type='SeqResize',
+ img_scale=(1088, 1088),
+ share_params=True,
+ ratio_range=(0.8, 1.2),
+ keep_ratio=True,
+ bbox_clip_border=False),
+ dict(type='SeqPhotoMetricDistortion', share_params=True),
+ dict(
+ type='SeqRandomCrop',
+ share_params=False,
+ crop_size=(1088, 1088),
+ bbox_clip_border=False),
+ dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
+ dict(
+ type='SeqNormalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='SeqPad', size_divisor=32),
+ dict(type='MatchInstances', skip_nomatch=True),
+ dict(
+ type='VideoCollect',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_match_indices',
+ 'gt_instance_ids'
+ ]),
+ dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
+ ]),
+ val=dict(
+ type='MOTChallengeDataset',
+ ann_file='data/MOT17/annotations/train_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=None,
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+ ]),
+ test=dict(
+ type='MOTChallengeDataset',
+ ann_file='data/MOT17/annotations/train_cocoformat.json',
+ img_prefix='data/MOT17/train',
+ ref_img_sampler=None,
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1088, 1088),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='VideoCollect', keys=['img'])
+ ])
+ ]))
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=100,
+ warmup_ratio=0.01,
+ step=[3])
+total_epochs = 4
+evaluation = dict(metric=['bbox', 'track'], interval=1)
+search_metrics = ['MOTA', 'IDF1', 'FN', 'FP', 'IDs', 'MT', 'ML']
+test_set = 'train'
diff --git a/vendor/ViTPose/demo/resources/demo.mp4 b/vendor/ViTPose/demo/resources/demo.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..2ba10c2a68726ccb398163e4505cfd190ec4dba1
Binary files /dev/null and b/vendor/ViTPose/demo/resources/demo.mp4 differ
diff --git a/vendor/ViTPose/demo/resources/sunglasses.jpg b/vendor/ViTPose/demo/resources/sunglasses.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5d3cee870232cb35415e3ae71ea07e9fbb45dfdf
Binary files /dev/null and b/vendor/ViTPose/demo/resources/sunglasses.jpg differ
diff --git a/vendor/ViTPose/demo/top_down_img_demo.py b/vendor/ViTPose/demo/top_down_img_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..da1697814f02708475b6ee83fae8ea81360d9c4b
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_img_demo.py
@@ -0,0 +1,129 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+from xtcocotools.coco import COCO
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+
+def main():
+ """Visualize the demo images.
+
+ Require the json_file containing boxes.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for detection')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument(
+ '--json-file',
+ type=str,
+ default='',
+ help='Json file containing image info.')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default='',
+ help='Root of the output img file. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_img_root != '')
+
+ coco = COCO(args.json_file)
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ img_keys = list(coco.imgs.keys())
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ # process each image
+ for i in range(len(img_keys)):
+ # get bounding box annotations
+ image_id = img_keys[i]
+ image = coco.loadImgs(image_id)[0]
+ image_name = os.path.join(args.img_root, image['file_name'])
+ ann_ids = coco.getAnnIds(image_id)
+
+ # make person bounding boxes
+ person_results = []
+ for ann_id in ann_ids:
+ person = {}
+ ann = coco.anns[ann_id]
+ # bbox format is 'xywh'
+ person['bbox'] = ann['bbox']
+ person_results.append(person)
+
+ # test a single image, with a list of bboxes
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ if args.out_img_root == '':
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg')
+
+ vis_pose_result(
+ pose_model,
+ image_name,
+ pose_results,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ radius=args.radius,
+ thickness=args.thickness,
+ show=args.show,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/top_down_img_demo_with_mmdet.py b/vendor/ViTPose/demo/top_down_img_demo_with_mmdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..227f44b2cfdcaa66e60ed2e1a13074bc292a1893
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_img_demo_with_mmdet.py
@@ -0,0 +1,138 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('det_config', help='Config file for detection')
+ parser.add_argument('det_checkpoint', help='Checkpoint file for detection')
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument('--img', type=str, default='', help='Image file')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--out-img-root',
+ type=str,
+ default='',
+ help='root of the output img file. '
+ 'Default not saving the visualization images.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--det-cat-id',
+ type=int,
+ default=1,
+ help='Category id for bounding box detection model')
+ parser.add_argument(
+ '--bbox-thr',
+ type=float,
+ default=0.3,
+ help='Bounding box score threshold')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_img_root != '')
+ assert args.img != ''
+ assert args.det_config is not None
+ assert args.det_checkpoint is not None
+
+ det_model = init_detector(
+ args.det_config, args.det_checkpoint, device=args.device.lower())
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ image_name = os.path.join(args.img_root, args.img)
+
+ # test a single image, the resulting box is (x1, y1, x2, y2)
+ mmdet_results = inference_detector(det_model, image_name)
+
+ # keep the person class bounding boxes.
+ person_results = process_mmdet_results(mmdet_results, args.det_cat_id)
+
+ # test a single image, with a list of bboxes.
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_results,
+ bbox_thr=args.bbox_thr,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ if args.out_img_root == '':
+ out_file = None
+ else:
+ os.makedirs(args.out_img_root, exist_ok=True)
+ out_file = os.path.join(args.out_img_root, f'vis_{args.img}')
+
+ # show the results
+ vis_pose_result(
+ pose_model,
+ image_name,
+ pose_results,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ radius=args.radius,
+ thickness=args.thickness,
+ show=args.show,
+ out_file=out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmdet.py b/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ddcd934ee3cc28d627d7620186512175391a96f
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmdet.py
@@ -0,0 +1,190 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (get_track_id, inference_top_down_pose_model,
+ init_pose_model, process_mmdet_results,
+ vis_pose_tracking_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('det_config', help='Config file for detection')
+ parser.add_argument('det_checkpoint', help='Checkpoint file for detection')
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--det-cat-id',
+ type=int,
+ default=1,
+ help='Category id for bounding box detection model')
+ parser.add_argument(
+ '--bbox-thr',
+ type=float,
+ default=0.3,
+ help='Bounding box score threshold')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--use-oks-tracking', action='store_true', help='Using OKS tracking')
+ parser.add_argument(
+ '--tracking-thr', type=float, default=0.3, help='Tracking threshold')
+ parser.add_argument(
+ '--euro',
+ action='store_true',
+ help='Using One_Euro_Filter for smoothing')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+ assert args.det_config is not None
+ assert args.det_checkpoint is not None
+
+ det_model = init_detector(
+ args.det_config, args.det_checkpoint, device=args.device.lower())
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ fps = None
+
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ next_id = 0
+ pose_results = []
+ while (cap.isOpened()):
+ pose_results_last = pose_results
+
+ flag, img = cap.read()
+ if not flag:
+ break
+ # test a single image, the resulting box is (x1, y1, x2, y2)
+ mmdet_results = inference_detector(det_model, img)
+
+ # keep the person class bounding boxes.
+ person_results = process_mmdet_results(mmdet_results, args.det_cat_id)
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ img,
+ person_results,
+ bbox_thr=args.bbox_thr,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # get track id for each person instance
+ pose_results, next_id = get_track_id(
+ pose_results,
+ pose_results_last,
+ next_id,
+ use_oks=args.use_oks_tracking,
+ tracking_thr=args.tracking_thr,
+ use_one_euro=args.euro,
+ fps=fps)
+
+ # show the results
+ vis_img = vis_pose_tracking_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmtracking.py b/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmtracking.py
new file mode 100644
index 0000000000000000000000000000000000000000..9902e0674ecd070ba96e86a6672420cfe8ebbedf
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_pose_tracking_demo_with_mmtracking.py
@@ -0,0 +1,185 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ vis_pose_tracking_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ from mmtrack.apis import inference_mot
+ from mmtrack.apis import init_model as init_tracking_model
+ has_mmtrack = True
+except (ImportError, ModuleNotFoundError):
+ has_mmtrack = False
+
+
+def process_mmtracking_results(mmtracking_results):
+ """Process mmtracking results.
+
+ :param mmtracking_results:
+ :return: a list of tracked bounding boxes
+ """
+ person_results = []
+ # 'track_results' is changed to 'track_bboxes'
+ # in https://github.com/open-mmlab/mmtracking/pull/300
+ if 'track_bboxes' in mmtracking_results:
+ tracking_results = mmtracking_results['track_bboxes'][0]
+ elif 'track_results' in mmtracking_results:
+ tracking_results = mmtracking_results['track_results'][0]
+
+ for track in tracking_results:
+ person = {}
+ person['track_id'] = int(track[0])
+ person['bbox'] = track[1:]
+ person_results.append(person)
+ return person_results
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('tracking_config', help='Config file for tracking')
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--bbox-thr',
+ type=float,
+ default=0.3,
+ help='Bounding box score threshold')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_mmtrack, 'Please install mmtrack to run the demo.'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+ assert args.tracking_config is not None
+
+ tracking_model = init_tracking_model(
+ args.tracking_config, None, device=args.device.lower())
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ frame_id = 0
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+
+ mmtracking_results = inference_mot(
+ tracking_model, img, frame_id=frame_id)
+
+ # keep the person class bounding boxes.
+ person_results = process_mmtracking_results(mmtracking_results)
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ img,
+ person_results,
+ bbox_thr=args.bbox_thr,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # show the results
+ vis_img = vis_pose_tracking_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ frame_id += 1
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/top_down_video_demo_full_frame_without_det.py b/vendor/ViTPose/demo/top_down_video_demo_full_frame_without_det.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d81810899578c504330c2bda6163cd4496e78b2
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_video_demo_full_frame_without_det.py
@@ -0,0 +1,139 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+import numpy as np
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+
+ # keep the person class bounding boxes.
+ person_results = [{'bbox': np.array([0, 0, size[0], size[1]])}]
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ img,
+ person_results,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # show the results
+ vis_img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=args.radius,
+ thickness=args.thickness,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/top_down_video_demo_with_mmdet.py b/vendor/ViTPose/demo/top_down_video_demo_with_mmdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ba32322cd7941ef21b14545656fc72a077b5e71
--- /dev/null
+++ b/vendor/ViTPose/demo/top_down_video_demo_with_mmdet.py
@@ -0,0 +1,165 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+from argparse import ArgumentParser
+
+import cv2
+
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+
+def main():
+ """Visualize the demo images.
+
+ Using mmdet to detect the human.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('det_config', help='Config file for detection')
+ parser.add_argument('det_checkpoint', help='Checkpoint file for detection')
+ parser.add_argument('pose_config', help='Config file for pose')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
+ parser.add_argument('--video-path', type=str, help='Video path')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show visualizations.')
+ parser.add_argument(
+ '--out-video-root',
+ default='',
+ help='Root of the output video file. '
+ 'Default not saving the visualization video.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--det-cat-id',
+ type=int,
+ default=1,
+ help='Category id for bounding box detection model')
+ parser.add_argument(
+ '--bbox-thr',
+ type=float,
+ default=0.3,
+ help='Bounding box score threshold')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+ parser.add_argument(
+ '--radius',
+ type=int,
+ default=4,
+ help='Keypoint radius for visualization')
+ parser.add_argument(
+ '--thickness',
+ type=int,
+ default=1,
+ help='Link thickness for visualization')
+
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+
+ args = parser.parse_args()
+
+ assert args.show or (args.out_video_root != '')
+ assert args.det_config is not None
+ assert args.det_checkpoint is not None
+
+ det_model = init_detector(
+ args.det_config, args.det_checkpoint, device=args.device.lower())
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
+ if dataset_info is None:
+ warnings.warn(
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ else:
+ dataset_info = DatasetInfo(dataset_info)
+
+ cap = cv2.VideoCapture(args.video_path)
+ assert cap.isOpened(), f'Faild to load video file {args.video_path}'
+
+ if args.out_video_root == '':
+ save_out_video = False
+ else:
+ os.makedirs(args.out_video_root, exist_ok=True)
+ save_out_video = True
+
+ if save_out_video:
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ videoWriter = cv2.VideoWriter(
+ os.path.join(args.out_video_root,
+ f'vis_{os.path.basename(args.video_path)}'), fourcc,
+ fps, size)
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ while (cap.isOpened()):
+ flag, img = cap.read()
+ if not flag:
+ break
+ # test a single image, the resulting box is (x1, y1, x2, y2)
+ mmdet_results = inference_detector(det_model, img)
+
+ # keep the person class bounding boxes.
+ person_results = process_mmdet_results(mmdet_results, args.det_cat_id)
+
+ # test a single image, with a list of bboxes.
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ img,
+ person_results,
+ bbox_thr=args.bbox_thr,
+ format='xyxy',
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # show the results
+ vis_img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ kpt_score_thr=args.kpt_thr,
+ radius=args.radius,
+ thickness=args.thickness,
+ show=False)
+
+ if args.show:
+ cv2.imshow('Image', vis_img)
+
+ if save_out_video:
+ videoWriter.write(vis_img)
+
+ if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+ cap.release()
+ if save_out_video:
+ videoWriter.release()
+ if args.show:
+ cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/demo/webcam_demo.py b/vendor/ViTPose/demo/webcam_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..bff300121d8d3e5d1cbfaa445dbb6dbd50ad1c20
--- /dev/null
+++ b/vendor/ViTPose/demo/webcam_demo.py
@@ -0,0 +1,585 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+from collections import deque
+from queue import Queue
+from threading import Event, Lock, Thread
+
+import cv2
+import numpy as np
+
+from mmpose.apis import (get_track_id, inference_top_down_pose_model,
+ init_pose_model, vis_pose_result)
+from mmpose.core import apply_bugeye_effect, apply_sunglasses_effect
+from mmpose.utils import StopWatch
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+try:
+ import psutil
+ psutil_proc = psutil.Process()
+except (ImportError, ModuleNotFoundError):
+ psutil_proc = None
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--cam-id', type=str, default='0')
+ parser.add_argument(
+ '--det-config',
+ type=str,
+ default='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ help='Config file for detection')
+ parser.add_argument(
+ '--det-checkpoint',
+ type=str,
+ default='https://download.openmmlab.com/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ help='Checkpoint file for detection')
+ parser.add_argument(
+ '--enable-human-pose',
+ type=int,
+ default=1,
+ help='Enable human pose estimation')
+ parser.add_argument(
+ '--enable-animal-pose',
+ type=int,
+ default=0,
+ help='Enable animal pose estimation')
+ parser.add_argument(
+ '--human-pose-config',
+ type=str,
+ default='configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'coco-wholebody/vipnas_res50_coco_wholebody_256x192_dark.py',
+ help='Config file for human pose')
+ parser.add_argument(
+ '--human-pose-checkpoint',
+ type=str,
+ default='https://download.openmmlab.com/'
+ 'mmpose/top_down/vipnas/'
+ 'vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth',
+ help='Checkpoint file for human pose')
+ parser.add_argument(
+ '--human-det-ids',
+ type=int,
+ default=[1],
+ nargs='+',
+ help='Object category label of human in detection results.'
+ 'Default is [1(person)], following COCO definition.')
+ parser.add_argument(
+ '--animal-pose-config',
+ type=str,
+ default='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'animalpose/hrnet_w32_animalpose_256x256.py',
+ help='Config file for animal pose')
+ parser.add_argument(
+ '--animal-pose-checkpoint',
+ type=str,
+ default='https://download.openmmlab.com/mmpose/animal/hrnet/'
+ 'hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
+ help='Checkpoint file for animal pose')
+ parser.add_argument(
+ '--animal-det-ids',
+ type=int,
+ default=[16, 17, 18, 19, 20],
+ nargs='+',
+ help='Object category label of animals in detection results'
+ 'Default is [16(cat), 17(dog), 18(horse), 19(sheep), 20(cow)], '
+ 'following COCO definition.')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--det-score-thr',
+ type=float,
+ default=0.5,
+ help='bbox score threshold')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='bbox score threshold')
+ parser.add_argument(
+ '--vis-mode',
+ type=int,
+ default=2,
+ help='0-none. 1-detection only. 2-detection and pose.')
+ parser.add_argument(
+ '--sunglasses', action='store_true', help='Apply `sunglasses` effect.')
+ parser.add_argument(
+ '--bugeye', action='store_true', help='Apply `bug-eye` effect.')
+
+ parser.add_argument(
+ '--out-video-file',
+ type=str,
+ default=None,
+ help='Record the video into a file. This may reduce the frame rate')
+
+ parser.add_argument(
+ '--out-video-fps',
+ type=int,
+ default=20,
+ help='Set the FPS of the output video file.')
+
+ parser.add_argument(
+ '--buffer-size',
+ type=int,
+ default=-1,
+ help='Frame buffer size. If set -1, the buffer size will be '
+ 'automatically inferred from the display delay time. Default: -1')
+
+ parser.add_argument(
+ '--inference-fps',
+ type=int,
+ default=10,
+ help='Maximum inference FPS. This is to limit the resource consuming '
+ 'especially when the detection and pose model are lightweight and '
+ 'very fast. Default: 10.')
+
+ parser.add_argument(
+ '--display-delay',
+ type=int,
+ default=0,
+ help='Delay the output video in milliseconds. This can be used to '
+ 'align the output video and inference results. The delay can be '
+ 'disabled by setting a non-positive delay time. Default: 0')
+
+ parser.add_argument(
+ '--synchronous-mode',
+ action='store_true',
+ help='Enable synchronous mode that video I/O and inference will be '
+ 'temporally aligned. Note that this will reduce the display FPS.')
+
+ return parser.parse_args()
+
+
+def process_mmdet_results(mmdet_results, class_names=None, cat_ids=1):
+ """Process mmdet results to mmpose input format.
+
+ Args:
+ mmdet_results: raw output of mmdet model
+ class_names: class names of mmdet model
+ cat_ids (int or List[int]): category id list that will be preserved
+ Returns:
+ List[Dict]: detection results for mmpose input
+ """
+ if isinstance(mmdet_results, tuple):
+ mmdet_results = mmdet_results[0]
+
+ if not isinstance(cat_ids, (list, tuple)):
+ cat_ids = [cat_ids]
+
+ # only keep bboxes of interested classes
+ bbox_results = [mmdet_results[i - 1] for i in cat_ids]
+ bboxes = np.vstack(bbox_results)
+
+ # get textual labels of classes
+ labels = np.concatenate([
+ np.full(bbox.shape[0], i - 1, dtype=np.int32)
+ for i, bbox in zip(cat_ids, bbox_results)
+ ])
+ if class_names is None:
+ labels = [f'class: {i}' for i in labels]
+ else:
+ labels = [class_names[i] for i in labels]
+
+ det_results = []
+ for bbox, label in zip(bboxes, labels):
+ det_result = dict(bbox=bbox, label=label)
+ det_results.append(det_result)
+ return det_results
+
+
+def read_camera():
+ # init video reader
+ print('Thread "input" started')
+ cam_id = args.cam_id
+ if cam_id.isdigit():
+ cam_id = int(cam_id)
+ vid_cap = cv2.VideoCapture(cam_id)
+ if not vid_cap.isOpened():
+ print(f'Cannot open camera (ID={cam_id})')
+ exit()
+
+ while not event_exit.is_set():
+ # capture a camera frame
+ ret_val, frame = vid_cap.read()
+ if ret_val:
+ ts_input = time.time()
+
+ event_inference_done.clear()
+ with input_queue_mutex:
+ input_queue.append((ts_input, frame))
+
+ if args.synchronous_mode:
+ event_inference_done.wait()
+
+ frame_buffer.put((ts_input, frame))
+ else:
+ # input ending signal
+ frame_buffer.put((None, None))
+ break
+
+ vid_cap.release()
+
+
+def inference_detection():
+ print('Thread "det" started')
+ stop_watch = StopWatch(window=10)
+ min_interval = 1.0 / args.inference_fps
+ _ts_last = None # timestamp when last inference was done
+
+ while True:
+ while len(input_queue) < 1:
+ time.sleep(0.001)
+ with input_queue_mutex:
+ ts_input, frame = input_queue.popleft()
+ # inference detection
+ with stop_watch.timeit('Det'):
+ mmdet_results = inference_detector(det_model, frame)
+
+ t_info = stop_watch.report_strings()
+ with det_result_queue_mutex:
+ det_result_queue.append((ts_input, frame, t_info, mmdet_results))
+
+ # limit the inference FPS
+ _ts = time.time()
+ if _ts_last is not None and _ts - _ts_last < min_interval:
+ time.sleep(min_interval - _ts + _ts_last)
+ _ts_last = time.time()
+
+
+def inference_pose():
+ print('Thread "pose" started')
+ stop_watch = StopWatch(window=10)
+
+ while True:
+ while len(det_result_queue) < 1:
+ time.sleep(0.001)
+ with det_result_queue_mutex:
+ ts_input, frame, t_info, mmdet_results = det_result_queue.popleft()
+
+ pose_results_list = []
+ for model_info, pose_history in zip(pose_model_list,
+ pose_history_list):
+ model_name = model_info['name']
+ pose_model = model_info['model']
+ cat_ids = model_info['cat_ids']
+ pose_results_last = pose_history['pose_results_last']
+ next_id = pose_history['next_id']
+
+ with stop_watch.timeit(model_name):
+ # process mmdet results
+ det_results = process_mmdet_results(
+ mmdet_results,
+ class_names=det_model.CLASSES,
+ cat_ids=cat_ids)
+
+ # inference pose model
+ dataset_name = pose_model.cfg.data['test']['type']
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ frame,
+ det_results,
+ bbox_thr=args.det_score_thr,
+ format='xyxy',
+ dataset=dataset_name)
+
+ pose_results, next_id = get_track_id(
+ pose_results,
+ pose_results_last,
+ next_id,
+ use_oks=False,
+ tracking_thr=0.3,
+ use_one_euro=True,
+ fps=None)
+
+ pose_results_list.append(pose_results)
+
+ # update pose history
+ pose_history['pose_results_last'] = pose_results
+ pose_history['next_id'] = next_id
+
+ t_info += stop_watch.report_strings()
+ with pose_result_queue_mutex:
+ pose_result_queue.append((ts_input, t_info, pose_results_list))
+
+ event_inference_done.set()
+
+
+def display():
+ print('Thread "display" started')
+ stop_watch = StopWatch(window=10)
+
+ # initialize result status
+ ts_inference = None # timestamp of the latest inference result
+ fps_inference = 0. # infenrece FPS
+ t_delay_inference = 0. # inference result time delay
+ pose_results_list = None # latest inference result
+ t_info = [] # upstream time information (list[str])
+
+ # initialize visualization and output
+ sunglasses_img = None # resource image for sunglasses effect
+ text_color = (228, 183, 61) # text color to show time/system information
+ vid_out = None # video writer
+
+ # show instructions
+ print('Keyboard shortcuts: ')
+ print('"v": Toggle the visualization of bounding boxes and poses.')
+ print('"s": Toggle the sunglasses effect.')
+ print('"b": Toggle the bug-eye effect.')
+ print('"Q", "q" or Esc: Exit.')
+
+ while True:
+ with stop_watch.timeit('_FPS_'):
+ # acquire a frame from buffer
+ ts_input, frame = frame_buffer.get()
+ # input ending signal
+ if ts_input is None:
+ break
+
+ img = frame
+
+ # get pose estimation results
+ if len(pose_result_queue) > 0:
+ with pose_result_queue_mutex:
+ _result = pose_result_queue.popleft()
+ _ts_input, t_info, pose_results_list = _result
+
+ _ts = time.time()
+ if ts_inference is not None:
+ fps_inference = 1.0 / (_ts - ts_inference)
+ ts_inference = _ts
+ t_delay_inference = (_ts - _ts_input) * 1000
+
+ # visualize detection and pose results
+ if pose_results_list is not None:
+ for model_info, pose_results in zip(pose_model_list,
+ pose_results_list):
+ pose_model = model_info['model']
+ bbox_color = model_info['bbox_color']
+
+ dataset_name = pose_model.cfg.data['test']['type']
+
+ # show pose results
+ if args.vis_mode == 1:
+ img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=4,
+ thickness=2,
+ dataset=dataset_name,
+ kpt_score_thr=1e7,
+ bbox_color=bbox_color)
+ elif args.vis_mode == 2:
+ img = vis_pose_result(
+ pose_model,
+ img,
+ pose_results,
+ radius=4,
+ thickness=2,
+ dataset=dataset_name,
+ kpt_score_thr=args.kpt_thr,
+ bbox_color=bbox_color)
+
+ # sunglasses effect
+ if args.sunglasses:
+ if dataset_name in {
+ 'TopDownCocoDataset',
+ 'TopDownCocoWholeBodyDataset'
+ }:
+ left_eye_idx = 1
+ right_eye_idx = 2
+ elif dataset_name == 'AnimalPoseDataset':
+ left_eye_idx = 0
+ right_eye_idx = 1
+ else:
+ raise ValueError(
+ 'Sunglasses effect does not support'
+ f'{dataset_name}')
+ if sunglasses_img is None:
+ # The image attributes to:
+ # https://www.vecteezy.com/free-vector/glass
+ # Glass Vectors by Vecteezy
+ sunglasses_img = cv2.imread(
+ 'demo/resources/sunglasses.jpg')
+ img = apply_sunglasses_effect(img, pose_results,
+ sunglasses_img,
+ left_eye_idx,
+ right_eye_idx)
+ # bug-eye effect
+ if args.bugeye:
+ if dataset_name in {
+ 'TopDownCocoDataset',
+ 'TopDownCocoWholeBodyDataset'
+ }:
+ left_eye_idx = 1
+ right_eye_idx = 2
+ elif dataset_name == 'AnimalPoseDataset':
+ left_eye_idx = 0
+ right_eye_idx = 1
+ else:
+ raise ValueError('Bug-eye effect does not support'
+ f'{dataset_name}')
+ img = apply_bugeye_effect(img, pose_results,
+ left_eye_idx, right_eye_idx)
+
+ # delay control
+ if args.display_delay > 0:
+ t_sleep = args.display_delay * 0.001 - (time.time() - ts_input)
+ if t_sleep > 0:
+ time.sleep(t_sleep)
+ t_delay = (time.time() - ts_input) * 1000
+
+ # show time information
+ t_info_display = stop_watch.report_strings() # display fps
+ t_info_display.append(f'Inference FPS: {fps_inference:>5.1f}')
+ t_info_display.append(f'Delay: {t_delay:>3.0f}')
+ t_info_display.append(
+ f'Inference Delay: {t_delay_inference:>3.0f}')
+ t_info_str = ' | '.join(t_info_display + t_info)
+ cv2.putText(img, t_info_str, (20, 20), cv2.FONT_HERSHEY_DUPLEX,
+ 0.3, text_color, 1)
+ # collect system information
+ sys_info = [
+ f'RES: {img.shape[1]}x{img.shape[0]}',
+ f'Buffer: {frame_buffer.qsize()}/{frame_buffer.maxsize}'
+ ]
+ if psutil_proc is not None:
+ sys_info += [
+ f'CPU: {psutil_proc.cpu_percent():.1f}%',
+ f'MEM: {psutil_proc.memory_percent():.1f}%'
+ ]
+ sys_info_str = ' | '.join(sys_info)
+ cv2.putText(img, sys_info_str, (20, 40), cv2.FONT_HERSHEY_DUPLEX,
+ 0.3, text_color, 1)
+
+ # save the output video frame
+ if args.out_video_file is not None:
+ if vid_out is None:
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+ fps = args.out_video_fps
+ frame_size = (img.shape[1], img.shape[0])
+ vid_out = cv2.VideoWriter(args.out_video_file, fourcc, fps,
+ frame_size)
+
+ vid_out.write(img)
+
+ # display
+ cv2.imshow('mmpose webcam demo', img)
+ keyboard_input = cv2.waitKey(1)
+ if keyboard_input in (27, ord('q'), ord('Q')):
+ break
+ elif keyboard_input == ord('s'):
+ args.sunglasses = not args.sunglasses
+ elif keyboard_input == ord('b'):
+ args.bugeye = not args.bugeye
+ elif keyboard_input == ord('v'):
+ args.vis_mode = (args.vis_mode + 1) % 3
+
+ cv2.destroyAllWindows()
+ if vid_out is not None:
+ vid_out.release()
+ event_exit.set()
+
+
+def main():
+ global args
+ global frame_buffer
+ global input_queue, input_queue_mutex
+ global det_result_queue, det_result_queue_mutex
+ global pose_result_queue, pose_result_queue_mutex
+ global det_model, pose_model_list, pose_history_list
+ global event_exit, event_inference_done
+
+ args = parse_args()
+
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+ assert args.det_config is not None
+ assert args.det_checkpoint is not None
+
+ # build detection model
+ det_model = init_detector(
+ args.det_config, args.det_checkpoint, device=args.device.lower())
+
+ # build pose models
+ pose_model_list = []
+ if args.enable_human_pose:
+ pose_model = init_pose_model(
+ args.human_pose_config,
+ args.human_pose_checkpoint,
+ device=args.device.lower())
+ model_info = {
+ 'name': 'HumanPose',
+ 'model': pose_model,
+ 'cat_ids': args.human_det_ids,
+ 'bbox_color': (148, 139, 255),
+ }
+ pose_model_list.append(model_info)
+ if args.enable_animal_pose:
+ pose_model = init_pose_model(
+ args.animal_pose_config,
+ args.animal_pose_checkpoint,
+ device=args.device.lower())
+ model_info = {
+ 'name': 'AnimalPose',
+ 'model': pose_model,
+ 'cat_ids': args.animal_det_ids,
+ 'bbox_color': 'cyan',
+ }
+ pose_model_list.append(model_info)
+
+ # store pose history for pose tracking
+ pose_history_list = []
+ for _ in range(len(pose_model_list)):
+ pose_history_list.append({'pose_results_last': [], 'next_id': 0})
+
+ # frame buffer
+ if args.buffer_size > 0:
+ buffer_size = args.buffer_size
+ else:
+ # infer buffer size from the display delay time
+ # assume that the maximum video fps is 30
+ buffer_size = round(30 * (1 + max(args.display_delay, 0) / 1000.))
+ frame_buffer = Queue(maxsize=buffer_size)
+
+ # queue of input frames
+ # element: (timestamp, frame)
+ input_queue = deque(maxlen=1)
+ input_queue_mutex = Lock()
+
+ # queue of detection results
+ # element: tuple(timestamp, frame, time_info, det_results)
+ det_result_queue = deque(maxlen=1)
+ det_result_queue_mutex = Lock()
+
+ # queue of detection/pose results
+ # element: (timestamp, time_info, pose_results_list)
+ pose_result_queue = deque(maxlen=1)
+ pose_result_queue_mutex = Lock()
+
+ try:
+ event_exit = Event()
+ event_inference_done = Event()
+ t_input = Thread(target=read_camera, args=())
+ t_det = Thread(target=inference_detection, args=(), daemon=True)
+ t_pose = Thread(target=inference_pose, args=(), daemon=True)
+
+ t_input.start()
+ t_det.start()
+ t_pose.start()
+
+ # run display in the main thread
+ display()
+ # join the input thread (non-daemon)
+ t_input.join()
+
+ except KeyboardInterrupt:
+ pass
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/docker/Dockerfile b/vendor/ViTPose/docker/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..f7d6192910fa2401218c67a7e9e01634d83f364e
--- /dev/null
+++ b/vendor/ViTPose/docker/Dockerfile
@@ -0,0 +1,29 @@
+ARG PYTORCH="1.6.0"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+
+RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx\
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install xtcocotools
+RUN pip install cython
+RUN pip install xtcocotools
+
+# Install MMCV
+RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
+
+# Install MMPose
+RUN conda clean --all
+RUN git clone https://github.com/open-mmlab/mmpose.git /mmpose
+WORKDIR /mmpose
+RUN mkdir -p /mmpose/data
+ENV FORCE_CUDA="1"
+RUN pip install -r requirements/build.txt
+RUN pip install --no-cache-dir -e .
diff --git a/vendor/ViTPose/docker/serve/Dockerfile b/vendor/ViTPose/docker/serve/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..74a31044b09c0f50fdedeaf4c1ba6138f5c9823a
--- /dev/null
+++ b/vendor/ViTPose/docker/serve/Dockerfile
@@ -0,0 +1,47 @@
+ARG PYTORCH="1.6.0"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV PYTHONUNBUFFERED TRUE
+
+RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+ ca-certificates \
+ g++ \
+ openjdk-11-jre-headless \
+ # MMDet Requirements
+ ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+ && rm -rf /var/lib/apt/lists/*
+
+ENV PATH="/opt/conda/bin:$PATH"
+RUN export FORCE_CUDA=1
+
+
+# MMLAB
+ARG PYTORCH
+ARG CUDA
+RUN ["/bin/bash", "-c", "pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN pip install mmpose
+
+# TORCHSEVER
+RUN pip install torchserve torch-model-archiver
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp
+
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+
+RUN chmod +x /usr/local/bin/entrypoint.sh \
+ && chown -R model-server /home/model-server
+
+COPY config.properties /home/model-server/config.properties
+RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
+
+EXPOSE 8080 8081 8082
+
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+CMD ["serve"]
diff --git a/vendor/ViTPose/docker/serve/Dockerfile_mmcls b/vendor/ViTPose/docker/serve/Dockerfile_mmcls
new file mode 100644
index 0000000000000000000000000000000000000000..7f63170176b9e810f343197ad8cafd95dbda7752
--- /dev/null
+++ b/vendor/ViTPose/docker/serve/Dockerfile_mmcls
@@ -0,0 +1,49 @@
+ARG PYTORCH="1.6.0"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ARG MMCV="1.3.8"
+ARG MMCLS="0.16.0"
+
+ENV PYTHONUNBUFFERED TRUE
+
+RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+ ca-certificates \
+ g++ \
+ openjdk-11-jre-headless \
+ # MMDet Requirements
+ ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+ && rm -rf /var/lib/apt/lists/*
+
+ENV PATH="/opt/conda/bin:$PATH"
+RUN export FORCE_CUDA=1
+
+# TORCHSEVER
+RUN pip install torchserve torch-model-archiver
+
+# MMLAB
+ARG PYTORCH
+ARG CUDA
+RUN ["/bin/bash", "-c", "pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN pip install mmcls==${MMCLS}
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp
+
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+
+RUN chmod +x /usr/local/bin/entrypoint.sh \
+ && chown -R model-server /home/model-server
+
+COPY config.properties /home/model-server/config.properties
+RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
+
+EXPOSE 8080 8081 8082
+
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+CMD ["serve"]
diff --git a/vendor/ViTPose/docker/serve/config.properties b/vendor/ViTPose/docker/serve/config.properties
new file mode 100644
index 0000000000000000000000000000000000000000..efb9c47e40ab550bac765611e6c6c6f2a7152f11
--- /dev/null
+++ b/vendor/ViTPose/docker/serve/config.properties
@@ -0,0 +1,5 @@
+inference_address=http://0.0.0.0:8080
+management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
+model_store=/home/model-server/model-store
+load_models=all
diff --git a/vendor/ViTPose/docker/serve/entrypoint.sh b/vendor/ViTPose/docker/serve/entrypoint.sh
new file mode 100644
index 0000000000000000000000000000000000000000..41ba00b048aed84b45c5a8015a016ff148e97d86
--- /dev/null
+++ b/vendor/ViTPose/docker/serve/entrypoint.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -e
+
+if [[ "$1" = "serve" ]]; then
+ shift 1
+ torchserve --start --ts-config /home/model-server/config.properties
+else
+ eval "$@"
+fi
+
+# prevent docker exit
+tail -f /dev/null
diff --git a/vendor/ViTPose/docs/en/Makefile b/vendor/ViTPose/docs/en/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919
--- /dev/null
+++ b/vendor/ViTPose/docs/en/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/vendor/ViTPose/docs/en/_static/css/readthedocs.css b/vendor/ViTPose/docs/en/_static/css/readthedocs.css
new file mode 100644
index 0000000000000000000000000000000000000000..efc4b986a5348c645842a135883d4713986a7169
--- /dev/null
+++ b/vendor/ViTPose/docs/en/_static/css/readthedocs.css
@@ -0,0 +1,6 @@
+.header-logo {
+ background-image: url("../images/mmpose-logo.png");
+ background-size: 120px 50px;
+ height: 50px;
+ width: 120px;
+}
diff --git a/vendor/ViTPose/docs/en/_static/images/mmpose-logo.png b/vendor/ViTPose/docs/en/_static/images/mmpose-logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..128e1714f0933d0dfe0ab82d6f8780c48e0edc21
Binary files /dev/null and b/vendor/ViTPose/docs/en/_static/images/mmpose-logo.png differ
diff --git a/vendor/ViTPose/docs/en/api.rst b/vendor/ViTPose/docs/en/api.rst
new file mode 100644
index 0000000000000000000000000000000000000000..af0ec96bb7104ef8829c657ee9f2fe032bad69a7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/api.rst
@@ -0,0 +1,111 @@
+mmpose.apis
+-------------
+.. automodule:: mmpose.apis
+ :members:
+
+
+mmpose.core
+-------------
+evaluation
+^^^^^^^^^^^
+.. automodule:: mmpose.core.evaluation
+ :members:
+
+fp16
+^^^^^^^^^^^
+.. automodule:: mmpose.core.fp16
+ :members:
+
+
+utils
+^^^^^^^^^^^
+.. automodule:: mmpose.core.utils
+ :members:
+
+
+post_processing
+^^^^^^^^^^^^^^^^
+.. automodule:: mmpose.core.post_processing
+ :members:
+
+
+mmpose.models
+---------------
+backbones
+^^^^^^^^^^^
+.. automodule:: mmpose.models.backbones
+ :members:
+
+necks
+^^^^^^^^^^^
+.. automodule:: mmpose.models.necks
+ :members:
+
+detectors
+^^^^^^^^^^^
+.. automodule:: mmpose.models.detectors
+ :members:
+
+heads
+^^^^^^^^^^^^^^^
+.. automodule:: mmpose.models.heads
+ :members:
+
+losses
+^^^^^^^^^^^
+.. automodule:: mmpose.models.losses
+ :members:
+
+misc
+^^^^^^^^^^^
+.. automodule:: mmpose.models.misc
+ :members:
+
+mmpose.datasets
+-----------------
+.. automodule:: mmpose.datasets
+ :members:
+
+datasets
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.datasets.top_down
+ :members:
+ :noindex:
+
+.. automodule:: mmpose.datasets.datasets.bottom_up
+ :members:
+ :noindex:
+
+pipelines
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.pipelines
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.loading
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.shared_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.top_down_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.bottom_up_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.mesh_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.pose3d_transform
+ :members:
+
+samplers
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.samplers
+ :members:
+ :noindex:
+
+mmpose.utils
+---------------
+.. automodule:: mmpose.utils
+ :members:
diff --git a/vendor/ViTPose/docs/en/benchmark.md b/vendor/ViTPose/docs/en/benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e9b56d6f38ffc8fe129428cee7659f55f5c5961
--- /dev/null
+++ b/vendor/ViTPose/docs/en/benchmark.md
@@ -0,0 +1,46 @@
+# Benchmark
+
+We compare our results with some popular frameworks and official releases in terms of speed and accuracy.
+
+## Comparison Rules
+
+Here we compare our MMPose repo with other pose estimation toolboxes in the same data and model settings.
+
+To ensure the fairness of the comparison, the comparison experiments were conducted under the same hardware environment and using the same dataset.
+For each model setting, we kept the same data pre-processing methods to make sure the same feature input.
+In addition, we also used Memcached, a distributed memory-caching system, to load the data in all the compared toolboxes.
+This minimizes the IO time during benchmark.
+
+The time we measured is the average training time for an iteration, including data processing and model training.
+The training speed is measure with s/iter. The lower, the better.
+
+### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+We demonstrate the superiority of our MMPose framework in terms of speed and accuracy on the standard COCO keypoint detection benchmark.
+The mAP (the mean average precision) is used as the evaluation metric.
+
+| Model | Input size| MMPose (s/iter) | HRNet (s/iter) | MMPose (mAP) | HRNet (mAP) |
+| :--- | :---------------: | :---------------: |:--------------------: | :----------------------------: | :-----------------: |
+| resnet_50 | 256x192 | **0.28** | 0.64 | **0.718** | 0.704 |
+| resnet_50 | 384x288 | **0.81** | 1.24 | **0.731** | 0.722 |
+| resnet_101 | 256x192 | **0.36** | 0.84 | **0.726** | 0.714 |
+| resnet_101 | 384x288 | **0.79** | 1.53 | **0.748** | 0.736 |
+| resnet_152 | 256x192 | **0.49** | 1.00 | **0.735** | 0.720 |
+| resnet_152 | 384x288 | **0.96** | 1.65 | **0.750** | 0.743 |
+| hrnet_w32 | 256x192 | **0.54** | 1.31 | **0.746** | 0.744 |
+| hrnet_w32 | 384x288 | **0.76** | 2.00 | **0.760** | 0.758 |
+| hrnet_w48 | 256x192 | **0.66** | 1.55 | **0.756** | 0.751 |
+| hrnet_w48 | 384x288 | **1.23** | 2.20 | **0.767** | 0.763 |
+
+## Hardware
+
+- 8 NVIDIA Tesla V100 (32G) GPUs
+- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+
+## Software Environment
+
+- Python 3.7
+- PyTorch 1.4
+- CUDA 10.1
+- CUDNN 7.6.03
+- NCCL 2.4.08
diff --git a/vendor/ViTPose/docs/en/changelog.md b/vendor/ViTPose/docs/en/changelog.md
new file mode 100644
index 0000000000000000000000000000000000000000..37f6b3cce511c58be0be3f9d82a92944f5bf8631
--- /dev/null
+++ b/vendor/ViTPose/docs/en/changelog.md
@@ -0,0 +1,665 @@
+# Changelog
+
+## v0.24.0 (07/03/2022)
+
+**Highlights**
+
+- Support HRFormer ["HRFormer: High-Resolution Vision Transformer for Dense Predict"](https://proceedings.neurips.cc/paper/2021/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html), NeurIPS'2021 ([\#1203](https://github.com/open-mmlab/mmpose/pull/1203)) @zengwang430521
+- Support Windows installation with pip ([\#1213](https://github.com/open-mmlab/mmpose/pull/1213)) @jin-s13, @ly015
+- Add WebcamAPI documents ([\#1187](https://github.com/open-mmlab/mmpose/pull/1187)) @ly015
+
+**New Features**
+
+- Support HRFormer ["HRFormer: High-Resolution Vision Transformer for Dense Predict"](https://proceedings.neurips.cc/paper/2021/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html), NeurIPS'2021 ([\#1203](https://github.com/open-mmlab/mmpose/pull/1203)) @zengwang430521
+- Support Windows installation with pip ([\#1213](https://github.com/open-mmlab/mmpose/pull/1213)) @jin-s13, @ly015
+- Support CPU training with mmcv < v1.4.4 ([\#1161](https://github.com/open-mmlab/mmpose/pull/1161)) @EasonQYS, @ly015
+- Add "Valentine Magic" demo with WebcamAPI ([\#1189](https://github.com/open-mmlab/mmpose/pull/1189), [\#1191](https://github.com/open-mmlab/mmpose/pull/1191)) @liqikai9
+
+**Improvements**
+
+- Refactor multi-view 3D pose estimation framework towards better modularization and expansibility ([\#1196](https://github.com/open-mmlab/mmpose/pull/1196)) @wusize
+- Add WebcamAPI documents and tutorials ([\#1187](https://github.com/open-mmlab/mmpose/pull/1187)) @ly015
+- Refactor dataset evaluation interface to align with other OpenMMLab codebases ([\#1209](https://github.com/open-mmlab/mmpose/pull/1209)) @ly015
+- Add deprecation message for deploy tools since [MMDeploy](https://github.com/open-mmlab/mmdeploy) has supported MMPose ([\#1207](https://github.com/open-mmlab/mmpose/pull/1207)) @QwQ2000
+- Improve documentation quality ([\#1206](https://github.com/open-mmlab/mmpose/pull/1206), [\#1161](https://github.com/open-mmlab/mmpose/pull/1161)) @ly015
+- Switch to OpenMMLab official pre-commit-hook for copyright check ([\#1214](https://github.com/open-mmlab/mmpose/pull/1214)) @ly015
+
+**Bug Fixes**
+
+- Fix hard-coded data collating and scattering in inference ([\#1175](https://github.com/open-mmlab/mmpose/pull/1175)) @ly015
+- Fix model configs on JHMDB dataset ([\#1188](https://github.com/open-mmlab/mmpose/pull/1188)) @jin-s13
+- Fix area calculation in pose tracking inference ([\#1197](https://github.com/open-mmlab/mmpose/pull/1197)) @pallgeuer
+- Fix registry scope conflict of module wrapper ([\#1204](https://github.com/open-mmlab/mmpose/pull/1204)) @ly015
+- Update MMCV installation in CI and documents ([\#1205](https://github.com/open-mmlab/mmpose/pull/1205))
+- Fix incorrect color channel order in visualization functions ([\#1212](https://github.com/open-mmlab/mmpose/pull/1212)) @ly015
+
+## v0.23.0 (11/02/2022)
+
+**Highlights**
+
+- Add [MMPose Webcam API](https://github.com/open-mmlab/mmpose/tree/master/tools/webcam): A simple yet powerful tools to develop interactive webcam applications with MMPose functions. ([\#1178](https://github.com/open-mmlab/mmpose/pull/1178), [\#1173](https://github.com/open-mmlab/mmpose/pull/1173), [\#1173](https://github.com/open-mmlab/mmpose/pull/1173), [\#1143](https://github.com/open-mmlab/mmpose/pull/1143), [\#1094](https://github.com/open-mmlab/mmpose/pull/1094), [\#1133](https://github.com/open-mmlab/mmpose/pull/1133), [\#1098](https://github.com/open-mmlab/mmpose/pull/1098), [\#1160](https://github.com/open-mmlab/mmpose/pull/1160)) @ly015, @jin-s13, @liqikai9, @wusize, @luminxu, @zengwang430521 @mzr1996
+
+**New Features**
+
+- Add [MMPose Webcam API](https://github.com/open-mmlab/mmpose/tree/master/tools/webcam): A simple yet powerful tools to develop interactive webcam applications with MMPose functions. ([\#1178](https://github.com/open-mmlab/mmpose/pull/1178), [\#1173](https://github.com/open-mmlab/mmpose/pull/1173), [\#1173](https://github.com/open-mmlab/mmpose/pull/1173), [\#1143](https://github.com/open-mmlab/mmpose/pull/1143), [\#1094](https://github.com/open-mmlab/mmpose/pull/1094), [\#1133](https://github.com/open-mmlab/mmpose/pull/1133), [\#1098](https://github.com/open-mmlab/mmpose/pull/1098), [\#1160](https://github.com/open-mmlab/mmpose/pull/1160)) @ly015, @jin-s13, @liqikai9, @wusize, @luminxu, @zengwang430521 @mzr1996
+- Support ConcatDataset ([\#1139](https://github.com/open-mmlab/mmpose/pull/1139)) @Canwang-sjtu
+- Support CPU training and testing ([\#1157](https://github.com/open-mmlab/mmpose/pull/1157)) @ly015
+
+**Improvements**
+
+- Add multi-processing configurations to speed up distributed training and testing ([\#1146](https://github.com/open-mmlab/mmpose/pull/1146)) @ly015
+- Add default runtime config ([\#1145](https://github.com/open-mmlab/mmpose/pull/1145))
+
+- Upgrade isort in pre-commit hook ([\#1179](https://github.com/open-mmlab/mmpose/pull/1179)) @liqikai9
+- Update README and documents ([\#1171](https://github.com/open-mmlab/mmpose/pull/1171), [\#1167](https://github.com/open-mmlab/mmpose/pull/1167), [\#1153](https://github.com/open-mmlab/mmpose/pull/1153), [\#1149](https://github.com/open-mmlab/mmpose/pull/1149), [\#1148](https://github.com/open-mmlab/mmpose/pull/1148), [\#1147](https://github.com/open-mmlab/mmpose/pull/1147), [\#1140](https://github.com/open-mmlab/mmpose/pull/1140)) @jin-s13, @wusize, @TommyZihao, @ly015
+
+**Bug Fixes**
+
+- Fix undeterministic behavior in pre-commit hooks ([\#1136](https://github.com/open-mmlab/mmpose/pull/1136)) @jin-s13
+- Deprecate the support for "python setup.py test" ([\#1179](https://github.com/open-mmlab/mmpose/pull/1179)) @ly015
+- Fix incompatible settings with MMCV on HSigmoid default parameters ([\#1132](https://github.com/open-mmlab/mmpose/pull/1132)) @ly015
+- Fix albumentation installation ([\#1184](https://github.com/open-mmlab/mmpose/pull/1184)) @BIGWangYuDong
+
+## v0.22.0 (04/01/2022)
+
+**Highlights**
+
+- Support VoxelPose ["VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment"](https://arxiv.org/abs/2004.06239), ECCV'2020 ([\#1050](https://github.com/open-mmlab/mmpose/pull/1050)) @wusize
+- Support Soft Wing loss ["Structure-Coherent Deep Feature Learning for Robust Face Alignment"](https://linchunze.github.io/papers/TIP21_Structure_coherent_FA.pdf), TIP'2021 ([\#1077](https://github.com/open-mmlab/mmpose/pull/1077)) @jin-s13
+- Support Adaptive Wing loss ["Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression"](https://arxiv.org/abs/1904.07399), ICCV'2019 ([\#1072](https://github.com/open-mmlab/mmpose/pull/1072)) @jin-s13
+
+**New Features**
+
+- Support VoxelPose ["VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment"](https://arxiv.org/abs/2004.06239), ECCV'2020 ([\#1050](https://github.com/open-mmlab/mmpose/pull/1050)) @wusize
+- Support Soft Wing loss ["Structure-Coherent Deep Feature Learning for Robust Face Alignment"](https://linchunze.github.io/papers/TIP21_Structure_coherent_FA.pdf), TIP'2021 ([\#1077](https://github.com/open-mmlab/mmpose/pull/1077)) @jin-s13
+- Support Adaptive Wing loss ["Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression"](https://arxiv.org/abs/1904.07399), ICCV'2019 ([\#1072](https://github.com/open-mmlab/mmpose/pull/1072)) @jin-s13
+- Add LiteHRNet-18 Checkpoints trained on COCO. ([\#1120](https://github.com/open-mmlab/mmpose/pull/1120)) @jin-s13
+
+**Improvements**
+
+- Improve documentation quality ([\#1115](https://github.com/open-mmlab/mmpose/pull/1115), [\#1111](https://github.com/open-mmlab/mmpose/pull/1111), [\#1105](https://github.com/open-mmlab/mmpose/pull/1105), [\#1087](https://github.com/open-mmlab/mmpose/pull/1087), [\#1086](https://github.com/open-mmlab/mmpose/pull/1086), [\#1085](https://github.com/open-mmlab/mmpose/pull/1085), [\#1084](https://github.com/open-mmlab/mmpose/pull/1084), [\#1083](https://github.com/open-mmlab/mmpose/pull/1083), [\#1124](https://github.com/open-mmlab/mmpose/pull/1124), [\#1070](https://github.com/open-mmlab/mmpose/pull/1070), [\#1068](https://github.com/open-mmlab/mmpose/pull/1068)) @jin-s13, @liqikai9, @ly015
+- Support CircleCI ([\#1074](https://github.com/open-mmlab/mmpose/pull/1074)) @ly015
+- Skip unit tests in CI when only document files were changed ([\#1074](https://github.com/open-mmlab/mmpose/pull/1074), [\#1041](https://github.com/open-mmlab/mmpose/pull/1041)) @QwQ2000, @ly015
+- Support file_client_args in LoadImageFromFile ([\#1076](https://github.com/open-mmlab/mmpose/pull/1076)) @jin-s13
+
+**Bug Fixes**
+
+- Fix a bug in Dark UDP postprocessing that causes error when the channel number is large. ([\#1079](https://github.com/open-mmlab/mmpose/pull/1079), [\#1116](https://github.com/open-mmlab/mmpose/pull/1116)) @X00123, @jin-s13
+- Fix hard-coded `sigmas` in bottom-up image demo ([\#1107](https://github.com/open-mmlab/mmpose/pull/1107), [\#1101](https://github.com/open-mmlab/mmpose/pull/1101)) @chenxinfeng4, @liqikai9
+- Fix unstable checks in unit tests ([\#1112](https://github.com/open-mmlab/mmpose/pull/1112)) @ly015
+- Do not destroy NULL windows if `args.show==False` in demo scripts ([\#1104](https://github.com/open-mmlab/mmpose/pull/1104)) @bladrome
+
+## v0.21.0 (06/12/2021)
+
+**Highlights**
+
+- Support ["Learning Temporal Pose Estimation from Sparsely-Labeled Videos"](https://arxiv.org/abs/1906.04016), NeurIPS'2019 ([\#932](https://github.com/open-mmlab/mmpose/pull/932), [\#1006](https://github.com/open-mmlab/mmpose/pull/1006), [\#1036](https://github.com/open-mmlab/mmpose/pull/1036), [\#1060](https://github.com/open-mmlab/mmpose/pull/1060)) @liqikai9
+- Add ViPNAS-MobileNetV3 models ([\#1025](https://github.com/open-mmlab/mmpose/pull/1025)) @luminxu, @jin-s13
+- Add [inference speed benchmark](/docs/en/inference_speed_summary.md) ([\#1028](https://github.com/open-mmlab/mmpose/pull/1028), [\#1034](https://github.com/open-mmlab/mmpose/pull/1034), [\#1044](https://github.com/open-mmlab/mmpose/pull/1044)) @liqikai9
+
+**New Features**
+
+- Support ["Learning Temporal Pose Estimation from Sparsely-Labeled Videos"](https://arxiv.org/abs/1906.04016), NeurIPS'2019 ([\#932](https://github.com/open-mmlab/mmpose/pull/932), [\#1006](https://github.com/open-mmlab/mmpose/pull/1006), [\#1036](https://github.com/open-mmlab/mmpose/pull/1036)) @liqikai9
+- Add ViPNAS-MobileNetV3 models ([\#1025](https://github.com/open-mmlab/mmpose/pull/1025)) @luminxu, @jin-s13
+- Add light-weight top-down models for whole-body keypoint detection ([\#1009](https://github.com/open-mmlab/mmpose/pull/1009), [\#1020](https://github.com/open-mmlab/mmpose/pull/1020), [\#1055](https://github.com/open-mmlab/mmpose/pull/1055)) @luminxu, @ly015
+- Add HRNet checkpoints with various settings on PoseTrack18 ([\#1035](https://github.com/open-mmlab/mmpose/pull/1035)) @liqikai9
+
+**Improvements**
+
+- Add [inference speed benchmark](/docs/en/inference_speed_summary.md) ([\#1028](https://github.com/open-mmlab/mmpose/pull/1028), [\#1034](https://github.com/open-mmlab/mmpose/pull/1034), [\#1044](https://github.com/open-mmlab/mmpose/pull/1044)) @liqikai9
+- Update model metafile format ([\#1001](https://github.com/open-mmlab/mmpose/pull/1001)) @ly015
+- Support minus output feature index in mobilenet_v3 ([\#1005](https://github.com/open-mmlab/mmpose/pull/1005)) @luminxu
+- Improve documentation quality ([\#1018](https://github.com/open-mmlab/mmpose/pull/1018), [\#1026](https://github.com/open-mmlab/mmpose/pull/1026), [\#1027](https://github.com/open-mmlab/mmpose/pull/1027), [\#1031](https://github.com/open-mmlab/mmpose/pull/1031), [\#1038](https://github.com/open-mmlab/mmpose/pull/1038), [\#1046](https://github.com/open-mmlab/mmpose/pull/1046), [\#1056](https://github.com/open-mmlab/mmpose/pull/1056), [\#1057](https://github.com/open-mmlab/mmpose/pull/1057)) @edybk, @luminxu, @ly015, @jin-s13
+- Set default random seed in training initialization ([\#1030](https://github.com/open-mmlab/mmpose/pull/1030)) @ly015
+- Skip CI when only specific files changed ([\#1041](https://github.com/open-mmlab/mmpose/pull/1041), [\#1059](https://github.com/open-mmlab/mmpose/pull/1059)) @QwQ2000, @ly015
+- Automatically cancel uncompleted action runs when new commit arrives ([\#1053](https://github.com/open-mmlab/mmpose/pull/1053)) @ly015
+
+**Bug Fixes**
+
+- Update pose tracking demo to be compatible with latest mmtracking ([\#1014](https://github.com/open-mmlab/mmpose/pull/1014)) @jin-s13
+- Fix symlink creation failure when installed in Windows environments ([\#1039](https://github.com/open-mmlab/mmpose/pull/1039)) @QwQ2000
+- Fix AP-10K dataset sigmas ([\#1040](https://github.com/open-mmlab/mmpose/pull/1040)) @jin-s13
+
+## v0.20.0 (01/11/2021)
+
+**Highlights**
+
+- Add AP-10K dataset for animal pose estimation ([\#987](https://github.com/open-mmlab/mmpose/pull/987)) @Annbless, @AlexTheBad, @jin-s13, @ly015
+- Support TorchServe ([\#979](https://github.com/open-mmlab/mmpose/pull/979)) @ly015
+
+**New Features**
+
+- Add AP-10K dataset for animal pose estimation ([\#987](https://github.com/open-mmlab/mmpose/pull/987)) @Annbless, @AlexTheBad, @jin-s13, @ly015
+- Add HRNetv2 checkpoints on 300W and COFW datasets ([\#980](https://github.com/open-mmlab/mmpose/pull/980)) @jin-s13
+- Support TorchServe ([\#979](https://github.com/open-mmlab/mmpose/pull/979)) @ly015
+
+**Bug Fixes**
+
+- Fix some deprecated or risky settings in configs ([\#963](https://github.com/open-mmlab/mmpose/pull/963), [\#976](https://github.com/open-mmlab/mmpose/pull/976), [\#992](https://github.com/open-mmlab/mmpose/pull/992)) @jin-s13, @wusize
+- Fix issues of default arguments of training and testing scripts ([\#970](https://github.com/open-mmlab/mmpose/pull/970), [\#985](https://github.com/open-mmlab/mmpose/pull/985)) @liqikai9, @wusize
+- Fix heatmap and tag size mismatch in bottom-up with UDP ([\#994](https://github.com/open-mmlab/mmpose/pull/994)) @wusize
+- Fix python3.9 installation in CI ([\#983](https://github.com/open-mmlab/mmpose/pull/983)) @ly015
+- Fix model zoo document integrity issue ([\#990](https://github.com/open-mmlab/mmpose/pull/990)) @jin-s13
+
+**Improvements**
+
+- Support non-square input shape for bottom-up ([\#991](https://github.com/open-mmlab/mmpose/pull/991)) @wusize
+- Add image and video resources for demo ([\#971](https://github.com/open-mmlab/mmpose/pull/971)) @liqikai9
+- Use CUDA docker images to accelerate CI ([\#973](https://github.com/open-mmlab/mmpose/pull/973)) @ly015
+- Add codespell hook and fix detected typos ([\#977](https://github.com/open-mmlab/mmpose/pull/977)) @ly015
+
+## v0.19.0 (08/10/2021)
+
+**Highlights**
+
+- Add models for Associative Embedding with Hourglass network backbone ([\#906](https://github.com/open-mmlab/mmpose/pull/906), [\#955](https://github.com/open-mmlab/mmpose/pull/955)) @jin-s13, @luminxu
+- Support COCO-Wholebody-Face and COCO-Wholebody-Hand datasets ([\#813](https://github.com/open-mmlab/mmpose/pull/813)) @jin-s13, @innerlee, @luminxu
+- Upgrade dataset interface ([\#901](https://github.com/open-mmlab/mmpose/pull/901), [\#924](https://github.com/open-mmlab/mmpose/pull/924)) @jin-s13, @innerlee, @ly015, @liqikai9
+- New style of documentation ([\#945](https://github.com/open-mmlab/mmpose/pull/945)) @ly015
+
+**New Features**
+
+- Add models for Associative Embedding with Hourglass network backbone ([\#906](https://github.com/open-mmlab/mmpose/pull/906), [\#955](https://github.com/open-mmlab/mmpose/pull/955)) @jin-s13, @luminxu
+- Support COCO-Wholebody-Face and COCO-Wholebody-Hand datasets ([\#813](https://github.com/open-mmlab/mmpose/pull/813)) @jin-s13, @innerlee, @luminxu
+- Add pseudo-labeling tool to generate COCO style keypoint annotations with given bounding boxes ([\#928](https://github.com/open-mmlab/mmpose/pull/928)) @soltkreig
+- New style of documentation ([\#945](https://github.com/open-mmlab/mmpose/pull/945)) @ly015
+
+**Bug Fixes**
+
+- Fix segmentation parsing in Macaque dataset preprocessing ([\#948](https://github.com/open-mmlab/mmpose/pull/948)) @jin-s13
+- Fix dependencies that may lead to CI failure in downstream projects ([\#936](https://github.com/open-mmlab/mmpose/pull/936), [\#953](https://github.com/open-mmlab/mmpose/pull/953)) @RangiLyu, @ly015
+- Fix keypoint order in Human3.6M dataset ([\#940](https://github.com/open-mmlab/mmpose/pull/940)) @ttxskk
+- Fix unstable image loading for Interhand2.6M ([\#913](https://github.com/open-mmlab/mmpose/pull/913)) @zengwang430521
+
+**Improvements**
+
+- Upgrade dataset interface ([\#901](https://github.com/open-mmlab/mmpose/pull/901), [\#924](https://github.com/open-mmlab/mmpose/pull/924)) @jin-s13, @innerlee, @ly015, @liqikai9
+- Improve demo usability and stability ([\#908](https://github.com/open-mmlab/mmpose/pull/908), [\#934](https://github.com/open-mmlab/mmpose/pull/934)) @ly015
+- Standardize model metafile format ([\#941](https://github.com/open-mmlab/mmpose/pull/941)) @ly015
+- Support `persistent_worker` and several other arguments in configs ([\#946](https://github.com/open-mmlab/mmpose/pull/946)) @jin-s13
+- Use MMCV root model registry to enable cross-project module building ([\#935](https://github.com/open-mmlab/mmpose/pull/935)) @RangiLyu
+- Improve the document quality ([\#916](https://github.com/open-mmlab/mmpose/pull/916), [\#909](https://github.com/open-mmlab/mmpose/pull/909), [\#942](https://github.com/open-mmlab/mmpose/pull/942), [\#913](https://github.com/open-mmlab/mmpose/pull/913), [\#956](https://github.com/open-mmlab/mmpose/pull/956)) @jin-s13, @ly015, @bit-scientist, @zengwang430521
+- Improve pull request template ([\#952](https://github.com/open-mmlab/mmpose/pull/952), [\#954](https://github.com/open-mmlab/mmpose/pull/954)) @ly015
+
+**Breaking Changes**
+
+- Upgrade dataset interface ([\#901](https://github.com/open-mmlab/mmpose/pull/901)) @jin-s13, @innerlee, @ly015
+
+## v0.18.0 (01/09/2021)
+
+**Bug Fixes**
+
+- Fix redundant model weight loading in pytorch-to-onnx conversion ([\#850](https://github.com/open-mmlab/mmpose/pull/850)) @ly015
+- Fix a bug in update_model_index.py that may cause pre-commit hook failure([\#866](https://github.com/open-mmlab/mmpose/pull/866)) @ly015
+- Fix a bug in interhand_3d_head ([\#890](https://github.com/open-mmlab/mmpose/pull/890)) @zengwang430521
+- Fix pose tracking demo failure caused by out-of-date configs ([\#891](https://github.com/open-mmlab/mmpose/pull/891))
+
+**Improvements**
+
+- Add automatic benchmark regression tools ([\#849](https://github.com/open-mmlab/mmpose/pull/849), [\#880](https://github.com/open-mmlab/mmpose/pull/880), [\#885](https://github.com/open-mmlab/mmpose/pull/885)) @liqikai9, @ly015
+- Add copyright information and checking hook ([\#872](https://github.com/open-mmlab/mmpose/pull/872))
+- Add PR template ([\#875](https://github.com/open-mmlab/mmpose/pull/875)) @ly015
+- Add citation information ([\#876](https://github.com/open-mmlab/mmpose/pull/876)) @ly015
+- Add python3.9 in CI ([\#877](https://github.com/open-mmlab/mmpose/pull/877), [\#883](https://github.com/open-mmlab/mmpose/pull/883)) @ly015
+- Improve the quality of the documents ([\#845](https://github.com/open-mmlab/mmpose/pull/845), [\#845](https://github.com/open-mmlab/mmpose/pull/845), [\#848](https://github.com/open-mmlab/mmpose/pull/848), [\#867](https://github.com/open-mmlab/mmpose/pull/867), [\#870](https://github.com/open-mmlab/mmpose/pull/870), [\#873](https://github.com/open-mmlab/mmpose/pull/873), [\#896](https://github.com/open-mmlab/mmpose/pull/896)) @jin-s13, @ly015, @zhiqwang
+
+## v0.17.0 (06/08/2021)
+
+**Highlights**
+
+1. Support ["Lite-HRNet: A Lightweight High-Resolution Network"](https://arxiv.org/abs/2104.06403) CVPR'2021 ([\#733](https://github.com/open-mmlab/mmpose/pull/733),[\#800](https://github.com/open-mmlab/mmpose/pull/800)) @jin-s13
+2. Add 3d body mesh demo ([\#771](https://github.com/open-mmlab/mmpose/pull/771)) @zengwang430521
+3. Add Chinese documentation ([\#787](https://github.com/open-mmlab/mmpose/pull/787), [\#798](https://github.com/open-mmlab/mmpose/pull/798), [\#799](https://github.com/open-mmlab/mmpose/pull/799), [\#802](https://github.com/open-mmlab/mmpose/pull/802), [\#804](https://github.com/open-mmlab/mmpose/pull/804), [\#805](https://github.com/open-mmlab/mmpose/pull/805), [\#815](https://github.com/open-mmlab/mmpose/pull/815), [\#816](https://github.com/open-mmlab/mmpose/pull/816), [\#817](https://github.com/open-mmlab/mmpose/pull/817), [\#819](https://github.com/open-mmlab/mmpose/pull/819), [\#839](https://github.com/open-mmlab/mmpose/pull/839)) @ly015, @luminxu, @jin-s13, @liqikai9, @zengwang430521
+4. Add Colab Tutorial ([\#834](https://github.com/open-mmlab/mmpose/pull/834)) @ly015
+
+**New Features**
+
+- Support ["Lite-HRNet: A Lightweight High-Resolution Network"](https://arxiv.org/abs/2104.06403) CVPR'2021 ([\#733](https://github.com/open-mmlab/mmpose/pull/733),[\#800](https://github.com/open-mmlab/mmpose/pull/800)) @jin-s13
+- Add 3d body mesh demo ([\#771](https://github.com/open-mmlab/mmpose/pull/771)) @zengwang430521
+- Add Chinese documentation ([\#787](https://github.com/open-mmlab/mmpose/pull/787), [\#798](https://github.com/open-mmlab/mmpose/pull/798), [\#799](https://github.com/open-mmlab/mmpose/pull/799), [\#802](https://github.com/open-mmlab/mmpose/pull/802), [\#804](https://github.com/open-mmlab/mmpose/pull/804), [\#805](https://github.com/open-mmlab/mmpose/pull/805), [\#815](https://github.com/open-mmlab/mmpose/pull/815), [\#816](https://github.com/open-mmlab/mmpose/pull/816), [\#817](https://github.com/open-mmlab/mmpose/pull/817), [\#819](https://github.com/open-mmlab/mmpose/pull/819), [\#839](https://github.com/open-mmlab/mmpose/pull/839)) @ly015, @luminxu, @jin-s13, @liqikai9, @zengwang430521
+- Add Colab Tutorial ([\#834](https://github.com/open-mmlab/mmpose/pull/834)) @ly015
+- Support training for InterHand v1.0 dataset ([\#761](https://github.com/open-mmlab/mmpose/pull/761)) @zengwang430521
+
+**Bug Fixes**
+
+- Fix mpii pckh@0.1 index ([\#773](https://github.com/open-mmlab/mmpose/pull/773)) @jin-s13
+- Fix multi-node distributed test ([\#818](https://github.com/open-mmlab/mmpose/pull/818)) @ly015
+- Fix docstring and init_weights error of ShuffleNetV1 ([\#814](https://github.com/open-mmlab/mmpose/pull/814)) @Junjun2016
+- Fix imshow_bbox error when input bboxes is empty ([\#796](https://github.com/open-mmlab/mmpose/pull/796)) @ly015
+- Fix model zoo doc generation ([\#778](https://github.com/open-mmlab/mmpose/pull/778)) @ly015
+- Fix typo ([\#767](https://github.com/open-mmlab/mmpose/pull/767)), ([\#780](https://github.com/open-mmlab/mmpose/pull/780), [\#782](https://github.com/open-mmlab/mmpose/pull/782)) @ly015, @jin-s13
+
+**Breaking Changes**
+
+- Use MMCV EvalHook ([\#686](https://github.com/open-mmlab/mmpose/pull/686)) @ly015
+
+**Improvements**
+
+- Add pytest.ini and fix docstring ([\#812](https://github.com/open-mmlab/mmpose/pull/812)) @jin-s13
+- Update MSELoss ([\#829](https://github.com/open-mmlab/mmpose/pull/829)) @Ezra-Yu
+- Move process_mmdet_results into inference.py ([\#831](https://github.com/open-mmlab/mmpose/pull/831)) @ly015
+- Update resource limit ([\#783](https://github.com/open-mmlab/mmpose/pull/783)) @jin-s13
+- Use COCO 2D pose model in 3D demo examples ([\#785](https://github.com/open-mmlab/mmpose/pull/785)) @ly015
+- Change model zoo titles in the doc from center-aligned to left-aligned ([\#792](https://github.com/open-mmlab/mmpose/pull/792), [\#797](https://github.com/open-mmlab/mmpose/pull/797)) @ly015
+- Support MIM ([\#706](https://github.com/open-mmlab/mmpose/pull/706), [\#794](https://github.com/open-mmlab/mmpose/pull/794)) @ly015
+- Update out-of-date configs ([\#827](https://github.com/open-mmlab/mmpose/pull/827)) @jin-s13
+- Remove opencv-python-headless dependency by albumentations ([\#833](https://github.com/open-mmlab/mmpose/pull/833)) @ly015
+- Update QQ QR code in README_CN.md ([\#832](https://github.com/open-mmlab/mmpose/pull/832)) @ly015
+
+## v0.16.0 (02/07/2021)
+
+**Highlights**
+
+1. Support ["ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"](https://arxiv.org/abs/2105.10154) CVPR'2021 ([\#742](https://github.com/open-mmlab/mmpose/pull/742),[\#755](https://github.com/open-mmlab/mmpose/pull/755)).
+1. Support MPI-INF-3DHP dataset ([\#683](https://github.com/open-mmlab/mmpose/pull/683),[\#746](https://github.com/open-mmlab/mmpose/pull/746),[\#751](https://github.com/open-mmlab/mmpose/pull/751)).
+1. Add webcam demo tool ([\#729](https://github.com/open-mmlab/mmpose/pull/729))
+1. Add 3d body and hand pose estimation demo ([\#704](https://github.com/open-mmlab/mmpose/pull/704), [\#727](https://github.com/open-mmlab/mmpose/pull/727)).
+
+**New Features**
+
+- Support ["ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"](https://arxiv.org/abs/2105.10154) CVPR'2021 ([\#742](https://github.com/open-mmlab/mmpose/pull/742),[\#755](https://github.com/open-mmlab/mmpose/pull/755))
+- Support MPI-INF-3DHP dataset ([\#683](https://github.com/open-mmlab/mmpose/pull/683),[\#746](https://github.com/open-mmlab/mmpose/pull/746),[\#751](https://github.com/open-mmlab/mmpose/pull/751))
+- Support Webcam demo ([\#729](https://github.com/open-mmlab/mmpose/pull/729))
+- Support Interhand 3d demo ([\#704](https://github.com/open-mmlab/mmpose/pull/704))
+- Support 3d pose video demo ([\#727](https://github.com/open-mmlab/mmpose/pull/727))
+- Support H36m dataset for 2d pose estimation ([\#709](https://github.com/open-mmlab/mmpose/pull/709), [\#735](https://github.com/open-mmlab/mmpose/pull/735))
+- Add scripts to generate mim metafile ([\#749](https://github.com/open-mmlab/mmpose/pull/749))
+
+**Bug Fixes**
+
+- Fix typos ([\#692](https://github.com/open-mmlab/mmpose/pull/692),[\#696](https://github.com/open-mmlab/mmpose/pull/696),[\#697](https://github.com/open-mmlab/mmpose/pull/697),[\#698](https://github.com/open-mmlab/mmpose/pull/698),[\#712](https://github.com/open-mmlab/mmpose/pull/712),[\#718](https://github.com/open-mmlab/mmpose/pull/718),[\#728](https://github.com/open-mmlab/mmpose/pull/728))
+- Change model download links from `http` to `https` ([\#716](https://github.com/open-mmlab/mmpose/pull/716))
+
+**Breaking Changes**
+
+- Switch to MMCV MODEL_REGISTRY ([\#669](https://github.com/open-mmlab/mmpose/pull/669))
+
+**Improvements**
+
+- Refactor MeshMixDataset ([\#752](https://github.com/open-mmlab/mmpose/pull/752))
+- Rename 'GaussianHeatMap' to 'GaussianHeatmap' ([\#745](https://github.com/open-mmlab/mmpose/pull/745))
+- Update out-of-date configs ([\#734](https://github.com/open-mmlab/mmpose/pull/734))
+- Improve compatibility for breaking changes ([\#731](https://github.com/open-mmlab/mmpose/pull/731))
+- Enable to control radius and thickness in visualization ([\#722](https://github.com/open-mmlab/mmpose/pull/722))
+- Add regex dependency ([\#720](https://github.com/open-mmlab/mmpose/pull/720))
+
+## v0.15.0 (02/06/2021)
+
+**Highlights**
+
+1. Support 3d video pose estimation (VideoPose3D).
+1. Support 3d hand pose estimation (InterNet).
+1. Improve presentation of modelzoo.
+
+**New Features**
+
+- Support "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image" (ECCV‘20) ([\#624](https://github.com/open-mmlab/mmpose/pull/624))
+- Support "3D human pose estimation in video with temporal convolutions and semi-supervised training" (CVPR'19) ([\#602](https://github.com/open-mmlab/mmpose/pull/602), [\#681](https://github.com/open-mmlab/mmpose/pull/681))
+- Support 3d pose estimation demo ([\#653](https://github.com/open-mmlab/mmpose/pull/653), [\#670](https://github.com/open-mmlab/mmpose/pull/670))
+- Support bottom-up whole-body pose estimation ([\#689](https://github.com/open-mmlab/mmpose/pull/689))
+- Support mmcli ([\#634](https://github.com/open-mmlab/mmpose/pull/634))
+
+**Bug Fixes**
+
+- Fix opencv compatibility ([\#635](https://github.com/open-mmlab/mmpose/pull/635))
+- Fix demo with UDP ([\#637](https://github.com/open-mmlab/mmpose/pull/637))
+- Fix bottom-up model onnx conversion ([\#680](https://github.com/open-mmlab/mmpose/pull/680))
+- Fix `GPU_IDS` in distributed training ([\#668](https://github.com/open-mmlab/mmpose/pull/668))
+- Fix MANIFEST.in ([\#641](https://github.com/open-mmlab/mmpose/pull/641), [\#657](https://github.com/open-mmlab/mmpose/pull/657))
+- Fix docs ([\#643](https://github.com/open-mmlab/mmpose/pull/643),[\#684](https://github.com/open-mmlab/mmpose/pull/684),[\#688](https://github.com/open-mmlab/mmpose/pull/688),[\#690](https://github.com/open-mmlab/mmpose/pull/690),[\#692](https://github.com/open-mmlab/mmpose/pull/692))
+
+**Breaking Changes**
+
+- Reorganize configs by tasks, algorithms, datasets, and techniques ([\#647](https://github.com/open-mmlab/mmpose/pull/647))
+- Rename heads and detectors ([\#667](https://github.com/open-mmlab/mmpose/pull/667))
+
+**Improvements**
+
+- Add `radius` and `thickness` parameters in visualization ([\#638](https://github.com/open-mmlab/mmpose/pull/638))
+- Add `trans_prob` parameter in `TopDownRandomTranslation` ([\#650](https://github.com/open-mmlab/mmpose/pull/650))
+- Switch to `MMCV MODEL_REGISTRY` ([\#669](https://github.com/open-mmlab/mmpose/pull/669))
+- Update dependencies ([\#674](https://github.com/open-mmlab/mmpose/pull/674), [\#676](https://github.com/open-mmlab/mmpose/pull/676))
+
+## v0.14.0 (06/05/2021)
+
+**Highlights**
+
+1. Support animal pose estimation with 7 popular datasets.
+1. Support "A simple yet effective baseline for 3d human pose estimation" (ICCV'17).
+
+**New Features**
+
+- Support "A simple yet effective baseline for 3d human pose estimation" (ICCV'17) ([\#554](https://github.com/open-mmlab/mmpose/pull/554),[\#558](https://github.com/open-mmlab/mmpose/pull/558),[\#566](https://github.com/open-mmlab/mmpose/pull/566),[\#570](https://github.com/open-mmlab/mmpose/pull/570),[\#589](https://github.com/open-mmlab/mmpose/pull/589))
+- Support animal pose estimation ([\#559](https://github.com/open-mmlab/mmpose/pull/559),[\#561](https://github.com/open-mmlab/mmpose/pull/561),[\#563](https://github.com/open-mmlab/mmpose/pull/563),[\#571](https://github.com/open-mmlab/mmpose/pull/571),[\#603](https://github.com/open-mmlab/mmpose/pull/603),[\#605](https://github.com/open-mmlab/mmpose/pull/605))
+- Support Horse-10 dataset ([\#561](https://github.com/open-mmlab/mmpose/pull/561)), MacaquePose dataset ([\#561](https://github.com/open-mmlab/mmpose/pull/561)), Vinegar Fly dataset ([\#561](https://github.com/open-mmlab/mmpose/pull/561)), Desert Locust dataset ([\#561](https://github.com/open-mmlab/mmpose/pull/561)), Grevy's Zebra dataset ([\#561](https://github.com/open-mmlab/mmpose/pull/561)), ATRW dataset ([\#571](https://github.com/open-mmlab/mmpose/pull/571)), and Animal-Pose dataset ([\#603](https://github.com/open-mmlab/mmpose/pull/603))
+- Support bottom-up pose tracking demo ([\#574](https://github.com/open-mmlab/mmpose/pull/574))
+- Support FP16 training ([\#584](https://github.com/open-mmlab/mmpose/pull/584),[\#616](https://github.com/open-mmlab/mmpose/pull/616),[\#626](https://github.com/open-mmlab/mmpose/pull/626))
+- Support NMS for bottom-up ([\#609](https://github.com/open-mmlab/mmpose/pull/609))
+
+**Bug Fixes**
+
+- Fix bugs in the top-down demo, when there are no people in the images ([\#569](https://github.com/open-mmlab/mmpose/pull/569)).
+- Fix the links in the doc ([\#612](https://github.com/open-mmlab/mmpose/pull/612))
+
+**Improvements**
+
+- Speed up top-down inference ([\#560](https://github.com/open-mmlab/mmpose/pull/560))
+- Update github CI ([\#562](https://github.com/open-mmlab/mmpose/pull/562), [\#564](https://github.com/open-mmlab/mmpose/pull/564))
+- Update Readme ([\#578](https://github.com/open-mmlab/mmpose/pull/578),[\#579](https://github.com/open-mmlab/mmpose/pull/579),[\#580](https://github.com/open-mmlab/mmpose/pull/580),[\#592](https://github.com/open-mmlab/mmpose/pull/592),[\#599](https://github.com/open-mmlab/mmpose/pull/599),[\#600](https://github.com/open-mmlab/mmpose/pull/600),[\#607](https://github.com/open-mmlab/mmpose/pull/607))
+- Update Faq ([\#587](https://github.com/open-mmlab/mmpose/pull/587), [\#610](https://github.com/open-mmlab/mmpose/pull/610))
+
+## v0.13.0 (31/03/2021)
+
+**Highlights**
+
+1. Support Wingloss.
+1. Support RHD hand dataset.
+
+**New Features**
+
+- Support Wingloss ([\#482](https://github.com/open-mmlab/mmpose/pull/482))
+- Support RHD hand dataset ([\#523](https://github.com/open-mmlab/mmpose/pull/523), [\#551](https://github.com/open-mmlab/mmpose/pull/551))
+- Support Human3.6m dataset for 3d keypoint detection ([\#518](https://github.com/open-mmlab/mmpose/pull/518), [\#527](https://github.com/open-mmlab/mmpose/pull/527))
+- Support TCN model for 3d keypoint detection ([\#521](https://github.com/open-mmlab/mmpose/pull/521), [\#522](https://github.com/open-mmlab/mmpose/pull/522))
+- Support Interhand3D model for 3d hand detection ([\#536](https://github.com/open-mmlab/mmpose/pull/536))
+- Support Multi-task detector ([\#480](https://github.com/open-mmlab/mmpose/pull/480))
+
+**Bug Fixes**
+
+- Fix PCKh@0.1 calculation ([\#516](https://github.com/open-mmlab/mmpose/pull/516))
+- Fix unittest ([\#529](https://github.com/open-mmlab/mmpose/pull/529))
+- Fix circular importing ([\#542](https://github.com/open-mmlab/mmpose/pull/542))
+- Fix bugs in bottom-up keypoint score ([\#548](https://github.com/open-mmlab/mmpose/pull/548))
+
+**Improvements**
+
+- Update config & checkpoints ([\#525](https://github.com/open-mmlab/mmpose/pull/525), [\#546](https://github.com/open-mmlab/mmpose/pull/546))
+- Fix typos ([\#514](https://github.com/open-mmlab/mmpose/pull/514), [\#519](https://github.com/open-mmlab/mmpose/pull/519), [\#532](https://github.com/open-mmlab/mmpose/pull/532), [\#537](https://github.com/open-mmlab/mmpose/pull/537), )
+- Speed up post processing ([\#535](https://github.com/open-mmlab/mmpose/pull/535))
+- Update mmcv version dependency ([\#544](https://github.com/open-mmlab/mmpose/pull/544))
+
+## v0.12.0 (28/02/2021)
+
+**Highlights**
+
+1. Support DeepPose algorithm.
+
+**New Features**
+
+- Support DeepPose algorithm ([\#446](https://github.com/open-mmlab/mmpose/pull/446), [\#461](https://github.com/open-mmlab/mmpose/pull/461))
+- Support interhand3d dataset ([\#468](https://github.com/open-mmlab/mmpose/pull/468))
+- Support Albumentation pipeline ([\#469](https://github.com/open-mmlab/mmpose/pull/469))
+- Support PhotometricDistortion pipeline ([\#485](https://github.com/open-mmlab/mmpose/pull/485))
+- Set seed option for training ([\#493](https://github.com/open-mmlab/mmpose/pull/493))
+- Add demos for face keypoint detection ([\#502](https://github.com/open-mmlab/mmpose/pull/502))
+
+**Bug Fixes**
+
+- Change channel order according to configs ([\#504](https://github.com/open-mmlab/mmpose/pull/504))
+- Fix `num_factors` in UDP encoding ([\#495](https://github.com/open-mmlab/mmpose/pull/495))
+- Fix configs ([\#456](https://github.com/open-mmlab/mmpose/pull/456))
+
+**Breaking Changes**
+
+- Refactor configs for wholebody pose estimation ([\#487](https://github.com/open-mmlab/mmpose/pull/487), [\#491](https://github.com/open-mmlab/mmpose/pull/491))
+- Rename `decode` function for heads ([\#481](https://github.com/open-mmlab/mmpose/pull/481))
+
+**Improvements**
+
+- Update config & checkpoints ([\#453](https://github.com/open-mmlab/mmpose/pull/453),[\#484](https://github.com/open-mmlab/mmpose/pull/484),[\#487](https://github.com/open-mmlab/mmpose/pull/487))
+- Add README in Chinese ([\#462](https://github.com/open-mmlab/mmpose/pull/462))
+- Add tutorials about configs ([\#465](https://github.com/open-mmlab/mmpose/pull/465))
+- Add demo videos for various tasks ([\#499](https://github.com/open-mmlab/mmpose/pull/499), [\#503](https://github.com/open-mmlab/mmpose/pull/503))
+- Update docs about MMPose installation ([\#467](https://github.com/open-mmlab/mmpose/pull/467), [\#505](https://github.com/open-mmlab/mmpose/pull/505))
+- Rename `stat.py` to `stats.py` ([\#483](https://github.com/open-mmlab/mmpose/pull/483))
+- Fix typos ([\#463](https://github.com/open-mmlab/mmpose/pull/463), [\#464](https://github.com/open-mmlab/mmpose/pull/464), [\#477](https://github.com/open-mmlab/mmpose/pull/477), [\#481](https://github.com/open-mmlab/mmpose/pull/481))
+- latex to bibtex ([\#471](https://github.com/open-mmlab/mmpose/pull/471))
+- Update FAQ ([\#466](https://github.com/open-mmlab/mmpose/pull/466))
+
+## v0.11.0 (31/01/2021)
+
+**Highlights**
+
+1. Support fashion landmark detection.
+1. Support face keypoint detection.
+1. Support pose tracking with MMTracking.
+
+**New Features**
+
+- Support fashion landmark detection (DeepFashion) ([\#413](https://github.com/open-mmlab/mmpose/pull/413))
+- Support face keypoint detection (300W, AFLW, COFW, WFLW) ([\#367](https://github.com/open-mmlab/mmpose/pull/367))
+- Support pose tracking demo with MMTracking ([\#427](https://github.com/open-mmlab/mmpose/pull/427))
+- Support face demo ([\#443](https://github.com/open-mmlab/mmpose/pull/443))
+- Support AIC dataset for bottom-up methods ([\#438](https://github.com/open-mmlab/mmpose/pull/438), [\#449](https://github.com/open-mmlab/mmpose/pull/449))
+
+**Bug Fixes**
+
+- Fix multi-batch training ([\#434](https://github.com/open-mmlab/mmpose/pull/434))
+- Fix sigmas in AIC dataset ([\#441](https://github.com/open-mmlab/mmpose/pull/441))
+- Fix config file ([\#420](https://github.com/open-mmlab/mmpose/pull/420))
+
+**Breaking Changes**
+
+- Refactor Heads ([\#382](https://github.com/open-mmlab/mmpose/pull/382))
+
+**Improvements**
+
+- Update readme ([\#409](https://github.com/open-mmlab/mmpose/pull/409), [\#412](https://github.com/open-mmlab/mmpose/pull/412), [\#415](https://github.com/open-mmlab/mmpose/pull/415), [\#416](https://github.com/open-mmlab/mmpose/pull/416), [\#419](https://github.com/open-mmlab/mmpose/pull/419), [\#421](https://github.com/open-mmlab/mmpose/pull/421), [\#422](https://github.com/open-mmlab/mmpose/pull/422), [\#424](https://github.com/open-mmlab/mmpose/pull/424), [\#425](https://github.com/open-mmlab/mmpose/pull/425), [\#435](https://github.com/open-mmlab/mmpose/pull/435), [\#436](https://github.com/open-mmlab/mmpose/pull/436), [\#437](https://github.com/open-mmlab/mmpose/pull/437), [\#444](https://github.com/open-mmlab/mmpose/pull/444), [\#445](https://github.com/open-mmlab/mmpose/pull/445))
+- Add GAP (global average pooling) neck ([\#414](https://github.com/open-mmlab/mmpose/pull/414))
+- Speed up ([\#411](https://github.com/open-mmlab/mmpose/pull/411), [\#423](https://github.com/open-mmlab/mmpose/pull/423))
+- Support COCO test-dev test ([\#433](https://github.com/open-mmlab/mmpose/pull/433))
+
+## v0.10.0 (31/12/2020)
+
+**Highlights**
+
+1. Support more human pose estimation methods.
+ - [UDP](https://arxiv.org/abs/1911.07524)
+1. Support pose tracking.
+1. Support multi-batch inference.
+1. Add some useful tools, including `analyze_logs`, `get_flops`, `print_config`.
+1. Support more backbone networks.
+ - [ResNest](https://arxiv.org/pdf/2004.08955.pdf)
+ - [VGG](https://arxiv.org/abs/1409.1556)
+
+**New Features**
+
+- Support UDP ([\#353](https://github.com/open-mmlab/mmpose/pull/353), [\#371](https://github.com/open-mmlab/mmpose/pull/371), [\#402](https://github.com/open-mmlab/mmpose/pull/402))
+- Support multi-batch inference ([\#390](https://github.com/open-mmlab/mmpose/pull/390))
+- Support MHP dataset ([\#386](https://github.com/open-mmlab/mmpose/pull/386))
+- Support pose tracking demo ([\#380](https://github.com/open-mmlab/mmpose/pull/380))
+- Support mpii-trb demo ([\#372](https://github.com/open-mmlab/mmpose/pull/372))
+- Support mobilenet for hand pose estimation ([\#377](https://github.com/open-mmlab/mmpose/pull/377))
+- Support ResNest backbone ([\#370](https://github.com/open-mmlab/mmpose/pull/370))
+- Support VGG backbone ([\#370](https://github.com/open-mmlab/mmpose/pull/370))
+- Add some useful tools, including `analyze_logs`, `get_flops`, `print_config` ([\#324](https://github.com/open-mmlab/mmpose/pull/324))
+
+**Bug Fixes**
+
+- Fix bugs in pck evaluation ([\#328](https://github.com/open-mmlab/mmpose/pull/328))
+- Fix model download links in README ([\#396](https://github.com/open-mmlab/mmpose/pull/396), [\#397](https://github.com/open-mmlab/mmpose/pull/397))
+- Fix CrowdPose annotations and update benchmarks ([\#384](https://github.com/open-mmlab/mmpose/pull/384))
+- Fix modelzoo stat ([\#354](https://github.com/open-mmlab/mmpose/pull/354), [\#360](https://github.com/open-mmlab/mmpose/pull/360), [\#362](https://github.com/open-mmlab/mmpose/pull/362))
+- Fix config files for aic datasets ([\#340](https://github.com/open-mmlab/mmpose/pull/340))
+
+**Breaking Changes**
+
+- Rename `image_thr` to `det_bbox_thr` for top-down methods.
+
+**Improvements**
+
+- Organize the readme files ([\#398](https://github.com/open-mmlab/mmpose/pull/398), [\#399](https://github.com/open-mmlab/mmpose/pull/399), [\#400](https://github.com/open-mmlab/mmpose/pull/400))
+- Check linting for markdown ([\#379](https://github.com/open-mmlab/mmpose/pull/379))
+- Add faq.md ([\#350](https://github.com/open-mmlab/mmpose/pull/350))
+- Remove PyTorch 1.4 in CI ([\#338](https://github.com/open-mmlab/mmpose/pull/338))
+- Add pypi badge in readme ([\#329](https://github.com/open-mmlab/mmpose/pull/329))
+
+## v0.9.0 (30/11/2020)
+
+**Highlights**
+
+1. Support more human pose estimation methods.
+ - [MSPN](https://arxiv.org/abs/1901.00148)
+ - [RSN](https://arxiv.org/abs/2003.04030)
+1. Support video pose estimation datasets.
+ - [sub-JHMDB](http://jhmdb.is.tue.mpg.de/dataset)
+1. Support Onnx model conversion.
+
+**New Features**
+
+- Support MSPN ([\#278](https://github.com/open-mmlab/mmpose/pull/278))
+- Support RSN ([\#221](https://github.com/open-mmlab/mmpose/pull/221), [\#318](https://github.com/open-mmlab/mmpose/pull/318))
+- Support new post-processing method for MSPN & RSN ([\#288](https://github.com/open-mmlab/mmpose/pull/288))
+- Support sub-JHMDB dataset ([\#292](https://github.com/open-mmlab/mmpose/pull/292))
+- Support urls for pre-trained models in config files ([\#232](https://github.com/open-mmlab/mmpose/pull/232))
+- Support Onnx ([\#305](https://github.com/open-mmlab/mmpose/pull/305))
+
+**Bug Fixes**
+
+- Fix model download links in README ([\#255](https://github.com/open-mmlab/mmpose/pull/255), [\#315](https://github.com/open-mmlab/mmpose/pull/315))
+
+**Breaking Changes**
+
+- `post_process=True|False` and `unbiased_decoding=True|False` are deprecated, use `post_process=None|default|unbiased` etc. instead ([\#288](https://github.com/open-mmlab/mmpose/pull/288))
+
+**Improvements**
+
+- Enrich the model zoo ([\#256](https://github.com/open-mmlab/mmpose/pull/256), [\#320](https://github.com/open-mmlab/mmpose/pull/320))
+- Set the default map_location as 'cpu' to reduce gpu memory cost ([\#227](https://github.com/open-mmlab/mmpose/pull/227))
+- Support return heatmaps and backbone features for bottom-up models ([\#229](https://github.com/open-mmlab/mmpose/pull/229))
+- Upgrade mmcv maximum & minimum version ([\#269](https://github.com/open-mmlab/mmpose/pull/269), [\#313](https://github.com/open-mmlab/mmpose/pull/313))
+- Automatically add modelzoo statistics to readthedocs ([\#252](https://github.com/open-mmlab/mmpose/pull/252))
+- Fix Pylint issues ([\#258](https://github.com/open-mmlab/mmpose/pull/258), [\#259](https://github.com/open-mmlab/mmpose/pull/259), [\#260](https://github.com/open-mmlab/mmpose/pull/260), [\#262](https://github.com/open-mmlab/mmpose/pull/262), [\#265](https://github.com/open-mmlab/mmpose/pull/265), [\#267](https://github.com/open-mmlab/mmpose/pull/267), [\#268](https://github.com/open-mmlab/mmpose/pull/268), [\#270](https://github.com/open-mmlab/mmpose/pull/270), [\#271](https://github.com/open-mmlab/mmpose/pull/271), [\#272](https://github.com/open-mmlab/mmpose/pull/272), [\#273](https://github.com/open-mmlab/mmpose/pull/273), [\#275](https://github.com/open-mmlab/mmpose/pull/275), [\#276](https://github.com/open-mmlab/mmpose/pull/276), [\#283](https://github.com/open-mmlab/mmpose/pull/283), [\#285](https://github.com/open-mmlab/mmpose/pull/285), [\#293](https://github.com/open-mmlab/mmpose/pull/293), [\#294](https://github.com/open-mmlab/mmpose/pull/294), [\#295](https://github.com/open-mmlab/mmpose/pull/295))
+- Improve README ([\#226](https://github.com/open-mmlab/mmpose/pull/226), [\#257](https://github.com/open-mmlab/mmpose/pull/257), [\#264](https://github.com/open-mmlab/mmpose/pull/264), [\#280](https://github.com/open-mmlab/mmpose/pull/280), [\#296](https://github.com/open-mmlab/mmpose/pull/296))
+- Support PyTorch 1.7 in CI ([\#274](https://github.com/open-mmlab/mmpose/pull/274))
+- Add docs/tutorials for running demos ([\#263](https://github.com/open-mmlab/mmpose/pull/263))
+
+## v0.8.0 (31/10/2020)
+
+**Highlights**
+
+1. Support more human pose estimation datasets.
+ - [CrowdPose](https://github.com/Jeff-sjtu/CrowdPose)
+ - [PoseTrack18](https://posetrack.net/)
+1. Support more 2D hand keypoint estimation datasets.
+ - [InterHand2.6](https://github.com/facebookresearch/InterHand2.6M)
+1. Support adversarial training for 3D human shape recovery.
+1. Support multi-stage losses.
+1. Support mpii demo.
+
+**New Features**
+
+- Support [CrowdPose](https://github.com/Jeff-sjtu/CrowdPose) dataset ([\#195](https://github.com/open-mmlab/mmpose/pull/195))
+- Support [PoseTrack18](https://posetrack.net/) dataset ([\#220](https://github.com/open-mmlab/mmpose/pull/220))
+- Support [InterHand2.6](https://github.com/facebookresearch/InterHand2.6M) dataset ([\#202](https://github.com/open-mmlab/mmpose/pull/202))
+- Support adversarial training for 3D human shape recovery ([\#192](https://github.com/open-mmlab/mmpose/pull/192))
+- Support multi-stage losses ([\#204](https://github.com/open-mmlab/mmpose/pull/204))
+
+**Bug Fixes**
+
+- Fix config files ([\#190](https://github.com/open-mmlab/mmpose/pull/190))
+
+**Improvements**
+
+- Add mpii demo ([\#216](https://github.com/open-mmlab/mmpose/pull/216))
+- Improve README ([\#181](https://github.com/open-mmlab/mmpose/pull/181), [\#183](https://github.com/open-mmlab/mmpose/pull/183), [\#208](https://github.com/open-mmlab/mmpose/pull/208))
+- Support return heatmaps and backbone features ([\#196](https://github.com/open-mmlab/mmpose/pull/196), [\#212](https://github.com/open-mmlab/mmpose/pull/212))
+- Support different return formats of mmdetection models ([\#217](https://github.com/open-mmlab/mmpose/pull/217))
+
+## v0.7.0 (30/9/2020)
+
+**Highlights**
+
+1. Support HMR for 3D human shape recovery.
+1. Support WholeBody human pose estimation.
+ - [COCO-WholeBody](https://github.com/jin-s13/COCO-WholeBody)
+1. Support more 2D hand keypoint estimation datasets.
+ - [Frei-hand](https://lmb.informatik.uni-freiburg.de/projects/freihand/)
+ - [CMU Panoptic HandDB](http://domedb.perception.cs.cmu.edu/handdb.html)
+1. Add more popular backbones & enrich the [modelzoo](https://mmpose.readthedocs.io/en/latest/model_zoo.html)
+ - ShuffleNetv2
+1. Support hand demo and whole-body demo.
+
+**New Features**
+
+- Support HMR for 3D human shape recovery ([\#157](https://github.com/open-mmlab/mmpose/pull/157), [\#160](https://github.com/open-mmlab/mmpose/pull/160), [\#161](https://github.com/open-mmlab/mmpose/pull/161), [\#162](https://github.com/open-mmlab/mmpose/pull/162))
+- Support [COCO-WholeBody](https://github.com/jin-s13/COCO-WholeBody) dataset ([\#133](https://github.com/open-mmlab/mmpose/pull/133))
+- Support [Frei-hand](https://lmb.informatik.uni-freiburg.de/projects/freihand/) dataset ([\#125](https://github.com/open-mmlab/mmpose/pull/125))
+- Support [CMU Panoptic HandDB](http://domedb.perception.cs.cmu.edu/handdb.html) dataset ([\#144](https://github.com/open-mmlab/mmpose/pull/144))
+- Support H36M dataset ([\#159](https://github.com/open-mmlab/mmpose/pull/159))
+- Support ShuffleNetv2 ([\#139](https://github.com/open-mmlab/mmpose/pull/139))
+- Support saving best models based on key indicator ([\#127](https://github.com/open-mmlab/mmpose/pull/127))
+
+**Bug Fixes**
+
+- Fix typos in docs ([\#121](https://github.com/open-mmlab/mmpose/pull/121))
+- Fix assertion ([\#142](https://github.com/open-mmlab/mmpose/pull/142))
+
+**Improvements**
+
+- Add tools to transform .mat format to .json format ([\#126](https://github.com/open-mmlab/mmpose/pull/126))
+- Add hand demo ([\#115](https://github.com/open-mmlab/mmpose/pull/115))
+- Add whole-body demo ([\#163](https://github.com/open-mmlab/mmpose/pull/163))
+- Reuse mmcv utility function and update version files ([\#135](https://github.com/open-mmlab/mmpose/pull/135), [\#137](https://github.com/open-mmlab/mmpose/pull/137))
+- Enrich the modelzoo ([\#147](https://github.com/open-mmlab/mmpose/pull/147), [\#169](https://github.com/open-mmlab/mmpose/pull/169))
+- Improve docs ([\#174](https://github.com/open-mmlab/mmpose/pull/174), [\#175](https://github.com/open-mmlab/mmpose/pull/175), [\#178](https://github.com/open-mmlab/mmpose/pull/178))
+- Improve README ([\#176](https://github.com/open-mmlab/mmpose/pull/176))
+- Improve version.py ([\#173](https://github.com/open-mmlab/mmpose/pull/173))
+
+## v0.6.0 (31/8/2020)
+
+**Highlights**
+
+1. Add more popular backbones & enrich the [modelzoo](https://mmpose.readthedocs.io/en/latest/model_zoo.html)
+ - ResNext
+ - SEResNet
+ - ResNetV1D
+ - MobileNetv2
+ - ShuffleNetv1
+ - CPM (Convolutional Pose Machine)
+1. Add more popular datasets:
+ - [AIChallenger](https://arxiv.org/abs/1711.06475?context=cs.CV)
+ - [MPII](http://human-pose.mpi-inf.mpg.de/)
+ - [MPII-TRB](https://github.com/kennymckormick/Triplet-Representation-of-human-Body)
+ - [OCHuman](http://www.liruilong.cn/projects/pose2seg/index.html)
+1. Support 2d hand keypoint estimation.
+ - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html)
+1. Support bottom-up inference.
+
+**New Features**
+
+- Support [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) dataset ([\#52](https://github.com/open-mmlab/mmpose/pull/52))
+- Support [MPII](http://human-pose.mpi-inf.mpg.de/) dataset ([\#55](https://github.com/open-mmlab/mmpose/pull/55))
+- Support [MPII-TRB](https://github.com/kennymckormick/Triplet-Representation-of-human-Body) dataset ([\#19](https://github.com/open-mmlab/mmpose/pull/19), [\#47](https://github.com/open-mmlab/mmpose/pull/47), [\#48](https://github.com/open-mmlab/mmpose/pull/48))
+- Support [OCHuman](http://www.liruilong.cn/projects/pose2seg/index.html) dataset ([\#70](https://github.com/open-mmlab/mmpose/pull/70))
+- Support [AIChallenger](https://arxiv.org/abs/1711.06475?context=cs.CV) dataset ([\#87](https://github.com/open-mmlab/mmpose/pull/87))
+- Support multiple backbones ([\#26](https://github.com/open-mmlab/mmpose/pull/26))
+- Support CPM model ([\#56](https://github.com/open-mmlab/mmpose/pull/56))
+
+**Bug Fixes**
+
+- Fix configs for MPII & MPII-TRB datasets ([\#93](https://github.com/open-mmlab/mmpose/pull/93))
+- Fix the bug of missing `test_pipeline` in configs ([\#14](https://github.com/open-mmlab/mmpose/pull/14))
+- Fix typos ([\#27](https://github.com/open-mmlab/mmpose/pull/27), [\#28](https://github.com/open-mmlab/mmpose/pull/28), [\#50](https://github.com/open-mmlab/mmpose/pull/50), [\#53](https://github.com/open-mmlab/mmpose/pull/53), [\#63](https://github.com/open-mmlab/mmpose/pull/63))
+
+**Improvements**
+
+- Update benchmark ([\#93](https://github.com/open-mmlab/mmpose/pull/93))
+- Add Dockerfile ([\#44](https://github.com/open-mmlab/mmpose/pull/44))
+- Improve unittest coverage and minor fix ([\#18](https://github.com/open-mmlab/mmpose/pull/18))
+- Support CPUs for train/val/demo ([\#34](https://github.com/open-mmlab/mmpose/pull/34))
+- Support bottom-up demo ([\#69](https://github.com/open-mmlab/mmpose/pull/69))
+- Add tools to publish model ([\#62](https://github.com/open-mmlab/mmpose/pull/62))
+- Enrich the modelzoo ([\#64](https://github.com/open-mmlab/mmpose/pull/64), [\#68](https://github.com/open-mmlab/mmpose/pull/68), [\#82](https://github.com/open-mmlab/mmpose/pull/82))
+
+## v0.5.0 (21/7/2020)
+
+**Highlights**
+
+- MMPose is released.
+
+**Main Features**
+
+- Support both top-down and bottom-up pose estimation approaches.
+- Achieve higher training efficiency and higher accuracy than other popular codebases (e.g. AlphaPose, HRNet)
+- Support various backbone models: ResNet, HRNet, SCNet, Houglass and HigherHRNet.
diff --git a/vendor/ViTPose/docs/en/collect.py b/vendor/ViTPose/docs/en/collect.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f8aedee0616d0bcf61d325feeced3738d524218
--- /dev/null
+++ b/vendor/ViTPose/docs/en/collect.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import re
+from glob import glob
+
+from titlecase import titlecase
+
+os.makedirs('topics', exist_ok=True)
+os.makedirs('papers', exist_ok=True)
+
+# Step 1: get subtopics: a mix of topic and task
+minisections = [
+ x.split('/')[-2:] for x in glob('../../configs/*/*') if '_base_' not in x
+]
+alltopics = sorted(list(set(x[0] for x in minisections)))
+subtopics = []
+for t in alltopics:
+ data = [x[1].split('_') for x in minisections if x[0] == t]
+ valid_ids = []
+ for i in range(len(data[0])):
+ if len(set(x[i] for x in data)) > 1:
+ valid_ids.append(i)
+ if len(valid_ids) > 0:
+ subtopics.extend([
+ f"{titlecase(t)}({','.join([d[i].title() for i in valid_ids])})",
+ t, '_'.join(d)
+ ] for d in data)
+ else:
+ subtopics.append([titlecase(t), t, '_'.join(data[0])])
+
+contents = {}
+for subtopic, topic, task in sorted(subtopics):
+ # Step 2: get all datasets
+ datasets = sorted(
+ list(
+ set(
+ x.split('/')[-2]
+ for x in glob(f'../../configs/{topic}/{task}/*/*/'))))
+ contents[subtopic] = {d: {} for d in datasets}
+ for dataset in datasets:
+ # Step 3: get all settings: algorithm + backbone + trick
+ for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'):
+ keywords = (file.split('/')[-3],
+ *file.split('/')[-1].split('_')[:-1])
+ with open(file, 'r') as f:
+ contents[subtopic][dataset][keywords] = f.read()
+
+# Step 4: write files by topic
+for subtopic, datasets in contents.items():
+ lines = [f'# {subtopic}', '']
+ for dataset, keywords in datasets.items():
+ if len(keywords) == 0:
+ continue
+ lines += [
+ ' ', ' ', '', f'## {titlecase(dataset)} Dataset', ''
+ ]
+ for keyword, info in keywords.items():
+ keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword]
+ lines += [
+ ' ', '',
+ (f'### {" + ".join(keyword_strs)}'
+ f' on {titlecase(dataset)}'), '', info, ''
+ ]
+
+ with open(f'topics/{subtopic.lower()}.md', 'w') as f:
+ f.write('\n'.join(lines))
+
+# Step 5: write files by paper
+allfiles = [x.split('/')[-2:] for x in glob('../en/papers/*/*.md')]
+sections = sorted(list(set(x[0] for x in allfiles)))
+for section in sections:
+ lines = [f'# {titlecase(section)}', '']
+ files = [f for s, f in allfiles if s == section]
+ for file in files:
+ with open(f'../en/papers/{section}/{file}', 'r') as f:
+ keyline = [
+ line for line in f.readlines() if line.startswith('', '', keyline).strip()
+ paperlines = []
+ for subtopic, datasets in contents.items():
+ for dataset, keywords in datasets.items():
+ keywords = {k: v for k, v in keywords.items() if keyline in v}
+ if len(keywords) == 0:
+ continue
+ for keyword, info in keywords.items():
+ keyword_strs = [
+ titlecase(x.replace('_', ' ')) for x in keyword
+ ]
+ paperlines += [
+ ' ', '',
+ (f'### {" + ".join(keyword_strs)}'
+ f' on {titlecase(dataset)}'), '', info, ''
+ ]
+ if len(paperlines) > 0:
+ lines += [' ', ' ', '', f'## {papername}', '']
+ lines += paperlines
+
+ with open(f'papers/{section}.md', 'w') as f:
+ f.write('\n'.join(lines))
diff --git a/vendor/ViTPose/docs/en/conf.py b/vendor/ViTPose/docs/en/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..10efef64d6ae6818bc6a2b85715265fe5ad4a017
--- /dev/null
+++ b/vendor/ViTPose/docs/en/conf.py
@@ -0,0 +1,116 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+
+import pytorch_sphinx_theme
+
+sys.path.insert(0, os.path.abspath('../..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'MMPose'
+copyright = '2020-2021, OpenMMLab'
+author = 'MMPose Authors'
+
+# The full version, including alpha/beta/rc tags
+version_file = '../../mmpose/version.py'
+
+
+def get_version():
+ with open(version_file, 'r') as f:
+ exec(compile(f.read(), version_file, 'exec'))
+ return locals()['__version__']
+
+
+release = get_version()
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode',
+ 'sphinx_markdown_tables', 'sphinx_copybutton', 'myst_parser'
+]
+
+autodoc_mock_imports = ['json_tricks', 'mmpose.version']
+
+# Ignore >>> when copying code
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+source_suffix = {
+ '.rst': 'restructuredtext',
+ '.md': 'markdown',
+}
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pytorch_sphinx_theme'
+html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+html_theme_options = {
+ 'menu': [
+ {
+ 'name':
+ 'Tutorial',
+ 'url':
+ 'https://colab.research.google.com/github/'
+ 'open-mmlab/mmpose/blob/master/demo/MMPose_Tutorial.ipynb'
+ },
+ {
+ 'name': 'GitHub',
+ 'url': 'https://github.com/open-mmlab/mmpose'
+ },
+ ],
+ # Specify the language of the shared menu
+ 'menu_lang':
+ 'en'
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+
+language = 'en'
+
+html_static_path = ['_static']
+html_css_files = ['css/readthedocs.css']
+
+# Enable ::: for my_st
+myst_enable_extensions = ['colon_fence']
+
+master_doc = 'index'
+
+
+def builder_inited_handler(app):
+ subprocess.run(['./collect.py'])
+ subprocess.run(['./merge_docs.sh'])
+ subprocess.run(['./stats.py'])
+
+
+def setup(app):
+ app.connect('builder-inited', builder_inited_handler)
diff --git a/vendor/ViTPose/docs/en/data_preparation.md b/vendor/ViTPose/docs/en/data_preparation.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c691f532d504eecb24f566feaf0a1eaeb7a9f24
--- /dev/null
+++ b/vendor/ViTPose/docs/en/data_preparation.md
@@ -0,0 +1,13 @@
+# Prepare Datasets
+
+MMPose supports multiple tasks. Please follow the corresponding guidelines for data preparation.
+
+- [2D Body Keypoint](tasks/2d_body_keypoint.md)
+- [3D Body Keypoint](tasks/3d_body_keypoint.md)
+- [3D Body Mesh Recovery](tasks/3d_body_mesh.md)
+- [2D Hand Keypoint](tasks/2d_hand_keypoint.md)
+- [3D Hand Keypoint](tasks/3d_hand_keypoint.md)
+- [2D Face Keypoint](tasks/2d_face_keypoint.md)
+- [2D WholeBody Keypoint](tasks/2d_wholebody_keypoint.md)
+- [2D Fashion Landmark](tasks/2d_fashion_landmark.md)
+- [2D Animal Keypoint](tasks/2d_animal_keypoint.md)
diff --git a/vendor/ViTPose/docs/en/faq.md b/vendor/ViTPose/docs/en/faq.md
new file mode 100644
index 0000000000000000000000000000000000000000..277885f3787b361c73980d23f71fe0436fee9834
--- /dev/null
+++ b/vendor/ViTPose/docs/en/faq.md
@@ -0,0 +1,135 @@
+# FAQ
+
+We list some common issues faced by many users and their corresponding solutions here.
+Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them.
+If the contents here do not cover your issue, please create an issue using the [provided templates](/.github/ISSUE_TEMPLATE/error-report.md) and make sure you fill in all required information in the template.
+
+## Installation
+
+- **Unable to install xtcocotools**
+
+ 1. Try to install it using pypi manually `pip install xtcocotools`.
+ 1. If step1 does not work. Try to install it from [source](https://github.com/jin-s13/xtcocoapi).
+
+ ```
+ git clone https://github.com/jin-s13/xtcocoapi
+ cd xtcocoapi
+ python setup.py install
+ ```
+
+- **No matching distribution found for xtcocotools>=1.6**
+
+ 1. Install cython by `pip install cython`.
+ 1. Install xtcocotools from [source](https://github.com/jin-s13/xtcocoapi).
+
+ ```
+ git clone https://github.com/jin-s13/xtcocoapi
+ cd xtcocoapi
+ python setup.py install
+ ```
+
+- **"No module named 'mmcv.ops'"; "No module named 'mmcv._ext'"**
+
+ 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`.
+ 1. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/#installation).
+
+## Data
+
+- **How to convert my 2d keypoint dataset to coco-type?**
+
+ You may refer to this conversion [tool](https://github.com/open-mmlab/mmpose/blob/master/tools/dataset/parse_macaquepose_dataset.py) to prepare your data.
+ Here is an [example](https://github.com/open-mmlab/mmpose/blob/master/tests/data/macaque/test_macaque.json) of the coco-type json.
+ In the coco-type json, we need "categories", "annotations" and "images". "categories" contain some basic information of the dataset, e.g. class name and keypoint names.
+ "images" contain image-level information. We need "id", "file_name", "height", "width". Others are optional.
+ Note: (1) It is okay that "id"s are not continuous or not sorted (e.g. 1000, 40, 352, 333 ...).
+
+ "annotations" contain instance-level information. We need "image_id", "id", "keypoints", "num_keypoints", "bbox", "iscrowd", "area", "category_id". Others are optional.
+ Note: (1) "num_keypoints" means the number of visible keypoints. (2) By default, please set "iscrowd: 0". (3) "area" can be calculated using the bbox (area = w * h) (4) Simply set "category_id: 1". (5) The "image_id" in "annotations" should match the "id" in "images".
+
+- **What if my custom dataset does not have bounding box label?**
+
+ We can estimate the bounding box of a person as the minimal box that tightly bounds all the keypoints.
+
+- **What if my custom dataset does not have segmentation label?**
+
+ Just set the `area` of the person as the area of the bounding boxes. During evaluation, please set `use_area=False` as in this [example](https://github.com/open-mmlab/mmpose/blob/a82dd486853a8a471522ac06b8b9356db61f8547/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py#L113).
+
+- **What is `COCO_val2017_detections_AP_H_56_person.json`? Can I train pose models without it?**
+
+ "COCO_val2017_detections_AP_H_56_person.json" contains the "detected" human bounding boxes for COCO validation set, which are generated by FasterRCNN.
+ One can choose to use gt bounding boxes to evaluate models, by setting `use_gt_bbox=True` and `bbox_file=''`. Or one can use detected boxes to evaluate
+ the generalizability of models, by setting `use_gt_bbox=False` and `bbox_file='COCO_val2017_detections_AP_H_56_person.json'`.
+
+## Training
+
+- **RuntimeError: Address already in use**
+
+ Set the environment variables `MASTER_PORT=XXX`. For example,
+ `MASTER_PORT=29517 GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh Test res50 configs/body/2D_Kpt_SV_RGB_Img/topdown_hm/coco/res50_coco_256x192.py work_dirs/res50_coco_256x192`
+
+- **"Unexpected keys in source state dict" when loading pre-trained weights**
+
+ It's normal that some layers in the pretrained model are not used in the pose model. ImageNet-pretrained classification network and the pose network may have different architectures (e.g. no classification head). So some unexpected keys in source state dict is actually expected.
+
+- **How to use trained models for backbone pre-training ?**
+
+ Refer to [Use Pre-Trained Model](/docs/en/tutorials/1_finetune.md#use-pre-trained-model),
+ in order to use the pre-trained model for the whole network (backbone + head), the new config adds the link of pre-trained models in the `load_from`.
+
+ And to use backbone for pre-training, you can change `pretrained` value in the backbone dict of config files to the checkpoint path / url.
+ When training, the unexpected keys will be ignored.
+
+- **How to visualize the training accuracy/loss curves in real-time ?**
+
+ Use `TensorboardLoggerHook` in `log_config` like
+
+ ```python
+ log_config=dict(interval=20, hooks=[dict(type='TensorboardLoggerHook')])
+ ```
+
+ You can refer to [tutorials/6_customize_runtime.md](/tutorials/6_customize_runtime.md#log-config) and the example [config](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L26).
+
+- **Log info is NOT printed**
+
+ Use smaller log interval. For example, change `interval=50` to `interval=1` in the [config](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L23).
+
+- **How to fix stages of backbone when finetuning a model ?**
+
+ You can refer to [`def _freeze_stages()`](https://github.com/open-mmlab/mmpose/blob/d026725554f9dc08e8708bd9da8678f794a7c9a6/mmpose/models/backbones/resnet.py#L618) and [`frozen_stages`](https://github.com/open-mmlab/mmpose/blob/d026725554f9dc08e8708bd9da8678f794a7c9a6/mmpose/models/backbones/resnet.py#L498),
+ reminding to set `find_unused_parameters = True` in config files for distributed training or testing.
+
+## Evaluation
+
+- **How to evaluate on MPII test dataset?**
+ Since we do not have the ground-truth for test dataset, we cannot evaluate it 'locally'.
+ If you would like to evaluate the performance on test set, you have to upload the pred.mat (which is generated during testing) to the official server via email, according to [the MPII guideline](http://human-pose.mpi-inf.mpg.de/#evaluation).
+
+- **For top-down 2d pose estimation, why predicted joint coordinates can be out of the bounding box (bbox)?**
+ We do not directly use the bbox to crop the image. bbox will be first transformed to center & scale, and the scale will be multiplied by a factor (1.25) to include some context. If the ratio of width/height is different from that of model input (possibly 192/256), we will adjust the bbox.
+
+## Inference
+
+- **How to run mmpose on CPU?**
+
+ Run demos with `--device=cpu`.
+
+- **How to speed up inference?**
+
+ For top-down models, try to edit the config file. For example,
+
+ 1. set `flip_test=False` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L51).
+ 1. set `post_process='default'` in [topdown-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py#L54).
+ 1. use faster human bounding box detector, see [MMDetection](https://mmdetection.readthedocs.io/en/latest/model_zoo.html).
+
+ For bottom-up models, try to edit the config file. For example,
+
+ 1. set `flip_test=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L91).
+ 1. set `adjust=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L89).
+ 1. set `refine=False` in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L90).
+ 1. use smaller input image size in [AE-res50](https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/bottom_up/resnet/coco/res50_coco_512x512.py#L39).
+
+## Deployment
+
+- **Why is the onnx model converted by mmpose throwing error when converting to other frameworks such as TensorRT?**
+
+ For now, we can only make sure that models in mmpose are onnx-compatible. However, some operations in onnx may be unsupported by your target framework for deployment, e.g. TensorRT in [this issue](https://github.com/open-mmlab/mmaction2/issues/414). When such situation occurs, we suggest you raise an issue and ask the community to help as long as `pytorch2onnx.py` works well and is verified numerically.
diff --git a/vendor/ViTPose/docs/en/getting_started.md b/vendor/ViTPose/docs/en/getting_started.md
new file mode 100644
index 0000000000000000000000000000000000000000..d7cfea3d2745dbdf61e464673d5eaac4d8253385
--- /dev/null
+++ b/vendor/ViTPose/docs/en/getting_started.md
@@ -0,0 +1,283 @@
+# Getting Started
+
+This page provides basic tutorials about the usage of MMPose.
+For installation instructions, please see [install.md](install.md).
+
+
+
+- [Prepare Datasets](#prepare-datasets)
+- [Inference with Pre-Trained Models](#inference-with-pre-trained-models)
+ - [Test a dataset](#test-a-dataset)
+ - [Run demos](#run-demos)
+- [Train a Model](#train-a-model)
+ - [Train with a single GPU](#train-with-a-single-gpu)
+ - [Train with CPU](#train-with-cpu)
+ - [Train with multiple GPUs](#train-with-multiple-gpus)
+ - [Train with multiple machines](#train-with-multiple-machines)
+ - [Launch multiple jobs on a single machine](#launch-multiple-jobs-on-a-single-machine)
+- [Benchmark](#benchmark)
+- [Tutorials](#tutorials)
+
+
+
+## Prepare Datasets
+
+MMPose supports multiple tasks. Please follow the corresponding guidelines for data preparation.
+
+- [2D Body Keypoint Detection](/docs/en/tasks/2d_body_keypoint.md)
+- [3D Body Keypoint Detection](/docs/en/tasks/3d_body_keypoint.md)
+- [3D Body Mesh Recovery](/docs/en/tasks/3d_body_mesh.md)
+- [2D Hand Keypoint Detection](/docs/en/tasks/2d_hand_keypoint.md)
+- [3D Hand Keypoint Detection](/docs/en/tasks/3d_hand_keypoint.md)
+- [2D Face Keypoint Detection](/docs/en/tasks/2d_face_keypoint.md)
+- [2D WholeBody Keypoint Detection](/docs/en/tasks/2d_wholebody_keypoint.md)
+- [2D Fashion Landmark Detection](/docs/en/tasks/2d_fashion_landmark.md)
+- [2D Animal Keypoint Detection](/docs/en/tasks/2d_animal_keypoint.md)
+
+## Inference with Pre-trained Models
+
+We provide testing scripts to evaluate a whole dataset (COCO, MPII etc.),
+and provide some high-level apis for easier integration to other OpenMMLab projects.
+
+### Test a dataset
+
+- [x] single GPU
+- [x] CPU
+- [x] single node multiple GPUs
+- [x] multiple node
+
+You can use the following commands to test a dataset.
+
+```shell
+# single-gpu testing
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--fuse-conv-bn] \
+ [--eval ${EVAL_METRICS}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--cfg-options ${CFG_OPTIONS}] \
+ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+
+# CPU: disable GPUs and run single-gpu testing script
+export CUDA_VISIBLE_DEVICES=-1
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] \
+ [--eval ${EVAL_METRICS}]
+
+# multi-gpu testing
+./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--fuse-conv-bn] \
+ [--eval ${EVAL_METRIC}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--cfg-options ${CFG_OPTIONS}] \
+ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+```
+
+Note that the provided `CHECKPOINT_FILE` is either the path to the model checkpoint file downloaded in advance, or the url link to the model checkpoint.
+
+Optional arguments:
+
+- `RESULT_FILE`: Filename of the output results. If not specified, the results will not be saved to a file.
+- `--fuse-conv-bn`: Whether to fuse conv and bn, this will slightly increase the inference speed.
+- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values depend on the dataset.
+- `--gpu_collect`: If specified, recognition results will be collected using gpu communication. Otherwise, it will save the results on different gpus to `TMPDIR` and collect them by the rank 0 worker.
+- `TMPDIR`: Temporary directory used for collecting results from multiple workers, available when `--gpu_collect` is not specified.
+- `CFG_OPTIONS`: Override some settings in the used config, the key-value pair in xxx=yyy format will be merged into config file. For example, '--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'.
+- `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
+- `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
+
+Examples:
+
+Assume that you have already downloaded the checkpoints to the directory `checkpoints/`.
+
+1. Test ResNet50 on COCO (without saving the test results) and evaluate the mAP.
+
+ ```shell
+ ./tools/dist_test.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ checkpoints/SOME_CHECKPOINT.pth 1 \
+ --eval mAP
+ ```
+
+1. Test ResNet50 on COCO with 8 GPUS. Download the checkpoint via url, and evaluate the mAP.
+
+ ```shell
+ ./tools/dist_test.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth 8 \
+ --eval mAP
+ ```
+
+1. Test ResNet50 on COCO in slurm environment and evaluate the mAP.
+
+ ```shell
+ ./tools/slurm_test.sh slurm_partition test_job \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ checkpoints/SOME_CHECKPOINT.pth \
+ --eval mAP
+ ```
+
+### Run demos
+
+We also provide scripts to run demos.
+Here is an example of running top-down human pose demos using ground-truth bounding boxes.
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+Examples:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results
+```
+
+More examples and details can be found in the [demo folder](/demo) and the [demo docs](https://mmpose.readthedocs.io/en/latest/demo.html).
+
+## Train a model
+
+MMPose implements distributed training and non-distributed training,
+which uses `MMDistributedDataParallel` and `MMDataParallel` respectively.
+
+We adopt distributed training for both single machine and multiple machines. Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.
+
+Each process keeps an isolated model, data loader, and optimizer. Model parameters are only synchronized once at the beginning. After a forward and backward pass, gradients will be allreduced among all GPUs, and the optimizer will update model parameters. Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.
+
+### Training setting
+
+All outputs (log files and checkpoints) will be saved to the working directory,
+which is specified by `work_dir` in the config file.
+
+By default we evaluate the model on the validation set after each epoch, you can change the evaluation interval by modifying the interval argument in the training config
+
+```python
+evaluation = dict(interval=5) # This evaluate the model per 5 epoch.
+```
+
+According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you need to set the learning rate proportional to the batch size if you use different GPUs or videos per GPU, e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
+
+### Train with a single GPU
+
+```shell
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+```
+
+If you want to specify the working directory in the command, you can add an argument `--work-dir ${YOUR_WORK_DIR}`.
+
+### Train with CPU
+
+The process of training on the CPU is consistent with single GPU training. We just need to disable GPUs before the training process.
+
+```shell
+export CUDA_VISIBLE_DEVICES=-1
+```
+
+And then run the script [above](#training-on-a-single-GPU).
+
+**Note**:
+
+We do not recommend users to use CPU for training because it is too slow. We support this feature to allow users to debug on machines without GPU for convenience.
+
+### Train with multiple GPUs
+
+```shell
+./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
+```
+
+Optional arguments are:
+
+- `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file.
+- `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.
+- `--no-validate`: Whether not to evaluate the checkpoint during training.
+- `--gpus ${GPU_NUM}`: Number of gpus to use, which is only applicable to non-distributed training.
+- `--gpu-ids ${GPU_IDS}`: IDs of gpus to use, which is only applicable to non-distributed training.
+- `--seed ${SEED}`: Seed id for random state in python, numpy and pytorch to generate random numbers.
+- `--deterministic`: If specified, it will set deterministic options for CUDNN backend.
+- `--cfg-options CFG_OPTIONS`: Override some settings in the used config, the key-value pair in xxx=yyy format will be merged into config file. For example, '--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'.
+- `--launcher ${JOB_LAUNCHER}`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
+- `--autoscale-lr`: If specified, it will automatically scale lr with the number of gpus by [Linear Scaling Rule](https://arxiv.org/abs/1706.02677).
+- `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
+
+Difference between `resume-from` and `load-from`:
+`resume-from` loads both the model weights and optimizer status, and the epoch is also inherited from the specified checkpoint. It is usually used for resuming the training process that is interrupted accidentally.
+`load-from` only loads the model weights and the training epoch starts from 0. It is usually used for finetuning.
+
+Here is an example of using 8 GPUs to load ResNet50 checkpoint.
+
+```shell
+./tools/dist_train.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py 8 --resume_from work_dirs/res50_coco_256x192/latest.pth
+```
+
+### Train with multiple machines
+
+If you can run MMPose on a cluster managed with [slurm](https://slurm.schedmd.com/), you can use the script `slurm_train.sh`. (This script also supports single machine training.)
+
+```shell
+./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${WORK_DIR}
+```
+
+Here is an example of using 16 GPUs to train ResNet50 on the dev partition in a slurm cluster.
+(Use `GPUS_PER_NODE=8` to specify a single slurm cluster node with 8 GPUs, `CPUS_PER_TASK=2` to use 2 cpus per task.
+Assume that `Test` is a valid ${PARTITION} name.)
+
+```shell
+GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh Test res50 configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py work_dirs/res50_coco_256x192
+```
+
+You can check [slurm_train.sh](/tools/slurm_train.sh) for full arguments and environment variables.
+
+If you have just multiple machines connected with ethernet, you can refer to
+pytorch [launch utility](https://pytorch.org/docs/en/stable/distributed_deprecated.html#launch-utility).
+Usually it is slow if you do not have high speed networking like InfiniBand.
+
+### Launch multiple jobs on a single machine
+
+If you launch multiple jobs on a single machine, e.g., 2 jobs of 4-GPU training on a machine with 8 GPUs,
+you need to specify different ports (29500 by default) for each job to avoid communication conflict.
+
+If you use `dist_train.sh` to launch training jobs, you can set the port in commands.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG_FILE} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG_FILE} 4
+```
+
+If you use launch training jobs with slurm, you need to modify the config files (usually the 4th line in config files) to set different communication ports.
+
+In `config1.py`,
+
+```python
+dist_params = dict(backend='nccl', port=29500)
+```
+
+In `config2.py`,
+
+```python
+dist_params = dict(backend='nccl', port=29501)
+```
+
+Then you can launch two jobs with `config1.py` ang `config2.py`.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py ${WORK_DIR} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py ${WORK_DIR} 4
+```
+
+## Benchmark
+
+You can get average inference speed using the following script. Note that it does not include the IO time and the pre-processing time.
+
+```shell
+python tools/analysis/benchmark_inference.py ${MMPOSE_CONFIG_FILE}
+```
+
+## Tutorials
+
+We provide some tutorials for users:
+
+- [learn about configs](tutorials/0_config.md)
+- [finetune model](tutorials/1_finetune.md)
+- [add new dataset](tutorials/2_new_dataset.md)
+- [customize data pipelines](tutorials/3_data_pipeline.md)
+- [add new modules](tutorials/4_new_modules.md)
+- [export a model to ONNX](tutorials/5_export_model.md)
+- [customize runtime settings](tutorials/6_customize_runtime.md).
diff --git a/vendor/ViTPose/docs/en/index.rst b/vendor/ViTPose/docs/en/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a56282236fecdc30f97add4a22d5b6b2537b64cd
--- /dev/null
+++ b/vendor/ViTPose/docs/en/index.rst
@@ -0,0 +1,99 @@
+Welcome to MMPose's documentation!
+==================================
+
+You can change the documentation language at the lower-left corner of the page.
+
+您可以在页面左下角切换文档语言。
+
+.. toctree::
+ :maxdepth: 2
+
+ install.md
+ getting_started.md
+ demo.md
+ benchmark.md
+ inference_speed_summary.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Datasets
+
+ datasets.md
+ tasks/2d_body_keypoint.md
+ tasks/2d_wholebody_keypoint.md
+ tasks/2d_face_keypoint.md
+ tasks/2d_hand_keypoint.md
+ tasks/2d_fashion_landmark.md
+ tasks/2d_animal_keypoint.md
+ tasks/3d_body_keypoint.md
+ tasks/3d_body_mesh.md
+ tasks/3d_hand_keypoint.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Model Zoo
+
+ modelzoo.md
+ topics/animal.md
+ topics/body(2d,kpt,sview,img).md
+ topics/body(2d,kpt,sview,vid).md
+ topics/body(3d,kpt,sview,img).md
+ topics/body(3d,kpt,sview,vid).md
+ topics/body(3d,kpt,mview,img).md
+ topics/body(3d,mesh,sview,img).md
+ topics/face.md
+ topics/fashion.md
+ topics/hand(2d).md
+ topics/hand(3d).md
+ topics/wholebody.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Model Zoo (by paper)
+
+ papers/algorithms.md
+ papers/backbones.md
+ papers/datasets.md
+ papers/techniques.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Tutorials
+
+ tutorials/0_config.md
+ tutorials/1_finetune.md
+ tutorials/2_new_dataset.md
+ tutorials/3_data_pipeline.md
+ tutorials/4_new_modules.md
+ tutorials/5_export_model.md
+ tutorials/6_customize_runtime.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Useful Tools and Scripts
+
+ useful_tools.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Notes
+
+ changelog.md
+ faq.md
+
+.. toctree::
+ :caption: API Reference
+
+ api.rst
+
+.. toctree::
+ :caption: Languages
+
+ language.md
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/vendor/ViTPose/docs/en/inference_speed_summary.md b/vendor/ViTPose/docs/en/inference_speed_summary.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d165ec2cccef81e3fe15690320ff40f12c27aca
--- /dev/null
+++ b/vendor/ViTPose/docs/en/inference_speed_summary.md
@@ -0,0 +1,114 @@
+# Inference Speed
+
+We summarize the model complexity and inference speed of major models in MMPose, including FLOPs, parameter counts and inference speeds on both CPU and GPU devices with different batch sizes. We also compare the mAP of different models on COCO human keypoint dataset, showing the trade-off between model performance and model complexity.
+
+## Comparison Rules
+
+To ensure the fairness of the comparison, the comparison experiments are conducted under the same hardware and software environment using the same dataset. We also list the mAP (mean average precision) on COCO human keypoint dataset of the models along with the corresponding config files.
+
+For model complexity information measurement, we calculate the FLOPs and parameter counts of a model with corresponding input shape. Note that some layers or ops are currently not supported, for example, `DeformConv2d`, so you may need to check if all ops are supported and verify that the flops and parameter counts computation is correct.
+
+For inference speed, we omit the time for data pre-processing and only measure the time for model forwarding and data post-processing. For each model setting, we keep the same data pre-processing methods to make sure the same feature input. We measure the inference speed on both CPU and GPU devices. For topdown heatmap models, we also test the case when the batch size is larger, e.g., 10, to test model performance in crowded scenes.
+
+The inference speed is measured with frames per second (FPS), namely the average iterations per second, which can show how fast the model can handle an input. The higher, the faster, the better.
+
+### Hardware
+
+- GPU: GeForce GTX 1660 SUPER
+- CPU: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
+
+### Software Environment
+
+- Ubuntu 16.04
+- Python 3.8
+- PyTorch 1.10
+- CUDA 10.2
+- mmcv-full 1.3.17
+- mmpose 0.20.0
+
+## Model complexity information and inference speed results of major models in MMPose
+
+| Algorithm | Model | config | Input size | mAP | Flops (GFLOPs) | Params (M) | GPU Inference Speed (FPS)1 | GPU Inference Speed (FPS, bs=10)2 | CPU Inference Speed (FPS) | CPU Inference Speed (FPS, bs=10) |
+| :--- | :---------------: | :-----------------: |:--------------------: | :----------------------------: | :-----------------: | :---------------: |:--------------------: | :----------------------------: | :-----------------: | :-----------------: |
+| topdown_heatmap | Alexnet | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco_256x192.py) | (3, 192, 256) | 0.397 | 1.42 | 5.62 | 229.21 ± 16.91 | 33.52 ± 1.14 | 13.92 ± 0.60 | 1.38 ± 0.02 |
+| topdown_heatmap | CPM | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py) | (3, 192, 256) | 0.623 | 63.81 | 31.3 | 11.35 ± 0.22 | 3.87 ± 0.07 | 0.31 ± 0.01 | 0.03 ± 0.00 |
+| topdown_heatmap | CPM | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py) | (3, 288, 384) | 0.65 | 143.57 | 31.3 | 7.09 ± 0.14 | 2.10 ± 0.05 | 0.14 ± 0.00 | 0.01 ± 0.00 |
+| topdown_heatmap | Hourglass-52 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py) | (3, 256, 256) | 0.726 | 28.67 | 94.85 | 25.50 ± 1.68 | 3.99 ± 0.07 | 0.92 ± 0.03 | 0.09 ± 0.00 |
+| topdown_heatmap | Hourglass-52 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_384x384.py) | (3, 384, 384) | 0.746 | 64.5 | 94.85 | 14.74 ± 0.8 | 1.86 ± 0.06 | 0.43 ± 0.03 | 0.04 ± 0.00 |
+| topdown_heatmap | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py) | (3, 192, 256) | 0.746 | 7.7 | 28.54 | 22.73 ± 1.12 | 6.60 ± 0.14 | 2.73 ± 0.11 | 0.32 ± 0.00 |
+| topdown_heatmap | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288.py) | (3, 288, 384) | 0.76 | 17.33 | 28.54 | 22.78 ± 1.21 | 3.28 ± 0.08 | 1.35 ± 0.05 | 0.14 ± 0.00 |
+| topdown_heatmap | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py) | (3, 192, 256) | 0.756 | 15.77 | 63.6 | 22.01 ± 1.10 | 3.74 ± 0.10 | 1.46 ± 0.05 | 0.16 ± 0.00 |
+| topdown_heatmap | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288.py) | (3, 288, 384) | 0.767 | 35.48 | 63.6 | 15.03 ± 1.03 | 1.80 ± 0.03 | 0.68 ± 0.02 | 0.07 ± 0.00 |
+| topdown_heatmap | LiteHRNet-30 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_256x192.py) | (3, 192, 256) | 0.675 | 0.42 | 1.76 | 11.86 ± 0.38 | 9.77 ± 0.23 | 5.84 ± 0.39 | 0.80 ± 0.00 |
+| topdown_heatmap | LiteHRNet-30 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_384x288.py) | (3, 288, 384) | 0.7 | 0.95 | 1.76 | 11.52 ± 0.39 | 5.18 ± 0.11 | 3.45 ± 0.22 | 0.37 ± 0.00 |
+| topdown_heatmap | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py) | (3, 192, 256) | 0.646 | 1.59 | 9.57 | 91.82 ± 10.98 | 17.85 ± 0.32 | 10.44 ± 0.80 | 1.05 ± 0.01 |
+| topdown_heatmap | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_384x288.py) | (3, 288, 384) | 0.673 | 3.57 | 9.57 | 71.27 ± 6.82 | 8.00 ± 0.15 | 5.01 ± 0.32 | 0.46 ± 0.00 |
+| topdown_heatmap | MSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py) | (3, 192, 256) | 0.723 | 5.11 | 25.11 | 59.65 ± 3.74 | 9.51 ± 0.15 | 3.98 ± 0.21 | 0.43 ± 0.00 |
+| topdown_heatmap | 2xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py) | (3, 192, 256) | 0.754 | 11.35 | 56.8 | 30.64 ± 2.61 | 4.74 ± 0.12 | 1.85 ± 0.08 | 0.20 ± 0.00 |
+| topdown_heatmap | 3xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py) | (3, 192, 256) | 0.758 | 17.59 | 88.49 | 20.90 ± 1.82 | 3.22 ± 0.08 | 1.23 ± 0.04 | 0.13 ± 0.00 |
+| topdown_heatmap | 4xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py) | (3, 192, 256) | 0.764 | 23.82 | 120.18 | 15.79 ± 1.14 | 2.45 ± 0.05 | 0.90 ± 0.03 | 0.10 ± 0.00 |
+| topdown_heatmap | ResNest-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_256x192.py) | (3, 192, 256) | 0.721 | 6.73 | 35.93 | 48.36 ± 4.12 | 7.48 ± 0.13 | 3.00 ± 0.13 | 0.33 ± 0.00 |
+| topdown_heatmap | ResNest-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_384x288.py) | (3, 288, 384) | 0.737 | 15.14 | 35.93 | 30.30 ± 2.30 | 3.62 ± 0.09 | 1.43 ± 0.05 | 0.13 ± 0.00 |
+| topdown_heatmap | ResNest-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py) | (3, 192, 256) | 0.725 | 10.38 | 56.61 | 29.21 ± 1.98 | 5.30 ± 0.12 | 2.01 ± 0.08 | 0.22 ± 0.00 |
+| topdown_heatmap | ResNest-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py) | (3, 288, 384) | 0.746 | 23.36 | 56.61 | 19.02 ± 1.40 | 2.59 ± 0.05 | 0.97 ± 0.03 | 0.09 ± 0.00 |
+| topdown_heatmap | ResNest-200 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py) | (3, 192, 256) | 0.732 | 17.5 | 78.54 | 16.11 ± 0.71 | 3.29 ± 0.07 | 1.33 ± 0.02 | 0.14 ± 0.00 |
+| topdown_heatmap | ResNest-200 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py) | (3, 288, 384) | 0.754 | 39.37 | 78.54 | 11.48 ± 0.68 | 1.58 ± 0.02 | 0.63 ± 0.01 | 0.06 ± 0.00 |
+| topdown_heatmap | ResNest-269 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_256x192.py) | (3, 192, 256) | 0.738 | 22.45 | 119.27 | 12.02 ± 0.47 | 2.60 ± 0.05 | 1.03 ± 0.01 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNest-269 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_384x288.py) | (3, 288, 384) | 0.755 | 50.5 | 119.27 | 8.82 ± 0.42 | 1.24 ± 0.02 | 0.49 ± 0.01 | 0.05 ± 0.00 |
+| topdown_heatmap | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py) | (3, 192, 256) | 0.718 | 5.46 | 34 | 64.23 ± 6.05 | 9.33 ± 0.21 | 4.00 ± 0.10 | 0.41 ± 0.00 |
+| topdown_heatmap | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py) | (3, 288, 384) | 0.731 | 12.29 | 34 | 36.78 ± 3.05 | 4.48 ± 0.12 | 1.92 ± 0.04 | 0.19 ± 0.00 |
+| topdown_heatmap | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py) | (3, 192, 256) | 0.726 | 9.11 | 52.99 | 43.35 ± 4.36 | 6.44 ± 0.14 | 2.57 ± 0.05 | 0.27 ± 0.00 |
+| topdown_heatmap | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py) | (3, 288, 384) | 0.748 | 20.5 | 52.99 | 23.29 ± 1.83 | 3.12 ± 0.09 | 1.23 ± 0.03 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py) | (3, 192, 256) | 0.735 | 12.77 | 68.64 | 32.31 ± 2.84 | 4.88 ± 0.17 | 1.89 ± 0.03 | 0.20 ± 0.00 |
+| topdown_heatmap | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py) | (3, 288, 384) | 0.75 | 28.73 | 68.64 | 17.32 ± 1.17 | 2.40 ± 0.04 | 0.91 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ResNetV1d-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py) | (3, 192, 256) | 0.722 | 5.7 | 34.02 | 63.44 ± 6.09 | 9.09 ± 0.10 | 3.82 ± 0.10 | 0.39 ± 0.00 |
+| topdown_heatmap | ResNetV1d-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py) | (3, 288, 384) | 0.73 | 12.82 | 34.02 | 36.21 ± 3.10 | 4.30 ± 0.12 | 1.82 ± 0.04 | 0.16 ± 0.00 |
+| topdown_heatmap | ResNetV1d-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py) | (3, 192, 256) | 0.731 | 9.35 | 53.01 | 41.48 ± 3.76 | 6.33 ± 0.15 | 2.48 ± 0.05 | 0.26 ± 0.00 |
+| topdown_heatmap | ResNetV1d-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py) | (3, 288, 384) | 0.748 | 21.04 | 53.01 | 23.49 ± 1.76 | 3.07 ± 0.07 | 1.19 ± 0.02 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNetV1d-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_256x192.py) | (3, 192, 256) | 0.737 | 13.01 | 68.65 | 31.96 ± 2.87 | 4.69 ± 0.18 | 1.87 ± 0.02 | 0.19 ± 0.00 |
+| topdown_heatmap | ResNetV1d-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py) | (3, 288, 384) | 0.752 | 29.26 | 68.65 | 17.31 ± 1.13 | 2.32 ± 0.04 | 0.88 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ResNext-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_256x192.py) | (3, 192, 256) | 0.714 | 5.61 | 33.47 | 48.34 ± 3.85 | 7.66 ± 0.13 | 3.71 ± 0.10 | 0.37 ± 0.00 |
+| topdown_heatmap | ResNext-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py) | (3, 288, 384) | 0.724 | 12.62 | 33.47 | 30.66 ± 2.38 | 3.64 ± 0.11 | 1.73 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | ResNext-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py) | (3, 192, 256) | 0.726 | 9.29 | 52.62 | 27.33 ± 2.35 | 5.09 ± 0.13 | 2.45 ± 0.04 | 0.25 ± 0.00 |
+| topdown_heatmap | ResNext-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py) | (3, 288, 384) | 0.743 | 20.91 | 52.62 | 18.19 ± 1.38 | 2.42 ± 0.04 | 1.15 ± 0.01 | 0.10 ± 0.00 |
+| topdown_heatmap | ResNext-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_256x192.py) | (3, 192, 256) | 0.73 | 12.98 | 68.39 | 19.61 ± 1.61 | 3.80 ± 0.13 | 1.83 ± 0.02 | 0.18 ± 0.00 |
+| topdown_heatmap | ResNext-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_384x288.py) | (3, 288, 384) | 0.742 | 29.21 | 68.39 | 13.14 ± 0.75 | 1.82 ± 0.03 | 0.85 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | RSN-18 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py) | (3, 192, 256) | 0.704 | 2.27 | 9.14 | 47.80 ± 4.50 | 13.68 ± 0.25 | 6.70 ± 0.28 | 0.70 ± 0.00 |
+| topdown_heatmap | RSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py) | (3, 192, 256) | 0.723 | 4.11 | 19.33 | 27.22 ± 1.61 | 8.81 ± 0.13 | 3.98 ± 0.12 | 0.45 ± 0.00 |
+| topdown_heatmap | 2xRSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xrsn50_coco_256x192.py) | (3, 192, 256) | 0.745 | 8.29 | 39.26 | 13.88 ± 0.64 | 4.78 ± 0.13 | 2.02 ± 0.04 | 0.23 ± 0.00 |
+| topdown_heatmap | 3xRSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py) | (3, 192, 256) | 0.75 | 12.47 | 59.2 | 9.40 ± 0.32 | 3.37 ± 0.09 | 1.34 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | SCNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_256x192.py) | (3, 192, 256) | 0.728 | 5.31 | 34.01 | 40.76 ± 3.08 | 8.35 ± 0.19 | 3.82 ± 0.08 | 0.40 ± 0.00 |
+| topdown_heatmap | SCNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py) | (3, 288, 384) | 0.751 | 11.94 | 34.01 | 32.61 ± 2.97 | 4.19 ± 0.10 | 1.85 ± 0.03 | 0.17 ± 0.00 |
+| topdown_heatmap | SCNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py) | (3, 192, 256) | 0.733 | 8.51 | 53.01 | 24.28 ± 1.19 | 5.80 ± 0.13 | 2.49 ± 0.05 | 0.27 ± 0.00 |
+| topdown_heatmap | SCNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_384x288.py) | (3, 288, 384) | 0.752 | 19.14 | 53.01 | 20.43 ± 1.76 | 2.91 ± 0.06 | 1.23 ± 0.02 | 0.12 ± 0.00 |
+| topdown_heatmap | SeresNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py) | (3, 192, 256) | 0.728 | 5.47 | 36.53 | 54.83 ± 4.94 | 8.80 ± 0.12 | 3.85 ± 0.10 | 0.40 ± 0.00 |
+| topdown_heatmap | SeresNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py) | (3, 288, 384) | 0.748 | 12.3 | 36.53 | 33.00 ± 2.67 | 4.26 ± 0.12 | 1.86 ± 0.04 | 0.17 ± 0.00 |
+| topdown_heatmap | SeresNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py) | (3, 192, 256) | 0.734 | 9.13 | 57.77 | 33.90 ± 2.65 | 6.01 ± 0.13 | 2.48 ± 0.05 | 0.26 ± 0.00 |
+| topdown_heatmap | SeresNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py) | (3, 288, 384) | 0.753 | 20.53 | 57.77 | 20.57 ± 1.57 | 2.96 ± 0.07 | 1.20 ± 0.02 | 0.11 ± 0.00 |
+| topdown_heatmap | SeresNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_256x192.py) | (3, 192, 256) | 0.73 | 12.79 | 75.26 | 24.25 ± 1.95 | 4.45 ± 0.10 | 1.82 ± 0.02 | 0.19 ± 0.00 |
+| topdown_heatmap | SeresNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py) | (3, 288, 384) | 0.753 | 28.76 | 75.26 | 15.11 ± 0.99 | 2.25 ± 0.04 | 0.88 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ShuffleNetV1 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_256x192.py) | (3, 192, 256) | 0.585 | 1.35 | 6.94 | 80.79 ± 8.95 | 21.91 ± 0.46 | 11.84 ± 0.59 | 1.25 ± 0.01 |
+| topdown_heatmap | ShuffleNetV1 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_384x288.py) | (3, 288, 384) | 0.622 | 3.05 | 6.94 | 63.45 ± 5.21 | 9.84 ± 0.10 | 6.01 ± 0.31 | 0.57 ± 0.00 |
+| topdown_heatmap | ShuffleNetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py) | (3, 192, 256) | 0.599 | 1.37 | 7.55 | 82.36 ± 7.30 | 22.68 ± 0.53 | 12.40 ± 0.66 | 1.34 ± 0.02 |
+| topdown_heatmap | ShuffleNetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py) | (3, 288, 384) | 0.636 | 3.08 | 7.55 | 63.63 ± 5.72 | 10.47 ± 0.16 | 6.32 ± 0.28 | 0.63 ± 0.01 |
+| topdown_heatmap | VGG16 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg16_bn_coco_256x192.py) | (3, 192, 256) | 0.698 | 16.22 | 18.92 | 51.91 ± 2.98 | 6.18 ± 0.13 | 1.64 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | VIPNAS + ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py) | (3, 192, 256) | 0.711 | 1.49 | 7.29 | 34.88 ± 2.45 | 10.29 ± 0.13 | 6.51 ± 0.17 | 0.65 ± 0.00 |
+| topdown_heatmap | VIPNAS + MobileNetV3 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py) | (3, 192, 256) | 0.7 | 0.76 | 5.9 | 53.62 ± 6.59 | 11.54 ± 0.18 | 1.26 ± 0.02 | 0.13 ± 0.00 |
+| Associative Embedding | HigherHRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | (3, 512, 512) | 0.677 | 46.58 | 28.65 | 7.80 ± 0.67 | / | 0.28 ± 0.02 | / |
+| Associative Embedding | HigherHRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | (3, 640, 640) | 0.686 | 72.77 | 28.65 | 5.30 ± 0.37 | / | 0.17 ± 0.01 | / |
+| Associative Embedding | HigherHRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | (3, 512, 512) | 0.686 | 96.17 | 63.83 | 4.55 ± 0.35 | / | 0.15 ± 0.01 | / |
+| Associative Embedding | Hourglass-AE | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py) | (3, 512, 512) | 0.613 | 221.58 | 138.86 | 3.55 ± 0.24 | / | 0.08 ± 0.00 | / |
+| Associative Embedding | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py) | (3, 512, 512) | 0.654 | 41.1 | 28.54 | 8.93 ± 0.76 | / | 0.33 ± 0.02 | / |
+| Associative Embedding | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py) | (3, 512, 512) | 0.665 | 84.12 | 63.6 | 5.27 ± 0.43 | / | 0.18 ± 0.01 | / |
+| Associative Embedding | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py) | (3, 512, 512) | 0.38 | 8.54 | 9.57 | 21.24 ± 1.34 | / | 0.81 ± 0.06 | / |
+| Associative Embedding | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py) | (3, 512, 512) | 0.466 | 29.2 | 34 | 11.71 ± 0.97 | / | 0.41 ± 0.02 | / |
+| Associative Embedding | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py) | (3, 640, 640) | 0.479 | 45.62 | 34 | 8.20 ± 0.58 | / | 0.26 ± 0.02 | / |
+| Associative Embedding | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py) | (3, 512, 512) | 0.554 | 48.67 | 53 | 8.26 ± 0.68 | / | 0.28 ± 0.02 | / |
+| Associative Embedding | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py) | (3, 512, 512) | 0.595 | 68.17 | 68.64 | 6.25 ± 0.53 | / | 0.21 ± 0.01 | / |
+| DeepPose | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py) | (3, 192, 256) | 0.526 | 4.04 | 23.58 | 82.20 ± 7.54 | / | 5.50 ± 0.18 | / |
+| DeepPose | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py) | (3, 192, 256) | 0.56 | 7.69 | 42.57 | 48.93 ± 4.02 | / | 3.10 ± 0.07 | / |
+| DeepPose | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py) | (3, 192, 256) | 0.583 | 11.34 | 58.21 | 35.06 ± 3.50 | / | 2.19 ± 0.04 | / |
+
+1 Note that we run multiple iterations and record the time of each iteration, and the mean and standard deviation value of FPS are both shown.
+
+2 The FPS is defined as the average iterations per second, regardless of the batch size in this iteration.
diff --git a/vendor/ViTPose/docs/en/install.md b/vendor/ViTPose/docs/en/install.md
new file mode 100644
index 0000000000000000000000000000000000000000..a668b232b063b7028d9d4bd7d5c5650f57b3c89a
--- /dev/null
+++ b/vendor/ViTPose/docs/en/install.md
@@ -0,0 +1,202 @@
+# Installation
+
+
+
+- [Requirements](#requirements)
+- [Prepare Environment](#prepare-environment)
+- [Install MMPose](#install-mmpose)
+- [Install with CPU only](#install-with-cpu-only)
+- [A from-scratch setup script](#a-from-scratch-setup-script)
+- [Another option: Docker Image](#another-option-docker-image)
+- [Developing with multiple MMPose versions](#developing-with-multiple-mmpose-versions)
+
+
+
+## Requirements
+
+- Linux (Windows is not officially supported)
+- Python 3.6+
+- PyTorch 1.3+
+- CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible)
+- GCC 5+
+- [mmcv](https://github.com/open-mmlab/mmcv) (Please install the latest version of mmcv-full)
+- Numpy
+- cv2
+- json_tricks
+- [xtcocotools](https://github.com/jin-s13/xtcocoapi)
+
+Optional:
+
+- [mmdet](https://github.com/open-mmlab/mmdetection) (to run pose demos)
+- [mmtrack](https://github.com/open-mmlab/mmtracking) (to run pose tracking demos)
+- [pyrender](https://pyrender.readthedocs.io/en/latest/install/index.html) (to run 3d mesh demos)
+- [smplx](https://github.com/vchoutas/smplx) (to run 3d mesh demos)
+
+## Prepare environment
+
+a. Create a conda virtual environment and activate it.
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+```
+
+b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/), e.g.,
+
+```shell
+conda install pytorch torchvision -c pytorch
+```
+
+```{note}
+Make sure that your compilation CUDA version and runtime CUDA version match.
+```
+
+You can check the supported CUDA version for precompiled packages on the [PyTorch website](https://pytorch.org/).
+
+`E.g.1` If you have CUDA 10.2 installed under `/usr/local/cuda` and would like to install PyTorch 1.8.0,
+you need to install the prebuilt PyTorch with CUDA 10.2.
+
+```shell
+conda install pytorch==1.8.0 torchvision==0.9.0 cudatoolkit=10.2 -c pytorch
+```
+
+`E.g.2` If you have CUDA 9.2 installed under `/usr/local/cuda` and would like to install PyTorch 1.7.0.,
+you need to install the prebuilt PyTorch with CUDA 9.2.
+
+```shell
+conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=9.2 -c pytorch
+```
+
+If you build PyTorch from source instead of installing the pre-built package, you can use more CUDA versions such as 9.0.
+
+## Install MMPose
+
+a. Install mmcv, we recommend you to install the pre-built mmcv as below.
+
+```shell
+# pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
+# We can ignore the micro version of PyTorch
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9/index.html
+```
+
+mmcv-full is only compiled on PyTorch 1.x.0 because the compatibility usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you can install mmcv-full compiled with PyTorch 1.x.0 and it usually works well.
+
+See [here](https://github.com/open-mmlab/mmcv#installation) for different versions of MMCV compatible to different PyTorch and CUDA versions.
+
+Optionally you can choose to compile mmcv from source by the following command
+
+```shell
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+MMCV_WITH_OPS=1 pip install -e . # package mmcv-full, which contains cuda ops, will be installed after this step
+# OR pip install -e . # package mmcv, which contains no cuda ops, will be installed after this step
+cd ..
+```
+
+**Important:** You need to run `pip uninstall mmcv` first if you have mmcv installed. If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`.
+
+b. Clone the mmpose repository
+
+```shell
+git clone git@github.com:open-mmlab/mmpose.git # or git clone https://github.com/open-mmlab/mmpose
+cd mmpose
+```
+
+c. Install build requirements and then install mmpose
+
+```shell
+pip install -r requirements.txt
+pip install -v -e . # or "python setup.py develop"
+```
+
+If you build MMPose on macOS, replace the last command with
+
+```shell
+CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' pip install -e .
+```
+
+d. Install optional modules
+
+- [mmdet](https://github.com/open-mmlab/mmdetection) (to run pose demos)
+- [mmtrack](https://github.com/open-mmlab/mmtracking) (to run pose tracking demos)
+- [pyrender](https://pyrender.readthedocs.io/en/latest/install/index.html) (to run 3d mesh demos)
+- [smplx](https://github.com/vchoutas/smplx) (to run 3d mesh demos)
+
+```{note}
+1. The git commit id will be written to the version number with step c, e.g. 0.6.0+2e7045c. The version will also be saved in trained models.
+ It is recommended that you run step d each time you pull some updates from github. If C++/CUDA codes are modified, then this step is compulsory.
+
+1. Following the above instructions, mmpose is installed on `dev` mode, any local modifications made to the code will take effect without the need to reinstall it (unless you submit some commits and want to update the version number).
+
+1. If you would like to use `opencv-python-headless` instead of `opencv-python`,
+ you can install it before installing MMCV.
+
+1. If you have `mmcv` installed, you need to firstly uninstall `mmcv`, and then install `mmcv-full`.
+
+1. Some dependencies are optional. Running `python setup.py develop` will only install the minimum runtime requirements.
+ To use optional dependencies like `smplx`, either install them with `pip install -r requirements/optional.txt`
+ or specify desired extras when calling `pip` (e.g. `pip install -v -e .[optional]`,
+ valid keys for the `[optional]` field are `all`, `tests`, `build`, and `optional`) like `pip install -v -e .[tests,build]`.
+```
+
+## Install with CPU only
+
+The code can be built for CPU only environment (where CUDA isn't available).
+
+In CPU mode you can run the demo/demo.py for example.
+
+## A from-scratch setup script
+
+Here is a full script for setting up mmpose with conda and link the dataset path (supposing that your COCO dataset path is $COCO_ROOT).
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+# install latest pytorch prebuilt with the default prebuilt CUDA version (usually the latest)
+conda install -c pytorch pytorch torchvision -y
+
+# install the latest mmcv-full
+# Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one.
+# See [here](https://github.com/open-mmlab/mmcv#installation) for different versions of MMCV compatible to different PyTorch and CUDA versions.
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+
+# install mmpose
+git clone https://github.com/open-mmlab/mmpose.git
+cd mmpose
+pip install -r requirements.txt
+pip install -v -e .
+
+mkdir data
+ln -s $COCO_ROOT data/coco
+```
+
+## Another option: Docker Image
+
+We provide a [Dockerfile](/docker/Dockerfile) to build an image.
+
+```shell
+# build an image with PyTorch 1.6.0, CUDA 10.1, CUDNN 7.
+docker build -f ./docker/Dockerfile --rm -t mmpose .
+```
+
+**Important:** Make sure you've installed the [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker).
+
+Run the following cmd:
+
+```shell
+docker run --gpus all\
+ --shm-size=8g \
+ -it -v {DATA_DIR}:/mmpose/data mmpose
+```
+
+## Developing with multiple MMPose versions
+
+The train and test scripts already modify the `PYTHONPATH` to ensure the script use the MMPose in the current directory.
+
+To use the default MMPose installed in the environment rather than that you are working with, you can remove the following line in those scripts.
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
diff --git a/vendor/ViTPose/docs/en/language.md b/vendor/ViTPose/docs/en/language.md
new file mode 100644
index 0000000000000000000000000000000000000000..a0a6259bee27121ca837c85141ebca0307d617b4
--- /dev/null
+++ b/vendor/ViTPose/docs/en/language.md
@@ -0,0 +1,3 @@
+## English
+
+## 简体中文
diff --git a/vendor/ViTPose/docs/en/make.bat b/vendor/ViTPose/docs/en/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..922152e96a04a242e6fc40f124261d74890617d8
--- /dev/null
+++ b/vendor/ViTPose/docs/en/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/vendor/ViTPose/docs/en/merge_docs.sh b/vendor/ViTPose/docs/en/merge_docs.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6484b78f4355558f546053fd2869898100178001
--- /dev/null
+++ b/vendor/ViTPose/docs/en/merge_docs.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+sed -i '$a\\n' ../../demo/docs/*_demo.md
+cat ../../demo/docs/*_demo.md | sed "s/#/#&/" | sed "s/md###t/html#t/g" | sed '1i\# Demo' | sed 's=](/docs/en/=](/=g' | sed 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' >demo.md
+
+ # remove /docs/ for link used in doc site
+sed -i 's=](/docs/en/=](=g' ./tutorials/*.md
+sed -i 's=](/docs/en/=](=g' ./tasks/*.md
+sed -i 's=](/docs/en/=](=g' ./papers/*.md
+sed -i 's=](/docs/en/=](=g' ./topics/*.md
+sed -i 's=](/docs/en/=](=g' data_preparation.md
+sed -i 's=](/docs/en/=](=g' getting_started.md
+sed -i 's=](/docs/en/=](=g' install.md
+sed -i 's=](/docs/en/=](=g' benchmark.md
+sed -i 's=](/docs/en/=](=g' changelog.md
+sed -i 's=](/docs/en/=](=g' faq.md
+
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./tutorials/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./tasks/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./papers/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./topics/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' data_preparation.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' getting_started.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' install.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' benchmark.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' changelog.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' faq.md
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/associative_embedding.md b/vendor/ViTPose/docs/en/papers/algorithms/associative_embedding.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a27267ae9f822e0609bc8513835dbcef7ef343a
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/associative_embedding.md
@@ -0,0 +1,30 @@
+# Associative embedding: End-to-end learning for joint detection and grouping (AE)
+
+
+
+
+Associative Embedding (NIPS'2017)
+
+```bibtex
+@inproceedings{newell2017associative,
+ title={Associative embedding: End-to-end learning for joint detection and grouping},
+ author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
+ booktitle={Advances in neural information processing systems},
+ pages={2277--2287},
+ year={2017}
+}
+```
+
+
+
+## Abstract
+
+
+
+We introduce associative embedding, a novel method for supervising convolutional neural networks for the task of detection and grouping. A number of computer vision problems can be framed in this manner including multi-person pose estimation, instance segmentation, and multi-object tracking. Usually the grouping of detections is achieved with multi-stage pipelines, instead we propose an approach that teaches a network to simultaneously output detections and group assignments. This technique can be easily integrated into any state-of-the-art network architecture that produces pixel-wise predictions. We show how to apply this method to both multi-person pose estimation and instance segmentation and report state-of-the-art performance for multi-person pose on the MPII and MS-COCO datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/awingloss.md b/vendor/ViTPose/docs/en/papers/algorithms/awingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d4b93a87c622b6b965cab31ac402b8445934a9a
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/awingloss.md
@@ -0,0 +1,31 @@
+# Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression
+
+
+
+
+AdaptiveWingloss (ICCV'2019)
+
+```bibtex
+@inproceedings{wang2019adaptive,
+ title={Adaptive wing loss for robust face alignment via heatmap regression},
+ author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
+ booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
+ pages={6971--6981},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+Heatmap regression with a deep network has become one of the mainstream approaches to localize facial landmarks. However, the loss function for heatmap regression is rarely studied. In this paper, we analyze the ideal loss function properties for heatmap regression in face alignment problems. Then we propose a novel loss function, named Adaptive Wing loss, that is able to adapt its shape to different types of ground truth heatmap pixels. This adaptability penalizes loss more on foreground pixels while less on background pixels. To address the imbalance between foreground and background pixels, we also propose Weighted Loss Map, which assigns high weights on foreground and difficult background pixels to help training process focus more on pixels that are crucial to landmark localization. To further improve face alignment accuracy, we introduce boundary prediction and CoordConv with boundary coordinates. Extensive experiments on different benchmarks, including COFW, 300W and WFLW, show our approach outperforms the state-of-the-art by a significant margin on
+various evaluation metrics. Besides, the Adaptive Wing loss also helps other heatmap regression tasks.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/cpm.md b/vendor/ViTPose/docs/en/papers/algorithms/cpm.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb5dbfacec909f86b58d1ed4b24e75cad039c49e
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/cpm.md
@@ -0,0 +1,30 @@
+# Convolutional pose machines
+
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+## Abstract
+
+
+
+We introduce associative embedding, a novel method for supervising convolutional neural networks for the task of detection and grouping. A number of computer vision problems can be framed in this manner including multi-person pose estimation, instance segmentation, and multi-object tracking. Usually the grouping of detections is achieved with multi-stage pipelines, instead we propose an approach that teaches a network to simultaneously output detections and group assignments. This technique can be easily integrated into any state-of-the-art network architecture that produces pixel-wise predictions. We show how to apply this method to both multi-person pose estimation and instance segmentation and report state-of-the-art performance for multi-person pose on the MPII and MS-COCO datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/dark.md b/vendor/ViTPose/docs/en/papers/algorithms/dark.md
new file mode 100644
index 0000000000000000000000000000000000000000..083b7596ab1e7aadb3f154eea58a170b7b22fb54
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/dark.md
@@ -0,0 +1,30 @@
+# Distribution-aware coordinate representation for human pose estimation
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+While being the de facto standard coordinate representation for human pose estimation, heatmap has not been investigated in-depth. This work fills this gap. For the first time, we find that the process of decoding the predicted heatmaps into the final joint coordinates in the original image space is surprisingly significant for the performance. We further probe the design limitations of the standard coordinate decoding method, and propose a more principled distributionaware decoding method. Also, we improve the standard coordinate encoding process (i.e. transforming ground-truth coordinates to heatmaps) by generating unbiased/accurate heatmaps. Taking the two together, we formulate a novel Distribution-Aware coordinate Representation of Keypoints (DARK) method. Serving as a model-agnostic plug-in, DARK brings about significant performance boost to existing human pose estimation models. Extensive experiments show that DARK yields the best results on two common benchmarks, MPII and COCO. Besides, DARK achieves the 2nd place entry in the ICCV 2019 COCO Keypoints Challenge. The code is available online.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/deeppose.md b/vendor/ViTPose/docs/en/papers/algorithms/deeppose.md
new file mode 100644
index 0000000000000000000000000000000000000000..24778ba9db6ecfa35ea2dfabc68cadfeb3b24d7c
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/deeppose.md
@@ -0,0 +1,30 @@
+# DeepPose: Human pose estimation via deep neural networks
+
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+## Abstract
+
+
+
+We propose a method for human pose estimation based on Deep Neural Networks (DNNs). The pose estimation is formulated as a DNN-based regression problem towards body joints. We present a cascade of such DNN regressors which results in high precision pose estimates. The approach has the advantage of reasoning about pose in a holistic fashion and has a simple but yet powerful formulation which capitalizes on recent advances in Deep Learning. We present a detailed empirical analysis with state-of-art or better performance on four academic benchmarks of diverse real-world images.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/higherhrnet.md b/vendor/ViTPose/docs/en/papers/algorithms/higherhrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..c1d61c992a1f41e986d785560de0709407578dee
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/higherhrnet.md
@@ -0,0 +1,30 @@
+# HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Bottom-up human pose estimation methods have difficulties in predicting the correct pose for small persons due to challenges in scale variation. In this paper, we present HigherHRNet: a novel bottom-up human pose estimation method for learning scale-aware representations using high-resolution feature pyramids. Equipped with multi-resolution supervision for training and multi-resolution aggregation for inference, the proposed approach is able to solve the scale variation challenge in bottom-up multi-person pose estimation and localize keypoints more precisely, especially for small person. The feature pyramid in HigherHRNet consists of feature map outputs from HRNet and upsampled higher-resolution outputs through a transposed convolution. HigherHRNet outperforms the previous best bottom-up method by 2.5% AP for medium person on COCO test-dev, showing its effectiveness in handling scale variation. Furthermore, HigherHRNet achieves new state-of-the-art result on COCO test-dev (70.5% AP) without using refinement or other post-processing techniques, surpassing all existing bottom-up methods. HigherHRNet even surpasses all top-down methods on CrowdPose test (67.6% AP), suggesting its robustness in crowded scene.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/hmr.md b/vendor/ViTPose/docs/en/papers/algorithms/hmr.md
new file mode 100644
index 0000000000000000000000000000000000000000..5c90aa45218fcab1cd1f03d22af5c3c802b26be5
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/hmr.md
@@ -0,0 +1,32 @@
+# End-to-end Recovery of Human Shape and Pose
+
+
+
+
+HMR (CVPR'2018)
+
+```bibtex
+@inProceedings{kanazawaHMR18,
+ title={End-to-end Recovery of Human Shape and Pose},
+ author = {Angjoo Kanazawa
+ and Michael J. Black
+ and David W. Jacobs
+ and Jitendra Malik},
+ booktitle={Computer Vision and Pattern Recognition (CVPR)},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+We describe Human Mesh Recovery (HMR), an end-to-end framework for reconstructing a full 3D mesh of a human body from a single RGB image. In contrast to most current methods that compute 2D or 3D joint locations, we produce a richer and more useful mesh representation that is parameterized by shape and 3D joint angles. The main objective is to minimize the reprojection loss of keypoints, which allows our model to be trained using in-the-wild images that only have ground truth 2D annotations. However, the reprojection loss alone is highly underconstrained. In this work we address this problem by introducing an adversary trained to tell whether human body shape and pose are real or not using a large database of 3D human meshes. We show that HMR can be trained with and without using any paired 2D-to-3D supervision. We do not rely on intermediate 2D keypoint detections and infer 3D pose and shape parameters directly from image pixels. Our model runs in real-time given a bounding box containing the person. We demonstrate our approach on various images in-the-wild and out-perform previous optimization-based methods that output 3D meshes and show competitive results on tasks such as 3D joint location estimation and part segmentation.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/hourglass.md b/vendor/ViTPose/docs/en/papers/algorithms/hourglass.md
new file mode 100644
index 0000000000000000000000000000000000000000..7782484a31fc01d7daed19536328e653e317bda0
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/hourglass.md
@@ -0,0 +1,31 @@
+# Stacked hourglass networks for human pose estimation
+
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+## Abstract
+
+
+
+This work introduces a novel convolutional network architecture for the task of human pose estimation. Features are processed across all scales and consolidated to best capture the various spatial relationships associated with the body. We show how repeated bottom-up, top-down processing used in conjunction with intermediate supervision is critical to improving the performance of the network. We refer to the architecture as a “stacked hourglass” network based on the successive steps of pooling and upsampling that are done to produce a final set of predictions. State-of-the-art results are achieved on the FLIC and MPII benchmarks outcompeting all recent methods.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/hrnet.md b/vendor/ViTPose/docs/en/papers/algorithms/hrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..05a46f543ef25de847c5fcb4704f56e5cea2bd42
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/hrnet.md
@@ -0,0 +1,32 @@
+# Deep high-resolution representation learning for human pose estimation
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we are interested in the human pose estimation problem with a focus on learning reliable highresolution representations. Most existing methods recover high-resolution representations from low-resolution representations produced by a high-to-low resolution network. Instead, our proposed network maintains high-resolution representations through the whole process. We start from a high-resolution subnetwork as the first stage, gradually add high-to-low resolution subnetworks one by one to form more stages, and connect the mutliresolution subnetworks in parallel. We conduct repeated multi-scale fusions such that each of the high-to-low resolution representations receives information from other parallel representations over and over, leading to rich highresolution representations. As a result, the predicted keypoint heatmap is potentially more accurate and spatially more precise. We empirically demonstrate the effectiveness
+of our network through the superior pose estimation results over two benchmark datasets: the COCO keypoint detection
+dataset and the MPII Human Pose dataset. In addition, we show the superiority of our network in pose tracking on the PoseTrack dataset.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/hrnetv2.md b/vendor/ViTPose/docs/en/papers/algorithms/hrnetv2.md
new file mode 100644
index 0000000000000000000000000000000000000000..f2ed2a9c0c8797a842e73c980e1868cdbfbf8cc8
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/hrnetv2.md
@@ -0,0 +1,31 @@
+# Deep high-resolution representation learning for visual recognition
+
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions in series (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams in parallel and (ii) repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/internet.md b/vendor/ViTPose/docs/en/papers/algorithms/internet.md
new file mode 100644
index 0000000000000000000000000000000000000000..e37ea72cea85da8b1fd6bf143b6958ff18972377
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/internet.md
@@ -0,0 +1,29 @@
+# InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image
+
+
+
+
+InterNet (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Analysis of hand-hand interactions is a crucial step towards better understanding human behavior. However, most researches in 3D hand pose estimation have focused on the isolated single hand case. Therefore, we firstly propose (1) a large-scale dataset, InterHand2.6M, and (2) a baseline network, InterNet, for 3D interacting hand pose estimation from a single RGB image. The proposed InterHand2.6M consists of 2.6 M labeled single and interacting hand frames under various poses from multiple subjects. Our InterNet simultaneously performs 3D single and interacting hand pose estimation. In our experiments, we demonstrate big gains in 3D interacting hand pose estimation accuracy when leveraging the interacting hand data in InterHand2.6M. We also report the accuracy of InterNet on InterHand2.6M, which serves as a strong baseline for this new dataset. Finally, we show 3D interacting hand pose estimation results from general images.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/litehrnet.md b/vendor/ViTPose/docs/en/papers/algorithms/litehrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..f446062caf6b5a88d1206c1cb412bf74006da6f2
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/litehrnet.md
@@ -0,0 +1,30 @@
+# Lite-HRNet: A Lightweight High-Resolution Network
+
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present an efficient high-resolution network, Lite-HRNet, for human pose estimation. We start by simply applying the efficient shuffle block in ShuffleNet to HRNet (high-resolution network), yielding stronger performance over popular lightweight networks, such as MobileNet, ShuffleNet, and Small HRNet.
+We find that the heavily-used pointwise (1x1) convolutions in shuffle blocks become the computational bottleneck. We introduce a lightweight unit, conditional channel weighting, to replace costly pointwise (1x1) convolutions in shuffle blocks. The complexity of channel weighting is linear w.r.t the number of channels and lower than the quadratic time complexity for pointwise convolutions. Our solution learns the weights from all the channels and over multiple resolutions that are readily available in the parallel branches in HRNet. It uses the weights as the bridge to exchange information across channels and resolutions, compensating the role played by the pointwise (1x1) convolution. Lite-HRNet demonstrates superior results on human pose estimation over popular lightweight networks. Moreover, Lite-HRNet can be easily applied to semantic segmentation task in the same lightweight manner.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/mspn.md b/vendor/ViTPose/docs/en/papers/algorithms/mspn.md
new file mode 100644
index 0000000000000000000000000000000000000000..1915cd3915fe6d0457ce6f8c02dbe4b306a6941b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/mspn.md
@@ -0,0 +1,29 @@
+# Rethinking on multi-stage networks for human pose estimation
+
+
+
+
+MSPN (ArXiv'2019)
+
+```bibtex
+@article{li2019rethinking,
+ title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
+ author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
+ journal={arXiv preprint arXiv:1901.00148},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+Existing pose estimation approaches fall into two categories: single-stage and multi-stage methods. While multi-stage methods are seemingly more suited for the task, their performance in current practice is not as good as single-stage methods. This work studies this issue. We argue that the current multi-stage methods' unsatisfactory performance comes from the insufficiency in various design choices. We propose several improvements, including the single-stage module design, cross stage feature aggregation, and coarse-to-fine supervision. The resulting method establishes the new state-of-the-art on both MS COCO and MPII Human Pose dataset, justifying the effectiveness of a multi-stage architecture. The source code is publicly available for further research.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/posewarper.md b/vendor/ViTPose/docs/en/papers/algorithms/posewarper.md
new file mode 100644
index 0000000000000000000000000000000000000000..285a36c582bc831667216d24d5bc20480e66e933
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/posewarper.md
@@ -0,0 +1,29 @@
+# Learning Temporal Pose Estimation from Sparsely-Labeled Videos
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
+
+## Abstract
+
+
+
+Modern approaches for multi-person pose estimation in video require large amounts of dense annotations. However, labeling every frame in a video is costly and labor intensive. To reduce the need for dense annotations, we propose a PoseWarper network that leverages training videos with sparse annotations (every k frames) to learn to perform dense temporal pose propagation and estimation. Given a pair of video frames---a labeled Frame A and an unlabeled Frame B---we train our model to predict human pose in Frame A using the features from Frame B by means of deformable convolutions to implicitly learn the pose warping between A and B. We demonstrate that we can leverage our trained PoseWarper for several applications. First, at inference time we can reverse the application direction of our network in order to propagate pose information from manually annotated frames to unlabeled frames. This makes it possible to generate pose annotations for the entire video given only a few manually-labeled frames. Compared to modern label propagation methods based on optical flow, our warping mechanism is much more compact (6M vs 39M parameters), and also more accurate (88.7% mAP vs 83.8% mAP). We also show that we can improve the accuracy of a pose estimator by training it on an augmented dataset obtained by adding our propagated poses to the original manual labels. Lastly, we can use our PoseWarper to aggregate temporal pose information from neighboring frames during inference. This allows our system to achieve state-of-the-art pose detection results on the PoseTrack2017 and PoseTrack2018 datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/rsn.md b/vendor/ViTPose/docs/en/papers/algorithms/rsn.md
new file mode 100644
index 0000000000000000000000000000000000000000..b1fb1ea9131d0b55828123211a8f8625c377f085
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/rsn.md
@@ -0,0 +1,31 @@
+# Learning delicate local representations for multi-person pose estimation
+
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we propose a novel method called Residual Steps Network (RSN). RSN aggregates features with the same spatial size (Intra-level features) efficiently to obtain delicate local representations, which retain rich low-level spatial information and result in precise keypoint localization. Additionally, we observe the output features contribute differently to final performance. To tackle this problem, we propose an efficient attention mechanism - Pose Refine Machine (PRM) to make a trade-off between local and global representations in output features and further refine the keypoint locations. Our approach won the 1st place of COCO Keypoint Challenge 2019 and achieves state-of-the-art results on both COCO and MPII benchmarks, without using extra training data and pretrained model. Our single model achieves 78.6 on COCO test-dev, 93.0 on MPII test dataset. Ensembled models achieve 79.2 on COCO test-dev, 77.1 on COCO test-challenge dataset.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/scnet.md b/vendor/ViTPose/docs/en/papers/algorithms/scnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..043c144111789880f4f1d8b6ee5059518e185e8f
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/scnet.md
@@ -0,0 +1,30 @@
+# Improving Convolutional Networks with Self-Calibrated Convolutions
+
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Recent advances on CNNs are mostly devoted to designing more complex architectures to enhance their representation learning capacity. In this paper, we consider how to improve the basic convolutional feature transformation process of CNNs without tuning the model architectures. To this end, we present a novel self-calibrated convolutions that explicitly expand fields-of-view of each convolutional layers through internal communications and hence enrich the output features. In particular, unlike the standard convolutions that fuse spatial and channel-wise information using small kernels (e.g., 3x3), self-calibrated convolutions adaptively build long-range spatial and inter-channel dependencies around each spatial location through a novel self-calibration operation. Thus, it can help CNNs generate more discriminative representations by explicitly incorporating richer information. Our self-calibrated convolution design is simple and generic, and can be easily applied to augment standard convolutional layers without introducing extra parameters and complexity. Extensive experiments demonstrate that when applying self-calibrated convolutions into different backbones, our networks can significantly improve the baseline models in a variety of vision tasks, including image recognition, object detection, instance segmentation, and keypoint detection, with no need to change the network architectures. We hope this work could provide a promising way for future research in designing novel convolutional feature transformations for improving convolutional networks. Code is available on the project page.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline2d.md b/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..026ef92afc5a89bdede8bbada21f56cbfc18fc32
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline2d.md
@@ -0,0 +1,31 @@
+# Simple baselines for human pose estimation and tracking
+
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+There has been significant progress on pose estimation and increasing interests on pose tracking in recent years. At the same time, the overall algorithm and system complexity increases as well, making the algorithm analysis and comparison more difficult. This work provides simple and effective baseline methods. They are helpful for inspiring and
+evaluating new ideas for the field. State-of-the-art results are achieved on challenging benchmarks.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline3d.md b/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee3c58368a5f71bda3199d385707336215086aaa
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/simplebaseline3d.md
@@ -0,0 +1,29 @@
+# A simple yet effective baseline for 3d human pose estimation
+
+
+
+
+SimpleBaseline3D (ICCV'2017)
+
+```bibtex
+@inproceedings{martinez_2017_3dbaseline,
+ title={A simple yet effective baseline for 3d human pose estimation},
+ author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
+ booktitle={ICCV},
+ year={2017}
+}
+```
+
+
+
+## Abstract
+
+
+
+Following the success of deep convolutional networks, state-of-the-art methods for 3d human pose estimation have focused on deep end-to-end systems that predict 3d joint locations given raw image pixels. Despite their excellent performance, it is often not easy to understand whether their remaining error stems from a limited 2d pose (visual) understanding, or from a failure to map 2d poses into 3-dimensional positions. With the goal of understanding these sources of error, we set out to build a system that given 2d joint locations predicts 3d positions. Much to our surprise, we have found that, with current technology, "lifting" ground truth 2d joint locations to 3d space is a task that can be solved with a remarkably low error rate: a relatively simple deep feed-forward network outperforms the best reported result by about 30% on Human3.6M, the largest publicly available 3d pose estimation benchmark. Furthermore, training our system on the output of an off-the-shelf state-of-the-art 2d detector (i.e., using images as input) yields state of the art results -- this includes an array of systems that have been trained end-to-end specifically for this task. Our results indicate that a large portion of the error of modern deep 3d pose estimation systems stems from their visual analysis, and suggests directions to further advance the state of the art in 3d human pose estimation.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/softwingloss.md b/vendor/ViTPose/docs/en/papers/algorithms/softwingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..524a6089ffee69e109a0a721fa14b820df88ae8b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/softwingloss.md
@@ -0,0 +1,30 @@
+# Structure-Coherent Deep Feature Learning for Robust Face Alignment
+
+
+
+
+SoftWingloss (TIP'2021)
+
+```bibtex
+@article{lin2021structure,
+ title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
+ author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
+ journal={IEEE Transactions on Image Processing},
+ year={2021},
+ publisher={IEEE}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we propose a structure-coherent deep feature learning method for face alignment. Unlike most existing face alignment methods which overlook the facial structure cues, we explicitly exploit the relation among facial landmarks to make the detector robust to hard cases such as occlusion and large pose. Specifically, we leverage a landmark-graph relational network to enforce the structural relationships among landmarks. We consider the facial landmarks as structural graph nodes and carefully design the neighborhood to passing features among the most related nodes. Our method dynamically adapts the weights of node neighborhood to eliminate distracted information from noisy nodes, such as occluded landmark point. Moreover, different from most previous works which only tend to penalize the landmarks absolute position during the training, we propose a relative location loss to enhance the information of relative location of landmarks. This relative location supervision further regularizes the facial structure. Our approach considers the interactions among facial landmarks and can be easily implemented on top of any convolutional backbone to boost the performance. Extensive experiments on three popular benchmarks, including WFLW, COFW and 300W, demonstrate the effectiveness of the proposed method. In particular, due to explicit structure modeling, our approach is especially robust to challenging cases resulting in impressive low failure rate on COFW and WFLW datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/udp.md b/vendor/ViTPose/docs/en/papers/algorithms/udp.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb4acebfbc9474312e992a67e2a19ef2df12be85
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/udp.md
@@ -0,0 +1,30 @@
+# The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Recently, the leading performance of human pose estimation is dominated by top-down methods. Being a fundamental component in training and inference, data processing has not been systematically considered in pose estimation community, to the best of our knowledge. In this paper, we focus on this problem and find that the devil of top-down pose estimator is in the biased data processing. Specifically, by investigating the standard data processing in state-of-the-art approaches mainly including data transformation and encoding-decoding, we find that the results obtained by common flipping strategy are unaligned with the original ones in inference. Moreover, there is statistical error in standard encoding-decoding during both training and inference. Two problems couple together and significantly degrade the pose estimation performance. Based on quantitative analyses, we then formulate a principled way to tackle this dilemma. Data is processed in continuous space based on unit length (the intervals between pixels) instead of in discrete space with pixel, and a combined classification and regression approach is adopted to perform encoding-decoding. The Unbiased Data Processing (UDP) for human pose estimation can be achieved by combining the two together. UDP not only boosts the performance of existing methods by a large margin but also plays a important role in result reproducing and future exploration. As a model-agnostic approach, UDP promotes SimpleBaseline-ResNet50-256x192 by 1.5 AP (70.2 to 71.7) and HRNet-W32-256x192 by 1.7 AP (73.5 to 75.2) on COCO test-dev set. The HRNet-W48-384x288 equipped with UDP achieves 76.5 AP and sets a new state-of-the-art for human pose estimation. The source code is publicly available for further research.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/videopose3d.md b/vendor/ViTPose/docs/en/papers/algorithms/videopose3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..f8647e0ee8a67666f352454aa40c256f07bd4c30
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/videopose3d.md
@@ -0,0 +1,30 @@
+# 3D human pose estimation in video with temporal convolutions and semi-supervised training
+
+
+
+
+VideoPose3D (CVPR'2019)
+
+```bibtex
+@inproceedings{pavllo20193d,
+ title={3d human pose estimation in video with temporal convolutions and semi-supervised training},
+ author={Pavllo, Dario and Feichtenhofer, Christoph and Grangier, David and Auli, Michael},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7753--7762},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this work, we demonstrate that 3D poses in video can be effectively estimated with a fully convolutional model based on dilated temporal convolutions over 2D keypoints. We also introduce back-projection, a simple and effective semi-supervised training method that leverages unlabeled video data. We start with predicted 2D keypoints for unlabeled video, then estimate 3D poses and finally back-project to the input 2D keypoints. In the supervised setting, our fully-convolutional model outperforms the previous best result from the literature by 6 mm mean per-joint position error on Human3.6M, corresponding to an error reduction of 11%, and the model also shows significant improvements on HumanEva-I. Moreover, experiments with back-projection show that it comfortably outperforms previous state-of-the-art results in semi-supervised settings where labeled data is scarce.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/vipnas.md b/vendor/ViTPose/docs/en/papers/algorithms/vipnas.md
new file mode 100644
index 0000000000000000000000000000000000000000..5f52a8cac04cf48cb2e330afe176d835588034c6
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/vipnas.md
@@ -0,0 +1,29 @@
+# ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search
+
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+## Abstract
+
+
+
+Human pose estimation has achieved significant progress in recent years. However, most of the recent methods focus on improving accuracy using complicated models and ignoring real-time efficiency. To achieve a better trade-off between accuracy and efficiency, we propose a novel neural architecture search (NAS) method, termed ViPNAS, to search networks in both spatial and temporal levels for fast online video pose estimation. In the spatial level, we carefully design the search space with five different dimensions including network depth, width, kernel size, group number, and attentions. In the temporal level, we search from a series of temporal feature fusions to optimize the total accuracy and speed across multiple video frames. To the best of our knowledge, we are the first to search for the temporal feature fusion and automatic computation allocation in videos. Extensive experiments demonstrate the effectiveness of our approach on the challenging COCO2017 and PoseTrack2018 datasets. Our discovered model family, S-ViPNAS and T-ViPNAS, achieve significantly higher inference speed (CPU real-time) without sacrificing the accuracy compared to the previous state-of-the-art methods.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/voxelpose.md b/vendor/ViTPose/docs/en/papers/algorithms/voxelpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..384f4ca1e57c1ad51ef79557f661b891f08173e7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/voxelpose.md
@@ -0,0 +1,29 @@
+# VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment
+
+
+
+
+VoxelPose (ECCV'2020)
+
+```bibtex
+@inproceedings{tumultipose,
+ title={VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment},
+ author={Tu, Hanyue and Wang, Chunyu and Zeng, Wenjun},
+ booktitle={ECCV},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present VoxelPose to estimate 3D poses of multiple people from multiple camera views. In contrast to the previous efforts which require to establish cross-view correspondence based on noisy and incomplete 2D pose estimates, VoxelPose directly operates in the 3D space therefore avoids making incorrect decisions in each camera view. To achieve this goal, features in all camera views are aggregated in the 3D voxel space and fed into Cuboid Proposal Network (CPN) to localize all people. Then we propose Pose Regression Network (PRN) to estimate a detailed 3D pose for each proposal. The approach is robust to occlusion which occurs frequently in practice. Without bells and whistles, it outperforms the previous methods on several public datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/algorithms/wingloss.md b/vendor/ViTPose/docs/en/papers/algorithms/wingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..2aaa05722eda24201cd35e1028349994d1f0fd6b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/algorithms/wingloss.md
@@ -0,0 +1,31 @@
+# Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks
+
+
+
+
+Wingloss (CVPR'2018)
+
+```bibtex
+@inproceedings{feng2018wing,
+ title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
+ author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
+ booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
+ year={2018},
+ pages ={2235-2245},
+ organization={IEEE}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present a new loss function, namely Wing loss, for robust facial landmark localisation with Convolutional Neural Networks (CNNs). We first compare and analyse different loss functions including L2, L1 and smooth L1. The analysis of these loss functions suggests that, for the training of a CNN-based localisation model, more attention should be paid to small and medium range errors. To this end, we design a piece-wise loss function. The new loss amplifies the impact of errors from the interval (-w, w) by switching from L1 loss to a modified logarithm function. To address the problem of under-representation of samples with large out-of-plane head rotations in the training set, we propose a simple but effective boosting strategy, referred to as pose-based data balancing. In particular, we deal with the data imbalance problem by duplicating the minority training samples and perturbing them by injecting random image rotation, bounding box translation and other data augmentation approaches. Last, the proposed approach is extended to create a two-stage framework for robust facial landmark localisation. The experimental results obtained on AFLW and 300W demonstrate the merits of the Wing loss function, and prove the superiority of the proposed method over the state-of-the-art approaches.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/alexnet.md b/vendor/ViTPose/docs/en/papers/backbones/alexnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d0bb87d25ff64384d674ff3a8fab88c3ce21f
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/alexnet.md
@@ -0,0 +1,30 @@
+# Imagenet classification with deep convolutional neural networks
+
+
+
+
+AlexNet (NeurIPS'2012)
+
+```bibtex
+@inproceedings{krizhevsky2012imagenet,
+ title={Imagenet classification with deep convolutional neural networks},
+ author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
+ booktitle={Advances in neural information processing systems},
+ pages={1097--1105},
+ year={2012}
+}
+```
+
+
+
+## Abstract
+
+
+
+We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSVRC-2010 contest into the 1000 different classes. On the test data, we achieved top-1 and top-5 error rates of 37.5% and 17.0% which is considerably better than the previous state-of-the-art. The neural network, which has 60 million parameters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully-connected layers with a final 1000-way softmax. To make training faster, we used non-saturating neurons and a very efficient GPU implementation of the convolution operation. To reduce overfitting in the fully-connected layers we employed a recently-developed regularization method called “dropout” that proved to be very effective. We also entered a variant of this model in the ILSVRC-2012 competition and achieved a winning top-5 test error rate of 15.3%, compared to 26.2% achieved by the second-best entry
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/cpm.md b/vendor/ViTPose/docs/en/papers/backbones/cpm.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb5dbfacec909f86b58d1ed4b24e75cad039c49e
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/cpm.md
@@ -0,0 +1,30 @@
+# Convolutional pose machines
+
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+## Abstract
+
+
+
+We introduce associative embedding, a novel method for supervising convolutional neural networks for the task of detection and grouping. A number of computer vision problems can be framed in this manner including multi-person pose estimation, instance segmentation, and multi-object tracking. Usually the grouping of detections is achieved with multi-stage pipelines, instead we propose an approach that teaches a network to simultaneously output detections and group assignments. This technique can be easily integrated into any state-of-the-art network architecture that produces pixel-wise predictions. We show how to apply this method to both multi-person pose estimation and instance segmentation and report state-of-the-art performance for multi-person pose on the MPII and MS-COCO datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/higherhrnet.md b/vendor/ViTPose/docs/en/papers/backbones/higherhrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..c1d61c992a1f41e986d785560de0709407578dee
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/higherhrnet.md
@@ -0,0 +1,30 @@
+# HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation
+
+
+
+
+HigherHRNet (CVPR'2020)
+
+```bibtex
+@inproceedings{cheng2020higherhrnet,
+ title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
+ author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={5386--5395},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Bottom-up human pose estimation methods have difficulties in predicting the correct pose for small persons due to challenges in scale variation. In this paper, we present HigherHRNet: a novel bottom-up human pose estimation method for learning scale-aware representations using high-resolution feature pyramids. Equipped with multi-resolution supervision for training and multi-resolution aggregation for inference, the proposed approach is able to solve the scale variation challenge in bottom-up multi-person pose estimation and localize keypoints more precisely, especially for small person. The feature pyramid in HigherHRNet consists of feature map outputs from HRNet and upsampled higher-resolution outputs through a transposed convolution. HigherHRNet outperforms the previous best bottom-up method by 2.5% AP for medium person on COCO test-dev, showing its effectiveness in handling scale variation. Furthermore, HigherHRNet achieves new state-of-the-art result on COCO test-dev (70.5% AP) without using refinement or other post-processing techniques, surpassing all existing bottom-up methods. HigherHRNet even surpasses all top-down methods on CrowdPose test (67.6% AP), suggesting its robustness in crowded scene.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/hourglass.md b/vendor/ViTPose/docs/en/papers/backbones/hourglass.md
new file mode 100644
index 0000000000000000000000000000000000000000..7782484a31fc01d7daed19536328e653e317bda0
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/hourglass.md
@@ -0,0 +1,31 @@
+# Stacked hourglass networks for human pose estimation
+
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+## Abstract
+
+
+
+This work introduces a novel convolutional network architecture for the task of human pose estimation. Features are processed across all scales and consolidated to best capture the various spatial relationships associated with the body. We show how repeated bottom-up, top-down processing used in conjunction with intermediate supervision is critical to improving the performance of the network. We refer to the architecture as a “stacked hourglass” network based on the successive steps of pooling and upsampling that are done to produce a final set of predictions. State-of-the-art results are achieved on the FLIC and MPII benchmarks outcompeting all recent methods.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/hrformer.md b/vendor/ViTPose/docs/en/papers/backbones/hrformer.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfa7a13f6b368b64669eb6acfa0d6d637fcb3496
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/hrformer.md
@@ -0,0 +1,39 @@
+# HRFormer: High-Resolution Vision Transformer for Dense Predict
+
+
+
+
+HRFormer (NIPS'2021)
+
+```bibtex
+@article{yuan2021hrformer,
+ title={HRFormer: High-Resolution Vision Transformer for Dense Predict},
+ author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
+ journal={Advances in Neural Information Processing Systems},
+ volume={34},
+ year={2021}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present a High-Resolution Transformer (HRFormer) that learns high-resolution representations for dense
+prediction tasks, in contrast to the original Vision Transformer that produces low-resolution representations
+and has high memory and computational cost. We take advantage of the multi-resolution parallel design
+introduced in high-resolution convolutional networks (HRNet), along with local-window self-attention
+that performs self-attention over small non-overlapping image windows, for improving the memory and
+computation efficiency. In addition, we introduce a convolution into the FFN to exchange information
+across the disconnected image windows. We demonstrate the effectiveness of the HighResolution Transformer
+on both human pose estimation and semantic segmentation tasks, e.g., HRFormer outperforms Swin
+transformer by 1.3 AP on COCO pose estimation with 50% fewer parameters and 30% fewer FLOPs.
+Code is available at: https://github.com/HRNet/HRFormer
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/hrnet.md b/vendor/ViTPose/docs/en/papers/backbones/hrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..05a46f543ef25de847c5fcb4704f56e5cea2bd42
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/hrnet.md
@@ -0,0 +1,32 @@
+# Deep high-resolution representation learning for human pose estimation
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we are interested in the human pose estimation problem with a focus on learning reliable highresolution representations. Most existing methods recover high-resolution representations from low-resolution representations produced by a high-to-low resolution network. Instead, our proposed network maintains high-resolution representations through the whole process. We start from a high-resolution subnetwork as the first stage, gradually add high-to-low resolution subnetworks one by one to form more stages, and connect the mutliresolution subnetworks in parallel. We conduct repeated multi-scale fusions such that each of the high-to-low resolution representations receives information from other parallel representations over and over, leading to rich highresolution representations. As a result, the predicted keypoint heatmap is potentially more accurate and spatially more precise. We empirically demonstrate the effectiveness
+of our network through the superior pose estimation results over two benchmark datasets: the COCO keypoint detection
+dataset and the MPII Human Pose dataset. In addition, we show the superiority of our network in pose tracking on the PoseTrack dataset.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/hrnetv2.md b/vendor/ViTPose/docs/en/papers/backbones/hrnetv2.md
new file mode 100644
index 0000000000000000000000000000000000000000..f2ed2a9c0c8797a842e73c980e1868cdbfbf8cc8
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/hrnetv2.md
@@ -0,0 +1,31 @@
+# Deep high-resolution representation learning for visual recognition
+
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions in series (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams in parallel and (ii) repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/litehrnet.md b/vendor/ViTPose/docs/en/papers/backbones/litehrnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..f446062caf6b5a88d1206c1cb412bf74006da6f2
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/litehrnet.md
@@ -0,0 +1,30 @@
+# Lite-HRNet: A Lightweight High-Resolution Network
+
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present an efficient high-resolution network, Lite-HRNet, for human pose estimation. We start by simply applying the efficient shuffle block in ShuffleNet to HRNet (high-resolution network), yielding stronger performance over popular lightweight networks, such as MobileNet, ShuffleNet, and Small HRNet.
+We find that the heavily-used pointwise (1x1) convolutions in shuffle blocks become the computational bottleneck. We introduce a lightweight unit, conditional channel weighting, to replace costly pointwise (1x1) convolutions in shuffle blocks. The complexity of channel weighting is linear w.r.t the number of channels and lower than the quadratic time complexity for pointwise convolutions. Our solution learns the weights from all the channels and over multiple resolutions that are readily available in the parallel branches in HRNet. It uses the weights as the bridge to exchange information across channels and resolutions, compensating the role played by the pointwise (1x1) convolution. Lite-HRNet demonstrates superior results on human pose estimation over popular lightweight networks. Moreover, Lite-HRNet can be easily applied to semantic segmentation task in the same lightweight manner.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/mobilenetv2.md b/vendor/ViTPose/docs/en/papers/backbones/mobilenetv2.md
new file mode 100644
index 0000000000000000000000000000000000000000..9456520d46399060f00531a93e8612bff7625550
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/mobilenetv2.md
@@ -0,0 +1,30 @@
+# Mobilenetv2: Inverted residuals and linear bottlenecks
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper we describe a new mobile architecture, mbox{MobileNetV2}, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call mbox{SSDLite}. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of mbox{DeepLabv3} which we call Mobile mbox{DeepLabv3}. is based on an inverted residual structure where the shortcut connections are between the thin bottleneck layers. The intermediate expansion layer uses lightweight depthwise convolutions to filter features as a source of non-linearity. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on mbox{ImageNet}~cite{Russakovsky:2015:ILS:2846547.2846559} classification, COCO object detection cite{COCO}, VOC image segmentation cite{PASCAL}. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as actual latency, and the number of parameters.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/mspn.md b/vendor/ViTPose/docs/en/papers/backbones/mspn.md
new file mode 100644
index 0000000000000000000000000000000000000000..1915cd3915fe6d0457ce6f8c02dbe4b306a6941b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/mspn.md
@@ -0,0 +1,29 @@
+# Rethinking on multi-stage networks for human pose estimation
+
+
+
+
+MSPN (ArXiv'2019)
+
+```bibtex
+@article{li2019rethinking,
+ title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
+ author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
+ journal={arXiv preprint arXiv:1901.00148},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+Existing pose estimation approaches fall into two categories: single-stage and multi-stage methods. While multi-stage methods are seemingly more suited for the task, their performance in current practice is not as good as single-stage methods. This work studies this issue. We argue that the current multi-stage methods' unsatisfactory performance comes from the insufficiency in various design choices. We propose several improvements, including the single-stage module design, cross stage feature aggregation, and coarse-to-fine supervision. The resulting method establishes the new state-of-the-art on both MS COCO and MPII Human Pose dataset, justifying the effectiveness of a multi-stage architecture. The source code is publicly available for further research.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/resnest.md b/vendor/ViTPose/docs/en/papers/backbones/resnest.md
new file mode 100644
index 0000000000000000000000000000000000000000..748c94737a4ebc96ec50a5520e1fa5c547651d42
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/resnest.md
@@ -0,0 +1,29 @@
+# ResNeSt: Split-Attention Networks
+
+
+
+
+ResNeSt (ArXiv'2020)
+
+```bibtex
+@article{zhang2020resnest,
+ title={ResNeSt: Split-Attention Networks},
+ author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+ journal={arXiv preprint arXiv:2004.08955},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+It is well known that featuremap attention and multi-path representation are important for visual recognition. In this paper, we present a modularized architecture, which applies the channel-wise attention on different network branches to leverage their success in capturing cross-feature interactions and learning diverse representations. Our design results in a simple and unified computation block, which can be parameterized using only a few variables. Our model, named ResNeSt, outperforms EfficientNet in accuracy and latency trade-off on image classification. In addition, ResNeSt has achieved superior transfer learning results on several public benchmarks serving as the backbone, and has been adopted by the winning entries of COCO-LVIS challenge. The source code for complete system and pretrained models are publicly available.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/resnet.md b/vendor/ViTPose/docs/en/papers/backbones/resnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..86b91ffc38623af6f4fd8614371cb3f3db2d6fe2
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/resnet.md
@@ -0,0 +1,32 @@
+# Deep residual learning for image recognition
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+## Abstract
+
+
+
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from
+considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers—8× deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC
+& COCO 2015 competitions1 , where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/resnetv1d.md b/vendor/ViTPose/docs/en/papers/backbones/resnetv1d.md
new file mode 100644
index 0000000000000000000000000000000000000000..ebde55454e4750dfce018e1f13cb7a464380b5ae
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/resnetv1d.md
@@ -0,0 +1,31 @@
+# Bag of tricks for image classification with convolutional neural networks
+
+
+
+
+ResNetV1D (CVPR'2019)
+
+```bibtex
+@inproceedings{he2019bag,
+ title={Bag of tricks for image classification with convolutional neural networks},
+ author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={558--567},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+Much of the recent progress made in image classification research can be credited to training procedure refinements, such as changes in data augmentations and optimization methods. In the literature, however, most refinements are either briefly mentioned as implementation details or only visible in source code. In this paper, we will examine a collection of such refinements and empirically evaluate their impact on the final model accuracy through ablation study. We will show that, by combining these refinements together, we are able to improve various CNN models significantly. For example, we raise ResNet-50’s top-1 validation accuracy from 75.3% to 79.29% on ImageNet. We will also demonstrate that improvement on image classification accuracy leads to better transfer learning performance in other application domains such as object detection and semantic
+segmentation.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/resnext.md b/vendor/ViTPose/docs/en/papers/backbones/resnext.md
new file mode 100644
index 0000000000000000000000000000000000000000..9803ee9bcd578c6a34369750a1b39e5ffa497797
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/resnext.md
@@ -0,0 +1,30 @@
+# Aggregated residual transformations for deep neural networks
+
+
+
+
+ResNext (CVPR'2017)
+
+```bibtex
+@inproceedings{xie2017aggregated,
+ title={Aggregated residual transformations for deep neural networks},
+ author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1492--1500},
+ year={2017}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present a simple, highly modularized network architecture for image classification. Our network is constructed by repeating a building block that aggregates a set of transformations with the same topology. Our simple design results in a homogeneous, multi-branch architecture that has only a few hyper-parameters to set. This strategy exposes a new dimension, which we call "cardinality" (the size of the set of transformations), as an essential factor in addition to the dimensions of depth and width. On the ImageNet-1K dataset, we empirically show that even under the restricted condition of maintaining complexity, increasing cardinality is able to improve classification accuracy. Moreover, increasing cardinality is more effective than going deeper or wider when we increase the capacity. Our models, named ResNeXt, are the foundations of our entry to the ILSVRC 2016 classification task in which we secured 2nd place. We further investigate ResNeXt on an ImageNet-5K set and the COCO detection set, also showing better results than its ResNet counterpart. The code and models are publicly available online.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/rsn.md b/vendor/ViTPose/docs/en/papers/backbones/rsn.md
new file mode 100644
index 0000000000000000000000000000000000000000..b1fb1ea9131d0b55828123211a8f8625c377f085
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/rsn.md
@@ -0,0 +1,31 @@
+# Learning delicate local representations for multi-person pose estimation
+
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we propose a novel method called Residual Steps Network (RSN). RSN aggregates features with the same spatial size (Intra-level features) efficiently to obtain delicate local representations, which retain rich low-level spatial information and result in precise keypoint localization. Additionally, we observe the output features contribute differently to final performance. To tackle this problem, we propose an efficient attention mechanism - Pose Refine Machine (PRM) to make a trade-off between local and global representations in output features and further refine the keypoint locations. Our approach won the 1st place of COCO Keypoint Challenge 2019 and achieves state-of-the-art results on both COCO and MPII benchmarks, without using extra training data and pretrained model. Our single model achieves 78.6 on COCO test-dev, 93.0 on MPII test dataset. Ensembled models achieve 79.2 on COCO test-dev, 77.1 on COCO test-challenge dataset.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/scnet.md b/vendor/ViTPose/docs/en/papers/backbones/scnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..043c144111789880f4f1d8b6ee5059518e185e8f
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/scnet.md
@@ -0,0 +1,30 @@
+# Improving Convolutional Networks with Self-Calibrated Convolutions
+
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Recent advances on CNNs are mostly devoted to designing more complex architectures to enhance their representation learning capacity. In this paper, we consider how to improve the basic convolutional feature transformation process of CNNs without tuning the model architectures. To this end, we present a novel self-calibrated convolutions that explicitly expand fields-of-view of each convolutional layers through internal communications and hence enrich the output features. In particular, unlike the standard convolutions that fuse spatial and channel-wise information using small kernels (e.g., 3x3), self-calibrated convolutions adaptively build long-range spatial and inter-channel dependencies around each spatial location through a novel self-calibration operation. Thus, it can help CNNs generate more discriminative representations by explicitly incorporating richer information. Our self-calibrated convolution design is simple and generic, and can be easily applied to augment standard convolutional layers without introducing extra parameters and complexity. Extensive experiments demonstrate that when applying self-calibrated convolutions into different backbones, our networks can significantly improve the baseline models in a variety of vision tasks, including image recognition, object detection, instance segmentation, and keypoint detection, with no need to change the network architectures. We hope this work could provide a promising way for future research in designing novel convolutional feature transformations for improving convolutional networks. Code is available on the project page.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/seresnet.md b/vendor/ViTPose/docs/en/papers/backbones/seresnet.md
new file mode 100644
index 0000000000000000000000000000000000000000..52178e5cf0b68e9512888bcfaaeb3d0c2b7a81b5
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/seresnet.md
@@ -0,0 +1,30 @@
+# Squeeze-and-excitation networks
+
+
+
+
+SEResNet (CVPR'2018)
+
+```bibtex
+@inproceedings{hu2018squeeze,
+ title={Squeeze-and-excitation networks},
+ author={Hu, Jie and Shen, Li and Sun, Gang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={7132--7141},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+Convolutional neural networks are built upon the convolution operation, which extracts informative features by fusing spatial and channel-wise information together within local receptive fields. In order to boost the representational power of a network, several recent approaches have shown the benefit of enhancing spatial encoding. In this work, we focus on the channel relationship and propose a novel architectural unit, which we term the “Squeeze-and-Excitation” (SE) block, that adaptively recalibrates channel-wise feature responses by explicitly modelling interdependencies between channels. We demonstrate that by stacking these blocks together, we can construct SENet architectures that generalise extremely well across challenging datasets. Crucially, we find that SE blocks produce significant performance improvements for existing state-of-the-art deep architectures at minimal additional computational cost. SENets formed the foundation of our ILSVRC 2017 classification submission which won first place and significantly reduced the top-5 error to 2.251%, achieving a ∼25% relative improvement over the winning entry of 2016. Code and models are available at https: //github.com/hujie-frank/SENet.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/shufflenetv1.md b/vendor/ViTPose/docs/en/papers/backbones/shufflenetv1.md
new file mode 100644
index 0000000000000000000000000000000000000000..a314c9b709ca8aaf6f7c47138fe3eee2aabd4bb9
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/shufflenetv1.md
@@ -0,0 +1,30 @@
+# Shufflenet: An extremely efficient convolutional neural network for mobile devices
+
+
+
+
+ShufflenetV1 (CVPR'2018)
+
+```bibtex
+@inproceedings{zhang2018shufflenet,
+ title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
+ author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={6848--6856},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+We introduce an extremely computation-efficient CNN architecture named ShuffleNet, which is designed specially for mobile devices with very limited computing power (e.g., 10-150 MFLOPs). The new architecture utilizes two new operations, pointwise group convolution and channel shuffle, to greatly reduce computation cost while maintaining accuracy. Experiments on ImageNet classification and MS COCO object detection demonstrate the superior performance of ShuffleNet over other structures, e.g. lower top-1 error (absolute 7.8%) than recent MobileNet~cite{howard2017mobilenets} on ImageNet classification task, under the computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet achieves $sim$13$ imes$ actual speedup over AlexNet while maintaining comparable accuracy.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/shufflenetv2.md b/vendor/ViTPose/docs/en/papers/backbones/shufflenetv2.md
new file mode 100644
index 0000000000000000000000000000000000000000..834ee38bc0deb814d7c3f911c919a8696764b415
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/shufflenetv2.md
@@ -0,0 +1,30 @@
+# Shufflenet v2: Practical guidelines for efficient cnn architecture design
+
+
+
+
+ShufflenetV2 (ECCV'2018)
+
+```bibtex
+@inproceedings{ma2018shufflenet,
+ title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
+ author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={116--131},
+ year={2018}
+}
+```
+
+
+
+## Abstract
+
+
+
+Current network architecture design is mostly guided by the indirect metric of computation complexity, i.e., FLOPs. However, the direct metric, such as speed, also depends on the other factors such as memory access cost and platform characterics. Taking these factors into account, this work proposes practical guidelines for efficient network de- sign. Accordingly, a new architecture called ShuffleNet V2 is presented. Comprehensive experiments verify that it is the state-of-the-art in both speed and accuracy.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/vgg.md b/vendor/ViTPose/docs/en/papers/backbones/vgg.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a92a46b986a9a8907a74333bcee6acff6d01891
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/vgg.md
@@ -0,0 +1,29 @@
+# Very Deep Convolutional Networks for Large-Scale Image Recognition
+
+
+
+
+VGG (ICLR'2015)
+
+```bibtex
+@article{simonyan2014very,
+ title={Very deep convolutional networks for large-scale image recognition},
+ author={Simonyan, Karen and Zisserman, Andrew},
+ journal={arXiv preprint arXiv:1409.1556},
+ year={2014}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/backbones/vipnas.md b/vendor/ViTPose/docs/en/papers/backbones/vipnas.md
new file mode 100644
index 0000000000000000000000000000000000000000..5f52a8cac04cf48cb2e330afe176d835588034c6
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/backbones/vipnas.md
@@ -0,0 +1,29 @@
+# ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search
+
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+## Abstract
+
+
+
+Human pose estimation has achieved significant progress in recent years. However, most of the recent methods focus on improving accuracy using complicated models and ignoring real-time efficiency. To achieve a better trade-off between accuracy and efficiency, we propose a novel neural architecture search (NAS) method, termed ViPNAS, to search networks in both spatial and temporal levels for fast online video pose estimation. In the spatial level, we carefully design the search space with five different dimensions including network depth, width, kernel size, group number, and attentions. In the temporal level, we search from a series of temporal feature fusions to optimize the total accuracy and speed across multiple video frames. To the best of our knowledge, we are the first to search for the temporal feature fusion and automatic computation allocation in videos. Extensive experiments demonstrate the effectiveness of our approach on the challenging COCO2017 and PoseTrack2018 datasets. Our discovered model family, S-ViPNAS and T-ViPNAS, achieve significantly higher inference speed (CPU real-time) without sacrificing the accuracy compared to the previous state-of-the-art methods.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/300w.md b/vendor/ViTPose/docs/en/papers/datasets/300w.md
new file mode 100644
index 0000000000000000000000000000000000000000..7af778ee6d821ec5817ae55e1729ebef43867668
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/300w.md
@@ -0,0 +1,20 @@
+# 300 faces in-the-wild challenge: Database and results
+
+
+
+
+300W (IMAVIS'2016)
+
+```bibtex
+@article{sagonas2016300,
+ title={300 faces in-the-wild challenge: Database and results},
+ author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
+ journal={Image and vision computing},
+ volume={47},
+ pages={3--18},
+ year={2016},
+ publisher={Elsevier}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/aflw.md b/vendor/ViTPose/docs/en/papers/datasets/aflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..f04f265c836a3fcccbd4869d22291db3235c672d
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/aflw.md
@@ -0,0 +1,19 @@
+# Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/aic.md b/vendor/ViTPose/docs/en/papers/datasets/aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..5054609a394ac3fe6f621caa86f60d7e0186c79c
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/aic.md
@@ -0,0 +1,17 @@
+# Ai challenger: A large-scale dataset for going deeper in image understanding
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/animalpose.md b/vendor/ViTPose/docs/en/papers/datasets/animalpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..58303b8ee27c58d3e262359f25578b10657a2729
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/animalpose.md
@@ -0,0 +1,18 @@
+# Cross-Domain Adaptation for Animal Pose Estimation
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/ap10k.md b/vendor/ViTPose/docs/en/papers/datasets/ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..e36988d833ae41efafa7408830b19bbeb8494f2b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/ap10k.md
@@ -0,0 +1,19 @@
+# AP-10K: A Benchmark for Animal Pose Estimation in the Wild
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/atrw.md b/vendor/ViTPose/docs/en/papers/datasets/atrw.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe83ac0e94ab3c513c30d1b016ab4a87d200807b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/atrw.md
@@ -0,0 +1,18 @@
+# ATRW: A Benchmark for Amur Tiger Re-identification in the Wild
+
+
+
+
+ATRW (ACM MM'2020)
+
+```bibtex
+@inproceedings{li2020atrw,
+ title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
+ author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
+ booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
+ pages={2590--2598},
+ year={2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/coco.md b/vendor/ViTPose/docs/en/papers/datasets/coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..8051dc756b0124816ed4db8e4cf5f31d363f6fa5
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/coco.md
@@ -0,0 +1,19 @@
+# Microsoft coco: Common objects in context
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody.md b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..69cb2b98d14b9cf426775944607c8b6d08674736
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody.md
@@ -0,0 +1,17 @@
+# Whole-Body Human Pose Estimation in the Wild
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_face.md b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e1d3d45011546273f93ba3a131824b7fb70994a
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_face.md
@@ -0,0 +1,17 @@
+# Whole-Body Human Pose Estimation in the Wild
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_hand.md b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..51e21693639d21d8541a102fe9a4fd16ceb9adef
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/coco_wholebody_hand.md
@@ -0,0 +1,17 @@
+# Whole-Body Human Pose Estimation in the Wild
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/cofw.md b/vendor/ViTPose/docs/en/papers/datasets/cofw.md
new file mode 100644
index 0000000000000000000000000000000000000000..20d29acdc704eed6c716eff9fcb4a347aa51c8a7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/cofw.md
@@ -0,0 +1,18 @@
+# Robust face landmark estimation under occlusion
+
+
+
+
+COFW (ICCV'2013)
+
+```bibtex
+@inproceedings{burgos2013robust,
+ title={Robust face landmark estimation under occlusion},
+ author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
+ booktitle={Proceedings of the IEEE international conference on computer vision},
+ pages={1513--1520},
+ year={2013}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/crowdpose.md b/vendor/ViTPose/docs/en/papers/datasets/crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee678aa74f90c5891846832a1343a6e685d37913
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/crowdpose.md
@@ -0,0 +1,17 @@
+# CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/deepfashion.md b/vendor/ViTPose/docs/en/papers/datasets/deepfashion.md
new file mode 100644
index 0000000000000000000000000000000000000000..3955cf30923693f1faa6d7bc335fb7079a5f0dad
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/deepfashion.md
@@ -0,0 +1,35 @@
+# DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations
+
+
+
+
+DeepFashion (CVPR'2016)
+
+```bibtex
+@inproceedings{liuLQWTcvpr16DeepFashion,
+ author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
+ title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
+ booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (ECCV'2016)
+
+```bibtex
+@inproceedings{liuYLWTeccv16FashionLandmark,
+ author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
+ title = {Fashion Landmark Detection in the Wild},
+ booktitle = {European Conference on Computer Vision (ECCV)},
+ month = {October},
+ year = {2016}
+ }
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/fly.md b/vendor/ViTPose/docs/en/papers/datasets/fly.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed1a9c148ec748d89cf18d26d7ea4a8021fc1b30
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/fly.md
@@ -0,0 +1,21 @@
+# Fast animal pose estimation using deep neural networks
+
+
+
+
+Vinegar Fly (Nature Methods'2019)
+
+```bibtex
+@article{pereira2019fast,
+ title={Fast animal pose estimation using deep neural networks},
+ author={Pereira, Talmo D and Aldarondo, Diego E and Willmore, Lindsay and Kislin, Mikhail and Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W},
+ journal={Nature methods},
+ volume={16},
+ number={1},
+ pages={117--125},
+ year={2019},
+ publisher={Nature Publishing Group}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/freihand.md b/vendor/ViTPose/docs/en/papers/datasets/freihand.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee086020691f4f3c36d08759fa4b603209da2dd5
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/freihand.md
@@ -0,0 +1,18 @@
+# Freihand: A dataset for markerless capture of hand pose and shape from single rgb images
+
+
+
+
+FreiHand (ICCV'2019)
+
+```bibtex
+@inproceedings{zimmermann2019freihand,
+ title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
+ author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={813--822},
+ year={2019}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/h36m.md b/vendor/ViTPose/docs/en/papers/datasets/h36m.md
new file mode 100644
index 0000000000000000000000000000000000000000..143e15417cba0b6bce2d9454c8b15506326ed1ae
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/h36m.md
@@ -0,0 +1,22 @@
+# Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/halpe.md b/vendor/ViTPose/docs/en/papers/datasets/halpe.md
new file mode 100644
index 0000000000000000000000000000000000000000..f71793fdbd5f1658a10f493eb1ff6fb598d4fb05
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/halpe.md
@@ -0,0 +1,17 @@
+# PaStaNet: Toward Human Activity Knowledge Engine
+
+
+
+
+Halpe (CVPR'2020)
+
+```bibtex
+@inproceedings{li2020pastanet,
+ title={PaStaNet: Toward Human Activity Knowledge Engine},
+ author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
+ booktitle={CVPR},
+ year={2020}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/horse10.md b/vendor/ViTPose/docs/en/papers/datasets/horse10.md
new file mode 100644
index 0000000000000000000000000000000000000000..94e559db5146dd932469ad35b16b8eb4a0f3d4e3
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/horse10.md
@@ -0,0 +1,18 @@
+# Pretraining boosts out-of-domain robustness for pose estimation
+
+
+
+
+Horse-10 (WACV'2021)
+
+```bibtex
+@inproceedings{mathis2021pretraining,
+ title={Pretraining boosts out-of-domain robustness for pose estimation},
+ author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
+ booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
+ pages={1859--1868},
+ year={2021}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/interhand.md b/vendor/ViTPose/docs/en/papers/datasets/interhand.md
new file mode 100644
index 0000000000000000000000000000000000000000..6b4458a01e0ed1394b7258de15e10a56c5c63432
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/interhand.md
@@ -0,0 +1,18 @@
+# InterHand2.6M: A dataset and baseline for 3D interacting hand pose estimation from a single RGB image
+
+
+
+
+InterHand2.6M (ECCV'2020)
+
+```bibtex
+@article{moon2020interhand2,
+ title={InterHand2.6M: A dataset and baseline for 3D interacting hand pose estimation from a single RGB image},
+ author={Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+ journal={arXiv preprint arXiv:2008.09309},
+ year={2020},
+ publisher={Springer}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/jhmdb.md b/vendor/ViTPose/docs/en/papers/datasets/jhmdb.md
new file mode 100644
index 0000000000000000000000000000000000000000..890d788ab2e2ef3e727d08aa897eff9a32b41926
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/jhmdb.md
@@ -0,0 +1,19 @@
+# Towards understanding action recognition
+
+
+
+
+JHMDB (ICCV'2013)
+
+```bibtex
+@inproceedings{Jhuang:ICCV:2013,
+ title = {Towards understanding action recognition},
+ author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
+ booktitle = {International Conf. on Computer Vision (ICCV)},
+ month = Dec,
+ pages = {3192-3199},
+ year = {2013}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/locust.md b/vendor/ViTPose/docs/en/papers/datasets/locust.md
new file mode 100644
index 0000000000000000000000000000000000000000..896ee03b8310543f1b336ed54419a6262a0d181c
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/locust.md
@@ -0,0 +1,20 @@
+# DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning
+
+
+
+
+Desert Locust (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/macaque.md b/vendor/ViTPose/docs/en/papers/datasets/macaque.md
new file mode 100644
index 0000000000000000000000000000000000000000..be4bec1131bc251d1c6983dd342d91769c09a467
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/macaque.md
@@ -0,0 +1,18 @@
+# MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture
+
+
+
+
+MacaquePose (bioRxiv'2020)
+
+```bibtex
+@article{labuguen2020macaquepose,
+ title={MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture},
+ author={Labuguen, Rollyn and Matsumoto, Jumpei and Negrete, Salvador and Nishimaru, Hiroshi and Nishijo, Hisao and Takada, Masahiko and Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro},
+ journal={bioRxiv},
+ year={2020},
+ publisher={Cold Spring Harbor Laboratory}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/mhp.md b/vendor/ViTPose/docs/en/papers/datasets/mhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..6dc5b17cccf192d0ec634b787bc38cdea911802c
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/mhp.md
@@ -0,0 +1,18 @@
+# Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing
+
+
+
+
+MHP (ACM MM'2018)
+
+```bibtex
+@inproceedings{zhao2018understanding,
+ title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
+ author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
+ booktitle={Proceedings of the 26th ACM international conference on Multimedia},
+ pages={792--800},
+ year={2018}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/mpi_inf_3dhp.md b/vendor/ViTPose/docs/en/papers/datasets/mpi_inf_3dhp.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a26d49fd5bf532aa265ce159d4380e774be5a1e
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/mpi_inf_3dhp.md
@@ -0,0 +1,20 @@
+# Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision
+
+
+
+
+MPI-INF-3DHP (3DV'2017)
+
+```bibtex
+@inproceedings{mono-3dhp2017,
+ author = {Mehta, Dushyant and Rhodin, Helge and Casas, Dan and Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and Theobalt, Christian},
+ title = {Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision},
+ booktitle = {3D Vision (3DV), 2017 Fifth International Conference on},
+ url = {http://gvv.mpi-inf.mpg.de/3dhp_dataset},
+ year = {2017},
+ organization={IEEE},
+ doi={10.1109/3dv.2017.00064},
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/mpii.md b/vendor/ViTPose/docs/en/papers/datasets/mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..e2df7cfd7d181f02802486667866cf663c442bc0
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/mpii.md
@@ -0,0 +1,18 @@
+# 2D Human Pose Estimation: New Benchmark and State of the Art Analysis
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/mpii_trb.md b/vendor/ViTPose/docs/en/papers/datasets/mpii_trb.md
new file mode 100644
index 0000000000000000000000000000000000000000..b3e96a77d2522851c27ba1301c609ba794a522a4
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/mpii_trb.md
@@ -0,0 +1,18 @@
+# TRB: A Novel Triplet Representation for Understanding 2D Human Body
+
+
+
+
+MPII-TRB (ICCV'2019)
+
+```bibtex
+@inproceedings{duan2019trb,
+ title={TRB: A Novel Triplet Representation for Understanding 2D Human Body},
+ author={Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and Liu, Wentao and Qian, Chen and Ouyang, Wanli},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={9479--9488},
+ year={2019}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/ochuman.md b/vendor/ViTPose/docs/en/papers/datasets/ochuman.md
new file mode 100644
index 0000000000000000000000000000000000000000..5211c341e42937a2ca5a22dac2d62901390720c2
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/ochuman.md
@@ -0,0 +1,18 @@
+# Pose2seg: Detection free human instance segmentation
+
+
+
+
+OCHuman (CVPR'2019)
+
+```bibtex
+@inproceedings{zhang2019pose2seg,
+ title={Pose2seg: Detection free human instance segmentation},
+ author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={889--898},
+ year={2019}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/onehand10k.md b/vendor/ViTPose/docs/en/papers/datasets/onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..5710fda4771e79182341b358a173d618f823c2e3
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/onehand10k.md
@@ -0,0 +1,21 @@
+# Mask-pose cascaded cnn for 2d hand pose estimation from single color image
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/panoptic.md b/vendor/ViTPose/docs/en/papers/datasets/panoptic.md
new file mode 100644
index 0000000000000000000000000000000000000000..60719c4df9df2e93756cf43c82cbf0edd8149f1f
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/panoptic.md
@@ -0,0 +1,18 @@
+# Hand keypoint detection in single images using multiview bootstrapping
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/panoptic_body3d.md b/vendor/ViTPose/docs/en/papers/datasets/panoptic_body3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..b7f45c8beb9400101b16c956f26845a1f01c27d7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/panoptic_body3d.md
@@ -0,0 +1,17 @@
+# Panoptic Studio: A Massively Multiview System for Social Motion Capture
+
+
+
+
+CMU Panoptic (ICCV'2015)
+
+```bibtex
+@Article = {joo_iccv_2015,
+author = {Hanbyul Joo, Hao Liu, Lei Tan, Lin Gui, Bart Nabbe, Iain Matthews, Takeo Kanade, Shohei Nobuhara, and Yaser Sheikh},
+title = {Panoptic Studio: A Massively Multiview System for Social Motion Capture},
+booktitle = {ICCV},
+year = {2015}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/posetrack18.md b/vendor/ViTPose/docs/en/papers/datasets/posetrack18.md
new file mode 100644
index 0000000000000000000000000000000000000000..90cfcb54f82aab26851417b2e976da5bc3556c50
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/posetrack18.md
@@ -0,0 +1,18 @@
+# Posetrack: A benchmark for human pose estimation and tracking
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/rhd.md b/vendor/ViTPose/docs/en/papers/datasets/rhd.md
new file mode 100644
index 0000000000000000000000000000000000000000..1855037bdceb07024c192c40b19e8efb599b0cbf
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/rhd.md
@@ -0,0 +1,19 @@
+# Learning to Estimate 3D Hand Pose from Single RGB Images
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/wflw.md b/vendor/ViTPose/docs/en/papers/datasets/wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..08c3ccced32535c12ee487a3b3f99c6d3d696679
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/wflw.md
@@ -0,0 +1,18 @@
+# Look at boundary: A boundary-aware face alignment algorithm
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/datasets/zebra.md b/vendor/ViTPose/docs/en/papers/datasets/zebra.md
new file mode 100644
index 0000000000000000000000000000000000000000..2727e595fc1a037b84eecb7381d7a5f7de15e90c
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/datasets/zebra.md
@@ -0,0 +1,20 @@
+# DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning
+
+
+
+
+Grévy’s Zebra (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/albumentations.md b/vendor/ViTPose/docs/en/papers/techniques/albumentations.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d09a7a3448cceca73c95003ee262bcea6473bcd
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/albumentations.md
@@ -0,0 +1,21 @@
+# Albumentations: fast and flexible image augmentations
+
+
+
+
+Albumentations (Information'2020)
+
+```bibtex
+@article{buslaev2020albumentations,
+ title={Albumentations: fast and flexible image augmentations},
+ author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A},
+ journal={Information},
+ volume={11},
+ number={2},
+ pages={125},
+ year={2020},
+ publisher={Multidisciplinary Digital Publishing Institute}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/awingloss.md b/vendor/ViTPose/docs/en/papers/techniques/awingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d4b93a87c622b6b965cab31ac402b8445934a9a
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/awingloss.md
@@ -0,0 +1,31 @@
+# Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression
+
+
+
+
+AdaptiveWingloss (ICCV'2019)
+
+```bibtex
+@inproceedings{wang2019adaptive,
+ title={Adaptive wing loss for robust face alignment via heatmap regression},
+ author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
+ booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
+ pages={6971--6981},
+ year={2019}
+}
+```
+
+
+
+## Abstract
+
+
+
+Heatmap regression with a deep network has become one of the mainstream approaches to localize facial landmarks. However, the loss function for heatmap regression is rarely studied. In this paper, we analyze the ideal loss function properties for heatmap regression in face alignment problems. Then we propose a novel loss function, named Adaptive Wing loss, that is able to adapt its shape to different types of ground truth heatmap pixels. This adaptability penalizes loss more on foreground pixels while less on background pixels. To address the imbalance between foreground and background pixels, we also propose Weighted Loss Map, which assigns high weights on foreground and difficult background pixels to help training process focus more on pixels that are crucial to landmark localization. To further improve face alignment accuracy, we introduce boundary prediction and CoordConv with boundary coordinates. Extensive experiments on different benchmarks, including COFW, 300W and WFLW, show our approach outperforms the state-of-the-art by a significant margin on
+various evaluation metrics. Besides, the Adaptive Wing loss also helps other heatmap regression tasks.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/dark.md b/vendor/ViTPose/docs/en/papers/techniques/dark.md
new file mode 100644
index 0000000000000000000000000000000000000000..083b7596ab1e7aadb3f154eea58a170b7b22fb54
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/dark.md
@@ -0,0 +1,30 @@
+# Distribution-aware coordinate representation for human pose estimation
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+While being the de facto standard coordinate representation for human pose estimation, heatmap has not been investigated in-depth. This work fills this gap. For the first time, we find that the process of decoding the predicted heatmaps into the final joint coordinates in the original image space is surprisingly significant for the performance. We further probe the design limitations of the standard coordinate decoding method, and propose a more principled distributionaware decoding method. Also, we improve the standard coordinate encoding process (i.e. transforming ground-truth coordinates to heatmaps) by generating unbiased/accurate heatmaps. Taking the two together, we formulate a novel Distribution-Aware coordinate Representation of Keypoints (DARK) method. Serving as a model-agnostic plug-in, DARK brings about significant performance boost to existing human pose estimation models. Extensive experiments show that DARK yields the best results on two common benchmarks, MPII and COCO. Besides, DARK achieves the 2nd place entry in the ICCV 2019 COCO Keypoints Challenge. The code is available online.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/fp16.md b/vendor/ViTPose/docs/en/papers/techniques/fp16.md
new file mode 100644
index 0000000000000000000000000000000000000000..7fd7ee0011a5946ed55119bac3d262b67b52d2d5
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/fp16.md
@@ -0,0 +1,17 @@
+# Mixed Precision Training
+
+
+
+
+FP16 (ArXiv'2017)
+
+```bibtex
+@article{micikevicius2017mixed,
+ title={Mixed precision training},
+ author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+ journal={arXiv preprint arXiv:1710.03740},
+ year={2017}
+}
+```
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/softwingloss.md b/vendor/ViTPose/docs/en/papers/techniques/softwingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..524a6089ffee69e109a0a721fa14b820df88ae8b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/softwingloss.md
@@ -0,0 +1,30 @@
+# Structure-Coherent Deep Feature Learning for Robust Face Alignment
+
+
+
+
+SoftWingloss (TIP'2021)
+
+```bibtex
+@article{lin2021structure,
+ title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
+ author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
+ journal={IEEE Transactions on Image Processing},
+ year={2021},
+ publisher={IEEE}
+}
+```
+
+
+
+## Abstract
+
+
+
+In this paper, we propose a structure-coherent deep feature learning method for face alignment. Unlike most existing face alignment methods which overlook the facial structure cues, we explicitly exploit the relation among facial landmarks to make the detector robust to hard cases such as occlusion and large pose. Specifically, we leverage a landmark-graph relational network to enforce the structural relationships among landmarks. We consider the facial landmarks as structural graph nodes and carefully design the neighborhood to passing features among the most related nodes. Our method dynamically adapts the weights of node neighborhood to eliminate distracted information from noisy nodes, such as occluded landmark point. Moreover, different from most previous works which only tend to penalize the landmarks absolute position during the training, we propose a relative location loss to enhance the information of relative location of landmarks. This relative location supervision further regularizes the facial structure. Our approach considers the interactions among facial landmarks and can be easily implemented on top of any convolutional backbone to boost the performance. Extensive experiments on three popular benchmarks, including WFLW, COFW and 300W, demonstrate the effectiveness of the proposed method. In particular, due to explicit structure modeling, our approach is especially robust to challenging cases resulting in impressive low failure rate on COFW and WFLW datasets.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/udp.md b/vendor/ViTPose/docs/en/papers/techniques/udp.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb4acebfbc9474312e992a67e2a19ef2df12be85
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/udp.md
@@ -0,0 +1,30 @@
+# The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+## Abstract
+
+
+
+Recently, the leading performance of human pose estimation is dominated by top-down methods. Being a fundamental component in training and inference, data processing has not been systematically considered in pose estimation community, to the best of our knowledge. In this paper, we focus on this problem and find that the devil of top-down pose estimator is in the biased data processing. Specifically, by investigating the standard data processing in state-of-the-art approaches mainly including data transformation and encoding-decoding, we find that the results obtained by common flipping strategy are unaligned with the original ones in inference. Moreover, there is statistical error in standard encoding-decoding during both training and inference. Two problems couple together and significantly degrade the pose estimation performance. Based on quantitative analyses, we then formulate a principled way to tackle this dilemma. Data is processed in continuous space based on unit length (the intervals between pixels) instead of in discrete space with pixel, and a combined classification and regression approach is adopted to perform encoding-decoding. The Unbiased Data Processing (UDP) for human pose estimation can be achieved by combining the two together. UDP not only boosts the performance of existing methods by a large margin but also plays a important role in result reproducing and future exploration. As a model-agnostic approach, UDP promotes SimpleBaseline-ResNet50-256x192 by 1.5 AP (70.2 to 71.7) and HRNet-W32-256x192 by 1.7 AP (73.5 to 75.2) on COCO test-dev set. The HRNet-W48-384x288 equipped with UDP achieves 76.5 AP and sets a new state-of-the-art for human pose estimation. The source code is publicly available for further research.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/papers/techniques/wingloss.md b/vendor/ViTPose/docs/en/papers/techniques/wingloss.md
new file mode 100644
index 0000000000000000000000000000000000000000..2aaa05722eda24201cd35e1028349994d1f0fd6b
--- /dev/null
+++ b/vendor/ViTPose/docs/en/papers/techniques/wingloss.md
@@ -0,0 +1,31 @@
+# Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks
+
+
+
+
+Wingloss (CVPR'2018)
+
+```bibtex
+@inproceedings{feng2018wing,
+ title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
+ author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
+ booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
+ year={2018},
+ pages ={2235-2245},
+ organization={IEEE}
+}
+```
+
+
+
+## Abstract
+
+
+
+We present a new loss function, namely Wing loss, for robust facial landmark localisation with Convolutional Neural Networks (CNNs). We first compare and analyse different loss functions including L2, L1 and smooth L1. The analysis of these loss functions suggests that, for the training of a CNN-based localisation model, more attention should be paid to small and medium range errors. To this end, we design a piece-wise loss function. The new loss amplifies the impact of errors from the interval (-w, w) by switching from L1 loss to a modified logarithm function. To address the problem of under-representation of samples with large out-of-plane head rotations in the training set, we propose a simple but effective boosting strategy, referred to as pose-based data balancing. In particular, we deal with the data imbalance problem by duplicating the minority training samples and perturbing them by injecting random image rotation, bounding box translation and other data augmentation approaches. Last, the proposed approach is extended to create a two-stage framework for robust facial landmark localisation. The experimental results obtained on AFLW and 300W demonstrate the merits of the Wing loss function, and prove the superiority of the proposed method over the state-of-the-art approaches.
+
+
+
+
+
+
diff --git a/vendor/ViTPose/docs/en/stats.py b/vendor/ViTPose/docs/en/stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..10ce3ab40f45e07c5c38ee4d8f7225670dc75f04
--- /dev/null
+++ b/vendor/ViTPose/docs/en/stats.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools as func
+import glob
+import re
+from os.path import basename, splitext
+
+import numpy as np
+import titlecase
+
+
+def anchor(name):
+ return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
+ name.strip().lower())).strip('-')
+
+
+# Count algorithms
+
+files = sorted(glob.glob('topics/*.md'))
+
+stats = []
+
+for f in files:
+ with open(f, 'r') as content_file:
+ content = content_file.read()
+
+ # title
+ title = content.split('\n')[0].replace('#', '')
+
+ # count papers
+ papers = set(
+ (papertype, titlecase.titlecase(paper.lower().strip()))
+ for (papertype, paper) in re.findall(
+ r'\s*\n.*?\btitle\s*=\s*{(.*?)}',
+ content, re.DOTALL))
+ # paper links
+ revcontent = '\n'.join(list(reversed(content.splitlines())))
+ paperlinks = {}
+ for _, p in papers:
+ print(p)
+ paperlinks[p] = ', '.join(
+ ((f'[{paperlink} ⇨]'
+ f'(topics/{splitext(basename(f))[0]}.html#{anchor(paperlink)})')
+ for paperlink in re.findall(
+ rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n### (.*?)\s*[,;]?\s*\n',
+ revcontent, re.DOTALL | re.IGNORECASE)))
+ print(' ', paperlinks[p])
+ paperlist = '\n'.join(
+ sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+ # count configs
+ configs = set(x.lower().strip()
+ for x in re.findall(r'.*configs/.*\.py', content))
+
+ # count ckpts
+ ckpts = set(x.lower().strip()
+ for x in re.findall(r'https://download.*\.pth', content)
+ if 'mmpose' in x)
+
+ statsmsg = f"""
+## [{title}]({f})
+
+* Number of checkpoints: {len(ckpts)}
+* Number of configs: {len(configs)}
+* Number of papers: {len(papers)}
+{paperlist}
+
+ """
+
+ stats.append((papers, configs, ckpts, statsmsg))
+
+allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
+allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
+allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
+
+# Summarize
+
+msglist = '\n'.join(x for _, _, _, x in stats)
+papertypes, papercounts = np.unique([t for t, _ in allpapers],
+ return_counts=True)
+countstr = '\n'.join(
+ [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# Overview
+
+* Number of checkpoints: {len(allckpts)}
+* Number of configs: {len(allconfigs)}
+* Number of papers: {len(allpapers)}
+{countstr}
+
+For supported datasets, see [datasets overview](datasets.md).
+
+{msglist}
+
+"""
+
+with open('modelzoo.md', 'w') as f:
+ f.write(modelzoo)
+
+# Count datasets
+
+files = sorted(glob.glob('tasks/*.md'))
+# files = sorted(glob.glob('docs/tasks/*.md'))
+
+datastats = []
+
+for f in files:
+ with open(f, 'r') as content_file:
+ content = content_file.read()
+
+ # title
+ title = content.split('\n')[0].replace('#', '')
+
+ # count papers
+ papers = set(
+ (papertype, titlecase.titlecase(paper.lower().strip()))
+ for (papertype, paper) in re.findall(
+ r'\s*\n.*?\btitle\s*=\s*{(.*?)}',
+ content, re.DOTALL))
+ # paper links
+ revcontent = '\n'.join(list(reversed(content.splitlines())))
+ paperlinks = {}
+ for _, p in papers:
+ print(p)
+ paperlinks[p] = ', '.join(
+ (f'[{p} ⇨](tasks/{splitext(basename(f))[0]}.html#{anchor(p)})'
+ for p in re.findall(
+ rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+ revcontent, re.DOTALL | re.IGNORECASE)))
+ print(' ', paperlinks[p])
+ paperlist = '\n'.join(
+ sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+ # count configs
+ configs = set(x.lower().strip()
+ for x in re.findall(r'https.*configs/.*\.py', content))
+
+ # count ckpts
+ ckpts = set(x.lower().strip()
+ for x in re.findall(r'https://download.*\.pth', content)
+ if 'mmpose' in x)
+
+ statsmsg = f"""
+## [{title}]({f})
+
+* Number of papers: {len(papers)}
+{paperlist}
+
+ """
+
+ datastats.append((papers, configs, ckpts, statsmsg))
+
+alldatapapers = func.reduce(lambda a, b: a.union(b),
+ [p for p, _, _, _ in datastats])
+
+# Summarize
+
+msglist = '\n'.join(x for _, _, _, x in stats)
+datamsglist = '\n'.join(x for _, _, _, x in datastats)
+papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
+ return_counts=True)
+countstr = '\n'.join(
+ [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# Overview
+
+* Number of papers: {len(alldatapapers)}
+{countstr}
+
+For supported pose algorithms, see [modelzoo overview](modelzoo.md).
+
+{datamsglist}
+"""
+
+with open('datasets.md', 'w') as f:
+ f.write(modelzoo)
diff --git a/vendor/ViTPose/docs/en/tasks/2d_animal_keypoint.md b/vendor/ViTPose/docs/en/tasks/2d_animal_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..c33ebb8074684a9997927a43d7accfc7ce9b1547
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_animal_keypoint.md
@@ -0,0 +1,448 @@
+# 2D Animal Keypoint Dataset
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [Animal-Pose](#animal-pose) \[ [Homepage](https://sites.google.com/view/animal-pose/) \]
+- [AP-10K](#ap-10k) \[ [Homepage](https://github.com/AlexTheBad/AP-10K/) \]
+- [Horse-10](#horse-10) \[ [Homepage](http://www.mackenziemathislab.org/horse10) \]
+- [MacaquePose](#macaquepose) \[ [Homepage](http://www.pri.kyoto-u.ac.jp/datasets/macaquepose/index.html) \]
+- [Vinegar Fly](#vinegar-fly) \[ [Homepage](https://github.com/jgraving/DeepPoseKit-Data) \]
+- [Desert Locust](#desert-locust) \[ [Homepage](https://github.com/jgraving/DeepPoseKit-Data) \]
+- [Grévy’s Zebra](#grvys-zebra) \[ [Homepage](https://github.com/jgraving/DeepPoseKit-Data) \]
+- [ATRW](#atrw) \[ [Homepage](https://cvwc2019.github.io/challenge.html) \]
+
+## Animal-Pose
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
+
+For [Animal-Pose](https://sites.google.com/view/animal-pose/) dataset, we prepare the dataset as follows:
+
+1. Download the images of [PASCAL2011](http://www.google.com/url?q=http%3A%2F%2Fhost.robots.ox.ac.uk%2Fpascal%2FVOC%2Fvoc2011%2Findex.html&sa=D&sntz=1&usg=AFQjCNGmiJGkhSSWtShDe7NwRPyyyBUYSQ), especially the five categories (dog, cat, sheep, cow, horse), which we use as trainval dataset.
+1. Download the [test-set](https://drive.google.com/drive/folders/1DwhQobZlGntOXxdm7vQsE4bqbFmN3b9y?usp=sharing) images with raw annotations (1000 images, 5 categories).
+1. We have pre-processed the annotations to make it compatible with MMPose. Please download the annotation files from [annotations](https://download.openmmlab.com/mmpose/datasets/animalpose_annotations.tar). If you would like to generate the annotations by yourself, please check our dataset parsing [codes](/tools/dataset/parse_animalpose_dataset.py).
+
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── animalpose
+ │
+ │-- VOC2011
+ │ │-- Annotations
+ │ │-- ImageSets
+ │ │-- JPEGImages
+ │ │-- SegmentationClass
+ │ │-- SegmentationObject
+ │
+ │-- animalpose_image_part2
+ │ │-- cat
+ │ │-- cow
+ │ │-- dog
+ │ │-- horse
+ │ │-- sheep
+ │
+ │-- annotations
+ │ │-- animalpose_train.json
+ │ |-- animalpose_val.json
+ │ |-- animalpose_trainval.json
+ │ │-- animalpose_test.json
+ │
+ │-- PASCAL2011_animal_annotation
+ │ │-- cat
+ │ │ |-- 2007_000528_1.xml
+ │ │ |-- 2007_000549_1.xml
+ │ │ │-- ...
+ │ │-- cow
+ │ │-- dog
+ │ │-- horse
+ │ │-- sheep
+ │
+ │-- annimalpose_anno2
+ │ │-- cat
+ │ │ |-- ca1.xml
+ │ │ |-- ca2.xml
+ │ │ │-- ...
+ │ │-- cow
+ │ │-- dog
+ │ │-- horse
+ │ │-- sheep
+
+```
+
+The official dataset does not provide the official train/val/test set split.
+We choose the images from PascalVOC for train & val. In total, we have 3608 images and 5117 annotations for train+val, where
+2798 images with 4000 annotations are used for training, and 810 images with 1117 annotations are used for validation.
+Those images from other sources (1000 images with 1000 annotations) are used for testing.
+
+## AP-10K
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+For [AP-10K](https://github.com/AlexTheBad/AP-10K/) dataset, images and annotations can be downloaded from [download](https://drive.google.com/file/d/1-FNNGcdtAQRehYYkGY1y4wzFNg4iWNad/view?usp=sharing).
+Note, this data and annotation data is for non-commercial use only.
+
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── ap10k
+ │-- annotations
+ │ │-- ap10k-train-split1.json
+ │ |-- ap10k-train-split2.json
+ │ |-- ap10k-train-split3.json
+ │ │-- ap10k-val-split1.json
+ │ |-- ap10k-val-split2.json
+ │ |-- ap10k-val-split3.json
+ │ |-- ap10k-test-split1.json
+ │ |-- ap10k-test-split2.json
+ │ |-- ap10k-test-split3.json
+ │-- data
+ │ │-- 000000000001.jpg
+ │ │-- 000000000002.jpg
+ │ │-- ...
+
+```
+
+The annotation files in 'annotation' folder contains 50 labeled animal species. There are total 10,015 labeled images with 13,028 instances in the AP-10K dataset. We randonly split them into train, val, and test set following the ratio of 7:1:2.
+
+## Horse-10
+
+
+
+
+Horse-10 (WACV'2021)
+
+```bibtex
+@inproceedings{mathis2021pretraining,
+ title={Pretraining boosts out-of-domain robustness for pose estimation},
+ author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
+ booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
+ pages={1859--1868},
+ year={2021}
+}
+```
+
+
+
+For [Horse-10](http://www.mackenziemathislab.org/horse10) dataset, images can be downloaded from [download](http://www.mackenziemathislab.org/horse10).
+Please download the annotation files from [horse10_annotations](https://download.openmmlab.com/mmpose/datasets/horse10_annotations.tar). Note, this data and annotation data is for non-commercial use only, per the authors (see http://horse10.deeplabcut.org for more information).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── horse10
+ │-- annotations
+ │ │-- horse10-train-split1.json
+ │ |-- horse10-train-split2.json
+ │ |-- horse10-train-split3.json
+ │ │-- horse10-test-split1.json
+ │ |-- horse10-test-split2.json
+ │ |-- horse10-test-split3.json
+ │-- labeled-data
+ │ │-- BrownHorseinShadow
+ │ │-- BrownHorseintoshadow
+ │ │-- ...
+
+```
+
+## MacaquePose
+
+
+
+
+MacaquePose (bioRxiv'2020)
+
+```bibtex
+@article{labuguen2020macaquepose,
+ title={MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture},
+ author={Labuguen, Rollyn and Matsumoto, Jumpei and Negrete, Salvador and Nishimaru, Hiroshi and Nishijo, Hisao and Takada, Masahiko and Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro},
+ journal={bioRxiv},
+ year={2020},
+ publisher={Cold Spring Harbor Laboratory}
+}
+```
+
+
+
+For [MacaquePose](http://www.pri.kyoto-u.ac.jp/datasets/macaquepose/index.html) dataset, images can be downloaded from [download](http://www.pri.kyoto-u.ac.jp/datasets/macaquepose/index.html).
+Please download the annotation files from [macaque_annotations](https://download.openmmlab.com/mmpose/datasets/macaque_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── macaque
+ │-- annotations
+ │ │-- macaque_train.json
+ │ |-- macaque_test.json
+ │-- images
+ │ │-- 01418849d54b3005.jpg
+ │ │-- 0142d1d1a6904a70.jpg
+ │ │-- 01ef2c4c260321b7.jpg
+ │ │-- 020a1c75c8c85238.jpg
+ │ │-- 020b1506eef2557d.jpg
+ │ │-- ...
+
+```
+
+Since the official dataset does not provide the test set, we randomly select 12500 images for training, and the rest for evaluation (see [code](/tools/dataset/parse_macaquepose_dataset.py)).
+
+## Vinegar Fly
+
+
+
+
+Vinegar Fly (Nature Methods'2019)
+
+```bibtex
+@article{pereira2019fast,
+ title={Fast animal pose estimation using deep neural networks},
+ author={Pereira, Talmo D and Aldarondo, Diego E and Willmore, Lindsay and Kislin, Mikhail and Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W},
+ journal={Nature methods},
+ volume={16},
+ number={1},
+ pages={117--125},
+ year={2019},
+ publisher={Nature Publishing Group}
+}
+```
+
+
+
+For [Vinegar Fly](https://github.com/jgraving/DeepPoseKit-Data) dataset, images can be downloaded from [vinegar_fly_images](https://download.openmmlab.com/mmpose/datasets/vinegar_fly_images.tar).
+Please download the annotation files from [vinegar_fly_annotations](https://download.openmmlab.com/mmpose/datasets/vinegar_fly_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── fly
+ │-- annotations
+ │ │-- fly_train.json
+ │ |-- fly_test.json
+ │-- images
+ │ │-- 0.jpg
+ │ │-- 1.jpg
+ │ │-- 2.jpg
+ │ │-- 3.jpg
+ │ │-- ...
+
+```
+
+Since the official dataset does not provide the test set, we randomly select 90\% images for training, and the rest (10\%) for evaluation (see [code](/tools/dataset/parse_deepposekit_dataset.py)).
+
+## Desert Locust
+
+
+
+
+Desert Locust (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+For [Desert Locust](https://github.com/jgraving/DeepPoseKit-Data) dataset, images can be downloaded from [locust_images](https://download.openmmlab.com/mmpose/datasets/locust_images.tar).
+Please download the annotation files from [locust_annotations](https://download.openmmlab.com/mmpose/datasets/locust_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── locust
+ │-- annotations
+ │ │-- locust_train.json
+ │ |-- locust_test.json
+ │-- images
+ │ │-- 0.jpg
+ │ │-- 1.jpg
+ │ │-- 2.jpg
+ │ │-- 3.jpg
+ │ │-- ...
+
+```
+
+Since the official dataset does not provide the test set, we randomly select 90\% images for training, and the rest (10\%) for evaluation (see [code](/tools/dataset/parse_deepposekit_dataset.py)).
+
+## Grévy’s Zebra
+
+
+
+
+Grévy’s Zebra (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+For [Grévy’s Zebra](https://github.com/jgraving/DeepPoseKit-Data) dataset, images can be downloaded from [zebra_images](https://download.openmmlab.com/mmpose/datasets/zebra_images.tar).
+Please download the annotation files from [zebra_annotations](https://download.openmmlab.com/mmpose/datasets/zebra_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── zebra
+ │-- annotations
+ │ │-- zebra_train.json
+ │ |-- zebra_test.json
+ │-- images
+ │ │-- 0.jpg
+ │ │-- 1.jpg
+ │ │-- 2.jpg
+ │ │-- 3.jpg
+ │ │-- ...
+
+```
+
+Since the official dataset does not provide the test set, we randomly select 90\% images for training, and the rest (10\%) for evaluation (see [code](/tools/dataset/parse_deepposekit_dataset.py)).
+
+## ATRW
+
+
+
+
+ATRW (ACM MM'2020)
+
+```bibtex
+@inproceedings{li2020atrw,
+ title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
+ author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
+ booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
+ pages={2590--2598},
+ year={2020}
+}
+```
+
+
+
+ATRW captures images of the Amur tiger (also known as Siberian tiger, Northeast-China tiger) in the wild.
+For [ATRW](https://cvwc2019.github.io/challenge.html) dataset, please download images from
+[Pose_train](https://lilablobssc.blob.core.windows.net/cvwc2019/train/atrw_pose_train.tar.gz),
+[Pose_val](https://lilablobssc.blob.core.windows.net/cvwc2019/train/atrw_pose_val.tar.gz), and
+[Pose_test](https://lilablobssc.blob.core.windows.net/cvwc2019/test/atrw_pose_test.tar.gz).
+Note that in the ATRW official annotation files, the key "file_name" is written as "filename". To make it compatible with
+other coco-type json files, we have modified this key.
+Please download the modified annotation files from [atrw_annotations](https://download.openmmlab.com/mmpose/datasets/atrw_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── atrw
+ │-- annotations
+ │ │-- keypoint_train.json
+ │ │-- keypoint_val.json
+ │ │-- keypoint_trainval.json
+ │-- images
+ │ │-- train
+ │ │ │-- 000002.jpg
+ │ │ │-- 000003.jpg
+ │ │ │-- ...
+ │ │-- val
+ │ │ │-- 000001.jpg
+ │ │ │-- 000013.jpg
+ │ │ │-- ...
+ │ │-- test
+ │ │ │-- 000000.jpg
+ │ │ │-- 000004.jpg
+ │ │ │-- ...
+
+```
diff --git a/vendor/ViTPose/docs/en/tasks/2d_body_keypoint.md b/vendor/ViTPose/docs/en/tasks/2d_body_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..625e4d57147c164f1495b8a4ac2c461075a467e7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_body_keypoint.md
@@ -0,0 +1,500 @@
+# 2D Body Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- Images
+ - [COCO](#coco) \[ [Homepage](http://cocodataset.org/) \]
+ - [MPII](#mpii) \[ [Homepage](http://human-pose.mpi-inf.mpg.de/) \]
+ - [MPII-TRB](#mpii-trb) \[ [Homepage](https://github.com/kennymckormick/Triplet-Representation-of-human-Body) \]
+ - [AI Challenger](#aic) \[ [Homepage](https://github.com/AIChallenger/AI_Challenger_2017) \]
+ - [CrowdPose](#crowdpose) \[ [Homepage](https://github.com/Jeff-sjtu/CrowdPose) \]
+ - [OCHuman](#ochuman) \[ [Homepage](https://github.com/liruilong940607/OCHumanApi) \]
+ - [MHP](#mhp) \[ [Homepage](https://lv-mhp.github.io/dataset) \]
+- Videos
+ - [PoseTrack18](#posetrack18) \[ [Homepage](https://posetrack.net/users/download.php) \]
+ - [sub-JHMDB](#sub-jhmdb-dataset) \[ [Homepage](http://jhmdb.is.tue.mpg.de/dataset) \]
+
+## COCO
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+For [COCO](http://cocodataset.org/) data, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation.
+[HRNet-Human-Pose-Estimation](https://github.com/HRNet/HRNet-Human-Pose-Estimation) provides person detection result of COCO val2017 to reproduce our multi-person pose estimation results.
+Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
+Optionally, to evaluate on COCO'2017 test-dev, please download the [image-info](https://download.openmmlab.com/mmpose/datasets/person_keypoints_test-dev-2017.json).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- annotations
+ │ │-- person_keypoints_train2017.json
+ │ |-- person_keypoints_val2017.json
+ │ |-- person_keypoints_test-dev-2017.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ | |-- COCO_test-dev2017_detections_AP_H_609_person.json
+ │-- train2017
+ │ │-- 000000000009.jpg
+ │ │-- 000000000025.jpg
+ │ │-- 000000000030.jpg
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+
+```
+
+## MPII
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+For [MPII](http://human-pose.mpi-inf.mpg.de/) data, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/).
+We have converted the original annotation files into json format, please download them from [mpii_annotations](https://download.openmmlab.com/mmpose/datasets/mpii_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mpii
+ |── annotations
+ | |── mpii_gt_val.mat
+ | |── mpii_test.json
+ | |── mpii_train.json
+ | |── mpii_trainval.json
+ | `── mpii_val.json
+ `── images
+ |── 000001163.jpg
+ |── 000003072.jpg
+
+```
+
+During training and inference, the prediction result will be saved as '.mat' format by default. We also provide a tool to convert this '.mat' to more readable '.json' format.
+
+```shell
+python tools/dataset/mat2json ${PRED_MAT_FILE} ${GT_JSON_FILE} ${OUTPUT_PRED_JSON_FILE}
+```
+
+For example,
+
+```shell
+python tools/dataset/mat2json work_dirs/res50_mpii_256x256/pred.mat data/mpii/annotations/mpii_val.json pred.json
+```
+
+## MPII-TRB
+
+
+
+
+MPII-TRB (ICCV'2019)
+
+```bibtex
+@inproceedings{duan2019trb,
+ title={TRB: A Novel Triplet Representation for Understanding 2D Human Body},
+ author={Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and Liu, Wentao and Qian, Chen and Ouyang, Wanli},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={9479--9488},
+ year={2019}
+}
+```
+
+
+
+For [MPII-TRB](https://github.com/kennymckormick/Triplet-Representation-of-human-Body) data, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/).
+Please download the annotation files from [mpii_trb_annotations](https://download.openmmlab.com/mmpose/datasets/mpii_trb_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mpii
+ |── annotations
+ | |── mpii_trb_train.json
+ | |── mpii_trb_val.json
+ `── images
+ |── 000001163.jpg
+ |── 000003072.jpg
+
+```
+
+## AIC
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+For [AIC](https://github.com/AIChallenger/AI_Challenger_2017) data, please download from [AI Challenger 2017](https://github.com/AIChallenger/AI_Challenger_2017), 2017 Train/Val is needed for keypoints training and validation.
+Please download the annotation files from [aic_annotations](https://download.openmmlab.com/mmpose/datasets/aic_annotations.tar).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── aic
+ │-- annotations
+ │ │-- aic_train.json
+ │ |-- aic_val.json
+ │-- ai_challenger_keypoint_train_20170902
+ │ │-- keypoint_train_images_20170902
+ │ │ │-- 0000252aea98840a550dac9a78c476ecb9f47ffa.jpg
+ │ │ │-- 000050f770985ac9653198495ef9b5c82435d49c.jpg
+ │ │ │-- ...
+ `-- ai_challenger_keypoint_validation_20170911
+ │-- keypoint_validation_images_20170911
+ │-- 0002605c53fb92109a3f2de4fc3ce06425c3b61f.jpg
+ │-- 0003b55a2c991223e6d8b4b820045bd49507bf6d.jpg
+ │-- ...
+```
+
+## CrowdPose
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+For [CrowdPose](https://github.com/Jeff-sjtu/CrowdPose) data, please download from [CrowdPose](https://github.com/Jeff-sjtu/CrowdPose).
+Please download the annotation files and human detection results from [crowdpose_annotations](https://download.openmmlab.com/mmpose/datasets/crowdpose_annotations.tar).
+For top-down approaches, we follow [CrowdPose](https://arxiv.org/abs/1812.00324) to use the [pre-trained weights](https://pjreddie.com/media/files/yolov3.weights) of [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) to generate the detected human bounding boxes.
+For model training, we follow [HigherHRNet](https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation) to train models on CrowdPose train/val dataset, and evaluate models on CrowdPose test dataset.
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── crowdpose
+ │-- annotations
+ │ │-- mmpose_crowdpose_train.json
+ │ │-- mmpose_crowdpose_val.json
+ │ │-- mmpose_crowdpose_trainval.json
+ │ │-- mmpose_crowdpose_test.json
+ │ │-- det_for_crowd_test_0.1_0.5.json
+ │-- images
+ │-- 100000.jpg
+ │-- 100001.jpg
+ │-- 100002.jpg
+ │-- ...
+```
+
+## OCHuman
+
+
+
+
+OCHuman (CVPR'2019)
+
+```bibtex
+@inproceedings{zhang2019pose2seg,
+ title={Pose2seg: Detection free human instance segmentation},
+ author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={889--898},
+ year={2019}
+}
+```
+
+
+
+For [OCHuman](https://github.com/liruilong940607/OCHumanApi) data, please download the images and annotations from [OCHuman](https://github.com/liruilong940607/OCHumanApi),
+Move them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── ochuman
+ │-- annotations
+ │ │-- ochuman_coco_format_val_range_0.00_1.00.json
+ │ |-- ochuman_coco_format_test_range_0.00_1.00.json
+ |-- images
+ │-- 000001.jpg
+ │-- 000002.jpg
+ │-- 000003.jpg
+ │-- ...
+
+```
+
+## MHP
+
+
+
+
+MHP (ACM MM'2018)
+
+```bibtex
+@inproceedings{zhao2018understanding,
+ title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
+ author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
+ booktitle={Proceedings of the 26th ACM international conference on Multimedia},
+ pages={792--800},
+ year={2018}
+}
+```
+
+
+
+For [MHP](https://lv-mhp.github.io/dataset) data, please download from [MHP](https://lv-mhp.github.io/dataset).
+Please download the annotation files from [mhp_annotations](https://download.openmmlab.com/mmpose/datasets/mhp_annotations.tar.gz).
+Please download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mhp
+ │-- annotations
+ │ │-- mhp_train.json
+ │ │-- mhp_val.json
+ │
+ `-- train
+ │ │-- images
+ │ │ │-- 1004.jpg
+ │ │ │-- 10050.jpg
+ │ │ │-- ...
+ │
+ `-- val
+ │ │-- images
+ │ │ │-- 10059.jpg
+ │ │ │-- 10068.jpg
+ │ │ │-- ...
+ │
+ `-- test
+ │ │-- images
+ │ │ │-- 1005.jpg
+ │ │ │-- 10052.jpg
+ │ │ │-- ...~~~~
+```
+
+## PoseTrack18
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+For [PoseTrack18](https://posetrack.net/users/download.php) data, please download from [PoseTrack18](https://posetrack.net/users/download.php).
+Please download the annotation files from [posetrack18_annotations](https://download.openmmlab.com/mmpose/datasets/posetrack18_annotations.tar).
+We have merged the video-wise separated official annotation files into two json files (posetrack18_train & posetrack18_val.json). We also generate the [mask files](https://download.openmmlab.com/mmpose/datasets/posetrack18_mask.tar) to speed up training.
+For top-down approaches, we use [MMDetection](https://github.com/open-mmlab/mmdetection) pre-trained [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) to generate the detected human bounding boxes.
+Please download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── posetrack18
+ │-- annotations
+ │ │-- posetrack18_train.json
+ │ │-- posetrack18_val.json
+ │ │-- posetrack18_val_human_detections.json
+ │ │-- train
+ │ │ │-- 000001_bonn_train.json
+ │ │ │-- 000002_bonn_train.json
+ │ │ │-- ...
+ │ │-- val
+ │ │ │-- 000342_mpii_test.json
+ │ │ │-- 000522_mpii_test.json
+ │ │ │-- ...
+ │ `-- test
+ │ │-- 000001_mpiinew_test.json
+ │ │-- 000002_mpiinew_test.json
+ │ │-- ...
+ │
+ `-- images
+ │ │-- train
+ │ │ │-- 000001_bonn_train
+ │ │ │ │-- 000000.jpg
+ │ │ │ │-- 000001.jpg
+ │ │ │ │-- ...
+ │ │ │-- ...
+ │ │-- val
+ │ │ │-- 000342_mpii_test
+ │ │ │ │-- 000000.jpg
+ │ │ │ │-- 000001.jpg
+ │ │ │ │-- ...
+ │ │ │-- ...
+ │ `-- test
+ │ │-- 000001_mpiinew_test
+ │ │ │-- 000000.jpg
+ │ │ │-- 000001.jpg
+ │ │ │-- ...
+ │ │-- ...
+ `-- mask
+ │-- train
+ │ │-- 000002_bonn_train
+ │ │ │-- 000000.jpg
+ │ │ │-- 000001.jpg
+ │ │ │-- ...
+ │ │-- ...
+ `-- val
+ │-- 000522_mpii_test
+ │ │-- 000000.jpg
+ │ │-- 000001.jpg
+ │ │-- ...
+ │-- ...
+```
+
+The official evaluation tool for PoseTrack should be installed from GitHub.
+
+```shell
+pip install git+https://github.com/svenkreiss/poseval.git
+```
+
+## sub-JHMDB dataset
+
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+For [sub-JHMDB](http://jhmdb.is.tue.mpg.de/dataset) data, please download the [images](<(http://files.is.tue.mpg.de/jhmdb/Rename_Images.tar.gz)>) from [JHMDB](http://jhmdb.is.tue.mpg.de/dataset),
+Please download the annotation files from [jhmdb_annotations](https://download.openmmlab.com/mmpose/datasets/jhmdb_annotations.tar).
+Move them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── jhmdb
+ │-- annotations
+ │ │-- Sub1_train.json
+ │ |-- Sub1_test.json
+ │ │-- Sub2_train.json
+ │ |-- Sub2_test.json
+ │ │-- Sub3_train.json
+ │ |-- Sub3_test.json
+ |-- Rename_Images
+ │-- brush_hair
+ │ │--April_09_brush_hair_u_nm_np1_ba_goo_0
+ | │ │--00001.png
+ | │ │--00002.png
+ │-- catch
+ │-- ...
+
+```
diff --git a/vendor/ViTPose/docs/en/tasks/2d_face_keypoint.md b/vendor/ViTPose/docs/en/tasks/2d_face_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe715003b3458fb75cfc81b823415cc42d7904e3
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_face_keypoint.md
@@ -0,0 +1,306 @@
+# 2D Face Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [300W](#300w-dataset) \[ [Homepage](https://ibug.doc.ic.ac.uk/resources/300-W/) \]
+- [WFLW](#wflw-dataset) \[ [Homepage](https://wywu.github.io/projects/LAB/WFLW.html) \]
+- [AFLW](#aflw-dataset) \[ [Homepage](https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/) \]
+- [COFW](#cofw-dataset) \[ [Homepage](http://www.vision.caltech.edu/xpburgos/ICCV13/) \]
+- [COCO-WholeBody-Face](#coco-wholebody-face) \[ [Homepage](https://github.com/jin-s13/COCO-WholeBody/) \]
+
+## 300W Dataset
+
+
+
+
+300W (IMAVIS'2016)
+
+```bibtex
+@article{sagonas2016300,
+ title={300 faces in-the-wild challenge: Database and results},
+ author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
+ journal={Image and vision computing},
+ volume={47},
+ pages={3--18},
+ year={2016},
+ publisher={Elsevier}
+}
+```
+
+
+
+For 300W data, please download images from [300W Dataset](https://ibug.doc.ic.ac.uk/resources/300-W/).
+Please download the annotation files from [300w_annotations](https://download.openmmlab.com/mmpose/datasets/300w_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── 300w
+ |── annotations
+ | |── face_landmarks_300w_train.json
+ | |── face_landmarks_300w_valid.json
+ | |── face_landmarks_300w_valid_common.json
+ | |── face_landmarks_300w_valid_challenge.json
+ | |── face_landmarks_300w_test.json
+ `── images
+ |── afw
+ | |── 1051618982_1.jpg
+ | |── 111076519_1.jpg
+ | ...
+ |── helen
+ | |── trainset
+ | | |── 100032540_1.jpg
+ | | |── 100040721_1.jpg
+ | | ...
+ | |── testset
+ | | |── 296814969_3.jpg
+ | | |── 2968560214_1.jpg
+ | | ...
+ |── ibug
+ | |── image_003_1.jpg
+ | |── image_004_1.jpg
+ | ...
+ |── lfpw
+ | |── trainset
+ | | |── image_0001.png
+ | | |── image_0002.png
+ | | ...
+ | |── testset
+ | | |── image_0001.png
+ | | |── image_0002.png
+ | | ...
+ `── Test
+ |── 01_Indoor
+ | |── indoor_001.png
+ | |── indoor_002.png
+ | ...
+ `── 02_Outdoor
+ |── outdoor_001.png
+ |── outdoor_002.png
+ ...
+```
+
+## WFLW Dataset
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+For WFLW data, please download images from [WFLW Dataset](https://wywu.github.io/projects/LAB/WFLW.html).
+Please download the annotation files from [wflw_annotations](https://download.openmmlab.com/mmpose/datasets/wflw_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── wflw
+ |── annotations
+ | |── face_landmarks_wflw_train.json
+ | |── face_landmarks_wflw_test.json
+ | |── face_landmarks_wflw_test_blur.json
+ | |── face_landmarks_wflw_test_occlusion.json
+ | |── face_landmarks_wflw_test_expression.json
+ | |── face_landmarks_wflw_test_largepose.json
+ | |── face_landmarks_wflw_test_illumination.json
+ | |── face_landmarks_wflw_test_makeup.json
+ |
+ `── images
+ |── 0--Parade
+ | |── 0_Parade_marchingband_1_1015.jpg
+ | |── 0_Parade_marchingband_1_1031.jpg
+ | ...
+ |── 1--Handshaking
+ | |── 1_Handshaking_Handshaking_1_105.jpg
+ | |── 1_Handshaking_Handshaking_1_107.jpg
+ | ...
+ ...
+```
+
+## AFLW Dataset
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
+
+For AFLW data, please download images from [AFLW Dataset](https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/).
+Please download the annotation files from [aflw_annotations](https://download.openmmlab.com/mmpose/datasets/aflw_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── aflw
+ |── annotations
+ | |── face_landmarks_aflw_train.json
+ | |── face_landmarks_aflw_test_frontal.json
+ | |── face_landmarks_aflw_test.json
+ `── images
+ |── flickr
+ |── 0
+ | |── image00002.jpg
+ | |── image00013.jpg
+ | ...
+ |── 2
+ | |── image00004.jpg
+ | |── image00006.jpg
+ | ...
+ `── 3
+ |── image00032.jpg
+ |── image00035.jpg
+ ...
+```
+
+## COFW Dataset
+
+
+
+
+COFW (ICCV'2013)
+
+```bibtex
+@inproceedings{burgos2013robust,
+ title={Robust face landmark estimation under occlusion},
+ author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
+ booktitle={Proceedings of the IEEE international conference on computer vision},
+ pages={1513--1520},
+ year={2013}
+}
+```
+
+
+
+For COFW data, please download from [COFW Dataset (Color Images)](http://www.vision.caltech.edu/xpburgos/ICCV13/Data/COFW_color.zip).
+Move `COFW_train_color.mat` and `COFW_test_color.mat` to `data/cofw/` and make them look like:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── cofw
+ |── COFW_train_color.mat
+ |── COFW_test_color.mat
+```
+
+Run the following script under `{MMPose}/data`
+
+`python tools/dataset/parse_cofw_dataset.py`
+
+And you will get
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── cofw
+ |── COFW_train_color.mat
+ |── COFW_test_color.mat
+ |── annotations
+ | |── cofw_train.json
+ | |── cofw_test.json
+ |── images
+ |── 000001.jpg
+ |── 000002.jpg
+```
+
+## COCO-WholeBody (Face)
+
+[DATASET]
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+For [COCO-WholeBody](https://github.com/jin-s13/COCO-WholeBody/) dataset, images can be downloaded from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation.
+Download COCO-WholeBody annotations for COCO-WholeBody annotations for [Train](https://drive.google.com/file/d/1thErEToRbmM9uLNi1JXXfOsaS5VK2FXf/view?usp=sharing) / [Validation](https://drive.google.com/file/d/1N6VgwKnj8DeyGXCvp1eYgNbRmw6jdfrb/view?usp=sharing) (Google Drive).
+Download person detection result of COCO val2017 from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- annotations
+ │ │-- coco_wholebody_train_v1.0.json
+ │ |-- coco_wholebody_val_v1.0.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ │-- train2017
+ │ │-- 000000000009.jpg
+ │ │-- 000000000025.jpg
+ │ │-- 000000000030.jpg
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+
+```
+
+Please also install the latest version of [Extended COCO API](https://github.com/jin-s13/xtcocoapi) to support COCO-WholeBody evaluation:
+
+`pip install xtcocotools`
diff --git a/vendor/ViTPose/docs/en/tasks/2d_fashion_landmark.md b/vendor/ViTPose/docs/en/tasks/2d_fashion_landmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..c0eb2c8435b34d0df29070fdd4e09b643bc15efe
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_fashion_landmark.md
@@ -0,0 +1,76 @@
+# 2D Fashion Landmark Dataset
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [DeepFashion](#deepfashion) \[ [Homepage](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/LandmarkDetection.html) \]
+
+## DeepFashion (Fashion Landmark Detection, FLD)
+
+
+
+
+DeepFashion (CVPR'2016)
+
+```bibtex
+@inproceedings{liuLQWTcvpr16DeepFashion,
+ author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
+ title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
+ booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2016}
+}
+```
+
+
+
+
+
+
+DeepFashion (ECCV'2016)
+
+```bibtex
+@inproceedings{liuYLWTeccv16FashionLandmark,
+ author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
+ title = {Fashion Landmark Detection in the Wild},
+ booktitle = {European Conference on Computer Vision (ECCV)},
+ month = {October},
+ year = {2016}
+ }
+```
+
+
+
+For [DeepFashion](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/LandmarkDetection.html) dataset, images can be downloaded from [download](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/LandmarkDetection.html).
+Please download the annotation files from [fld_annotations](https://download.openmmlab.com/mmpose/datasets/fld_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── fld
+ │-- annotations
+ │ │-- fld_upper_train.json
+ │ |-- fld_upper_val.json
+ │ |-- fld_upper_test.json
+ │ │-- fld_lower_train.json
+ │ |-- fld_lower_val.json
+ │ |-- fld_lower_test.json
+ │ │-- fld_full_train.json
+ │ |-- fld_full_val.json
+ │ |-- fld_full_test.json
+ │-- img
+ │ │-- img_00000001.jpg
+ │ │-- img_00000002.jpg
+ │ │-- img_00000003.jpg
+ │ │-- img_00000004.jpg
+ │ │-- img_00000005.jpg
+ │ │-- ...
+```
diff --git a/vendor/ViTPose/docs/en/tasks/2d_hand_keypoint.md b/vendor/ViTPose/docs/en/tasks/2d_hand_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..20f93d4c21c40697460ca91be7005eb087ffdd12
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_hand_keypoint.md
@@ -0,0 +1,319 @@
+# 2D Hand Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [OneHand10K](#onehand10k) \[ [Homepage](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) \]
+- [FreiHand](#freihand-dataset) \[ [Homepage](https://lmb.informatik.uni-freiburg.de/projects/freihand/) \]
+- [CMU Panoptic HandDB](#cmu-panoptic-handdb) \[ [Homepage](http://domedb.perception.cs.cmu.edu/handdb.html) \]
+- [InterHand2.6M](#interhand26m) \[ [Homepage](https://mks0601.github.io/InterHand2.6M/) \]
+- [RHD](#rhd-dataset) \[ [Homepage](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html) \]
+- [COCO-WholeBody-Hand](#coco-wholebody-hand) \[ [Homepage](https://github.com/jin-s13/COCO-WholeBody/) \]
+
+## OneHand10K
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+For [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) data, please download from [OneHand10K Dataset](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html).
+Please download the annotation files from [onehand10k_annotations](https://download.openmmlab.com/mmpose/datasets/onehand10k_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── onehand10k
+ |── annotations
+ | |── onehand10k_train.json
+ | |── onehand10k_test.json
+ `── Train
+ | |── source
+ | |── 0.jpg
+ | |── 1.jpg
+ | ...
+ `── Test
+ |── source
+ |── 0.jpg
+ |── 1.jpg
+
+```
+
+## FreiHAND Dataset
+
+
+
+
+FreiHand (ICCV'2019)
+
+```bibtex
+@inproceedings{zimmermann2019freihand,
+ title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
+ author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={813--822},
+ year={2019}
+}
+```
+
+
+
+For [FreiHAND](https://lmb.informatik.uni-freiburg.de/projects/freihand/) data, please download from [FreiHand Dataset](https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html).
+Since the official dataset does not provide validation set, we randomly split the training data into 8:1:1 for train/val/test.
+Please download the annotation files from [freihand_annotations](https://download.openmmlab.com/mmpose/datasets/frei_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── freihand
+ |── annotations
+ | |── freihand_train.json
+ | |── freihand_val.json
+ | |── freihand_test.json
+ `── training
+ |── rgb
+ | |── 00000000.jpg
+ | |── 00000001.jpg
+ | ...
+ |── mask
+ |── 00000000.jpg
+ |── 00000001.jpg
+ ...
+```
+
+## CMU Panoptic HandDB
+
+
+
+
+CMU Panoptic HandDB (CVPR'2017)
+
+```bibtex
+@inproceedings{simon2017hand,
+ title={Hand keypoint detection in single images using multiview bootstrapping},
+ author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={1145--1153},
+ year={2017}
+}
+```
+
+
+
+For [CMU Panoptic HandDB](http://domedb.perception.cs.cmu.edu/handdb.html), please download from [CMU Panoptic HandDB](http://domedb.perception.cs.cmu.edu/handdb.html).
+Following [Simon et al](https://arxiv.org/abs/1704.07809), panoptic images (hand143_panopticdb) and MPII & NZSL training sets (manual_train) are used for training, while MPII & NZSL test set (manual_test) for testing.
+Please download the annotation files from [panoptic_annotations](https://download.openmmlab.com/mmpose/datasets/panoptic_annotations.tar).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── panoptic
+ |── annotations
+ | |── panoptic_train.json
+ | |── panoptic_test.json
+ |
+ `── hand143_panopticdb
+ | |── imgs
+ | | |── 00000000.jpg
+ | | |── 00000001.jpg
+ | | ...
+ |
+ `── hand_labels
+ |── manual_train
+ | |── 000015774_01_l.jpg
+ | |── 000015774_01_r.jpg
+ | ...
+ |
+ `── manual_test
+ |── 000648952_02_l.jpg
+ |── 000835470_01_l.jpg
+ ...
+```
+
+## InterHand2.6M
+
+
+
+
+InterHand2.6M (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+For [InterHand2.6M](https://mks0601.github.io/InterHand2.6M/), please download from [InterHand2.6M](https://mks0601.github.io/InterHand2.6M/).
+Please download the annotation files from [annotations](https://drive.google.com/drive/folders/1pWXhdfaka-J0fSAze0MsajN0VpZ8e8tO).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── interhand2.6m
+ |── annotations
+ | |── all
+ | |── human_annot
+ | |── machine_annot
+ | |── skeleton.txt
+ | |── subject.txt
+ |
+ `── images
+ | |── train
+ | | |-- Capture0 ~ Capture26
+ | |── val
+ | | |-- Capture0
+ | |── test
+ | | |-- Capture0 ~ Capture7
+```
+
+## RHD Dataset
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+For [RHD Dataset](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html), please download from [RHD Dataset](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html).
+Please download the annotation files from [rhd_annotations](https://download.openmmlab.com/mmpose/datasets/rhd_annotations.zip).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── rhd
+ |── annotations
+ | |── rhd_train.json
+ | |── rhd_test.json
+ `── training
+ | |── color
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+ | |── depth
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+ | |── mask
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+ `── evaluation
+ | |── color
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+ | |── depth
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+ | |── mask
+ | | |── 00000.jpg
+ | | |── 00001.jpg
+```
+
+## COCO-WholeBody (Hand)
+
+[DATASET]
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+For [COCO-WholeBody](https://github.com/jin-s13/COCO-WholeBody/) dataset, images can be downloaded from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation.
+Download COCO-WholeBody annotations for COCO-WholeBody annotations for [Train](https://drive.google.com/file/d/1thErEToRbmM9uLNi1JXXfOsaS5VK2FXf/view?usp=sharing) / [Validation](https://drive.google.com/file/d/1N6VgwKnj8DeyGXCvp1eYgNbRmw6jdfrb/view?usp=sharing) (Google Drive).
+Download person detection result of COCO val2017 from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- annotations
+ │ │-- coco_wholebody_train_v1.0.json
+ │ |-- coco_wholebody_val_v1.0.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ │-- train2017
+ │ │-- 000000000009.jpg
+ │ │-- 000000000025.jpg
+ │ │-- 000000000030.jpg
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+```
+
+Please also install the latest version of [Extended COCO API](https://github.com/jin-s13/xtcocoapi) to support COCO-WholeBody evaluation:
+
+`pip install xtcocotools`
diff --git a/vendor/ViTPose/docs/en/tasks/2d_wholebody_keypoint.md b/vendor/ViTPose/docs/en/tasks/2d_wholebody_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..e3d573ffbdd62302035ddbd1747e36dc1da8f4cd
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/2d_wholebody_keypoint.md
@@ -0,0 +1,125 @@
+# 2D Wholebody Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [COCO-WholeBody](#coco-wholebody) \[ [Homepage](https://github.com/jin-s13/COCO-WholeBody/) \]
+- [Halpe](#halpe) \[ [Homepage](https://github.com/Fang-Haoshu/Halpe-FullBody/) \]
+
+## COCO-WholeBody
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+For [COCO-WholeBody](https://github.com/jin-s13/COCO-WholeBody/) dataset, images can be downloaded from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation.
+Download COCO-WholeBody annotations for COCO-WholeBody annotations for [Train](https://drive.google.com/file/d/1thErEToRbmM9uLNi1JXXfOsaS5VK2FXf/view?usp=sharing) / [Validation](https://drive.google.com/file/d/1N6VgwKnj8DeyGXCvp1eYgNbRmw6jdfrb/view?usp=sharing) (Google Drive).
+Download person detection result of COCO val2017 from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- annotations
+ │ │-- coco_wholebody_train_v1.0.json
+ │ |-- coco_wholebody_val_v1.0.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ │-- train2017
+ │ │-- 000000000009.jpg
+ │ │-- 000000000025.jpg
+ │ │-- 000000000030.jpg
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+
+```
+
+Please also install the latest version of [Extended COCO API](https://github.com/jin-s13/xtcocoapi) (version>=1.5) to support COCO-WholeBody evaluation:
+
+`pip install xtcocotools`
+
+## Halpe
+
+
+
+
+Halpe (CVPR'2020)
+
+```bibtex
+@inproceedings{li2020pastanet,
+ title={PaStaNet: Toward Human Activity Knowledge Engine},
+ author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
+ booktitle={CVPR},
+ year={2020}
+}
+```
+
+
+
+For [Halpe](https://github.com/Fang-Haoshu/Halpe-FullBody/) dataset, please download images and annotations from [Halpe download](https://github.com/Fang-Haoshu/Halpe-FullBody).
+The images of the training set are from [HICO-Det](https://drive.google.com/open?id=1QZcJmGVlF9f4h-XLWe9Gkmnmj2z1gSnk) and those of the validation set are from [COCO](http://images.cocodataset.org/zips/val2017.zip).
+Download person detection result of COCO val2017 from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── halpe
+ │-- annotations
+ │ │-- halpe_train_v1.json
+ │ |-- halpe_val_v1.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ │-- hico_20160224_det
+ │ │-- anno_bbox.mat
+ │ │-- anno.mat
+ │ │-- README
+ │ │-- images
+ │ │ │-- train2015
+ │ │ │ │-- HICO_train2015_00000001.jpg
+ │ │ │ │-- HICO_train2015_00000002.jpg
+ │ │ │ │-- HICO_train2015_00000003.jpg
+ │ │ │ │-- ...
+ │ │ │-- test2015
+ │ │-- tools
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+
+```
+
+Please also install the latest version of [Extended COCO API](https://github.com/jin-s13/xtcocoapi) (version>=1.5) to support Halpe evaluation:
+
+`pip install xtcocotools`
diff --git a/vendor/ViTPose/docs/en/tasks/3d_body_keypoint.md b/vendor/ViTPose/docs/en/tasks/3d_body_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..c5ca2a1dba1a0d82730621a85cfa1eef164fbbea
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/3d_body_keypoint.md
@@ -0,0 +1,120 @@
+# 3D Body Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [Human3.6M](#human36m) \[ [Homepage](http://vision.imar.ro/human3.6m/description.php) \]
+- [CMU Panoptic](#cmu-panoptic) \[ [Homepage](http://domedb.perception.cs.cmu.edu/) \]
+
+## Human3.6M
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+For [Human3.6M](http://vision.imar.ro/human3.6m/description.php), please download from the official website and run the [preprocessing script](/tools/dataset/preprocess_h36m.py), which will extract camera parameters and pose annotations at full framerate (50 FPS) and downsampled framerate (10 FPS). The processed data should have the following structure:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ ├── h36m
+ ├── annotation_body3d
+ | ├── cameras.pkl
+ | ├── fps50
+ | | ├── h36m_test.npz
+ | | ├── h36m_train.npz
+ | | ├── joint2d_rel_stats.pkl
+ | | ├── joint2d_stats.pkl
+ | | ├── joint3d_rel_stats.pkl
+ | | `── joint3d_stats.pkl
+ | `── fps10
+ | ├── h36m_test.npz
+ | ├── h36m_train.npz
+ | ├── joint2d_rel_stats.pkl
+ | ├── joint2d_stats.pkl
+ | ├── joint3d_rel_stats.pkl
+ | `── joint3d_stats.pkl
+ `── images
+ ├── S1
+ | ├── S1_Directions_1.54138969
+ | | ├── S1_Directions_1.54138969_00001.jpg
+ | | ├── S1_Directions_1.54138969_00002.jpg
+ | | ├── ...
+ | ├── ...
+ ├── S5
+ ├── S6
+ ├── S7
+ ├── S8
+ ├── S9
+ `── S11
+```
+
+Please note that Human3.6M dataset is also used in the [3D_body_mesh](/docs/en/tasks/3d_body_mesh.md) task, where different schemes for data preprocessing and organizing are adopted.
+
+## CMU Panoptic
+
+
+CMU Panoptic (ICCV'2015)
+
+```bibtex
+@Article = {joo_iccv_2015,
+author = {Hanbyul Joo, Hao Liu, Lei Tan, Lin Gui, Bart Nabbe, Iain Matthews, Takeo Kanade, Shohei Nobuhara, and Yaser Sheikh},
+title = {Panoptic Studio: A Massively Multiview System for Social Motion Capture},
+booktitle = {ICCV},
+year = {2015}
+}
+```
+
+
+
+Please follow [voxelpose-pytorch](https://github.com/microsoft/voxelpose-pytorch) to prepare this dataset.
+
+1. Download the dataset by following the instructions in [panoptic-toolbox](https://github.com/CMU-Perceptual-Computing-Lab/panoptic-toolbox) and extract them under `$MMPOSE/data/panoptic`.
+
+2. Only download those sequences that are needed. You can also just download a subset of camera views by specifying the number of views (HD_Video_Number) and changing the camera order in `./scripts/getData.sh`. The used sequences and camera views can be found in [VoxelPose](https://arxiv.org/abs/2004.06239). Note that the sequence "160906_band3" might not be available due to errors on the server of CMU Panoptic.
+
+3. Note that we only use HD videos, calibration data, and 3D Body Keypoint in the codes. You can comment out other irrelevant codes such as downloading 3D Face data in `./scripts/getData.sh`.
+
+The directory tree should be like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ ├── panoptic
+ ├── 16060224_haggling1
+ | | ├── hdImgs
+ | | ├── hdvideos
+ | | ├── hdPose3d_stage1_coco19
+ | | ├── calibration_160224_haggling1.json
+ ├── 160226_haggling1
+ ├── ...
+```
diff --git a/vendor/ViTPose/docs/en/tasks/3d_body_mesh.md b/vendor/ViTPose/docs/en/tasks/3d_body_mesh.md
new file mode 100644
index 0000000000000000000000000000000000000000..aced63c802c20f0d7b07277393076f2e03f87afc
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/3d_body_mesh.md
@@ -0,0 +1,342 @@
+# 3D Body Mesh Recovery Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+To achieve high-quality human mesh estimation, we use multiple datasets for training.
+The following items should be prepared for human mesh training:
+
+
+
+- [3D Body Mesh Recovery Datasets](#3d-body-mesh-recovery-datasets)
+ - [Notes](#notes)
+ - [Annotation Files for Human Mesh Estimation](#annotation-files-for-human-mesh-estimation)
+ - [SMPL Model](#smpl-model)
+ - [COCO](#coco)
+ - [Human3.6M](#human36m)
+ - [MPI-INF-3DHP](#mpi-inf-3dhp)
+ - [LSP](#lsp)
+ - [LSPET](#lspet)
+ - [CMU MoShed Data](#cmu-moshed-data)
+
+
+
+## Notes
+
+### Annotation Files for Human Mesh Estimation
+
+For human mesh estimation, we use multiple datasets for training.
+The annotation of different datasets are preprocessed to the same format. Please
+follow the [preprocess procedure](https://github.com/nkolot/SPIN/tree/master/datasets/preprocess)
+of SPIN to generate the annotation files or download the processed files from
+[here](https://download.openmmlab.com/mmpose/datasets/mesh_annotation_files.zip),
+and make it look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mesh_annotation_files
+ ├── coco_2014_train.npz
+ ├── h36m_valid_protocol1.npz
+ ├── h36m_valid_protocol2.npz
+ ├── hr-lspet_train.npz
+ ├── lsp_dataset_original_train.npz
+ ├── mpi_inf_3dhp_train.npz
+ └── mpii_train.npz
+```
+
+### SMPL Model
+
+```bibtex
+@article{loper2015smpl,
+ title={SMPL: A skinned multi-person linear model},
+ author={Loper, Matthew and Mahmood, Naureen and Romero, Javier and Pons-Moll, Gerard and Black, Michael J},
+ journal={ACM transactions on graphics (TOG)},
+ volume={34},
+ number={6},
+ pages={1--16},
+ year={2015},
+ publisher={ACM New York, NY, USA}
+}
+```
+
+For human mesh estimation, SMPL model is used to generate the human mesh.
+Please download the [gender neutral SMPL model](http://smplify.is.tue.mpg.de/),
+[joints regressor](https://download.openmmlab.com/mmpose/datasets/joints_regressor_cmr.npy)
+and [mean parameters](https://download.openmmlab.com/mmpose/datasets/smpl_mean_params.npz)
+under `$MMPOSE/models/smpl`, and make it look like this:
+
+```text
+mmpose
+├── mmpose
+├── ...
+├── models
+ │── smpl
+ ├── joints_regressor_cmr.npy
+ ├── smpl_mean_params.npz
+ └── SMPL_NEUTRAL.pkl
+```
+
+## COCO
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+For [COCO](http://cocodataset.org/) data, please download from [COCO download](http://cocodataset.org/#download). COCO'2014 Train is needed for human mesh estimation training.
+Download and extract them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- train2014
+ │ ├── COCO_train2014_000000000009.jpg
+ │ ├── COCO_train2014_000000000025.jpg
+ │ ├── COCO_train2014_000000000030.jpg
+ | │-- ...
+
+```
+
+## Human3.6M
+
+
+
+
+Human3.6M (TPAMI'2014)
+
+```bibtex
+@article{h36m_pami,
+ author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
+ title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher = {IEEE Computer Society},
+ volume = {36},
+ number = {7},
+ pages = {1325-1339},
+ month = {jul},
+ year = {2014}
+}
+```
+
+
+
+For [Human3.6M](http://vision.imar.ro/human3.6m/description.php), we use the MoShed data provided in [HMR](https://github.com/akanazawa/hmr) for training.
+However, due to license limitations, we are not allowed to redistribute the MoShed data.
+
+For the evaluation on Human3.6M dataset, please follow the
+[preprocess procedure](https://github.com/nkolot/SPIN/tree/master/datasets/preprocess)
+of SPIN to extract test images from
+[Human3.6M](http://vision.imar.ro/human3.6m/description.php) original videos,
+and make it look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── Human3.6M
+ ├── images
+ ├── S11_Directions_1.54138969_000001.jpg
+ ├── S11_Directions_1.54138969_000006.jpg
+ ├── S11_Directions_1.54138969_000011.jpg
+ ├── ...
+```
+
+The download of Human3.6M dataset is quite difficult, you can also download the
+[zip file](https://drive.google.com/file/d/1WnRJD9FS3NUf7MllwgLRJJC-JgYFr8oi/view?usp=sharing)
+of the test images. However, due to the license limitations, we are not allowed to
+redistribute the images either. So the users need to download the original video and
+extract the images by themselves.
+
+## MPI-INF-3DHP
+
+
+
+```bibtex
+@inproceedings{mono-3dhp2017,
+ author = {Mehta, Dushyant and Rhodin, Helge and Casas, Dan and Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and Theobalt, Christian},
+ title = {Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision},
+ booktitle = {3D Vision (3DV), 2017 Fifth International Conference on},
+ url = {http://gvv.mpi-inf.mpg.de/3dhp_dataset},
+ year = {2017},
+ organization={IEEE},
+ doi={10.1109/3dv.2017.00064},
+}
+```
+
+For [MPI-INF-3DHP](http://gvv.mpi-inf.mpg.de/3dhp-dataset/), please follow the
+[preprocess procedure](https://github.com/nkolot/SPIN/tree/master/datasets/preprocess)
+of SPIN to sample images, and make them like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ ├── mpi_inf_3dhp_test_set
+ │ ├── TS1
+ │ ├── TS2
+ │ ├── TS3
+ │ ├── TS4
+ │ ├── TS5
+ │ └── TS6
+ ├── S1
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S2
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S3
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S4
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S5
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S6
+ │ ├── Seq1
+ │ └── Seq2
+ ├── S7
+ │ ├── Seq1
+ │ └── Seq2
+ └── S8
+ ├── Seq1
+ └── Seq2
+```
+
+## LSP
+
+
+
+```bibtex
+@inproceedings{johnson2010clustered,
+ title={Clustered Pose and Nonlinear Appearance Models for Human Pose Estimation.},
+ author={Johnson, Sam and Everingham, Mark},
+ booktitle={bmvc},
+ volume={2},
+ number={4},
+ pages={5},
+ year={2010},
+ organization={Citeseer}
+}
+```
+
+For [LSP](https://sam.johnson.io/research/lsp.html), please download the high resolution version
+[LSP dataset original](http://sam.johnson.io/research/lsp_dataset_original.zip).
+Extract them under `$MMPOSE/data`, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── lsp_dataset_original
+ ├── images
+ ├── im0001.jpg
+ ├── im0002.jpg
+ └── ...
+```
+
+## LSPET
+
+
+
+```bibtex
+@inproceedings{johnson2011learning,
+ title={Learning effective human pose estimation from inaccurate annotation},
+ author={Johnson, Sam and Everingham, Mark},
+ booktitle={CVPR 2011},
+ pages={1465--1472},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+For [LSPET](https://sam.johnson.io/research/lspet.html), please download its high resolution form
+[HR-LSPET](http://datasets.d2.mpi-inf.mpg.de/hr-lspet/hr-lspet.zip).
+Extract them under `$MMPOSE/data`, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── lspet_dataset
+ ├── images
+ │ ├── im00001.jpg
+ │ ├── im00002.jpg
+ │ ├── im00003.jpg
+ │ └── ...
+ └── joints.mat
+```
+
+## CMU MoShed Data
+
+
+
+```bibtex
+@inproceedings{kanazawa2018end,
+ title={End-to-end recovery of human shape and pose},
+ author={Kanazawa, Angjoo and Black, Michael J and Jacobs, David W and Malik, Jitendra},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={7122--7131},
+ year={2018}
+}
+```
+
+Real-world SMPL parameters are used for the adversarial training in human mesh estimation.
+The MoShed data provided in [HMR](https://github.com/akanazawa/hmr) is included in this
+[zip file](https://download.openmmlab.com/mmpose/datasets/mesh_annotation_files.zip).
+Please download and extract it under `$MMPOSE/data`, and make it look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mesh_annotation_files
+ ├── CMU_mosh.npz
+ └── ...
+```
diff --git a/vendor/ViTPose/docs/en/tasks/3d_hand_keypoint.md b/vendor/ViTPose/docs/en/tasks/3d_hand_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..17537e44767a6af0ca3412054ea1b5c492a9bfff
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tasks/3d_hand_keypoint.md
@@ -0,0 +1,55 @@
+# 3D Hand Keypoint Datasets
+
+It is recommended to symlink the dataset root to `$MMPOSE/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+MMPose supported datasets:
+
+- [InterHand2.6M](#interhand26m) \[ [Homepage](https://mks0601.github.io/InterHand2.6M/) \]
+
+## InterHand2.6M
+
+
+
+
+InterHand2.6M (ECCV'2020)
+
+```bibtex
+@InProceedings{Moon_2020_ECCV_InterHand2.6M,
+author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
+title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
+booktitle = {European Conference on Computer Vision (ECCV)},
+year = {2020}
+}
+```
+
+
+
+For [InterHand2.6M](https://mks0601.github.io/InterHand2.6M/), please download from [InterHand2.6M](https://mks0601.github.io/InterHand2.6M/).
+Please download the annotation files from [annotations](https://drive.google.com/drive/folders/1pWXhdfaka-J0fSAze0MsajN0VpZ8e8tO).
+Extract them under {MMPose}/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── interhand2.6m
+ |── annotations
+ | |── all
+ | |── human_annot
+ | |── machine_annot
+ | |── skeleton.txt
+ | |── subject.txt
+ |
+ `── images
+ | |── train
+ | | |-- Capture0 ~ Capture26
+ | |── val
+ | | |-- Capture0
+ | |── test
+ | | |-- Capture0 ~ Capture7
+```
diff --git a/vendor/ViTPose/docs/en/tutorials/0_config.md b/vendor/ViTPose/docs/en/tutorials/0_config.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ca07805a46fda3b6adc860da958eeb9f7c77cf1
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/0_config.md
@@ -0,0 +1,235 @@
+# Tutorial 0: Learn about Configs
+
+We use python files as configs, incorporate modular and inheritance design into our config system, which is convenient to conduct various experiments.
+You can find all the provided configs under `$MMPose/configs`. If you wish to inspect the config file,
+you may run `python tools/analysis/print_config.py /PATH/TO/CONFIG` to see the complete config.
+
+
+
+- [Modify config through script arguments](#modify-config-through-script-arguments)
+- [Config File Naming Convention](#config-file-naming-convention)
+ - [Config System Example](#config-system-example)
+- [FAQ](#faq)
+ - [Use intermediate variables in configs](#use-intermediate-variables-in-configs)
+
+
+
+## Modify config through script arguments
+
+When submitting jobs using "tools/train.py" or "tools/test.py", you may specify `--cfg-options` to in-place modify the config.
+
+- Update config keys of dict chains.
+
+ The config options can be specified following the order of the dict keys in the original config.
+ For example, `--cfg-options model.backbone.norm_eval=False` changes the all BN modules in model backbones to `train` mode.
+
+- Update keys inside a list of configs.
+
+ Some config dicts are composed as a list in your config. For example, the training pipeline `data.train.pipeline` is normally a list
+ e.g. `[dict(type='LoadImageFromFile'), dict(type='TopDownRandomFlip', flip_prob=0.5), ...]`. If you want to change `'flip_prob=0.5'` to `'flip_prob=0.0'` in the pipeline,
+ you may specify `--cfg-options data.train.pipeline.1.flip_prob=0.0`.
+
+- Update values of list/tuples.
+
+ If the value to be updated is a list or a tuple. For example, the config file normally sets `workflow=[('train', 1)]`. If you want to
+ change this key, you may specify `--cfg-options workflow="[(train,1),(val,1)]"`. Note that the quotation mark \" is necessary to
+ support list/tuple data types, and that **NO** white space is allowed inside the quotation marks in the specified value.
+
+## Config File Naming Convention
+
+We follow the style below to name config files. Contributors are advised to follow the same style.
+
+```
+configs/{topic}/{task}/{algorithm}/{dataset}/{backbone}_[model_setting]_{dataset}_[input_size]_[technique].py
+```
+
+`{xxx}` is required field and `[yyy]` is optional.
+
+- `{topic}`: topic type, e.g. `body`, `face`, `hand`, `animal`, etc.
+- `{task}`: task type, `[2d | 3d]_[kpt | mesh]_[sview | mview]_[rgb | rgbd]_[img | vid]`. The task is categorized in 5: (1) 2D or 3D pose estimation, (2) representation type: keypoint (kpt), mesh, or DensePose (dense). (3) Single-view (sview) or multi-view (mview), (4) RGB or RGBD, and (5) Image (img) or Video (vid). e.g. `2d_kpt_sview_rgb_img`, `3d_kpt_sview_rgb_vid`, etc.
+- `{algorithm}`: algorithm type, e.g. `associative_embedding`, `deeppose`, etc.
+- `{dataset}`: dataset name, e.g. `coco`, etc.
+- `{backbone}`: backbone type, e.g. `res50` (ResNet-50), etc.
+- `[model setting]`: specific setting for some models.
+- `[input_size]`: input size of the model.
+- `[technique]`: some specific techniques, including losses, augmentation and tricks, e.g. `wingloss`, `udp`, `fp16`.
+
+### Config System
+
+- An Example of 2D Top-down Heatmap-based Human Pose Estimation
+
+ To help the users have a basic idea of a complete config structure and the modules in the config system,
+ we make brief comments on 'https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py' as the following.
+ For more detailed usage and alternative for per parameter in each module, please refer to the API documentation.
+
+ ```python
+ # runtime settings
+ log_level = 'INFO' # The level of logging
+ load_from = None # load models as a pre-trained model from a given path. This will not resume training
+ resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved
+ dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set
+ workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once
+ checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation
+ interval=10) # Interval to save checkpoint
+ evaluation = dict( # Config of evaluation during training
+ interval=10, # Interval to perform evaluation
+ metric='mAP', # Metrics to be performed
+ save_best='AP') # set `AP` as key indicator to save best checkpoint
+ # optimizer
+ optimizer = dict(
+ # Config used to build optimizer, support (1). All the optimizers in PyTorch
+ # whose arguments are also the same as those in PyTorch. (2). Custom optimizers
+ # which are builed on `constructor`, referring to "tutorials/4_new_modules.md"
+ # for implementation.
+ type='Adam', # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+ lr=5e-4, # Learning rate, see detail usages of the parameters in the documentation of PyTorch
+ )
+ optimizer_config = dict(grad_clip=None) # Do not use gradient clip
+ # learning policy
+ lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook
+ policy='step', # Policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9
+ warmup='linear', # Type of warmup used. It can be None(use no warmup), 'constant', 'linear' or 'exp'.
+ warmup_iters=500, # The number of iterations or epochs that warmup
+ warmup_ratio=0.001, # LR used at the beginning of warmup equals to warmup_ratio * initial_lr
+ step=[170, 200]) # Steps to decay the learning rate
+ total_epochs = 210 # Total epochs to train the model
+ log_config = dict( # Config to register logger hook
+ interval=50, # Interval to print the log
+ hooks=[
+ dict(type='TextLoggerHook'), # The logger used to record the training process
+ # dict(type='TensorboardLoggerHook') # The Tensorboard logger is also supported
+ ])
+
+ channel_cfg = dict(
+ num_output_channels=17, # The output channels of keypoint head
+ dataset_joints=17, # Number of joints in the dataset
+ dataset_channel=[ # Dataset supported channels
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[ # Channels to output
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ # model settings
+ model = dict( # Config of the model
+ type='TopDown', # Type of the model
+ pretrained='torchvision://resnet50', # The url/site of the pretrained model
+ backbone=dict( # Dict for backbone
+ type='ResNet', # Name of the backbone
+ depth=50), # Depth of ResNet model
+ keypoint_head=dict( # Dict for keypoint head
+ type='TopdownHeatmapSimpleHead', # Name of keypoint head
+ in_channels=2048, # The input channels of keypoint head
+ out_channels=channel_cfg['num_output_channels'], # The output channels of keypoint head
+ loss_keypoint=dict( # Dict for keypoint loss
+ type='JointsMSELoss', # Name of keypoint loss
+ use_target_weight=True)), # Whether to consider target_weight during loss calculation
+ train_cfg=dict(), # Config of training hyper-parameters
+ test_cfg=dict( # Config of testing hyper-parameters
+ flip_test=True, # Whether to use flip-test during inference
+ post_process='default', # Use 'default' post-processing approach.
+ shift_heatmap=True, # Shift and align the flipped heatmap to achieve higher performance
+ modulate_kernel=11)) # Gaussian kernel size for modulation. Only used for "post_process='unbiased'"
+
+ data_cfg = dict(
+ image_size=[192, 256], # Size of model input resolution
+ heatmap_size=[48, 64], # Size of the output heatmap
+ num_output_channels=channel_cfg['num_output_channels'], # Number of output channels
+ num_joints=channel_cfg['dataset_joints'], # Number of joints
+ dataset_channel=channel_cfg['dataset_channel'], # Dataset supported channels
+ inference_channel=channel_cfg['inference_channel'], # Channels to output
+ soft_nms=False, # Whether to perform soft-nms during inference
+ nms_thr=1.0, # Threshold for non maximum suppression.
+ oks_thr=0.9, # Threshold of oks (object keypoint similarity) score during nms
+ vis_thr=0.2, # Threshold of keypoint visibility
+ use_gt_bbox=False, # Whether to use ground-truth bounding box during testing
+ det_bbox_thr=0.0, # Threshold of detected bounding box score. Used when 'use_gt_bbox=True'
+ bbox_file='data/coco/person_detection_results/' # Path to the bounding box detection file
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ )
+
+ train_pipeline = [
+ dict(type='LoadImageFromFile'), # Loading image from file
+ dict(type='TopDownRandomFlip', # Perform random flip augmentation
+ flip_prob=0.5), # Probability of implementing flip
+ dict(
+ type='TopDownHalfBodyTransform', # Config of TopDownHalfBodyTransform data-augmentation
+ num_joints_half_body=8, # Threshold of performing half-body transform.
+ prob_half_body=0.3), # Probability of implementing half-body transform
+ dict(
+ type='TopDownGetRandomScaleRotation', # Config of TopDownGetRandomScaleRotation
+ rot_factor=40, # Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor=0.5), # Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ dict(type='TopDownAffine', # Affine transform the image to make input.
+ use_udp=False), # Do not use unbiased data processing.
+ dict(type='ToTensor'), # Convert other types to tensor type pipeline
+ dict(
+ type='NormalizeTensor', # Normalize input tensors
+ mean=[0.485, 0.456, 0.406], # Mean values of different channels to normalize
+ std=[0.229, 0.224, 0.225]), # Std values of different channels to normalize
+ dict(type='TopDownGenerateTarget', # Generate heatmap target. Different encoding types supported.
+ sigma=2), # Sigma of heatmap gaussian
+ dict(
+ type='Collect', # Collect pipeline that decides which keys in the data should be passed to the detector
+ keys=['img', 'target', 'target_weight'], # Keys of input
+ meta_keys=[ # Meta keys of input
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+ ]
+
+ val_pipeline = [
+ dict(type='LoadImageFromFile'), # Loading image from file
+ dict(type='TopDownAffine'), # Affine transform the image to make input.
+ dict(type='ToTensor'), # Config of ToTensor
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406], # Mean values of different channels to normalize
+ std=[0.229, 0.224, 0.225]), # Std values of different channels to normalize
+ dict(
+ type='Collect', # Collect pipeline that decides which keys in the data should be passed to the detector
+ keys=['img'], # Keys of input
+ meta_keys=[ # Meta keys of input
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+ ]
+
+ test_pipeline = val_pipeline
+
+ data_root = 'data/coco' # Root of the dataset
+ data = dict( # Config of data
+ samples_per_gpu=64, # Batch size of each single GPU during training
+ workers_per_gpu=2, # Workers to pre-fetch data for each single GPU
+ val_dataloader=dict(samples_per_gpu=32), # Batch size of each single GPU during validation
+ test_dataloader=dict(samples_per_gpu=32), # Batch size of each single GPU during testing
+ train=dict( # Training dataset config
+ type='TopDownCocoDataset', # Name of dataset
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', # Path to annotation file
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict( # Validation dataset config
+ type='TopDownCocoDataset', # Name of dataset
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', # Path to annotation file
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict( # Testing dataset config
+ type='TopDownCocoDataset', # Name of dataset
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', # Path to annotation file
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ )
+
+ ```
+
+## FAQ
+
+### Use intermediate variables in configs
+
+Some intermediate variables are used in the config files, like `train_pipeline`/`val_pipeline`/`test_pipeline` etc.
+
+For Example, we would like to first define `train_pipeline`/`val_pipeline`/`test_pipeline` and pass them into `data`.
+Thus, `train_pipeline`/`val_pipeline`/`test_pipeline` are intermediate variable.
diff --git a/vendor/ViTPose/docs/en/tutorials/1_finetune.md b/vendor/ViTPose/docs/en/tutorials/1_finetune.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f8ea097e16f58b261714e36829096848720d9b6
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/1_finetune.md
@@ -0,0 +1,153 @@
+# Tutorial 1: Finetuning Models
+
+Detectors pre-trained on the COCO dataset can serve as a good pre-trained model for other datasets, e.g., COCO-WholeBody Dataset.
+This tutorial provides instruction for users to use the models provided in the [Model Zoo](https://mmpose.readthedocs.io/en/latest/modelzoo.html) for other datasets to obtain better performance.
+
+
+
+- [Outline](#outline)
+- [Modify Head](#modify-head)
+- [Modify Dataset](#modify-dataset)
+- [Modify Training Schedule](#modify-training-schedule)
+- [Use Pre-Trained Model](#use-pre-trained-model)
+
+
+
+## Outline
+
+There are two steps to finetune a model on a new dataset.
+
+- Add support for the new dataset following [Tutorial 2: Adding New Dataset](tutorials/../2_new_dataset.md).
+- Modify the configs as will be discussed in this tutorial.
+
+To finetune on the custom datasets, the users need to modify four parts in the config.
+
+## Modify Head
+
+Then the new config needs to modify the model according to the keypoint numbers of the new datasets. By only changing `out_channels` in the keypoint_head.
+For example, we have 133 keypoints for COCO-WholeBody, and we have 17 keypoints for COCO.
+
+```python
+channel_cfg = dict(
+ num_output_channels=133, # changing from 17 to 133
+ dataset_joints=133, # changing from 17 to 133
+ dataset_channel=[
+ list(range(133)), # changing from 17 to 133
+ ],
+ inference_channel=list(range(133))) # changing from 17 to 133
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'], # modify this
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+```
+
+Note that the `pretrained='https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w48-8ef0771d.pth'` setting is used for initializing backbone.
+If you are training a new model from ImageNet-pretrained weights, this is for you.
+However, this setting is not related to our task at hand. What we need is load_from, which will be discussed later.
+
+## Modify dataset
+
+The users may also need to prepare the dataset and write the configs about dataset.
+MMPose supports multiple (10+) dataset, including COCO, COCO-WholeBody and MPII-TRB.
+
+```python
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset', # modify the name of the dataset
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json', # modify the path to the annotation file
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset', # modify the name of the dataset
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json', # modify the path to the annotation file
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset', # modify the name of the dataset
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json', # modify the path to the annotation file
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline)
+)
+```
+
+## Modify training schedule
+
+The finetuning hyperparameters vary from the default schedule. It usually requires smaller learning rate and less training epochs
+
+```python
+optimizer = dict(
+ type='Adam',
+ lr=5e-4, # reduce it
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200]) # reduce it
+total_epochs = 210 # reduce it
+```
+
+## Use pre-trained model
+
+Users can load a pre-trained model by setting the `load_from` field of the config to the model's path or link.
+The users might need to download the model weights before training to avoid the download time during training.
+
+```python
+# use the pre-trained model for the whole HRNet
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-741844ba_20200812.pth' # model path can be found in model zoo
+```
diff --git a/vendor/ViTPose/docs/en/tutorials/2_new_dataset.md b/vendor/ViTPose/docs/en/tutorials/2_new_dataset.md
new file mode 100644
index 0000000000000000000000000000000000000000..de628b49e1fed5a3f8104563013433ab34ac6f4d
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/2_new_dataset.md
@@ -0,0 +1,318 @@
+# Tutorial 2: Adding New Dataset
+
+## Customize datasets by reorganizing data to COCO format
+
+The simplest way to use the custom dataset is to convert your annotation format to COCO dataset format.
+
+The annotation json files in COCO format has the following necessary keys:
+
+```python
+'images': [
+ {
+ 'file_name': '000000001268.jpg',
+ 'height': 427,
+ 'width': 640,
+ 'id': 1268
+ },
+ ...
+],
+'annotations': [
+ {
+ 'segmentation': [[426.36,
+ ...
+ 424.34,
+ 223.3]],
+ 'keypoints': [0,0,0,
+ 0,0,0,
+ 0,0,0,
+ 427,220,2,
+ 443,222,2,
+ 414,228,2,
+ 449,232,2,
+ 408,248,1,
+ 454,261,2,
+ 0,0,0,
+ 0,0,0,
+ 411,287,2,
+ 431,287,2,
+ 0,0,0,
+ 458,265,2,
+ 0,0,0,
+ 466,300,1],
+ 'num_keypoints': 10,
+ 'area': 3894.5826,
+ 'iscrowd': 0,
+ 'image_id': 1268,
+ 'bbox': [402.34, 205.02, 65.26, 88.45],
+ 'category_id': 1,
+ 'id': 215218
+ },
+ ...
+],
+'categories': [
+ {'id': 1, 'name': 'person'},
+ ]
+```
+
+There are three necessary keys in the json file:
+
+- `images`: contains a list of images with their information like `file_name`, `height`, `width`, and `id`.
+- `annotations`: contains the list of instance annotations.
+- `categories`: contains the category name ('person') and its ID (1).
+
+## Create a custom dataset_info config file for the dataset
+
+Add a new dataset info config file.
+
+```
+configs/_base_/datasets/custom.py
+```
+
+An example of the dataset config is as follows.
+
+`keypoint_info` contains the information about each keypoint.
+
+1. `name`: the keypoint name. The keypoint name must be unique.
+2. `id`: the keypoint id.
+3. `color`: ([B, G, R]) is used for keypoint visualization.
+4. `type`: 'upper' or 'lower', will be used in data augmetation.
+5. `swap`: indicates the 'swap pair' (also known as 'flip pair'). When applying image horizontal flip, the left part will become the right part. We need to flip the keypoints accordingly.
+
+`skeleton_info` contains the information about the keypoint connectivity, which is used for visualization.
+
+`joint_weights` assigns different loss weights to different keypoints.
+
+`sigmas` is used to calculate the OKS score. Please read [keypoints-eval](https://cocodataset.org/#keypoints-eval) to learn more about it.
+
+```
+dataset_info = dict(
+ dataset_name='coco',
+ paper_info=dict(
+ author='Lin, Tsung-Yi and Maire, Michael and '
+ 'Belongie, Serge and Hays, James and '
+ 'Perona, Pietro and Ramanan, Deva and '
+ r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+ title='Microsoft coco: Common objects in context',
+ container='European conference on computer vision',
+ year='2014',
+ homepage='http://cocodataset.org/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
+```
+
+## Create a custom dataset class
+
+1. First create a package inside the mmpose/datasets/datasets folder.
+
+2. Create a class definition of your dataset in the package folder and register it in the registry with a name. Without a name, it will keep giving the error. `KeyError: 'XXXXX is not in the dataset registry'`
+
+ ```
+ @DATASETS.register_module(name='MyCustomDataset')
+ class MyCustomDataset(SomeOtherBaseClassAsPerYourNeed):
+ ```
+
+3. Make sure you have updated the `__init__.py` of your package folder
+
+4. Make sure you have updated the `__init__.py` of the dataset package folder.
+
+## Create a custom training config file
+
+Create a custom training config file as per your need and the model/architecture you want to use in the configs folder. You may modify an existing config file to use the new custom dataset.
+
+In `configs/my_custom_config.py`:
+
+```python
+...
+# dataset settings
+dataset_type = 'MyCustomDataset'
+...
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file='path/to/your/train/json',
+ img_prefix='path/to/your/train/img',
+ ...),
+ val=dict(
+ type=dataset_type,
+ ann_file='path/to/your/val/json',
+ img_prefix='path/to/your/val/img',
+ ...),
+ test=dict(
+ type=dataset_type,
+ ann_file='path/to/your/test/json',
+ img_prefix='path/to/your/test/img',
+ ...))
+...
+```
+
+Make sure you have provided all the paths correctly.
diff --git a/vendor/ViTPose/docs/en/tutorials/3_data_pipeline.md b/vendor/ViTPose/docs/en/tutorials/3_data_pipeline.md
new file mode 100644
index 0000000000000000000000000000000000000000..a637a8c113d4d9cb0285d88421311aba7c711e2d
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/3_data_pipeline.md
@@ -0,0 +1,153 @@
+# Tutorial 3: Custom Data Pipelines
+
+## Design of Data pipelines
+
+Following typical conventions, we use `Dataset` and `DataLoader` for data loading
+with multiple workers. `Dataset` returns a dict of data items corresponding
+the arguments of models' forward method.
+Since the data in pose estimation may not be the same size (image size, gt bbox size, etc.),
+we introduce a new `DataContainer` type in MMCV to help collect and distribute
+data of different size.
+See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
+
+The data preparation pipeline and the dataset is decomposed. Usually a dataset
+defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict.
+A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform.
+
+The operations are categorized into data loading, pre-processing, formatting, label generating.
+
+Here is an pipeline example for Simple Baseline (ResNet50).
+
+```python
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownHalfBodyTransform', num_joints_half_body=8, prob_half_body=0.3),
+ dict(type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+```
+
+For each operation, we list the related dict fields that are added/updated/removed.
+
+### Data loading
+
+`LoadImageFromFile`
+
+- add: img, img_file
+
+### Pre-processing
+
+`TopDownRandomFlip`
+
+- update: img, joints_3d, joints_3d_visible, center
+
+`TopDownHalfBodyTransform`
+
+- update: center, scale
+
+`TopDownGetRandomScaleRotation`
+
+- update: scale, rotation
+
+`TopDownAffine`
+
+- update: img, joints_3d, joints_3d_visible
+
+`NormalizeTensor`
+
+- update: img
+
+### Generating labels
+
+`TopDownGenerateTarget`
+
+- add: target, target_weight
+
+### Formatting
+
+`ToTensor`
+
+- update: 'img'
+
+`Collect`
+
+- add: img_meta (the keys of img_meta is specified by `meta_keys`)
+- remove: all other keys except for those specified by `keys`
+
+## Extend and use custom pipelines
+
+1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict.
+
+ ```python
+ from mmpose.datasets import PIPELINES
+
+ @PIPELINES.register_module()
+ class MyTransform:
+
+ def __call__(self, results):
+ results['dummy'] = True
+ return results
+ ```
+
+1. Import the new class.
+
+ ```python
+ from .my_pipeline import MyTransform
+ ```
+
+1. Use it in config files.
+
+ ```python
+ train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownHalfBodyTransform', num_joints_half_body=8, prob_half_body=0.3),
+ dict(type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='MyTransform'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+ ]
+ ```
diff --git a/vendor/ViTPose/docs/en/tutorials/4_new_modules.md b/vendor/ViTPose/docs/en/tutorials/4_new_modules.md
new file mode 100644
index 0000000000000000000000000000000000000000..e1864b21e1b93667c5c0aa6ae5c8f03dd69e94f7
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/4_new_modules.md
@@ -0,0 +1,213 @@
+# Tutorial 4: Adding New Modules
+
+## Customize optimizer
+
+A customized optimizer could be defined as following.
+Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to first implement the new optimizer in a file, e.g., in `mmpose/core/optimizer/my_optimizer.py`:
+
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+
+ def __init__(self, a, b, c)
+
+```
+
+Then add this module in `mmpose/core/optimizer/__init__.py` thus the registry will
+find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+To use your own optimizer, the field can be changed as
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+## Customize optimizer constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+
+```
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmpose.utils import get_root_logger
+from .cocktail_optimizer import CocktailOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module()
+class CocktailOptimizerConstructor:
+
+ def __init__(self, optimizer_cfg, paramwise_cfg=None):
+
+ def __call__(self, model):
+
+ return my_optimizer
+
+```
+
+### Develop new components
+
+We basically categorize model components into 3 types.
+
+- detectors: the whole pose detector model pipeline, usually contains a backbone and keypoint_head.
+- backbone: usually an FCN network to extract feature maps, e.g., ResNet, HRNet.
+- keypoint_head: the component for pose estimation task, usually contains some deconv layers.
+
+1. Create a new file `mmpose/models/backbones/my_model.py`.
+
+```python
+import torch.nn as nn
+
+from ..builder import BACKBONES
+
+@BACKBONES.register_module()
+class MyModel(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x): # should return a tuple
+ pass
+
+ def init_weights(self, pretrained=None):
+ pass
+```
+
+2. Import the module in `mmpose/models/backbones/__init__.py`.
+
+```python
+from .my_model import MyModel
+```
+
+3. Create a new file `mmpose/models/keypoint_heads/my_head.py`.
+
+You can write a new keypoint head inherit from `nn.Module`,
+and overwrite `init_weights(self)` and `forward(self, x)` method.
+
+```python
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class MyHead(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x):
+ pass
+
+ def init_weights(self):
+ pass
+```
+
+4. Import the module in `mmpose/models/keypoint_heads/__init__.py`
+
+```python
+from .my_head import MyHead
+```
+
+5. Use it in your config file.
+
+For the top-down 2D pose estimation model, we set the module type as `TopDown`.
+
+```python
+model = dict(
+ type='TopDown',
+ backbone=dict(
+ type='MyModel',
+ arg1=xxx,
+ arg2=xxx),
+ keypoint_head=dict(
+ type='MyHead',
+ arg1=xxx,
+ arg2=xxx))
+```
+
+### Add new loss
+
+Assume you want to add a new loss as `MyLoss`, for keypoints estimation.
+To add a new loss function, the users need implement it in `mmpose/models/losses/my_loss.py`.
+The decorator `weighted_loss` enable the loss to be weighted for each element.
+
+```python
+import torch
+import torch.nn as nn
+
+from mmpose.models import LOSSES
+
+def my_loss(pred, target):
+ assert pred.size() == target.size() and target.numel() > 0
+ loss = torch.abs(pred - target)
+ loss = torch.mean(loss)
+ return loss
+
+@LOSSES.register_module()
+class MyLoss(nn.Module):
+
+ def __init__(self, use_target_weight=False):
+ super(MyLoss, self).__init__()
+ self.criterion = my_loss()
+ self.use_target_weight = use_target_weight
+
+ def forward(self, output, target, target_weight):
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ loss = 0.
+
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ loss += self.criterion(
+ heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx])
+ else:
+ loss += self.criterion(heatmap_pred, heatmap_gt)
+
+ return loss / num_joints
+```
+
+Then the users need to add it in the `mmpose/models/losses/__init__.py`.
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+To use it, modify the `loss_keypoint` field in the model.
+
+```python
+loss_keypoint=dict(type='MyLoss', use_target_weight=False)
+```
diff --git a/vendor/ViTPose/docs/en/tutorials/5_export_model.md b/vendor/ViTPose/docs/en/tutorials/5_export_model.md
new file mode 100644
index 0000000000000000000000000000000000000000..14d76100a4a3c17d9c82476c83279c2d02c958ce
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/5_export_model.md
@@ -0,0 +1,48 @@
+# Tutorial 5: Exporting a model to ONNX
+
+Open Neural Network Exchange [(ONNX)](https://onnx.ai/) is an open ecosystem that empowers AI developers to choose the right tools as their project evolves.
+
+
+
+- [Supported Models](#supported-models)
+- [Usage](#usage)
+ - [Prerequisite](#prerequisite)
+
+
+
+## Supported Models
+
+So far, our codebase supports onnx exporting from pytorch models trained with MMPose. The supported models include:
+
+- ResNet
+- HRNet
+- HigherHRNet
+
+## Usage
+
+For simple exporting, you can use the [script](/tools/pytorch2onnx.py) here. Note that the package `onnx` and `onnxruntime` are required for verification after exporting.
+
+### Prerequisite
+
+First, install onnx.
+
+```shell
+pip install onnx onnxruntime
+```
+
+We provide a python script to export the pytorch model trained by MMPose to ONNX.
+
+```shell
+python tools/deployment/pytorch2onnx.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--shape ${SHAPE}] \
+ [--verify] [--show] [--output-file ${OUTPUT_FILE}] [--opset-version ${VERSION}]
+```
+
+Optional arguments:
+
+- `--shape`: The shape of input tensor to the model. If not specified, it will be set to `1 3 256 192`.
+- `--verify`: Determines whether to verify the exported model, runnably and numerically. If not specified, it will be set to `False`.
+- `--show`: Determines whether to print the architecture of the exported model. If not specified, it will be set to `False`.
+- `--output-file`: The output onnx model name. If not specified, it will be set to `tmp.onnx`.
+- `--opset-version`: Determines the operation set version of onnx, we recommend you to use a higher version such as 11 for compatibility. If not specified, it will be set to `11`.
+
+Please fire an issue if you discover any checkpoints that are not perfectly exported or suffer some loss in accuracy.
diff --git a/vendor/ViTPose/docs/en/tutorials/6_customize_runtime.md b/vendor/ViTPose/docs/en/tutorials/6_customize_runtime.md
new file mode 100644
index 0000000000000000000000000000000000000000..2803cd5c70577875db80fc3c91426682d2429bf0
--- /dev/null
+++ b/vendor/ViTPose/docs/en/tutorials/6_customize_runtime.md
@@ -0,0 +1,352 @@
+# Tutorial 6: Customize Runtime Settings
+
+In this tutorial, we will introduce some methods about how to customize optimization methods, training schedules, workflow and hooks when running your own settings for the project.
+
+
+
+- [Customize Optimization Methods](#customize-optimization-methods)
+ - [Customize optimizer supported by PyTorch](#customize-optimizer-supported-by-pytorch)
+ - [Customize self-implemented optimizer](#customize-self-implemented-optimizer)
+ - [1. Define a new optimizer](#1-define-a-new-optimizer)
+ - [2. Add the optimizer to registry](#2-add-the-optimizer-to-registry)
+ - [3. Specify the optimizer in the config file](#3-specify-the-optimizer-in-the-config-file)
+ - [Customize optimizer constructor](#customize-optimizer-constructor)
+ - [Additional settings](#additional-settings)
+- [Customize Training Schedules](#customize-training-schedules)
+- [Customize Workflow](#customize-workflow)
+- [Customize Hooks](#customize-hooks)
+ - [Customize self-implemented hooks](#customize-self-implemented-hooks)
+ - [1. Implement a new hook](#1-implement-a-new-hook)
+ - [2. Register the new hook](#2-register-the-new-hook)
+ - [3. Modify the config](#3-modify-the-config)
+ - [Use hooks implemented in MMCV](#use-hooks-implemented-in-mmcv)
+ - [Modify default runtime hooks](#modify-default-runtime-hooks)
+ - [Checkpoint config](#checkpoint-config)
+ - [Log config](#log-config)
+ - [Evaluation config](#evaluation-config)
+
+
+
+## Customize Optimization Methods
+
+### Customize optimizer supported by PyTorch
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `Adam`, the modification could be as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer.
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+For example, if you want to use `Adam` with the setting like `torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)` in PyTorch,
+the modification could be set as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
+```
+
+### Customize self-implemented optimizer
+
+#### 1. Define a new optimizer
+
+A customized optimizer could be defined as following.
+
+Assume you want to add an optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to create a new directory named `mmpose/core/optimizer`.
+And then implement the new optimizer in a file, e.g., in `mmpose/core/optimizer/my_optimizer.py`:
+
+```python
+from .builder import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+
+ def __init__(self, a, b, c):
+
+```
+
+#### 2. Add the optimizer to registry
+
+To find the above module defined above, this module should be imported into the main namespace at first. There are two ways to achieve it.
+
+- Modify `mmpose/core/optimizer/__init__.py` to import it.
+
+ The newly defined module should be imported in `mmpose/core/optimizer/__init__.py` so that the registry will
+ find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+- Use `custom_imports` in the config to manually import it
+
+```python
+custom_imports = dict(imports=['mmpose.core.optimizer.my_optimizer'], allow_failed_imports=False)
+```
+
+The module `mmpose.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
+Note that only the package containing the class `MyOptimizer` should be imported. `mmpose.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly.
+
+#### 3. Specify the optimizer in the config file
+
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+To use your own optimizer, the field can be changed to
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+### Customize optimizer constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+
+```python
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmpose.utils import get_root_logger
+from .my_optimizer import MyOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module()
+class MyOptimizerConstructor:
+
+ def __init__(self, optimizer_cfg, paramwise_cfg=None):
+ pass
+
+ def __call__(self, model):
+
+ return my_optimizer
+```
+
+The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11),
+which could also serve as a template for new optimizer constructor.
+
+### Additional settings
+
+Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks.
+We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
+
+- __Use gradient clip to stabilize training__:
+ Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
+
+ ```python
+ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+ ```
+
+- __Use momentum schedule to accelerate model convergence__:
+ We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
+ Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence.
+ For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327)
+ and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130).
+
+ ```python
+ lr_config = dict(
+ policy='cyclic',
+ target_ratio=(10, 1e-4),
+ cyclic_times=1,
+ step_ratio_up=0.4,
+ )
+ momentum_config = dict(
+ policy='cyclic',
+ target_ratio=(0.85 / 0.95, 1),
+ cyclic_times=1,
+ step_ratio_up=0.4,
+ )
+ ```
+
+## Customize Training Schedules
+
+we use step learning rate with default value in config files, this calls [`StepLRHook`](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L153) in MMCV.
+We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples
+
+- Poly schedule:
+
+ ```python
+ lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+ ```
+
+- ConsineAnnealing schedule:
+
+ ```python
+ lr_config = dict(
+ policy='CosineAnnealing',
+ warmup='linear',
+ warmup_iters=1000,
+ warmup_ratio=1.0 / 10,
+ min_lr_ratio=1e-5)
+ ```
+
+## Customize Workflow
+
+By default, we recommend users to use `EpochEvalHook` to do evaluation after training epoch, but they can still use `val` workflow as an alternative.
+
+Workflow is a list of (phase, epochs) to specify the running order and epochs. By default it is set to be
+
+```python
+workflow = [('train', 1)]
+```
+
+which means running 1 epoch for training.
+Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set.
+In such case, we can set the workflow as
+
+```python
+[('train', 1), ('val', 1)]
+```
+
+so that 1 epoch for training and 1 epoch for validation will be run iteratively.
+
+```{note}
+1. The parameters of model will not be updated during val epoch.
+1. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow.
+1. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EpochEvalHook` because `EpochEvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`.
+ Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch.
+```
+
+## Customize Hooks
+
+### Customize self-implemented hooks
+
+#### 1. Implement a new hook
+
+Here we give an example of creating a new hook in MMPose and using it in training.
+
+```python
+from mmcv.runner import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class MyHook(Hook):
+
+ def __init__(self, a, b):
+ pass
+
+ def before_run(self, runner):
+ pass
+
+ def after_run(self, runner):
+ pass
+
+ def before_epoch(self, runner):
+ pass
+
+ def after_epoch(self, runner):
+ pass
+
+ def before_iter(self, runner):
+ pass
+
+ def after_iter(self, runner):
+ pass
+```
+
+Depending on the functionality of the hook, the users need to specify what the hook will do at each stage of the training in `before_run`, `after_run`, `before_epoch`, `after_epoch`, `before_iter`, and `after_iter`.
+
+#### 2. Register the new hook
+
+Then we need to make `MyHook` imported. Assuming the file is in `mmpose/core/utils/my_hook.py` there are two ways to do that:
+
+- Modify `mmpose/core/utils/__init__.py` to import it.
+
+ The newly defined module should be imported in `mmpose/core/utils/__init__.py` so that the registry will
+ find the new module and add it:
+
+```python
+from .my_hook import MyHook
+```
+
+- Use `custom_imports` in the config to manually import it
+
+```python
+custom_imports = dict(imports=['mmpose.core.utils.my_hook'], allow_failed_imports=False)
+```
+
+#### 3. Modify the config
+
+```python
+custom_hooks = [
+ dict(type='MyHook', a=a_value, b=b_value)
+]
+```
+
+You can also set the priority of the hook by adding key `priority` to `'NORMAL'` or `'HIGHEST'` as below
+
+```python
+custom_hooks = [
+ dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+
+By default the hook's priority is set as `NORMAL` during registration.
+
+### Use hooks implemented in MMCV
+
+If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below
+
+```python
+mmcv_hooks = [
+ dict(type='MMCVHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+
+### Modify default runtime hooks
+
+There are some common hooks that are not registered through `custom_hooks` but has been registered by default when importing MMCV, they are
+
+- log_config
+- checkpoint_config
+- evaluation
+- lr_config
+- optimizer_config
+- momentum_config
+
+In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`.
+The above-mentioned tutorials already cover how to modify `optimizer_config`, `momentum_config`, and `lr_config`.
+Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`.
+
+#### Checkpoint config
+
+The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9).
+
+```python
+checkpoint_config = dict(interval=1)
+```
+
+The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`.
+More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook)
+
+#### Log config
+
+The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`.
+The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook).
+
+```python
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ dict(type='TensorboardLoggerHook')
+ ])
+```
+
+#### Evaluation config
+
+The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/evaluation/eval_hooks.py#L11).
+Except the key `interval`, other arguments such as `metric` will be passed to the `dataset.evaluate()`
+
+```python
+evaluation = dict(interval=1, metric='mAP')
+```
diff --git a/vendor/ViTPose/docs/en/useful_tools.md b/vendor/ViTPose/docs/en/useful_tools.md
new file mode 100644
index 0000000000000000000000000000000000000000..a9d246dfdec0318f437b7faf03cf26144f22bcba
--- /dev/null
+++ b/vendor/ViTPose/docs/en/useful_tools.md
@@ -0,0 +1,232 @@
+# Useful Tools
+
+Apart from training/testing scripts, We provide lots of useful tools under the `tools/` directory.
+
+
+
+- [Log Analysis](#log-analysis)
+- [Model Complexity (experimental)](#model-complexity-experimental)
+- [Model Conversion](#model-conversion)
+ - [MMPose model to ONNX (experimental)](#mmpose-model-to-onnx-experimental)
+ - [Prepare a model for publishing](#prepare-a-model-for-publishing)
+- [Model Serving](#model-serving)
+- [Miscellaneous](#miscellaneous)
+ - [Evaluating a metric](#evaluating-a-metric)
+ - [Print the entire config](#print-the-entire-config)
+
+
+
+## Log Analysis
+
+`tools/analysis/analyze_logs.py` plots loss/pose acc curves given a training log file. Run `pip install seaborn` first to install the dependency.
+
+![acc_curve_image](imgs/acc_curve.png)
+
+```shell
+python tools/analysis/analyze_logs.py plot_curve ${JSON_LOGS} [--keys ${KEYS}] [--title ${TITLE}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}]
+```
+
+Examples:
+
+- Plot the mse loss of some run.
+
+ ```shell
+ python tools/analysis/analyze_logs.py plot_curve log.json --keys loss --legend loss
+ ```
+
+- Plot the acc of some run, and save the figure to a pdf.
+
+ ```shell
+ python tools/analysis/analyze_logs.py plot_curve log.json --keys acc_pose --out results.pdf
+ ```
+
+- Compare the acc of two runs in the same figure.
+
+ ```shell
+ python tools/analysis/analyze_logs.py plot_curve log1.json log2.json --keys acc_pose --legend run1 run2
+ ```
+
+You can also compute the average training speed.
+
+```shell
+python tools/analysis/analyze_logs.py cal_train_time ${JSON_LOGS} [--include-outliers]
+```
+
+- Compute the average training speed for a config file
+
+ ```shell
+ python tools/analysis/analyze_logs.py cal_train_time log.json
+ ```
+
+ The output is expected to be like the following.
+
+ ```text
+ -----Analyze train time of log.json-----
+ slowest epoch 114, average time is 0.9662
+ fastest epoch 16, average time is 0.7532
+ time std over epochs is 0.0426
+ average iter time: 0.8406 s/iter
+ ```
+
+## Model Complexity (Experimental)
+
+`/tools/analysis/get_flops.py` is a script adapted from [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) to compute the FLOPs and params of a given model.
+
+```shell
+python tools/analysis/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
+```
+
+We will get the result like this
+
+```text
+
+==============================
+Input shape: (1, 3, 256, 192)
+Flops: 8.9 GMac
+Params: 28.04 M
+==============================
+```
+
+```{note}
+This tool is still experimental and we do not guarantee that the number is absolutely correct.
+```
+
+You may use the result for simple comparisons, but double check it before you adopt it in technical reports or papers.
+
+(1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 340, 256) for 2D recognizer, (1, 3, 32, 340, 256) for 3D recognizer.
+(2) Some operators are not counted into FLOPs like GN and custom operators. Refer to [`mmcv.cnn.get_model_complexity_info()`](https://github.com/open-mmlab/mmcv/blob/master/mmcv/cnn/utils/flops_counter.py) for details.
+
+## Model Conversion
+
+### MMPose model to ONNX (experimental)
+
+`/tools/deployment/pytorch2onnx.py` is a script to convert model to [ONNX](https://github.com/onnx/onnx) format.
+It also supports comparing the output results between Pytorch and ONNX model for verification.
+Run `pip install onnx onnxruntime` first to install the dependency.
+
+```shell
+python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --shape $SHAPE --verify
+```
+
+### Prepare a model for publishing
+
+`tools/publish_model.py` helps users to prepare their model for publishing.
+
+Before you upload a model to AWS, you may want to:
+
+(1) convert model weights to CPU tensors.
+(2) delete the optimizer states.
+(3) compute the hash of the checkpoint file and append the hash id to the filename.
+
+```shell
+python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME}
+```
+
+E.g.,
+
+```shell
+python tools/publish_model.py work_dirs/hrnet_w32_coco_256x192/latest.pth hrnet_w32_coco_256x192
+```
+
+The final output filename will be `hrnet_w32_coco_256x192-{hash id}_{time_stamp}.pth`.
+
+## Model Serving
+
+MMPose supports model serving with [`TorchServe`](https://pytorch.org/serve/). You can serve an MMPose model via following steps:
+
+### 1. Install TorchServe
+
+Please follow the official installation guide of TorchServe: https://github.com/pytorch/serve#install-torchserve-and-torch-model-archiver
+
+### 2. Convert model from MMPose to TorchServe
+
+```shell
+python tools/deployment/mmpose2torchserve.py \
+ ${CONFIG_FILE} ${CHECKPOINT_FILE} \
+ --output-folder ${MODEL_STORE} \
+ --model-name ${MODEL_NAME}
+```
+
+**Note**: ${MODEL_STORE} needs to be an absolute path to a folder.
+
+A model file `${MODEL_NAME}.mar` will be generated and placed in the `${MODEL_STORE}` folder.
+
+### 3. Deploy model serving
+
+We introduce following 2 approaches to deploying the model serving.
+
+#### Use TorchServe API
+
+```shell
+torchserve --start \
+ --model-store ${MODEL_STORE} \
+ --models ${MODEL_PATH1} [${MODEL_NAME}=${MODEL_PATH2} ... ]
+```
+
+Example:
+
+```shell
+# serve one model
+torchserve --start --model-store /models --models hrnet=hrnet.mar
+
+# serve all models in model-store
+torchserve --start --model-store /models --models all
+```
+
+After executing the `torchserve` command above, TorchServe runse on your host, listening for inference requests. Check the [official docs](https://github.com/pytorch/serve/blob/master/docs/server.md) for more information.
+
+#### Use `mmpose-serve` docker image
+
+**Build `mmpose-serve` docker image:**
+
+```shell
+docker build -t mmpose-serve:latest docker/serve/
+```
+
+**Run `mmpose-serve`:**
+
+Check the official docs for [running TorchServe with docker](https://github.com/pytorch/serve/blob/master/docker/README.md#running-torchserve-in-a-production-docker-environment).
+
+In order to run in GPU, you need to install [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). You can omit the `--gpus` argument in order to run in CPU.
+
+Example:
+
+```shell
+docker run --rm \
+--cpus 8 \
+--gpus device=0 \
+-p8080:8080 -p8081:8081 -p8082:8082 \
+--mount type=bind,source=$MODEL_STORE,target=/home/model-server/model-store \
+mmpose-serve:latest
+```
+
+[Read the docs](https://github.com/pytorch/serve/blob/072f5d088cce9bb64b2a18af065886c9b01b317b/docs/rest_api.md/) about the Inference (8080), Management (8081) and Metrics (8082) APis
+
+### 4. Test deployment
+
+You can use `tools/deployment/test_torchserver.py` to test the model serving. It will compare and visualize the result of torchserver and pytorch.
+
+```shell
+python tools/deployment/test_torchserver.py ${IMAGE_PAHT} ${CONFIG_PATH} ${CHECKPOINT_PATH} ${MODEL_NAME} --out-dir ${OUT_DIR}
+```
+
+Example:
+
+```shell
+python tools/deployment/test_torchserver.py \
+ ls tests/data/coco/000000000785.jpg \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ hrnet \
+ --out-dir vis_results
+```
+
+## Miscellaneous
+
+### Print the entire config
+
+`tools/analysis/print_config.py` prints the whole config verbatim, expanding all its imports.
+
+```shell
+python tools/print_config.py ${CONFIG} [-h] [--options ${OPTIONS [OPTIONS...]}]
+```
diff --git a/vendor/ViTPose/docs/zh_cn/Makefile b/vendor/ViTPose/docs/zh_cn/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/vendor/ViTPose/docs/zh_cn/_static/css/readthedocs.css b/vendor/ViTPose/docs/zh_cn/_static/css/readthedocs.css
new file mode 100644
index 0000000000000000000000000000000000000000..efc4b986a5348c645842a135883d4713986a7169
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/_static/css/readthedocs.css
@@ -0,0 +1,6 @@
+.header-logo {
+ background-image: url("../images/mmpose-logo.png");
+ background-size: 120px 50px;
+ height: 50px;
+ width: 120px;
+}
diff --git a/vendor/ViTPose/docs/zh_cn/_static/images/mmpose-logo.png b/vendor/ViTPose/docs/zh_cn/_static/images/mmpose-logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..128e1714f0933d0dfe0ab82d6f8780c48e0edc21
Binary files /dev/null and b/vendor/ViTPose/docs/zh_cn/_static/images/mmpose-logo.png differ
diff --git a/vendor/ViTPose/docs/zh_cn/api.rst b/vendor/ViTPose/docs/zh_cn/api.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2856891b9f115e076e76a48c03fafe787a8f0ec4
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/api.rst
@@ -0,0 +1,109 @@
+mmpose.apis
+-------------
+.. automodule:: mmpose.apis
+ :members:
+
+
+mmpose.core
+-------------
+evaluation
+^^^^^^^^^^^
+.. automodule:: mmpose.core.evaluation
+ :members:
+
+fp16
+^^^^^^^^^^^
+.. automodule:: mmpose.core.fp16
+ :members:
+
+
+utils
+^^^^^^^^^^^
+.. automodule:: mmpose.core.utils
+ :members:
+
+
+post_processing
+^^^^^^^^^^^^^^^^
+.. automodule:: mmpose.core.post_processing
+ :members:
+
+
+mmpose.models
+---------------
+backbones
+^^^^^^^^^^^
+.. automodule:: mmpose.models.backbones
+ :members:
+
+necks
+^^^^^^^^^^^
+.. automodule:: mmpose.models.necks
+ :members:
+
+detectors
+^^^^^^^^^^^
+.. automodule:: mmpose.models.detectors
+ :members:
+
+heads
+^^^^^^^^^^^^^^^
+.. automodule:: mmpose.models.heads
+ :members:
+
+losses
+^^^^^^^^^^^
+.. automodule:: mmpose.models.losses
+ :members:
+
+misc
+^^^^^^^^^^^
+.. automodule:: mmpose.models.misc
+ :members:
+
+mmpose.datasets
+-----------------
+.. automodule:: mmpose.datasets
+ :members:
+
+datasets
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.datasets.top_down
+ :members:
+
+.. automodule:: mmpose.datasets.datasets.bottom_up
+ :members:
+
+pipelines
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.pipelines
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.loading
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.shared_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.top_down_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.bottom_up_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.mesh_transform
+ :members:
+
+.. automodule:: mmpose.datasets.pipelines.pose3d_transform
+ :members:
+
+samplers
+^^^^^^^^^^^
+.. automodule:: mmpose.datasets.samplers
+ :members:
+
+
+mmpose.utils
+---------------
+.. automodule:: mmpose.utils
+ :members:
diff --git a/vendor/ViTPose/docs/zh_cn/benchmark.md b/vendor/ViTPose/docs/zh_cn/benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..0de8844a4aab8ea06ab353c3a8e7b40a6767d840
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/benchmark.md
@@ -0,0 +1,3 @@
+# 基准测试
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/collect.py b/vendor/ViTPose/docs/zh_cn/collect.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f8aedee0616d0bcf61d325feeced3738d524218
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/collect.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import re
+from glob import glob
+
+from titlecase import titlecase
+
+os.makedirs('topics', exist_ok=True)
+os.makedirs('papers', exist_ok=True)
+
+# Step 1: get subtopics: a mix of topic and task
+minisections = [
+ x.split('/')[-2:] for x in glob('../../configs/*/*') if '_base_' not in x
+]
+alltopics = sorted(list(set(x[0] for x in minisections)))
+subtopics = []
+for t in alltopics:
+ data = [x[1].split('_') for x in minisections if x[0] == t]
+ valid_ids = []
+ for i in range(len(data[0])):
+ if len(set(x[i] for x in data)) > 1:
+ valid_ids.append(i)
+ if len(valid_ids) > 0:
+ subtopics.extend([
+ f"{titlecase(t)}({','.join([d[i].title() for i in valid_ids])})",
+ t, '_'.join(d)
+ ] for d in data)
+ else:
+ subtopics.append([titlecase(t), t, '_'.join(data[0])])
+
+contents = {}
+for subtopic, topic, task in sorted(subtopics):
+ # Step 2: get all datasets
+ datasets = sorted(
+ list(
+ set(
+ x.split('/')[-2]
+ for x in glob(f'../../configs/{topic}/{task}/*/*/'))))
+ contents[subtopic] = {d: {} for d in datasets}
+ for dataset in datasets:
+ # Step 3: get all settings: algorithm + backbone + trick
+ for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'):
+ keywords = (file.split('/')[-3],
+ *file.split('/')[-1].split('_')[:-1])
+ with open(file, 'r') as f:
+ contents[subtopic][dataset][keywords] = f.read()
+
+# Step 4: write files by topic
+for subtopic, datasets in contents.items():
+ lines = [f'# {subtopic}', '']
+ for dataset, keywords in datasets.items():
+ if len(keywords) == 0:
+ continue
+ lines += [
+ ' ', ' ', '', f'## {titlecase(dataset)} Dataset', ''
+ ]
+ for keyword, info in keywords.items():
+ keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword]
+ lines += [
+ ' ', '',
+ (f'### {" + ".join(keyword_strs)}'
+ f' on {titlecase(dataset)}'), '', info, ''
+ ]
+
+ with open(f'topics/{subtopic.lower()}.md', 'w') as f:
+ f.write('\n'.join(lines))
+
+# Step 5: write files by paper
+allfiles = [x.split('/')[-2:] for x in glob('../en/papers/*/*.md')]
+sections = sorted(list(set(x[0] for x in allfiles)))
+for section in sections:
+ lines = [f'# {titlecase(section)}', '']
+ files = [f for s, f in allfiles if s == section]
+ for file in files:
+ with open(f'../en/papers/{section}/{file}', 'r') as f:
+ keyline = [
+ line for line in f.readlines() if line.startswith('', '', keyline).strip()
+ paperlines = []
+ for subtopic, datasets in contents.items():
+ for dataset, keywords in datasets.items():
+ keywords = {k: v for k, v in keywords.items() if keyline in v}
+ if len(keywords) == 0:
+ continue
+ for keyword, info in keywords.items():
+ keyword_strs = [
+ titlecase(x.replace('_', ' ')) for x in keyword
+ ]
+ paperlines += [
+ ' ', '',
+ (f'### {" + ".join(keyword_strs)}'
+ f' on {titlecase(dataset)}'), '', info, ''
+ ]
+ if len(paperlines) > 0:
+ lines += [' ', ' ', '', f'## {papername}', '']
+ lines += paperlines
+
+ with open(f'papers/{section}.md', 'w') as f:
+ f.write('\n'.join(lines))
diff --git a/vendor/ViTPose/docs/zh_cn/conf.py b/vendor/ViTPose/docs/zh_cn/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..991325547d5ddded70c65bca7fc00bd02ba3bcdb
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/conf.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+
+import pytorch_sphinx_theme
+
+sys.path.insert(0, os.path.abspath('../..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'MMPose'
+copyright = '2020-2021, OpenMMLab'
+author = 'MMPose Authors'
+
+# The full version, including alpha/beta/rc tags
+version_file = '../../mmpose/version.py'
+
+
+def get_version():
+ with open(version_file, 'r') as f:
+ exec(compile(f.read(), version_file, 'exec'))
+ return locals()['__version__']
+
+
+release = get_version()
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode',
+ 'sphinx_markdown_tables', 'sphinx_copybutton', 'myst_parser'
+]
+
+autodoc_mock_imports = ['json_tricks', 'mmpose.version']
+
+# Ignore >>> when copying code
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+source_suffix = {
+ '.rst': 'restructuredtext',
+ '.md': 'markdown',
+}
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pytorch_sphinx_theme'
+html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+html_theme_options = {
+ 'menu': [{
+ 'name':
+ '教程',
+ 'url':
+ 'https://colab.research.google.com/github/'
+ 'open-mmlab/mmpose/blob/master/demo/MMPose_Tutorial.ipynb'
+ }, {
+ 'name': 'GitHub',
+ 'url': 'https://github.com/open-mmlab/mmpose'
+ }],
+ 'menu_lang':
+ 'cn'
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+
+language = 'zh_CN'
+
+html_static_path = ['_static']
+html_css_files = ['css/readthedocs.css']
+
+# Enable ::: for my_st
+myst_enable_extensions = ['colon_fence']
+
+master_doc = 'index'
+
+
+def builder_inited_handler(app):
+ subprocess.run(['./collect.py'])
+ subprocess.run(['./merge_docs.sh'])
+ subprocess.run(['./stats.py'])
+
+
+def setup(app):
+ app.connect('builder-inited', builder_inited_handler)
diff --git a/vendor/ViTPose/docs/zh_cn/data_preparation.md b/vendor/ViTPose/docs/zh_cn/data_preparation.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee91f6f1f596377c0a4fff7a00ffe3e0492c61b7
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/data_preparation.md
@@ -0,0 +1,13 @@
+# 准备数据集
+
+MMPose支持多种姿态估计任务,对应的数据集准备方法请参考下列文档。
+
+- [2D人体关键点](tasks/2d_body_keypoint.md)
+- [3D人体关键点](tasks/3d_body_keypoint.md)
+- [3D人体网格模型](tasks/3d_body_mesh.md)
+- [2D手部关键点](tasks/2d_hand_keypoint.md)
+- [3D手部关键点](tasks/3d_hand_keypoint.md)
+- [2D人脸关键点](tasks/2d_face_keypoint.md)
+- [2D全身人体关键点](tasks/2d_wholebody_keypoint.md)
+- [2D服装关键点](tasks/2d_fashion_landmark.md)
+- [2D动物关键点](tasks/2d_animal_keypoint.md)
diff --git a/vendor/ViTPose/docs/zh_cn/faq.md b/vendor/ViTPose/docs/zh_cn/faq.md
new file mode 100644
index 0000000000000000000000000000000000000000..0bb8e6cf161eed3f7e9d71cd301ee0d4a84114bc
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/faq.md
@@ -0,0 +1,3 @@
+# 常见问题
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/getting_started.md b/vendor/ViTPose/docs/zh_cn/getting_started.md
new file mode 100644
index 0000000000000000000000000000000000000000..c8b1b26050272b3faf7042dbaf2959bc09fb16e4
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/getting_started.md
@@ -0,0 +1,270 @@
+# 基础教程
+
+本文档提供 MMPose 的基础使用教程。请先参阅 [安装指南](install.md),进行 MMPose 的安装。
+
+
+
+- [准备数据集](#准备数据集)
+- [使用预训练模型进行推理](#使用预训练模型进行推理)
+ - [测试某个数据集](#测试某个数据集)
+ - [运行演示](#运行演示)
+- [如何训练模型](#如何训练模型)
+ - [使用单个 GPU 训练](#使用单个-GPU-训练)
+ - [使用 CPU 训练](#使用-CPU-训练)
+ - [使用多个 GPU 训练](#使用多个-GPU-训练)
+ - [使用多台机器训练](#使用多台机器训练)
+ - [使用单台机器启动多个任务](#使用单台机器启动多个任务)
+- [基准测试](#基准测试)
+- [进阶教程](#进阶教程)
+
+
+
+## 准备数据集
+
+MMPose 支持各种不同的任务。请根据需要,查阅对应的数据集准备教程。
+
+- [2D 人体关键点检测](/docs/zh_cn/tasks/2d_body_keypoint.md)
+- [3D 人体关键点检测](/docs/zh_cn/tasks/3d_body_keypoint.md)
+- [3D 人体形状恢复](/docs/zh_cn/tasks/3d_body_mesh.md)
+- [2D 人手关键点检测](/docs/zh_cn/tasks/2d_hand_keypoint.md)
+- [3D 人手关键点检测](/docs/zh_cn/tasks/3d_hand_keypoint.md)
+- [2D 人脸关键点检测](/docs/zh_cn/tasks/2d_face_keypoint.md)
+- [2D 全身人体关键点检测](/docs/zh_cn/tasks/2d_wholebody_keypoint.md)
+- [2D 服饰关键点检测](/docs/zh_cn/tasks/2d_fashion_landmark.md)
+- [2D 动物关键点检测](/docs/zh_cn/tasks/2d_animal_keypoint.md)
+
+## 使用预训练模型进行推理
+
+MMPose 提供了一些测试脚本用于测试数据集上的指标(如 COCO, MPII 等),
+并提供了一些高级 API,使您可以轻松使用 MMPose。
+
+### 测试某个数据集
+
+- [x] 单 GPU 测试
+- [x] CPU 测试
+- [x] 单节点多 GPU 测试
+- [x] 多节点测试
+
+用户可使用以下命令测试数据集
+
+```shell
+# 单 GPU 测试
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--fuse-conv-bn] \
+ [--eval ${EVAL_METRICS}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--cfg-options ${CFG_OPTIONS}] \
+ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+
+# CPU 测试:禁用 GPU 并运行测试脚本
+export CUDA_VISIBLE_DEVICES=-1
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] \
+ [--eval ${EVAL_METRICS}]
+
+# 多 GPU 测试
+./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] \
+ [--gpu-collect] [--tmpdir ${TMPDIR}] [--options ${OPTIONS}] [--average-clips ${AVG_TYPE}] \
+ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
+```
+
+此处的 `CHECKPOINT_FILE` 可以是本地的模型权重文件的路径,也可以是模型的下载链接。
+
+可选参数:
+
+- `RESULT_FILE`:输出结果文件名。如果没有被指定,则不会保存测试结果。
+- `--fuse-conv-bn`: 是否融合 BN 和 Conv 层。该操作会略微提升模型推理速度。
+- `EVAL_METRICS`:测试指标。其可选值与对应数据集相关,如 `mAP`,适用于 COCO 等数据集,`PCK` `AUC` `EPE` 适用于 OneHand10K 等数据集等。
+- `--gpu-collect`:如果被指定,姿态估计结果将会通过 GPU 通信进行收集。否则,它将被存储到不同 GPU 上的 `TMPDIR` 文件夹中,并在 rank 0 的进程中被收集。
+- `TMPDIR`:用于存储不同进程收集的结果文件的临时文件夹。该变量仅当 `--gpu-collect` 没有被指定时有效。
+- `CFG_OPTIONS`:覆盖配置文件中的一些实验设置。比如,可以设置'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True',在线修改配置文件内容。
+- `JOB_LAUNCHER`:分布式任务初始化启动器选项。可选值有 `none`,`pytorch`,`slurm`,`mpi`。特别地,如果被设置为 `none`, 则会以非分布式模式进行测试。
+- `LOCAL_RANK`:本地 rank 的 ID。如果没有被指定,则会被设置为 0。
+
+例子:
+
+假定用户将下载的模型权重文件放置在 `checkpoints/` 目录下。
+
+1. 在 COCO 数据集下测试 ResNet50(不存储测试结果为文件),并验证 `mAP` 指标
+
+ ```shell
+ ./tools/dist_test.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ checkpoints/SOME_CHECKPOINT.pth 1 \
+ --eval mAP
+ ```
+
+1. 使用 8 块 GPU 在 COCO 数据集下测试 ResNet。在线下载模型权重,并验证 `mAP` 指标。
+
+ ```shell
+ ./tools/dist_test.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth 8 \
+ --eval mAP
+ ```
+
+1. 在 slurm 分布式环境中测试 ResNet50 在 COCO 数据集下的 `mAP` 指标
+
+ ```shell
+ ./tools/slurm_test.sh slurm_partition test_job \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py \
+ checkpoints/SOME_CHECKPOINT.pth \
+ --eval mAP
+ ```
+
+### 运行演示
+
+我们提供了丰富的脚本,方便大家快速运行演示。
+下面是 多人人体姿态估计 的演示示例,此处我们使用了人工标注的人体框作为输入。
+
+```shell
+python demo/top_down_img_demo.py \
+ ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
+ --img-root ${IMG_ROOT} --json-file ${JSON_FILE} \
+ --out-img-root ${OUTPUT_DIR} \
+ [--show --device ${GPU_ID}] \
+ [--kpt-thr ${KPT_SCORE_THR}]
+```
+
+例子:
+
+```shell
+python demo/top_down_img_demo.py \
+ configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+ https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+ --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json \
+ --out-img-root vis_results
+```
+
+更多实例和细节可以查看 [demo文件夹](/demo) 和 [demo文档](https://mmpose.readthedocs.io/en/latest/demo.html)。
+
+## 如何训练模型
+
+MMPose 使用 `MMDistributedDataParallel` 进行分布式训练,使用 `MMDataParallel` 进行非分布式训练。
+
+对于单机多卡与多台机器的情况,MMPose 使用分布式训练。假设服务器有 8 块 GPU,则会启动 8 个进程,并且每台 GPU 对应一个进程。
+
+每个进程拥有一个独立的模型,以及对应的数据加载器和优化器。
+模型参数同步只发生于最开始。之后,每经过一次前向与后向计算,所有 GPU 中梯度都执行一次 allreduce 操作,而后优化器将更新模型参数。
+由于梯度执行了 allreduce 操作,因此不同 GPU 中模型参数将保持一致。
+
+### 训练配置
+
+所有的输出(日志文件和模型权重文件)会被将保存到工作目录下。工作目录通过配置文件中的参数 `work_dir` 指定。
+
+默认情况下,MMPose 在每轮训练轮后会在验证集上评估模型,可以通过在训练配置中修改 `interval` 参数来更改评估间隔
+
+```python
+evaluation = dict(interval=5) # 每 5 轮训练进行一次模型评估
+```
+
+根据 [Linear Scaling Rule](https://arxiv.org/abs/1706.02677),当 GPU 数量或每个 GPU 上的视频批大小改变时,用户可根据批大小按比例地调整学习率,如,当 4 GPUs x 2 video/gpu 时,lr=0.01;当 16 GPUs x 4 video/gpu 时,lr=0.08。
+
+### 使用单个 GPU 训练
+
+```shell
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+```
+
+如果用户想在命令中指定工作目录,则需要增加参数 `--work-dir ${YOUR_WORK_DIR}`
+
+### 使用 CPU 训练
+
+使用 CPU 训练的流程和使用单 GPU 训练的流程一致,我们仅需要在训练流程开始前禁用 GPU。
+
+```shell
+export CUDA_VISIBLE_DEVICES=-1
+```
+
+之后运行单 GPU 训练脚本即可。
+
+**注意**:
+
+我们不推荐用户使用 CPU 进行训练,这太过缓慢。我们支持这个功能是为了方便用户在没有 GPU 的机器上进行调试。
+
+### 使用多个 GPU 训练
+
+```shell
+./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
+```
+
+可选参数为:
+
+- `--work-dir ${WORK_DIR}`:覆盖配置文件中指定的工作目录。
+- `--resume-from ${CHECKPOINT_FILE}`:从以前的模型权重文件恢复训练。
+- `--no-validate`: 在训练过程中,不进行验证。
+- `--gpus ${GPU_NUM}`:使用的 GPU 数量,仅适用于非分布式训练。
+- `--gpu-ids ${GPU_IDS}`:使用的 GPU ID,仅适用于非分布式训练。
+- `--seed ${SEED}`:设置 python,numpy 和 pytorch 里的种子 ID,已用于生成随机数。
+- `--deterministic`:如果被指定,程序将设置 CUDNN 后端的确定化选项。
+- `--cfg-options CFG_OPTIONS`:覆盖配置文件中的一些实验设置。比如,可以设置'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True',在线修改配置文件内容。
+- `--launcher ${JOB_LAUNCHER}`:分布式任务初始化启动器选项。可选值有 `none`,`pytorch`,`slurm`,`mpi`。特别地,如果被设置为 `none`, 则会以非分布式模式进行测试。
+- `--autoscale-lr`:根据 [Linear Scaling Rule](https://arxiv.org/abs/1706.02677),当 GPU 数量或每个 GPU 上的视频批大小改变时,用户可根据批大小按比例地调整学习率。
+- `LOCAL_RANK`:本地 rank 的 ID。如果没有被指定,则会被设置为 0。
+
+`resume-from` 和 `load-from` 的区别:
+`resume-from` 加载模型参数和优化器状态,并且保留检查点所在的训练轮数,常被用于恢复意外被中断的训练。
+`load-from` 只加载模型参数,但训练轮数从 0 开始计数,常被用于微调模型。
+
+这里提供一个使用 8 块 GPU 加载 ResNet50 模型权重文件的例子。
+
+```shell
+./tools/dist_train.sh configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py 8 --resume_from work_dirs/res50_coco_256x192/latest.pth
+```
+
+### 使用多台机器训练
+
+如果用户在 [slurm](https://slurm.schedmd.com/) 集群上运行 MMPose,可使用 `slurm_train.sh` 脚本。(该脚本也支持单台机器上训练)
+
+```shell
+[GPUS=${GPUS}] ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} [--work-dir ${WORK_DIR}]
+```
+
+这里给出一个在 slurm 集群上的 dev 分区使用 16 块 GPU 训练 ResNet50 的例子。
+使用 `GPUS_PER_NODE=8` 参数来指定一个有 8 块 GPUS 的 slurm 集群节点,使用 `CPUS_PER_TASK=2` 来指定每个任务拥有2块cpu。
+
+```shell
+GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh Test res50 configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py work_dirs/res50_coco_256x192
+```
+
+用户可以查看 [slurm_train.sh](/tools/slurm_train.sh) 文件来检查完整的参数和环境变量。
+
+如果用户的多台机器通过 Ethernet 连接,则可以参考 pytorch [launch utility](https://pytorch.org/docs/en/stable/distributed.html#launch-utility)。如果用户没有高速网络,如 InfiniBand,速度将会非常慢。
+
+### 使用单台机器启动多个任务
+
+如果用使用单台机器启动多个任务,如在有 8 块 GPU 的单台机器上启动 2 个需要 4 块 GPU 的训练任务,则需要为每个任务指定不同端口,以避免通信冲突。
+
+如果用户使用 `dist_train.sh` 脚本启动训练任务,则可以通过以下命令指定端口
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG_FILE} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG_FILE} 4
+```
+
+如果用户在 slurm 集群下启动多个训练任务,则需要修改配置文件(通常是配置文件的第 4 行)中的 `dist_params` 变量,以设置不同的通信端口。
+
+在 `config1.py` 中,
+
+```python
+dist_params = dict(backend='nccl', port=29500)
+```
+
+在 `config2.py` 中,
+
+```python
+dist_params = dict(backend='nccl', port=29501)
+```
+
+之后便可启动两个任务,分别对应 `config1.py` 和 `config2.py`。
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py [--work-dir ${WORK_DIR}]
+CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py [--work-dir ${WORK_DIR}]
+```
+
+## 进阶教程
+
+目前, MMPose 提供了以下更详细的教程:
+
+- [如何编写配置文件](tutorials/0_config.md)
+- [如何微调模型](tutorials/1_finetune.md)
+- [如何增加新数据集](tutorials/2_new_dataset.md)
+- [如何设计数据处理流程](tutorials/3_data_pipeline.md)
+- [如何增加新模块](tutorials/4_new_modules.md)
+- [如何导出模型为 onnx 格式](tutorials/5_export_model.md)
+- [如何自定义模型运行参数](tutorials/6_customize_runtime.md)
diff --git a/vendor/ViTPose/docs/zh_cn/index.rst b/vendor/ViTPose/docs/zh_cn/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e51f885cb7238f034c13c8da23c194e26a8a7263
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/index.rst
@@ -0,0 +1,97 @@
+欢迎来到 MMPose 中文文档!
+==================================
+
+您可以在页面左下角切换文档语言。
+
+You can change the documentation language at the lower-left corner of the page.
+
+.. toctree::
+ :maxdepth: 2
+
+ install.md
+ getting_started.md
+ demo.md
+ benchmark.md
+ inference_speed_summary.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 数据集
+
+ datasets.md
+ tasks/2d_body_keypoint.md
+ tasks/2d_wholebody_keypoint.md
+ tasks/2d_face_keypoint.md
+ tasks/2d_hand_keypoint.md
+ tasks/2d_fashion_landmark.md
+ tasks/2d_animal_keypoint.md
+ tasks/3d_body_keypoint.md
+ tasks/3d_body_mesh.md
+ tasks/3d_hand_keypoint.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 模型池
+
+ modelzoo.md
+ topics/animal.md
+ topics/body(2d,kpt,sview,img).md
+ topics/body(2d,kpt,sview,vid).md
+ topics/body(3d,kpt,sview,img).md
+ topics/body(3d,kpt,sview,vid).md
+ topics/body(3d,kpt,mview,img).md
+ topics/body(3d,mesh,sview,img).md
+ topics/face.md
+ topics/fashion.md
+ topics/hand(2d).md
+ topics/hand(3d).md
+ topics/wholebody.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 模型池(按论文整理)
+
+ papers/algorithms.md
+ papers/backbones.md
+ papers/datasets.md
+ papers/techniques.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 教程
+
+ tutorials/0_config.md
+ tutorials/1_finetune.md
+ tutorials/2_new_dataset.md
+ tutorials/3_data_pipeline.md
+ tutorials/4_new_modules.md
+ tutorials/5_export_model.md
+ tutorials/6_customize_runtime.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 常用工具
+
+ useful_tools.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Notes
+
+ faq.md
+
+.. toctree::
+ :caption: API文档
+
+ api.rst
+
+.. toctree::
+ :caption: 语言
+
+ Language.md
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/vendor/ViTPose/docs/zh_cn/inference_speed_summary.md b/vendor/ViTPose/docs/zh_cn/inference_speed_summary.md
new file mode 100644
index 0000000000000000000000000000000000000000..f5a23fc6127c18e157374337612549f23ada592c
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/inference_speed_summary.md
@@ -0,0 +1,114 @@
+# 推理速度总结
+
+这里总结了 MMPose 中主要模型的复杂度信息和推理速度,包括模型的计算复杂度、参数数量,以及以不同的批处理大小在 CPU 和 GPU 上的推理速度。还比较了不同模型在 COCO 人体关键点数据集上的全类别平均正确率,展示了模型性能和模型复杂度之间的折中。
+
+## 比较规则
+
+为了保证比较的公平性,在相同的硬件和软件环境下使用相同的数据集进行了比较实验。还列出了模型在 COCO 人体关键点数据集上的全类别平均正确率以及相应的配置文件。
+
+对于模型复杂度信息,计算具有相应输入形状的模型的浮点数运算次数和参数数量。请注意,当前某些网络层或算子还未支持,如 `DeformConv2d` ,因此您可能需要检查是否所有操作都已支持,并验证浮点数运算次数和参数数量的计算是否正确。
+
+对于推理速度,忽略了数据预处理的时间,只测量模型前向计算和数据后处理的时间。对于每个模型设置,保持相同的数据预处理方法,以确保相同的特征输入。分别测量了在 CPU 和 GPU 设备上的推理速度。对于自上而下的热图模型,我们还测试了批处理量较大(例如,10)情况,以测试拥挤场景下的模型性能。
+
+推断速度是用每秒处理的帧数 (FPS) 来衡量的,即每秒模型的平均迭代次数,它可以显示模型处理输入的速度。这个数值越高,表示推理速度越快,模型性能越好。
+
+### 硬件
+
+- GPU: GeForce GTX 1660 SUPER
+- CPU: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
+
+### 软件环境
+
+- Ubuntu 16.04
+- Python 3.8
+- PyTorch 1.10
+- CUDA 10.2
+- mmcv-full 1.3.17
+- mmpose 0.20.0
+
+## MMPose 中主要模型的复杂度信息和推理速度总结
+
+| Algorithm | Model | config | Input size | mAP | Flops (GFLOPs) | Params (M) | GPU Inference Speed (FPS)1 | GPU Inference Speed (FPS, bs=10)2 | CPU Inference Speed (FPS) | CPU Inference Speed (FPS, bs=10) |
+| :--- | :---------------: | :-----------------: |:--------------------: | :----------------------------: | :-----------------: | :---------------: |:--------------------: | :----------------------------: | :-----------------: | :-----------------: |
+| topdown_heatmap | Alexnet | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco_256x192.py) | (3, 192, 256) | 0.397 | 1.42 | 5.62 | 229.21 ± 16.91 | 33.52 ± 1.14 | 13.92 ± 0.60 | 1.38 ± 0.02 |
+| topdown_heatmap | CPM | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py) | (3, 192, 256) | 0.623 | 63.81 | 31.3 | 11.35 ± 0.22 | 3.87 ± 0.07 | 0.31 ± 0.01 | 0.03 ± 0.00 |
+| topdown_heatmap | CPM | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_384x288.py) | (3, 288, 384) | 0.65 | 143.57 | 31.3 | 7.09 ± 0.14 | 2.10 ± 0.05 | 0.14 ± 0.00 | 0.01 ± 0.00 |
+| topdown_heatmap | Hourglass-52 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_256x256.py) | (3, 256, 256) | 0.726 | 28.67 | 94.85 | 25.50 ± 1.68 | 3.99 ± 0.07 | 0.92 ± 0.03 | 0.09 ± 0.00 |
+| topdown_heatmap | Hourglass-52 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass52_coco_384x384.py) | (3, 384, 384) | 0.746 | 64.5 | 94.85 | 14.74 ± 0.8 | 1.86 ± 0.06 | 0.43 ± 0.03 | 0.04 ± 0.00 |
+| topdown_heatmap | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py) | (3, 192, 256) | 0.746 | 7.7 | 28.54 | 22.73 ± 1.12 | 6.60 ± 0.14 | 2.73 ± 0.11 | 0.32 ± 0.00 |
+| topdown_heatmap | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_384x288.py) | (3, 288, 384) | 0.76 | 17.33 | 28.54 | 22.78 ± 1.21 | 3.28 ± 0.08 | 1.35 ± 0.05 | 0.14 ± 0.00 |
+| topdown_heatmap | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py) | (3, 192, 256) | 0.756 | 15.77 | 63.6 | 22.01 ± 1.10 | 3.74 ± 0.10 | 1.46 ± 0.05 | 0.16 ± 0.00 |
+| topdown_heatmap | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_384x288.py) | (3, 288, 384) | 0.767 | 35.48 | 63.6 | 15.03 ± 1.03 | 1.80 ± 0.03 | 0.68 ± 0.02 | 0.07 ± 0.00 |
+| topdown_heatmap | LiteHRNet-30 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_256x192.py) | (3, 192, 256) | 0.675 | 0.42 | 1.76 | 11.86 ± 0.38 | 9.77 ± 0.23 | 5.84 ± 0.39 | 0.80 ± 0.00 |
+| topdown_heatmap | LiteHRNet-30 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_30_coco_384x288.py) | (3, 288, 384) | 0.7 | 0.95 | 1.76 | 11.52 ± 0.39 | 5.18 ± 0.11 | 3.45 ± 0.22 | 0.37 ± 0.00 |
+| topdown_heatmap | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_256x192.py) | (3, 192, 256) | 0.646 | 1.59 | 9.57 | 91.82 ± 10.98 | 17.85 ± 0.32 | 10.44 ± 0.80 | 1.05 ± 0.01 |
+| topdown_heatmap | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco_384x288.py) | (3, 288, 384) | 0.673 | 3.57 | 9.57 | 71.27 ± 6.82 | 8.00 ± 0.15 | 5.01 ± 0.32 | 0.46 ± 0.00 |
+| topdown_heatmap | MSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn50_coco_256x192.py) | (3, 192, 256) | 0.723 | 5.11 | 25.11 | 59.65 ± 3.74 | 9.51 ± 0.15 | 3.98 ± 0.21 | 0.43 ± 0.00 |
+| topdown_heatmap | 2xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xmspn50_coco_256x192.py) | (3, 192, 256) | 0.754 | 11.35 | 56.8 | 30.64 ± 2.61 | 4.74 ± 0.12 | 1.85 ± 0.08 | 0.20 ± 0.00 |
+| topdown_heatmap | 3xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xmspn50_coco_256x192.py) | (3, 192, 256) | 0.758 | 17.59 | 88.49 | 20.90 ± 1.82 | 3.22 ± 0.08 | 1.23 ± 0.04 | 0.13 ± 0.00 |
+| topdown_heatmap | 4xMSPN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/4xmspn50_coco_256x192.py) | (3, 192, 256) | 0.764 | 23.82 | 120.18 | 15.79 ± 1.14 | 2.45 ± 0.05 | 0.90 ± 0.03 | 0.10 ± 0.00 |
+| topdown_heatmap | ResNest-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_256x192.py) | (3, 192, 256) | 0.721 | 6.73 | 35.93 | 48.36 ± 4.12 | 7.48 ± 0.13 | 3.00 ± 0.13 | 0.33 ± 0.00 |
+| topdown_heatmap | ResNest-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest50_coco_384x288.py) | (3, 288, 384) | 0.737 | 15.14 | 35.93 | 30.30 ± 2.30 | 3.62 ± 0.09 | 1.43 ± 0.05 | 0.13 ± 0.00 |
+| topdown_heatmap | ResNest-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_256x192.py) | (3, 192, 256) | 0.725 | 10.38 | 56.61 | 29.21 ± 1.98 | 5.30 ± 0.12 | 2.01 ± 0.08 | 0.22 ± 0.00 |
+| topdown_heatmap | ResNest-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest101_coco_384x288.py) | (3, 288, 384) | 0.746 | 23.36 | 56.61 | 19.02 ± 1.40 | 2.59 ± 0.05 | 0.97 ± 0.03 | 0.09 ± 0.00 |
+| topdown_heatmap | ResNest-200 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_256x192.py) | (3, 192, 256) | 0.732 | 17.5 | 78.54 | 16.11 ± 0.71 | 3.29 ± 0.07 | 1.33 ± 0.02 | 0.14 ± 0.00 |
+| topdown_heatmap | ResNest-200 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest200_coco_384x288.py) | (3, 288, 384) | 0.754 | 39.37 | 78.54 | 11.48 ± 0.68 | 1.58 ± 0.02 | 0.63 ± 0.01 | 0.06 ± 0.00 |
+| topdown_heatmap | ResNest-269 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_256x192.py) | (3, 192, 256) | 0.738 | 22.45 | 119.27 | 12.02 ± 0.47 | 2.60 ± 0.05 | 1.03 ± 0.01 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNest-269 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest269_coco_384x288.py) | (3, 288, 384) | 0.755 | 50.5 | 119.27 | 8.82 ± 0.42 | 1.24 ± 0.02 | 0.49 ± 0.01 | 0.05 ± 0.00 |
+| topdown_heatmap | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_256x192.py) | (3, 192, 256) | 0.718 | 5.46 | 34 | 64.23 ± 6.05 | 9.33 ± 0.21 | 4.00 ± 0.10 | 0.41 ± 0.00 |
+| topdown_heatmap | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res50_coco_384x288.py) | (3, 288, 384) | 0.731 | 12.29 | 34 | 36.78 ± 3.05 | 4.48 ± 0.12 | 1.92 ± 0.04 | 0.19 ± 0.00 |
+| topdown_heatmap | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_256x192.py) | (3, 192, 256) | 0.726 | 9.11 | 52.99 | 43.35 ± 4.36 | 6.44 ± 0.14 | 2.57 ± 0.05 | 0.27 ± 0.00 |
+| topdown_heatmap | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res101_coco_384x288.py) | (3, 288, 384) | 0.748 | 20.5 | 52.99 | 23.29 ± 1.83 | 3.12 ± 0.09 | 1.23 ± 0.03 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_256x192.py) | (3, 192, 256) | 0.735 | 12.77 | 68.64 | 32.31 ± 2.84 | 4.88 ± 0.17 | 1.89 ± 0.03 | 0.20 ± 0.00 |
+| topdown_heatmap | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/res152_coco_384x288.py) | (3, 288, 384) | 0.75 | 28.73 | 68.64 | 17.32 ± 1.17 | 2.40 ± 0.04 | 0.91 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ResNetV1d-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_256x192.py) | (3, 192, 256) | 0.722 | 5.7 | 34.02 | 63.44 ± 6.09 | 9.09 ± 0.10 | 3.82 ± 0.10 | 0.39 ± 0.00 |
+| topdown_heatmap | ResNetV1d-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d50_coco_384x288.py) | (3, 288, 384) | 0.73 | 12.82 | 34.02 | 36.21 ± 3.10 | 4.30 ± 0.12 | 1.82 ± 0.04 | 0.16 ± 0.00 |
+| topdown_heatmap | ResNetV1d-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_256x192.py) | (3, 192, 256) | 0.731 | 9.35 | 53.01 | 41.48 ± 3.76 | 6.33 ± 0.15 | 2.48 ± 0.05 | 0.26 ± 0.00 |
+| topdown_heatmap | ResNetV1d-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d101_coco_384x288.py) | (3, 288, 384) | 0.748 | 21.04 | 53.01 | 23.49 ± 1.76 | 3.07 ± 0.07 | 1.19 ± 0.02 | 0.11 ± 0.00 |
+| topdown_heatmap | ResNetV1d-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_256x192.py) | (3, 192, 256) | 0.737 | 13.01 | 68.65 | 31.96 ± 2.87 | 4.69 ± 0.18 | 1.87 ± 0.02 | 0.19 ± 0.00 |
+| topdown_heatmap | ResNetV1d-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d152_coco_384x288.py) | (3, 288, 384) | 0.752 | 29.26 | 68.65 | 17.31 ± 1.13 | 2.32 ± 0.04 | 0.88 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ResNext-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_256x192.py) | (3, 192, 256) | 0.714 | 5.61 | 33.47 | 48.34 ± 3.85 | 7.66 ± 0.13 | 3.71 ± 0.10 | 0.37 ± 0.00 |
+| topdown_heatmap | ResNext-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext50_coco_384x288.py) | (3, 288, 384) | 0.724 | 12.62 | 33.47 | 30.66 ± 2.38 | 3.64 ± 0.11 | 1.73 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | ResNext-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_256x192.py) | (3, 192, 256) | 0.726 | 9.29 | 52.62 | 27.33 ± 2.35 | 5.09 ± 0.13 | 2.45 ± 0.04 | 0.25 ± 0.00 |
+| topdown_heatmap | ResNext-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext101_coco_384x288.py) | (3, 288, 384) | 0.743 | 20.91 | 52.62 | 18.19 ± 1.38 | 2.42 ± 0.04 | 1.15 ± 0.01 | 0.10 ± 0.00 |
+| topdown_heatmap | ResNext-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_256x192.py) | (3, 192, 256) | 0.73 | 12.98 | 68.39 | 19.61 ± 1.61 | 3.80 ± 0.13 | 1.83 ± 0.02 | 0.18 ± 0.00 |
+| topdown_heatmap | ResNext-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext152_coco_384x288.py) | (3, 288, 384) | 0.742 | 29.21 | 68.39 | 13.14 ± 0.75 | 1.82 ± 0.03 | 0.85 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | RSN-18 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn18_coco_256x192.py) | (3, 192, 256) | 0.704 | 2.27 | 9.14 | 47.80 ± 4.50 | 13.68 ± 0.25 | 6.70 ± 0.28 | 0.70 ± 0.00 |
+| topdown_heatmap | RSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn50_coco_256x192.py) | (3, 192, 256) | 0.723 | 4.11 | 19.33 | 27.22 ± 1.61 | 8.81 ± 0.13 | 3.98 ± 0.12 | 0.45 ± 0.00 |
+| topdown_heatmap | 2xRSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/2xrsn50_coco_256x192.py) | (3, 192, 256) | 0.745 | 8.29 | 39.26 | 13.88 ± 0.64 | 4.78 ± 0.13 | 2.02 ± 0.04 | 0.23 ± 0.00 |
+| topdown_heatmap | 3xRSN-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/3xrsn50_coco_256x192.py) | (3, 192, 256) | 0.75 | 12.47 | 59.2 | 9.40 ± 0.32 | 3.37 ± 0.09 | 1.34 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | SCNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_256x192.py) | (3, 192, 256) | 0.728 | 5.31 | 34.01 | 40.76 ± 3.08 | 8.35 ± 0.19 | 3.82 ± 0.08 | 0.40 ± 0.00 |
+| topdown_heatmap | SCNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet50_coco_384x288.py) | (3, 288, 384) | 0.751 | 11.94 | 34.01 | 32.61 ± 2.97 | 4.19 ± 0.10 | 1.85 ± 0.03 | 0.17 ± 0.00 |
+| topdown_heatmap | SCNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_256x192.py) | (3, 192, 256) | 0.733 | 8.51 | 53.01 | 24.28 ± 1.19 | 5.80 ± 0.13 | 2.49 ± 0.05 | 0.27 ± 0.00 |
+| topdown_heatmap | SCNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet101_coco_384x288.py) | (3, 288, 384) | 0.752 | 19.14 | 53.01 | 20.43 ± 1.76 | 2.91 ± 0.06 | 1.23 ± 0.02 | 0.12 ± 0.00 |
+| topdown_heatmap | SeresNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_256x192.py) | (3, 192, 256) | 0.728 | 5.47 | 36.53 | 54.83 ± 4.94 | 8.80 ± 0.12 | 3.85 ± 0.10 | 0.40 ± 0.00 |
+| topdown_heatmap | SeresNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet50_coco_384x288.py) | (3, 288, 384) | 0.748 | 12.3 | 36.53 | 33.00 ± 2.67 | 4.26 ± 0.12 | 1.86 ± 0.04 | 0.17 ± 0.00 |
+| topdown_heatmap | SeresNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_256x192.py) | (3, 192, 256) | 0.734 | 9.13 | 57.77 | 33.90 ± 2.65 | 6.01 ± 0.13 | 2.48 ± 0.05 | 0.26 ± 0.00 |
+| topdown_heatmap | SeresNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet101_coco_384x288.py) | (3, 288, 384) | 0.753 | 20.53 | 57.77 | 20.57 ± 1.57 | 2.96 ± 0.07 | 1.20 ± 0.02 | 0.11 ± 0.00 |
+| topdown_heatmap | SeresNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_256x192.py) | (3, 192, 256) | 0.73 | 12.79 | 75.26 | 24.25 ± 1.95 | 4.45 ± 0.10 | 1.82 ± 0.02 | 0.19 ± 0.00 |
+| topdown_heatmap | SeresNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet152_coco_384x288.py) | (3, 288, 384) | 0.753 | 28.76 | 75.26 | 15.11 ± 0.99 | 2.25 ± 0.04 | 0.88 ± 0.01 | 0.08 ± 0.00 |
+| topdown_heatmap | ShuffleNetV1 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_256x192.py) | (3, 192, 256) | 0.585 | 1.35 | 6.94 | 80.79 ± 8.95 | 21.91 ± 0.46 | 11.84 ± 0.59 | 1.25 ± 0.01 |
+| topdown_heatmap | ShuffleNetV1 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco_384x288.py) | (3, 288, 384) | 0.622 | 3.05 | 6.94 | 63.45 ± 5.21 | 9.84 ± 0.10 | 6.01 ± 0.31 | 0.57 ± 0.00 |
+| topdown_heatmap | ShuffleNetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_256x192.py) | (3, 192, 256) | 0.599 | 1.37 | 7.55 | 82.36 ± 7.30 | 22.68 ± 0.53 | 12.40 ± 0.66 | 1.34 ± 0.02 |
+| topdown_heatmap | ShuffleNetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco_384x288.py) | (3, 288, 384) | 0.636 | 3.08 | 7.55 | 63.63 ± 5.72 | 10.47 ± 0.16 | 6.32 ± 0.28 | 0.63 ± 0.01 |
+| topdown_heatmap | VGG16 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg16_bn_coco_256x192.py) | (3, 192, 256) | 0.698 | 16.22 | 18.92 | 51.91 ± 2.98 | 6.18 ± 0.13 | 1.64 ± 0.03 | 0.15 ± 0.00 |
+| topdown_heatmap | VIPNAS + ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py) | (3, 192, 256) | 0.711 | 1.49 | 7.29 | 34.88 ± 2.45 | 10.29 ± 0.13 | 6.51 ± 0.17 | 0.65 ± 0.00 |
+| topdown_heatmap | VIPNAS + MobileNetV3 | [config](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_mbv3_coco_256x192.py) | (3, 192, 256) | 0.7 | 0.76 | 5.9 | 53.62 ± 6.59 | 11.54 ± 0.18 | 1.26 ± 0.02 | 0.13 ± 0.00 |
+| Associative Embedding | HigherHRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py) | (3, 512, 512) | 0.677 | 46.58 | 28.65 | 7.80 ± 0.67 | / | 0.28 ± 0.02 | / |
+| Associative Embedding | HigherHRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py) | (3, 640, 640) | 0.686 | 72.77 | 28.65 | 5.30 ± 0.37 | / | 0.17 ± 0.01 | / |
+| Associative Embedding | HigherHRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py) | (3, 512, 512) | 0.686 | 96.17 | 63.83 | 4.55 ± 0.35 | / | 0.15 ± 0.01 | / |
+| Associative Embedding | Hourglass-AE | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco_512x512.py) | (3, 512, 512) | 0.613 | 221.58 | 138.86 | 3.55 ± 0.24 | / | 0.08 ± 0.00 | / |
+| Associative Embedding | HRNet-W32 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py) | (3, 512, 512) | 0.654 | 41.1 | 28.54 | 8.93 ± 0.76 | / | 0.33 ± 0.02 | / |
+| Associative Embedding | HRNet-W48 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py) | (3, 512, 512) | 0.665 | 84.12 | 63.6 | 5.27 ± 0.43 | / | 0.18 ± 0.01 | / |
+| Associative Embedding | MobilenetV2 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py) | (3, 512, 512) | 0.38 | 8.54 | 9.57 | 21.24 ± 1.34 | / | 0.81 ± 0.06 | / |
+| Associative Embedding | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py) | (3, 512, 512) | 0.466 | 29.2 | 34 | 11.71 ± 0.97 | / | 0.41 ± 0.02 | / |
+| Associative Embedding | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py) | (3, 640, 640) | 0.479 | 45.62 | 34 | 8.20 ± 0.58 | / | 0.26 ± 0.02 | / |
+| Associative Embedding | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py) | (3, 512, 512) | 0.554 | 48.67 | 53 | 8.26 ± 0.68 | / | 0.28 ± 0.02 | / |
+| Associative Embedding | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py) | (3, 512, 512) | 0.595 | 68.17 | 68.64 | 6.25 ± 0.53 | / | 0.21 ± 0.01 | / |
+| DeepPose | ResNet-50 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res50_coco_256x192.py) | (3, 192, 256) | 0.526 | 4.04 | 23.58 | 82.20 ± 7.54 | / | 5.50 ± 0.18 | / |
+| DeepPose | ResNet-101 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res101_coco_256x192.py) | (3, 192, 256) | 0.56 | 7.69 | 42.57 | 48.93 ± 4.02 | / | 3.10 ± 0.07 | / |
+| DeepPose | ResNet-152 | [config](/configs/body/2d_kpt_sview_rgb_img/deeppose/coco/res152_coco_256x192.py) | (3, 192, 256) | 0.583 | 11.34 | 58.21 | 35.06 ± 3.50 | / | 2.19 ± 0.04 | / |
+
+1 注意,这里运行迭代多次,并记录每次迭代的时间,同时展示了 FPS 数值的平均值和标准差。
+
+2 FPS 定义为每秒的平均迭代次数,与此迭代中的批处理大小无关。
diff --git a/vendor/ViTPose/docs/zh_cn/install.md b/vendor/ViTPose/docs/zh_cn/install.md
new file mode 100644
index 0000000000000000000000000000000000000000..c876ee5c2a043f44d7db0ed4ff75b2d75e531c9f
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/install.md
@@ -0,0 +1,202 @@
+# 安装
+
+本文档提供了安装 MMPose 的相关步骤。
+
+
+
+- [安装依赖包](#安装依赖包)
+- [准备环境](#准备环境)
+- [MMPose 的安装步骤](#MMPose-的安装步骤)
+- [CPU 环境下的安装步骤](#CPU-环境下的安装步骤)
+- [利用 Docker 镜像安装 MMPose](#利用-Docker-镜像安装-MMPose)
+- [源码安装 MMPose](#源码安装-MMPose)
+- [在多个 MMPose 版本下进行开发](#在多个-MMPose-版本下进行开发)
+
+
+
+## 安装依赖包
+
+- Linux (Windows 系统暂未有官方支持)
+- Python 3.6+
+- PyTorch 1.3+
+- CUDA 9.2+ (如果从源码编译 PyTorch,则可以兼容 CUDA 9.0 版本)
+- GCC 5+
+- [mmcv](https://github.com/open-mmlab/mmcv) 请安装最新版本的 mmcv-full
+- Numpy
+- cv2
+- json_tricks
+- [xtcocotools](https://github.com/jin-s13/xtcocoapi)
+
+可选项:
+
+- [mmdet](https://github.com/open-mmlab/mmdetection) (用于“姿态估计”)
+- [mmtrack](https://github.com/open-mmlab/mmtracking) (用于“姿态跟踪”)
+- [pyrender](https://pyrender.readthedocs.io/en/latest/install/index.html) (用于“三维人体形状恢复”)
+- [smplx](https://github.com/vchoutas/smplx) (用于“三维人体形状恢复”)
+
+## 准备环境
+
+a. 创建并激活 conda 虚拟环境,如:
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+```
+
+b. 参考 [官方文档](https://pytorch.org/) 安装 PyTorch 和 torchvision ,如:
+
+```shell
+conda install pytorch torchvision -c pytorch
+```
+
+**注**:确保 CUDA 的编译版本和 CUDA 的运行版本相匹配。
+用户可以参照 [PyTorch 官网](https://pytorch.org/) 对预编译包所支持的 CUDA 版本进行核对。
+
+`例 1`:如果用户的 `/usr/local/cuda` 文件夹下已安装 CUDA 10.2 版本,并且想要安装 PyTorch 1.8.0 版本,
+则需要安装 CUDA 10.2 下预编译的 PyTorch。
+
+```shell
+conda install pytorch==1.8.0 torchvision==0.9.0 cudatoolkit=10.2 -c pytorch
+```
+
+`例 2`:如果用户的 `/usr/local/cuda` 文件夹下已安装 CUDA 9.2 版本,并且想要安装 PyTorch 1.7.0 版本,
+则需要安装 CUDA 9.2 下预编译的 PyTorch。
+
+```shell
+conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=9.2 -c pytorch
+```
+
+如果 PyTorch 是由源码进行编译安装(而非直接下载预编译好的安装包),则可以使用更多的 CUDA 版本(如 9.0 版本)。
+
+## MMPose 的安装步骤
+
+a. 安装最新版本的 mmcv-full。MMPose 推荐用户使用如下的命令安装预编译好的 mmcv。
+
+```shell
+# pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
+# 我们可以忽略 PyTorch 的小版本号
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9/index.html
+```
+
+PyTorch 在 1.x.0 和 1.x.1 之间通常是兼容的,故 mmcv-full 只提供 1.x.0 的编译包。如果你的 PyTorch 版本是 1.x.1,你可以放心地安装在 1.x.0 版本编译的 mmcv-full。
+
+可查阅 [这里](https://github.com/open-mmlab/mmcv#installation) 以参考不同版本的 MMCV 所兼容的 PyTorch 和 CUDA 版本。
+
+另外,用户也可以通过使用以下命令从源码进行编译:
+
+```shell
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+MMCV_WITH_OPS=1 pip install -e . # mmcv-full 包含一些 cuda 算子,执行该步骤会安装 mmcv-full(而非 mmcv)
+# 或者使用 pip install -e . # 这个命令安装的 mmcv 将不包含 cuda ops,通常适配 CPU(无 GPU)环境
+cd ..
+```
+
+**注意**:如果之前安装过 mmcv,那么需要先使用 `pip uninstall mmcv` 命令进行卸载。如果 mmcv 和 mmcv-full 同时被安装, 会报 `ModuleNotFoundError` 的错误。
+
+b. 克隆 MMPose 库。
+
+```shell
+git clone https://github.com/open-mmlab/mmpose.git
+cd mmpose
+```
+
+c. 安装依赖包和 MMPose。
+
+```shell
+pip install -r requirements.txt
+pip install -v -e . # or "python setup.py develop"
+```
+
+如果是在 macOS 环境安装 MMPose,则需使用如下命令:
+
+```shell
+CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' pip install -e .
+```
+
+d. 安装其他可选依赖。
+
+如果用户不需要做相关任务,这部分步骤可以选择跳过。
+
+可选项:
+
+- [mmdet](https://github.com/open-mmlab/mmdetection) (用于“姿态估计”)
+- [mmtrack](https://github.com/open-mmlab/mmtracking) (用于“姿态跟踪”)
+- [pyrender](https://pyrender.readthedocs.io/en/latest/install/index.html) (用于“三维人体形状恢复”)
+- [smplx](https://github.com/vchoutas/smplx) (用于“三维人体形状恢复”)
+
+注意:
+
+1. 在步骤 c 中,git commit 的 id 将会被写到版本号中,如 0.6.0+2e7045c。这个版本号也会被保存到训练好的模型中。
+ 这里推荐用户每次在步骤 b 中对本地代码和 github 上的源码进行同步。如果 C++/CUDA 代码被修改,就必须进行这一步骤。
+
+1. 根据上述步骤,MMPose 就会以 `dev` 模式被安装,任何本地的代码修改都会立刻生效,不需要再重新安装一遍(除非用户提交了 commits,并且想更新版本号)。
+
+1. 如果用户想使用 `opencv-python-headless` 而不是 `opencv-python`,可再安装 MMCV 前安装 `opencv-python-headless`。
+
+1. 如果 mmcv 已经被安装,用户需要使用 `pip uninstall mmcv` 命令进行卸载。如果 mmcv 和 mmcv-full 同时被安装, 会报 `ModuleNotFoundError` 的错误。
+
+1. 一些依赖包是可选的。运行 `python setup.py develop` 将只会安装运行代码所需的最小要求依赖包。
+ 要想使用一些可选的依赖包,如 `smplx`,用户需要通过 `pip install -r requirements/optional.txt` 进行安装,
+ 或者通过调用 `pip`(如 `pip install -v -e .[optional]`,这里的 `[optional]` 可替换为 `all`,`tests`,`build` 或 `optional`) 指定安装对应的依赖包,如 `pip install -v -e .[tests,build]`。
+
+## CPU 环境下的安装步骤
+
+MMPose 可以在只有 CPU 的环境下安装(即无法使用 GPU 的环境)。
+
+在 CPU 模式下,用户可以运行 `demo/demo.py` 的代码。
+
+## 源码安装 MMPose
+
+这里提供了 conda 下安装 MMPose 并链接 COCO 数据集路径的完整脚本(假设 COCO 数据的路径在 $COCO_ROOT)。
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+# 安装最新的,使用默认版本的 CUDA 版本(一般为最新版本)预编译的 PyTorch 包
+conda install -c pytorch pytorch torchvision -y
+
+# 安装 mmcv-full。其中,命令里 url 的 ``{cu_version}`` 和 ``{torch_version}`` 变量需由用户进行指定。
+# 可查阅 [这里](https://github.com/open-mmlab/mmcv#installation) 以参考不同版本的 MMCV 所兼容的 PyTorch 和 CUDA 版本。
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+
+# 安装 mmpose
+git clone git@github.com:open-mmlab/mmpose.git
+cd mmpose
+pip install -r requirements.txt
+python setup.py develop
+
+mkdir data
+ln -s $COCO_ROOT data/coco
+```
+
+## 利用 Docker 镜像安装 MMPose
+
+MMPose 提供一个 [Dockerfile](/docker/Dockerfile) 用户创建 docker 镜像。
+
+```shell
+# 创建拥有 PyTorch 1.6.0, CUDA 10.1, CUDNN 7 配置的 docker 镜像.
+docker build -f ./docker/Dockerfile --rm -t mmpose .
+```
+
+**注意**:用户需要确保已经安装了 [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker)。
+
+运行以下命令:
+
+```shell
+docker run --gpus all\
+ --shm-size=8g \
+ -it -v {DATA_DIR}:/mmpose/data mmpose
+```
+
+## 在多个 MMPose 版本下进行开发
+
+MMPose 的训练和测试脚本已经修改了 `PYTHONPATH` 变量,以确保其能够运行当前目录下的 MMPose。
+
+如果想要运行环境下默认的 MMPose,用户需要在训练和测试脚本中去除这一行:
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
diff --git a/vendor/ViTPose/docs/zh_cn/language.md b/vendor/ViTPose/docs/zh_cn/language.md
new file mode 100644
index 0000000000000000000000000000000000000000..a0a6259bee27121ca837c85141ebca0307d617b4
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/language.md
@@ -0,0 +1,3 @@
+## English
+
+## 简体中文
diff --git a/vendor/ViTPose/docs/zh_cn/make.bat b/vendor/ViTPose/docs/zh_cn/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..922152e96a04a242e6fc40f124261d74890617d8
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/vendor/ViTPose/docs/zh_cn/merge_docs.sh b/vendor/ViTPose/docs/zh_cn/merge_docs.sh
new file mode 100644
index 0000000000000000000000000000000000000000..51fc8bc84f250eb1ec7fac8379c2f6b0c845bfa0
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/merge_docs.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+sed -i '$a\\n' ../../demo/docs/*_demo.md
+cat ../../demo/docs/*_demo.md | sed "s/#/#&/" | sed "s/md###t/html#t/g" | sed '1i\# 示例' | sed 's=](/docs/zh_cn/=](/=g' | sed 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' >demo.md
+
+ # remove /docs_zh-CN/ for link used in doc site
+sed -i 's=](/docs/zh_cn/=](=g' ./tutorials/*.md
+sed -i 's=](/docs/zh_cn/=](=g' ./tasks/*.md
+sed -i 's=](/docs/zh_cn/=](=g' ./papers/*.md
+sed -i 's=](/docs/zh_cn/=](=g' ./topics/*.md
+sed -i 's=](/docs/zh_cn/=](=g' data_preparation.md
+sed -i 's=](/docs/zh_cn/=](=g' getting_started.md
+sed -i 's=](/docs/zh_cn/=](=g' install.md
+sed -i 's=](/docs/zh_cn/=](=g' benchmark.md
+# sed -i 's=](/docs/zh_cn/=](=g' changelog.md
+sed -i 's=](/docs/zh_cn/=](=g' faq.md
+
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./tutorials/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./tasks/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./papers/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' ./topics/*.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' data_preparation.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' getting_started.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' install.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' benchmark.md
+# sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' changelog.md
+sed -i 's=](/=](https://github.com/open-mmlab/mmpose/tree/master/=g' faq.md
diff --git a/vendor/ViTPose/docs/zh_cn/stats.py b/vendor/ViTPose/docs/zh_cn/stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..d947ab10ba9beacf9da8ed208c3a1f78fa22f149
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/stats.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools as func
+import glob
+import re
+from os.path import basename, splitext
+
+import numpy as np
+import titlecase
+
+
+def anchor(name):
+ return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
+ name.strip().lower())).strip('-')
+
+
+# Count algorithms
+
+files = sorted(glob.glob('topics/*.md'))
+
+stats = []
+
+for f in files:
+ with open(f, 'r') as content_file:
+ content = content_file.read()
+
+ # title
+ title = content.split('\n')[0].replace('#', '')
+
+ # count papers
+ papers = set(
+ (papertype, titlecase.titlecase(paper.lower().strip()))
+ for (papertype, paper) in re.findall(
+ r'\s*\n.*?\btitle\s*=\s*{(.*?)}',
+ content, re.DOTALL))
+ # paper links
+ revcontent = '\n'.join(list(reversed(content.splitlines())))
+ paperlinks = {}
+ for _, p in papers:
+ print(p)
+ paperlinks[p] = ', '.join(
+ ((f'[{paperlink} ⇨]'
+ f'(topics/{splitext(basename(f))[0]}.html#{anchor(paperlink)})')
+ for paperlink in re.findall(
+ rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n### (.*?)\s*[,;]?\s*\n',
+ revcontent, re.DOTALL | re.IGNORECASE)))
+ print(' ', paperlinks[p])
+ paperlist = '\n'.join(
+ sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+ # count configs
+ configs = set(x.lower().strip()
+ for x in re.findall(r'.*configs/.*\.py', content))
+
+ # count ckpts
+ ckpts = set(x.lower().strip()
+ for x in re.findall(r'https://download.*\.pth', content)
+ if 'mmpose' in x)
+
+ statsmsg = f"""
+## [{title}]({f})
+
+* 模型权重文件数量: {len(ckpts)}
+* 配置文件数量: {len(configs)}
+* 论文数量: {len(papers)}
+{paperlist}
+
+ """
+
+ stats.append((papers, configs, ckpts, statsmsg))
+
+allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
+allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
+allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
+
+# Summarize
+
+msglist = '\n'.join(x for _, _, _, x in stats)
+papertypes, papercounts = np.unique([t for t, _ in allpapers],
+ return_counts=True)
+countstr = '\n'.join(
+ [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# 概览
+
+* 模型权重文件数量: {len(allckpts)}
+* 配置文件数量: {len(allconfigs)}
+* 论文数量: {len(allpapers)}
+{countstr}
+
+已支持的数据集详细信息请见 [数据集](datasets.md).
+
+{msglist}
+
+"""
+
+with open('modelzoo.md', 'w') as f:
+ f.write(modelzoo)
+
+# Count datasets
+
+files = sorted(glob.glob('tasks/*.md'))
+# files = sorted(glob.glob('docs/tasks/*.md'))
+
+datastats = []
+
+for f in files:
+ with open(f, 'r') as content_file:
+ content = content_file.read()
+
+ # title
+ title = content.split('\n')[0].replace('#', '')
+
+ # count papers
+ papers = set(
+ (papertype, titlecase.titlecase(paper.lower().strip()))
+ for (papertype, paper) in re.findall(
+ r'\s*\n.*?\btitle\s*=\s*{(.*?)}',
+ content, re.DOTALL))
+ # paper links
+ revcontent = '\n'.join(list(reversed(content.splitlines())))
+ paperlinks = {}
+ for _, p in papers:
+ print(p)
+ paperlinks[p] = ', '.join(
+ (f'[{p} ⇨](tasks/{splitext(basename(f))[0]}.html#{anchor(p)})'
+ for p in re.findall(
+ rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+ revcontent, re.DOTALL | re.IGNORECASE)))
+ print(' ', paperlinks[p])
+ paperlist = '\n'.join(
+ sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+ # count configs
+ configs = set(x.lower().strip()
+ for x in re.findall(r'https.*configs/.*\.py', content))
+
+ # count ckpts
+ ckpts = set(x.lower().strip()
+ for x in re.findall(r'https://download.*\.pth', content)
+ if 'mmpose' in x)
+
+ statsmsg = f"""
+## [{title}]({f})
+
+* 论文数量: {len(papers)}
+{paperlist}
+
+ """
+
+ datastats.append((papers, configs, ckpts, statsmsg))
+
+alldatapapers = func.reduce(lambda a, b: a.union(b),
+ [p for p, _, _, _ in datastats])
+
+# Summarize
+
+msglist = '\n'.join(x for _, _, _, x in stats)
+datamsglist = '\n'.join(x for _, _, _, x in datastats)
+papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
+ return_counts=True)
+countstr = '\n'.join(
+ [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# 概览
+
+* 论文数量: {len(alldatapapers)}
+{countstr}
+
+已支持的算法详细信息请见 [模型池](modelzoo.md).
+
+{datamsglist}
+"""
+
+with open('datasets.md', 'w') as f:
+ f.write(modelzoo)
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_animal_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_animal_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..3149533047b4457bf9b3088e14f0940db4bb743c
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_animal_keypoint.md
@@ -0,0 +1,3 @@
+# 2D动物关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_body_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_body_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..47a1c3e40a7d4f866f1f9128186d9ee2d2d75bd5
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_body_keypoint.md
@@ -0,0 +1,496 @@
+# 2D 人体关键点数据集
+
+我们建议您将数据集的根目录放置在 `$MMPOSE/data` 下。
+如果您的文件结构比较特别,您需要在配置文件中修改相应的路径。
+
+MMPose 支持的数据集如下所示:
+
+- 图像
+ - [COCO](#coco) \[ [主页](http://cocodataset.org/) \]
+ - [MPII](#mpii) \[ [主页](http://human-pose.mpi-inf.mpg.de/) \]
+ - [MPII-TRB](#mpii-trb) \[ [主页](https://github.com/kennymckormick/Triplet-Representation-of-human-Body) \]
+ - [AI Challenger](#aic) \[ [主页](https://github.com/AIChallenger/AI_Challenger_2017) \]
+ - [CrowdPose](#crowdpose) \[ [主页](https://github.com/Jeff-sjtu/CrowdPose) \]
+ - [OCHuman](#ochuman) \[ [主页](https://github.com/liruilong940607/OCHumanApi) \]
+ - [MHP](#mhp) \[ [主页](https://lv-mhp.github.io/dataset) \]
+- 视频
+ - [PoseTrack18](#posetrack18) \[ [主页](https://posetrack.net/users/download.php) \]
+ - [sub-JHMDB](#sub-jhmdb-dataset) \[ [主页](http://jhmdb.is.tue.mpg.de/dataset) \]
+
+## COCO
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+请从此链接 [COCO download](http://cocodataset.org/#download) 下载数据集。请注意,2017 Train/Val 对于 COCO 关键点的训练和评估是非常必要的。
+[HRNet-Human-Pose-Estimation](https://github.com/HRNet/HRNet-Human-Pose-Estimation) 提供了 COCO val2017 的检测结果,可用于复现我们的多人姿态估计的结果。
+请从 [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) 或 [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing)下载。
+可选地, 为了在 COCO'2017 test-dev 上评估, 请下载 [image-info](https://download.openmmlab.com/mmpose/datasets/person_keypoints_test-dev-2017.json)。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── coco
+ │-- annotations
+ │ │-- person_keypoints_train2017.json
+ │ |-- person_keypoints_val2017.json
+ │ |-- person_keypoints_test-dev-2017.json
+ |-- person_detection_results
+ | |-- COCO_val2017_detections_AP_H_56_person.json
+ | |-- COCO_test-dev2017_detections_AP_H_609_person.json
+ │-- train2017
+ │ │-- 000000000009.jpg
+ │ │-- 000000000025.jpg
+ │ │-- 000000000030.jpg
+ │ │-- ...
+ `-- val2017
+ │-- 000000000139.jpg
+ │-- 000000000285.jpg
+ │-- 000000000632.jpg
+ │-- ...
+
+```
+
+## MPII
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+请从此链接 [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/) 下载数据集。
+我们已经将原来的标注文件转成了 json 格式,请从此链接 [mpii_annotations](https://download.openmmlab.com/mmpose/datasets/mpii_annotations.tar) 下载。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mpii
+ |── annotations
+ | |── mpii_gt_val.mat
+ | |── mpii_test.json
+ | |── mpii_train.json
+ | |── mpii_trainval.json
+ | `── mpii_val.json
+ `── images
+ |── 000001163.jpg
+ |── 000003072.jpg
+
+```
+
+在训练和推理过程中,预测结果将会被默认保存为 '.mat' 的格式。我们提供了一个工具将这种 '.mat' 的格式转换成更加易读的 '.json' 格式。
+
+```shell
+python tools/dataset/mat2json ${PRED_MAT_FILE} ${GT_JSON_FILE} ${OUTPUT_PRED_JSON_FILE}
+```
+
+比如,
+
+```shell
+python tools/dataset/mat2json work_dirs/res50_mpii_256x256/pred.mat data/mpii/annotations/mpii_val.json pred.json
+```
+
+## MPII-TRB
+
+
+
+
+MPII-TRB (ICCV'2019)
+
+```bibtex
+@inproceedings{duan2019trb,
+ title={TRB: A Novel Triplet Representation for Understanding 2D Human Body},
+ author={Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and Liu, Wentao and Qian, Chen and Ouyang, Wanli},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={9479--9488},
+ year={2019}
+}
+```
+
+
+
+请从此链接[MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/)下载数据集,并从此链接 [mpii_trb_annotations](https://download.openmmlab.com/mmpose/datasets/mpii_trb_annotations.tar) 下载标注文件。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mpii
+ |── annotations
+ | |── mpii_trb_train.json
+ | |── mpii_trb_val.json
+ `── images
+ |── 000001163.jpg
+ |── 000003072.jpg
+
+```
+
+## AIC
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+请从此链接 [AI Challenger 2017](https://github.com/AIChallenger/AI_Challenger_2017) 下载 [AIC](https://github.com/AIChallenger/AI_Challenger_2017) 数据集。请注意,2017 Train/Val 对于关键点的训练和评估是必要的。
+请从此链接 [aic_annotations](https://download.openmmlab.com/mmpose/datasets/aic_annotations.tar) 下载标注文件。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── aic
+ │-- annotations
+ │ │-- aic_train.json
+ │ |-- aic_val.json
+ │-- ai_challenger_keypoint_train_20170902
+ │ │-- keypoint_train_images_20170902
+ │ │ │-- 0000252aea98840a550dac9a78c476ecb9f47ffa.jpg
+ │ │ │-- 000050f770985ac9653198495ef9b5c82435d49c.jpg
+ │ │ │-- ...
+ `-- ai_challenger_keypoint_validation_20170911
+ │-- keypoint_validation_images_20170911
+ │-- 0002605c53fb92109a3f2de4fc3ce06425c3b61f.jpg
+ │-- 0003b55a2c991223e6d8b4b820045bd49507bf6d.jpg
+ │-- ...
+```
+
+## CrowdPose
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+请从此链接 [CrowdPose](https://github.com/Jeff-sjtu/CrowdPose) 下载数据集,并从此链接 [crowdpose_annotations](https://download.openmmlab.com/mmpose/datasets/crowdpose_annotations.tar) 下载标注文件和人体检测结果。
+对于 top-down 方法,我们仿照 [CrowdPose](https://arxiv.org/abs/1812.00324),使用 [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3)的[预训练权重](https://pjreddie.com/media/files/yolov3.weights) 来产生人体的检测框。
+对于模型训练, 我们仿照 [HigherHRNet](https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation),在 CrowdPose 训练/验证 数据集上训练模型, 并在 CrowdPose 测试集上评估模型。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── crowdpose
+ │-- annotations
+ │ │-- mmpose_crowdpose_train.json
+ │ │-- mmpose_crowdpose_val.json
+ │ │-- mmpose_crowdpose_trainval.json
+ │ │-- mmpose_crowdpose_test.json
+ │ │-- det_for_crowd_test_0.1_0.5.json
+ │-- images
+ │-- 100000.jpg
+ │-- 100001.jpg
+ │-- 100002.jpg
+ │-- ...
+```
+
+## OCHuman
+
+
+
+
+OCHuman (CVPR'2019)
+
+```bibtex
+@inproceedings{zhang2019pose2seg,
+ title={Pose2seg: Detection free human instance segmentation},
+ author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={889--898},
+ year={2019}
+}
+```
+
+
+
+请从此链接 [OCHuman](https://github.com/liruilong940607/OCHumanApi) 下载数据集的图像和标注文件。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── ochuman
+ │-- annotations
+ │ │-- ochuman_coco_format_val_range_0.00_1.00.json
+ │ |-- ochuman_coco_format_test_range_0.00_1.00.json
+ |-- images
+ │-- 000001.jpg
+ │-- 000002.jpg
+ │-- 000003.jpg
+ │-- ...
+
+```
+
+## MHP
+
+
+
+
+MHP (ACM MM'2018)
+
+```bibtex
+@inproceedings{zhao2018understanding,
+ title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
+ author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
+ booktitle={Proceedings of the 26th ACM international conference on Multimedia},
+ pages={792--800},
+ year={2018}
+}
+```
+
+
+
+请从此链接 [MHP](https://lv-mhp.github.io/dataset)下载数据文件,并从此链接 [mhp_annotations](https://download.openmmlab.com/mmpose/datasets/mhp_annotations.tar.gz)下载标注文件。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── mhp
+ │-- annotations
+ │ │-- mhp_train.json
+ │ │-- mhp_val.json
+ │
+ `-- train
+ │ │-- images
+ │ │ │-- 1004.jpg
+ │ │ │-- 10050.jpg
+ │ │ │-- ...
+ │
+ `-- val
+ │ │-- images
+ │ │ │-- 10059.jpg
+ │ │ │-- 10068.jpg
+ │ │ │-- ...
+ │
+ `-- test
+ │ │-- images
+ │ │ │-- 1005.jpg
+ │ │ │-- 10052.jpg
+ │ │ │-- ...~~~~
+```
+
+## PoseTrack18
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+请从此链接 [PoseTrack18](https://posetrack.net/users/download.php)下载数据文件,并从此链接下载 [posetrack18_annotations](https://download.openmmlab.com/mmpose/datasets/posetrack18_annotations.tar)下载标注文件。
+我们已将官方提供的所有单视频标注文件合并为两个 json 文件 (posetrack18_train & posetrack18_val.json),并生成了 [mask files](https://download.openmmlab.com/mmpose/datasets/posetrack18_mask.tar) 来加速训练。
+对于 top-down 的方法, 我们使用 [MMDetection](https://github.com/open-mmlab/mmdetection) 的预训练 [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) 来生成人体的检测框。
+请将数据置于 $MMPOSE/data 目录下,并整理成如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── posetrack18
+ │-- annotations
+ │ │-- posetrack18_train.json
+ │ │-- posetrack18_val.json
+ │ │-- posetrack18_val_human_detections.json
+ │ │-- train
+ │ │ │-- 000001_bonn_train.json
+ │ │ │-- 000002_bonn_train.json
+ │ │ │-- ...
+ │ │-- val
+ │ │ │-- 000342_mpii_test.json
+ │ │ │-- 000522_mpii_test.json
+ │ │ │-- ...
+ │ `-- test
+ │ │-- 000001_mpiinew_test.json
+ │ │-- 000002_mpiinew_test.json
+ │ │-- ...
+ │
+ `-- images
+ │ │-- train
+ │ │ │-- 000001_bonn_train
+ │ │ │ │-- 000000.jpg
+ │ │ │ │-- 000001.jpg
+ │ │ │ │-- ...
+ │ │ │-- ...
+ │ │-- val
+ │ │ │-- 000342_mpii_test
+ │ │ │ │-- 000000.jpg
+ │ │ │ │-- 000001.jpg
+ │ │ │ │-- ...
+ │ │ │-- ...
+ │ `-- test
+ │ │-- 000001_mpiinew_test
+ │ │ │-- 000000.jpg
+ │ │ │-- 000001.jpg
+ │ │ │-- ...
+ │ │-- ...
+ `-- mask
+ │-- train
+ │ │-- 000002_bonn_train
+ │ │ │-- 000000.jpg
+ │ │ │-- 000001.jpg
+ │ │ │-- ...
+ │ │-- ...
+ `-- val
+ │-- 000522_mpii_test
+ │ │-- 000000.jpg
+ │ │-- 000001.jpg
+ │ │-- ...
+ │-- ...
+```
+
+请从 Github 上安装 PoseTrack 官方评估工具:
+
+```shell
+pip install git+https://github.com/svenkreiss/poseval.git
+```
+
+## sub-JHMDB dataset
+
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+对于 [sub-JHMDB](http://jhmdb.is.tue.mpg.de/dataset) 数据集,请从此链接 [images](<(http://files.is.tue.mpg.de/jhmdb/Rename_Images.tar.gz)>) (来自 [JHMDB](http://jhmdb.is.tue.mpg.de/dataset))下载,
+请从此链接 [jhmdb_annotations](https://download.openmmlab.com/mmpose/datasets/jhmdb_annotations.tar)下载标注文件。
+将它们移至 $MMPOSE/data目录下, 使得文件呈如下的格式:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+`── data
+ │── jhmdb
+ │-- annotations
+ │ │-- Sub1_train.json
+ │ |-- Sub1_test.json
+ │ │-- Sub2_train.json
+ │ |-- Sub2_test.json
+ │ │-- Sub3_train.json
+ │ |-- Sub3_test.json
+ |-- Rename_Images
+ │-- brush_hair
+ │ │--April_09_brush_hair_u_nm_np1_ba_goo_0
+ | │ │--00001.png
+ | │ │--00002.png
+ │-- catch
+ │-- ...
+
+```
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_face_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_face_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..81655de425f2e309508a282b6fd2c56f7354c257
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_face_keypoint.md
@@ -0,0 +1,3 @@
+# 2D人脸关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_fashion_landmark.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_fashion_landmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..25b7fd7c6484d8d1f876ecd13536dcc9764c7177
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_fashion_landmark.md
@@ -0,0 +1,3 @@
+# 2D服装关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_hand_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_hand_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..61c3eb3fa4ab43b534dc75d26f128f14ced2588e
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_hand_keypoint.md
@@ -0,0 +1,3 @@
+# 2D手部关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/2d_wholebody_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/2d_wholebody_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..23495ded145034e02420e8c564b4d90b10070c7a
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/2d_wholebody_keypoint.md
@@ -0,0 +1,3 @@
+# 2D全身人体关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/3d_body_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/3d_body_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..6ed59ffec74cdeac3561f35ab2d7c9f3181010a7
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/3d_body_keypoint.md
@@ -0,0 +1,3 @@
+# 3D人体关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/3d_body_mesh.md b/vendor/ViTPose/docs/zh_cn/tasks/3d_body_mesh.md
new file mode 100644
index 0000000000000000000000000000000000000000..24d364803ef5dd08f6fad75aedf7b288ccb62080
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/3d_body_mesh.md
@@ -0,0 +1,3 @@
+# 3D人体网格模型数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tasks/3d_hand_keypoint.md b/vendor/ViTPose/docs/zh_cn/tasks/3d_hand_keypoint.md
new file mode 100644
index 0000000000000000000000000000000000000000..b0843a9f8fb6b751d6d9735a9c3d3d91951d3624
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tasks/3d_hand_keypoint.md
@@ -0,0 +1,3 @@
+# 3D手部关键点数据集
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/0_config.md b/vendor/ViTPose/docs/zh_cn/tutorials/0_config.md
new file mode 100644
index 0000000000000000000000000000000000000000..024f3c6d65ea31a57c37a0f6b3c0e17fa2625048
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/0_config.md
@@ -0,0 +1,234 @@
+# 教程 0: 模型配置文件
+
+我们使用 python 文件作为配置文件,将模块化设计和继承设计结合到配置系统中,便于进行各种实验。
+您可以在 `$MMPose/configs` 下找到所有提供的配置。如果要检查配置文件,您可以运行
+`python tools/analysis/print_config.py /PATH/TO/CONFIG` 来查看完整的配置。
+
+
+
+- [通过脚本参数修改配置](#通过脚本参数修改配置)
+- [配置文件命名约定](#配置文件命名约定)
+ - [配置系统](#配置系统)
+- [常见问题](#常见问题)
+ - [在配置中使用中间变量](#在配置中使用中间变量)
+
+
+
+## 通过脚本参数修改配置
+
+当使用 "tools/train.py" 或 "tools/test.py" 提交作业时,您可以指定 `--cfg-options` 来修改配置。
+
+- 更新配置字典链的键值。
+
+ 可以按照原始配置文件中字典的键的顺序指定配置选项。
+ 例如,`--cfg-options model.backbone.norm_eval=False` 将主干网络中的所有 BN 模块更改为 `train` 模式。
+
+- 更新配置列表内部的键值。
+
+ 一些配置字典在配置文件中会形成一个列表。例如,训练流水线 `data.train.pipeline` 通常是一个列表。
+ 例如,`[dict(type='LoadImageFromFile'), dict(type='TopDownRandomFlip', flip_prob=0.5), ...]` 。如果要将流水线中的 `'flip_prob=0.5'` 更改为 `'flip_prob=0.0'`,您可以这样指定 `--cfg-options data.train.pipeline.1.flip_prob=0.0` 。
+
+- 更新列表 / 元组的值。
+
+ 如果要更新的值是列表或元组,例如,配置文件通常设置为 `workflow=[('train', 1)]` 。
+ 如果您想更改这个键,您可以这样指定 `--cfg-options workflow="[(train,1),(val,1)]"` 。
+ 请注意,引号 \" 是必要的,以支持列表 / 元组数据类型,并且指定值的引号内 **不允许** 有空格。
+
+## 配置文件命名约定
+
+我们按照下面的样式命名配置文件。建议贡献者也遵循同样的风格。
+
+```
+configs/{topic}/{task}/{algorithm}/{dataset}/{backbone}_[model_setting]_{dataset}_[input_size]_[technique].py
+```
+
+`{xxx}` 是必填字段,`[yyy]` 是可选字段.
+
+- `{topic}`: 主题类型,如 `body`, `face`, `hand`, `animal` 等。
+- `{task}`: 任务类型, `[2d | 3d]_[kpt | mesh]_[sview | mview]_[rgb | rgbd]_[img | vid]` 。任务类型从5个维度定义:(1)二维或三维姿态估计;(2)姿态表示形式:关键点 (kpt)、网格 (mesh) 或密集姿态 (dense); (3)单视图 (sview) 或多视图 (mview);(4)RGB 或 RGBD; 以及(5)图像 (img) 或视频 (vid)。例如, `2d_kpt_sview_rgb_img`, `3d_kpt_sview_rgb_vid`, 等等。
+- `{algorithm}`: 算法类型,例如,`associative_embedding`, `deeppose` 等。
+- `{dataset}`: 数据集名称,例如, `coco` 等。
+- `{backbone}`: 主干网络类型,例如,`res50` (ResNet-50) 等。
+- `[model setting]`: 对某些模型的特定设置。
+- `[input_size]`: 模型的输入大小。
+- `[technique]`: 一些特定的技术,包括损失函数,数据增强,训练技巧等,例如, `wingloss`, `udp`, `fp16` 等.
+
+### 配置系统
+
+- 基于热图的二维自顶向下的人体姿态估计实例
+
+ 为了帮助用户对完整的配置结构和配置系统中的模块有一个基本的了解,
+ 我们下面对配置文件 'https://github.com/open-mmlab/mmpose/tree/e1ec589884235bee875c89102170439a991f8450/configs/top_down/resnet/coco/res50_coco_256x192.py' 作简要的注释。
+ 有关每个模块中每个参数的更详细用法和替代方法,请参阅 API 文档。
+
+ ```python
+ # 运行设置
+ log_level = 'INFO' # 日志记录级别
+ load_from = None # 从给定路径加载预训练模型
+ resume_from = None # 从给定路径恢复模型权重文件,将从保存模型权重文件时的轮次开始继续训练
+ dist_params = dict(backend='nccl') # 设置分布式训练的参数,也可以设置端口
+ workflow = [('train', 1)] # 运行程序的工作流。[('train', 1)] 表示只有一个工作流,名为 'train' 的工作流执行一次
+ checkpoint_config = dict( # 设置模型权重文件钩子的配置,请参阅 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py 的实现
+ interval=10) # 保存模型权重文件的间隔
+ evaluation = dict( # 训练期间评估的配置
+ interval=10, # 执行评估的间隔
+ metric='mAP', # 采用的评价指标
+ key_indicator='AP') # 将 `AP` 设置为关键指标以保存最佳模型权重文件
+ # 优化器
+ optimizer = dict(
+ # 用于构建优化器的配置,支持 (1). PyTorch 中的所有优化器,
+ # 其参数也与 PyTorch 中的相同. (2). 自定义的优化器
+ # 它们通过 `constructor` 构建,可参阅 "tutorials/4_new_modules.md"
+ # 的实现。
+ type='Adam', # 优化器的类型, 可参阅 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 获取更多细节
+ lr=5e-4, # 学习率, 参数的详细用法见 PyTorch 文档
+ )
+ optimizer_config = dict(grad_clip=None) # 不限制梯度的范围
+ # 学习率调整策略
+ lr_config = dict( # 用于注册 LrUpdater 钩子的学习率调度器的配置
+ policy='step', # 调整策略, 还支持 CosineAnnealing, Cyclic, 等等,请参阅 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 获取支持的 LrUpdater 细节
+ warmup='linear', # 使用的预热类型,它可以是 None (不使用预热), 'constant', 'linear' 或者 'exp'.
+ warmup_iters=500, # 预热的迭代次数或者轮数
+ warmup_ratio=0.001, # 预热开始时使用的学习率,等于预热比 (warmup_ratio) * 初始学习率
+ step=[170, 200]) # 降低学习率的步数
+ total_epochs = 210 # 训练模型的总轮数
+ log_config = dict( # 注册日志记录器钩子的配置
+ interval=50, # 打印日志的间隔
+ hooks=[
+ dict(type='TextLoggerHook'), # 用来记录训练过程的日志记录器
+ # dict(type='TensorboardLoggerHook') # 也支持 Tensorboard 日志记录器
+ ])
+
+ channel_cfg = dict(
+ num_output_channels=17, # 关键点头部的输出通道数
+ dataset_joints=17, # 数据集的关节数
+ dataset_channel=[ # 数据集支持的通道数
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[ # 输出通道数
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ # 模型设置
+ model = dict( # 模型的配置
+ type='TopDown', # 模型的类型
+ pretrained='torchvision://resnet50', # 预训练模型的 url / 网址
+ backbone=dict( # 主干网络的字典
+ type='ResNet', # 主干网络的名称
+ depth=50), # ResNet 模型的深度
+ keypoint_head=dict( # 关键点头部的字典
+ type='TopdownHeatmapSimpleHead', # 关键点头部的名称
+ in_channels=2048, # 关键点头部的输入通道数
+ out_channels=channel_cfg['num_output_channels'], # 关键点头部的输出通道数
+ loss_keypoint=dict( # 关键点损失函数的字典
+ type='JointsMSELoss', # 关键点损失函数的名称
+ use_target_weight=True)), # 在损失计算中是否考虑目标权重
+ train_cfg=dict(), # 训练超参数的配置
+ test_cfg=dict( # 测试超参数的配置
+ flip_test=True, # 推断时是否使用翻转测试
+ post_process='default', # 使用“默认” (default) 后处理方法。
+ shift_heatmap=True, # 移动并对齐翻转的热图以获得更高的性能
+ modulate_kernel=11)) # 用于调制的高斯核大小。仅用于 "post_process='unbiased'"
+
+ data_cfg = dict(
+ image_size=[192, 256], # 模型输入分辨率的大小
+ heatmap_size=[48, 64], # 输出热图的大小
+ num_output_channels=channel_cfg['num_output_channels'], # 输出通道数
+ num_joints=channel_cfg['dataset_joints'], # 关节点数量
+ dataset_channel=channel_cfg['dataset_channel'], # 数据集支持的通道数
+ inference_channel=channel_cfg['inference_channel'], # 输出通道数
+ soft_nms=False, # 推理过程中是否执行 soft_nms
+ nms_thr=1.0, # 非极大抑制阈值
+ oks_thr=0.9, # nms 期间 oks(对象关键点相似性)得分阈值
+ vis_thr=0.2, # 关键点可见性阈值
+ use_gt_bbox=False, # 测试时是否使用人工标注的边界框
+ det_bbox_thr=0.0, # 检测到的边界框分数的阈值。当 'use_gt_bbox=True' 时使用
+ bbox_file='data/coco/person_detection_results/' # 边界框检测文件的路径
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ )
+
+ train_pipeline = [
+ dict(type='LoadImageFromFile'), # 从文件加载图像
+ dict(type='TopDownRandomFlip', # 执行随机翻转增强
+ flip_prob=0.5), # 执行翻转的概率
+ dict(
+ type='TopDownHalfBodyTransform', # TopDownHalfBodyTransform 数据增强的配置
+ num_joints_half_body=8, # 执行半身变换的阈值
+ prob_half_body=0.3), # 执行翻转的概率
+ dict(
+ type='TopDownGetRandomScaleRotation', # TopDownGetRandomScaleRotation 的配置
+ rot_factor=40, # 旋转到 ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor=0.5), # 缩放到 ``[1-scale_factor, 1+scale_factor]``.
+ dict(type='TopDownAffine', # 对图像进行仿射变换形成输入
+ use_udp=False), # 不使用无偏数据处理
+ dict(type='ToTensor'), # 将其他类型转换为张量类型流水线
+ dict(
+ type='NormalizeTensor', # 标准化输入张量
+ mean=[0.485, 0.456, 0.406], # 要标准化的不同通道的平均值
+ std=[0.229, 0.224, 0.225]), # 要标准化的不同通道的标准差
+ dict(type='TopDownGenerateTarget', # 生成热图目标。支持不同的编码类型
+ sigma=2), # 热图高斯的 Sigma
+ dict(
+ type='Collect', # 收集决定数据中哪些键应该传递到检测器的流水线
+ keys=['img', 'target', 'target_weight'], # 输入键
+ meta_keys=[ # 输入的元键
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+ ]
+
+ val_pipeline = [
+ dict(type='LoadImageFromFile'), # 从文件加载图像
+ dict(type='TopDownAffine'), # 对图像进行仿射变换形成输入
+ dict(type='ToTensor'), # ToTensor 的配置
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406], # 要标准化的不同通道的平均值
+ std=[0.229, 0.224, 0.225]), # 要标准化的不同通道的标准差
+ dict(
+ type='Collect', # 收集决定数据中哪些键应该传递到检测器的流水线
+ keys=['img'], # 输入键
+ meta_keys=[ # 输入的元键
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+ ]
+
+ test_pipeline = val_pipeline
+
+ data_root = 'data/coco' # 数据集的配置
+ data = dict(
+ samples_per_gpu=64, # 训练期间每个 GPU 的 Batch size
+ workers_per_gpu=2, # 每个 GPU 预取数据的 worker 个数
+ val_dataloader=dict(samples_per_gpu=32), # 验证期间每个 GPU 的 Batch size
+ test_dataloader=dict(samples_per_gpu=32), # 测试期间每个 GPU 的 Batch size
+ train=dict( # 训练数据集的配置
+ type='TopDownCocoDataset', # 数据集的名称
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', # 标注文件的路径
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict( # 验证数据集的配置
+ type='TopDownCocoDataset', # 数据集的名称
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', # 标注文件的路径
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict( # 测试数据集的配置
+ type='TopDownCocoDataset', # 数据集的名称
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', # 标注文件的路径
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ )
+
+ ```
+
+## 常见问题
+
+### 在配置中使用中间变量
+
+配置文件中使用了一些中间变量,如 `train_pipeline`/`val_pipeline`/`test_pipeline` 等。
+
+例如,我们首先要定义 `train_pipeline`/`val_pipeline`/`test_pipeline`,然后将它们传递到 `data` 中。
+因此,`train_pipeline`/`val_pipeline`/`test_pipeline` 是中间变量。
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/1_finetune.md b/vendor/ViTPose/docs/zh_cn/tutorials/1_finetune.md
new file mode 100644
index 0000000000000000000000000000000000000000..55c2f55194acec73a88d3856ff40aaaec06e2b3c
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/1_finetune.md
@@ -0,0 +1,153 @@
+# 教程 1:如何微调模型
+
+在 COCO 数据集上进行预训练,然后在其他数据集(如 COCO-WholeBody 数据集)上进行微调,往往可以提升模型的效果。
+本教程介绍如何使用[模型库](https://mmpose.readthedocs.io/en/latest/modelzoo.html)中的预训练模型,并在其他数据集上进行微调。
+
+
+
+- [概要](#概要)
+- [修改 Head](#修改网络头)
+- [修改数据集](#修改数据集)
+- [修改训练策略](#修改训练策略)
+- [使用预训练模型](#使用预训练模型)
+
+
+
+## 概要
+
+对新数据集上的模型微调需要两个步骤:
+
+1. 支持新数据集。详情参见 [教程 2:如何增加新数据集](2_new_dataset.md)
+2. 修改配置文件。这部分将在本教程中做具体讨论。
+
+例如,如果想要在自定义数据集上,微调 COCO 预训练的模型,则需要修改 [配置文件](0_config.md) 中 网络头、数据集、训练策略、预训练模型四个部分。
+
+## 修改网络头
+
+如果自定义数据集的关键点个数,与 COCO 不同,则需要相应修改 `keypoint_head` 中的 `out_channels` 参数。
+网络头(head)的最后一层的预训练参数不会被载入,而其他层的参数都会被正常载入。
+例如,COCO-WholeBody 拥有 133 个关键点,因此需要把 17 (COCO 数据集的关键点数目) 改为 133。
+
+```python
+channel_cfg = dict(
+ num_output_channels=133, # 从 17 改为 133
+ dataset_joints=133, # 从 17 改为 133
+ dataset_channel=[
+ list(range(133)), # 从 17 改为 133
+ ],
+ inference_channel=list(range(133))) # 从 17 改为 133
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth',
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'], # 已对应修改
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='unbiased',
+ shift_heatmap=True,
+ modulate_kernel=17))
+```
+
+其中, `pretrained='https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w48-8ef0771d.pth'` 表示采用 ImageNet 预训练的权重,初始化主干网络(backbone)。
+不过,`pretrained` 只会初始化主干网络(backbone),而不会初始化网络头(head)。因此,我们模型微调时的预训练权重一般通过 `load_from` 指定,而不是使用 `pretrained` 指定。
+
+## 支持自己的数据集
+
+MMPose 支持十余种不同的数据集,包括 COCO, COCO-WholeBody, MPII, MPII-TRB 等数据集。
+用户可将自定义数据集转换为已有数据集格式,并修改如下字段。
+
+```python
+data_root = 'data/coco'
+data = dict(
+ samples_per_gpu=32,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=32),
+ test_dataloader=dict(samples_per_gpu=32),
+ train=dict(
+ type='TopDownCocoWholeBodyDataset', # 对应修改数据集名称
+ ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json', # 修改数据集标签路径
+ img_prefix=f'{data_root}/train2017/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline),
+ val=dict(
+ type='TopDownCocoWholeBodyDataset', # 对应修改数据集名称
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json', # 修改数据集标签路径
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline),
+ test=dict(
+ type='TopDownCocoWholeBodyDataset', # 对应修改数据集名称
+ ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json', # 修改数据集标签路径
+ img_prefix=f'{data_root}/val2017/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline)
+)
+```
+
+## 修改训练策略
+
+通常情况下,微调模型时设置较小的学习率和训练轮数,即可取得较好效果。
+
+```python
+# 优化器
+optimizer = dict(
+ type='Adam',
+ lr=5e-4, # 可以适当减小
+)
+optimizer_config = dict(grad_clip=None)
+# 学习策略
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[170, 200]) # 可以适当减小
+total_epochs = 210 # 可以适当减小
+```
+
+## 使用预训练模型
+
+网络设置中的 `pretrained`,仅会在主干网络模型上加载预训练参数。若要载入整个网络的预训练参数,需要通过 `load_from` 指定模型文件路径或模型链接。
+
+```python
+# 将预训练模型用于整个 HRNet 网络
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288_dark-741844ba_20200812.pth' # 模型路径可以在 model zoo 中找到
+```
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/2_new_dataset.md b/vendor/ViTPose/docs/zh_cn/tutorials/2_new_dataset.md
new file mode 100644
index 0000000000000000000000000000000000000000..53d43062d2f407fc7396bcba6821a60369df412f
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/2_new_dataset.md
@@ -0,0 +1,316 @@
+# 教程 2: 增加新的数据集
+
+## 将数据集转化为COCO格式
+
+我们首先需要将自定义数据集,转换为COCO数据集格式。
+
+COCO数据集格式的json标注文件有以下关键字:
+
+```python
+'images': [
+ {
+ 'file_name': '000000001268.jpg',
+ 'height': 427,
+ 'width': 640,
+ 'id': 1268
+ },
+ ...
+],
+'annotations': [
+ {
+ 'segmentation': [[426.36,
+ ...
+ 424.34,
+ 223.3]],
+ 'keypoints': [0,0,0,
+ 0,0,0,
+ 0,0,0,
+ 427,220,2,
+ 443,222,2,
+ 414,228,2,
+ 449,232,2,
+ 408,248,1,
+ 454,261,2,
+ 0,0,0,
+ 0,0,0,
+ 411,287,2,
+ 431,287,2,
+ 0,0,0,
+ 458,265,2,
+ 0,0,0,
+ 466,300,1],
+ 'num_keypoints': 10,
+ 'area': 3894.5826,
+ 'iscrowd': 0,
+ 'image_id': 1268,
+ 'bbox': [402.34, 205.02, 65.26, 88.45],
+ 'category_id': 1,
+ 'id': 215218
+ },
+ ...
+],
+'categories': [
+ {'id': 1, 'name': 'person'},
+ ]
+```
+
+Json文件中必须包含以下三个关键字:
+
+- `images`: 包含图片信息的列表,提供图片的 `file_name`, `height`, `width` 和 `id` 等信息。
+- `annotations`: 包含实例标注的列表。
+- `categories`: 包含类别名称 ('person') 和对应的 ID (1)。
+
+## 为自定义数据集创建 dataset_info 数据集配置文件
+
+在如下位置,添加一个数据集配置文件。
+
+```
+configs/_base_/datasets/custom.py
+```
+
+数据集配置文件的样例如下:
+
+`keypoint_info` 包含每个关键点的信息,其中:
+
+1. `name`: 代表关键点的名称。一个数据集的每个关键点,名称必须唯一。
+2. `id`: 关键点的标识号。
+3. `color`: ([B, G, R]) 用于可视化关键点。
+4. `type`: 分为 'upper' 和 'lower' 两种,用于数据增强。
+5. `swap`: 表示与当前关键点,“镜像对称”的关键点名称。
+
+`skeleton_info` 包含关键点之间的连接关系,主要用于可视化。
+
+`joint_weights` 可以为不同的关键点设置不同的损失权重,用于训练。
+
+`sigmas` 用于计算 OKS 得分,具体内容请参考 [keypoints-eval](https://cocodataset.org/#keypoints-eval)。
+
+```
+dataset_info = dict(
+ dataset_name='coco',
+ paper_info=dict(
+ author='Lin, Tsung-Yi and Maire, Michael and '
+ 'Belongie, Serge and Hays, James and '
+ 'Perona, Pietro and Ramanan, Deva and '
+ r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+ title='Microsoft coco: Common objects in context',
+ container='European conference on computer vision',
+ year='2014',
+ homepage='http://cocodataset.org/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
+```
+
+## 创建自定义数据集类
+
+1. 首先在 mmpose/datasets/datasets 文件夹创建一个包,比如命名为 custom。
+
+2. 定义数据集类,并且注册这个类。
+
+ ```
+ @DATASETS.register_module(name='MyCustomDataset')
+ class MyCustomDataset(SomeOtherBaseClassAsPerYourNeed):
+ ```
+
+3. 为你的自定义类别创建 `mmpose/datasets/datasets/custom/__init__.py`
+
+4. 更新 `mmpose/datasets/__init__.py`
+
+## 创建和修改训练配置文件
+
+创建和修改训练配置文件,来使用你的自定义数据集。
+
+在 `configs/my_custom_config.py` 中,修改如下几行。
+
+```python
+...
+# dataset settings
+dataset_type = 'MyCustomDataset'
+...
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file='path/to/your/train/json',
+ img_prefix='path/to/your/train/img',
+ ...),
+ val=dict(
+ type=dataset_type,
+ ann_file='path/to/your/val/json',
+ img_prefix='path/to/your/val/img',
+ ...),
+ test=dict(
+ type=dataset_type,
+ ann_file='path/to/your/test/json',
+ img_prefix='path/to/your/test/img',
+ ...))
+...
+```
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/3_data_pipeline.md b/vendor/ViTPose/docs/zh_cn/tutorials/3_data_pipeline.md
new file mode 100644
index 0000000000000000000000000000000000000000..d2d48662ae53e7e3a7ba60f61f612ea1e227107d
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/3_data_pipeline.md
@@ -0,0 +1,151 @@
+# 教程 3: 自定义数据前处理流水线
+
+## 设计数据前处理流水线
+
+参照惯例,MMPose 使用 `Dataset` 和 `DataLoader` 实现多进程数据加载。
+`Dataset` 返回一个字典,作为模型的输入。
+由于姿态估计任务的数据大小不一定相同(图片大小,边界框大小等),MMPose 使用 MMCV 中的 `DataContainer` 收集和分配不同大小的数据。
+详情可见[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py)。
+
+数据前处理流水线和数据集是相互独立的。
+通常,数据集定义如何处理标注文件,而数据前处理流水线将原始数据处理成网络输入。
+数据前处理流水线包含一系列操作。
+每个操作都输入一个字典(dict),新增/更新/删除相关字段,最终输出更新后的字典作为下一个操作的输入。
+
+数据前处理流水线的操作可以被分类为数据加载、预处理、格式化和生成监督等(后文将详细介绍)。
+
+这里以 Simple Baseline (ResNet50) 的数据前处理流水线为例:
+
+```python
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownHalfBodyTransform', num_joints_half_body=8, prob_half_body=0.3),
+ dict(type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=['img'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+```
+
+下面列出每个操作新增/更新/删除的相关字典字段。
+
+### 数据加载
+
+`LoadImageFromFile`
+
+- 新增: img, img_file
+
+### 预处理
+
+`TopDownRandomFlip`
+
+- 更新: img, joints_3d, joints_3d_visible, center
+
+`TopDownHalfBodyTransform`
+
+- 更新: center, scale
+
+`TopDownGetRandomScaleRotation`
+
+- 更新: scale, rotation
+
+`TopDownAffine`
+
+- 更新: img, joints_3d, joints_3d_visible
+
+`NormalizeTensor`
+
+- 更新: img
+
+### 生成监督
+
+`TopDownGenerateTarget`
+
+- 新增: target, target_weight
+
+### 格式化
+
+`ToTensor`
+
+- 更新: 'img'
+
+`Collect`
+
+- 新增: img_meta (其包含的字段由 `meta_keys` 指定)
+- 删除: 除了 `keys` 指定以外的所有字段
+
+## 扩展和使用自定义流水线
+
+1. 将一个新的处理流水线操作写入任一文件中,例如 `my_pipeline.py`。它以一个字典作为输入,并返回一个更新后的字典。
+
+ ```python
+ from mmpose.datasets import PIPELINES
+
+ @PIPELINES.register_module()
+ class MyTransform:
+
+ def __call__(self, results):
+ results['dummy'] = True
+ return results
+ ```
+
+1. 导入定义好的新类。
+
+ ```python
+ from .my_pipeline import MyTransform
+ ```
+
+1. 在配置文件中使用它。
+
+ ```python
+ train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownHalfBodyTransform', num_joints_half_body=8, prob_half_body=0.3),
+ dict(type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='MyTransform'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=2),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+ ]
+ ```
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/4_new_modules.md b/vendor/ViTPose/docs/zh_cn/tutorials/4_new_modules.md
new file mode 100644
index 0000000000000000000000000000000000000000..4a8db97c4bc2fb943240535896b15d1784bd9314
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/4_new_modules.md
@@ -0,0 +1,214 @@
+# 教程 4: 增加新的模块
+
+## 自定义优化器
+
+在本教程中,我们将介绍如何为项目定制优化器.
+假设想要添加一个名为 `MyOptimizer` 的优化器,它有 `a`,`b` 和 `c` 三个参数。
+那么首先需要在一个文件中实现该优化器,例如 `mmpose/core/optimizer/my_optimizer.py`:
+
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+
+ def __init__(self, a, b, c)
+
+```
+
+然后需要将其添加到 `mmpose/core/optimizer/__init__.py` 中,从而让注册器可以找到这个新的优化器并添加它:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+之后,可以在配置文件的 `optimizer` 字段中使用 `MyOptimizer`。
+在配置中,优化器由 `optimizer` 字段所定义,如下所示:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+若要使用自己新定义的优化器,可以将字段修改为:
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+我们已经支持使用 PyTorch 实现的所有优化器,
+只需要更改配置文件的 `optimizer` 字段。
+例如:若用户想要使用`ADAM`优化器,只需要做出如下修改,虽然这会造成网络效果下降。
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+用户可以直接根据 [PyTorch API 文档](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim)
+对参数进行设置。
+
+## 自定义优化器构造器
+
+某些模型可能对不同层的参数有特定的优化设置,例如 BatchNorm 层的权值衰减。
+用户可以通过自定义优化器构造函数来进行这些细粒度的参数调整。
+
+```python
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmpose.utils import get_root_logger
+from .cocktail_optimizer import CocktailOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module()
+class CocktailOptimizerConstructor:
+
+ def __init__(self, optimizer_cfg, paramwise_cfg=None):
+
+ def __call__(self, model):
+
+ return my_optimizer
+
+```
+
+## 开发新组件
+
+MMPose 将模型组件分为 3 种基础模型:
+
+- 检测器(detector):整个检测器模型流水线,通常包含一个主干网络(backbone)和关键点头(keypoint_head)。
+- 主干网络(backbone):通常为一个用于提取特征的 FCN 网络,例如 ResNet,HRNet。
+- 关键点头(keypoint_head):用于姿势估计的组件,通常包括一系列反卷积层。
+
+1. 创建一个新文件 `mmpose/models/backbones/my_model.py`.
+
+```python
+import torch.nn as nn
+
+from ..builder import BACKBONES
+
+@BACKBONES.register_module()
+class MyModel(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x): # should return a tuple
+ pass
+
+ def init_weights(self, pretrained=None):
+ pass
+```
+
+2. 在 `mmpose/models/backbones/__init__.py` 中导入新的主干网络.
+
+```python
+from .my_model import MyModel
+```
+
+3. 创建一个新文件 `mmpose/models/keypoint_heads/my_head.py`.
+
+用户可以通过继承 `nn.Module` 编写一个新的关键点头,
+并重写 `init_weights(self)` 和 `forward(self, x)` 方法。
+
+```python
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class MyHead(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x):
+ pass
+
+ def init_weights(self):
+ pass
+```
+
+4. 在 `mmpose/models/keypoint_heads/__init__.py` 中导入新的关键点头
+
+```python
+from .my_head import MyHead
+```
+
+5. 在配置文件中使用它。
+
+对于自顶向下的 2D 姿态估计模型,我们将模型类型设置为 `TopDown`。
+
+```python
+model = dict(
+ type='TopDown',
+ backbone=dict(
+ type='MyModel',
+ arg1=xxx,
+ arg2=xxx),
+ keypoint_head=dict(
+ type='MyHead',
+ arg1=xxx,
+ arg2=xxx))
+```
+
+### 添加新的损失函数
+
+假设用户想要为关键点估计添加一个名为 `MyLoss`的新损失函数。
+为了添加一个新的损失函数,用户需要在 `mmpose/models/losses/my_loss.py` 下实现该函数。
+其中,装饰器 `weighted_loss` 使损失函数能够为每个元素加权。
+
+```python
+import torch
+import torch.nn as nn
+
+from mmpose.models import LOSSES
+
+def my_loss(pred, target):
+ assert pred.size() == target.size() and target.numel() > 0
+ loss = torch.abs(pred - target)
+ loss = torch.mean(loss)
+ return loss
+
+@LOSSES.register_module()
+class MyLoss(nn.Module):
+
+ def __init__(self, use_target_weight=False):
+ super(MyLoss, self).__init__()
+ self.criterion = my_loss()
+ self.use_target_weight = use_target_weight
+
+ def forward(self, output, target, target_weight):
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ loss = 0.
+
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ loss += self.criterion(
+ heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx])
+ else:
+ loss += self.criterion(heatmap_pred, heatmap_gt)
+
+ return loss / num_joints
+```
+
+之后,用户需要把它添加进 `mmpose/models/losses/__init__.py`。
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+若要使用新的损失函数,可以修改模型中的 `loss_keypoint` 字段。
+
+```python
+loss_keypoint=dict(type='MyLoss', use_target_weight=False)
+```
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/5_export_model.md b/vendor/ViTPose/docs/zh_cn/tutorials/5_export_model.md
new file mode 100644
index 0000000000000000000000000000000000000000..341d79acb4cb68cfc3ece54771d774c3eb7c1783
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/5_export_model.md
@@ -0,0 +1,48 @@
+# 教程 5:如何导出模型为 onnx 格式
+
+开放式神经网络交换格式(Open Neural Network Exchange,即 [ONNX](https://onnx.ai/))是各种框架共用的一种模型交换格式,AI 开发人员可以方便将模型部署到所需的框架之中。
+
+
+
+- [支持的模型](#支持的模型)
+- [如何使用](#如何使用)
+ - [准备工作](#准备工作)
+
+
+
+## 支持的模型
+
+MMPose 支持将训练好的各种 Pytorch 模型导出为 ONNX 格式。支持的模型包括但不限于:
+
+- ResNet
+- HRNet
+- HigherHRNet
+
+## 如何使用
+
+用户可以使用这里的 [脚本](/tools/deployment/pytorch2onnx.py) 来导出 ONNX 格式。
+
+### 准备工作
+
+首先,安装 onnx
+
+```shell
+pip install onnx onnxruntime
+```
+
+MMPose 提供了一个 python 脚本,将 MMPose 训练的 pytorch 模型导出到 ONNX。
+
+```shell
+python tools/deployment/pytorch2onnx.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--shape ${SHAPE}] \
+ [--verify] [--show] [--output-file ${OUTPUT_FILE}] [--is-localizer] [--opset-version ${VERSION}]
+```
+
+可选参数:
+
+- `--shape`: 模型输入张量的形状。对于 2D 关键点检测模型(如 HRNet),输入形状应当为 `$batch $channel $height $width` (例如,`1 3 256 192`);
+- `--verify`: 是否对导出模型进行验证,验证项包括是否可运行,数值是否正确等。如果没有手动指定,默认为 `False`。
+- `--show`: 是否打印导出模型的结构。如果没有手动指定,默认为 `False`。
+- `--output-file`: 导出的 onnx 模型名。如果没有手动指定,默认为 `tmp.onnx`。
+- `--opset-version`:决定 onnx 的执行版本,MMPose 推荐用户使用高版本(例如 11 版本)的 onnx 以确保稳定性。如果没有手动指定,默认为 `11`。
+
+如果发现提供的模型权重文件没有被成功导出,或者存在精度损失,可以在本 repo 下提出问题(issue)。
diff --git a/vendor/ViTPose/docs/zh_cn/tutorials/6_customize_runtime.md b/vendor/ViTPose/docs/zh_cn/tutorials/6_customize_runtime.md
new file mode 100644
index 0000000000000000000000000000000000000000..979ba8a95e975ea6362e9c7490c26e832787ebe8
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/tutorials/6_customize_runtime.md
@@ -0,0 +1,3 @@
+# 教程 6: 自定义运行时设置
+
+内容建设中……
diff --git a/vendor/ViTPose/docs/zh_cn/useful_tools.md b/vendor/ViTPose/docs/zh_cn/useful_tools.md
new file mode 100644
index 0000000000000000000000000000000000000000..a85f7a1e45571ca0d4e7cde5042b4ea93441ebf4
--- /dev/null
+++ b/vendor/ViTPose/docs/zh_cn/useful_tools.md
@@ -0,0 +1,3 @@
+# 常用工具
+
+内容建设中……
diff --git a/vendor/ViTPose/figures/Throughput.png b/vendor/ViTPose/figures/Throughput.png
new file mode 100644
index 0000000000000000000000000000000000000000..b13edca0906b38a3f16a7206db867b1b7d7591ef
Binary files /dev/null and b/vendor/ViTPose/figures/Throughput.png differ
diff --git a/vendor/ViTPose/logs/vitpose-b-simple.log.json b/vendor/ViTPose/logs/vitpose-b-simple.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..03a8b909296919cee3308be3562de0edefbeb651
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-b-simple.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+8c33819", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.06604, "heatmap_loss": 0.003, "acc_pose": 0.01043, "loss": 0.003, "grad_norm": 0.38666, "time": 0.39767}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00217, "acc_pose": 0.08058, "loss": 0.00217, "grad_norm": 0.06469, "time": 0.24281}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.0021, "acc_pose": 0.15628, "loss": 0.0021, "grad_norm": 0.08979, "time": 0.2394}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00204, "acc_pose": 0.2118, "loss": 0.00204, "grad_norm": 0.14474, "time": 0.23907}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00187, "acc_pose": 0.28084, "loss": 0.00187, "grad_norm": 0.14126, "time": 0.23857}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05567, "heatmap_loss": 0.00168, "acc_pose": 0.37635, "loss": 0.00168, "grad_norm": 0.19422, "time": 0.29901}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00158, "acc_pose": 0.42443, "loss": 0.00158, "grad_norm": 0.16822, "time": 0.24035}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00151, "acc_pose": 0.44287, "loss": 0.00151, "grad_norm": 0.138, "time": 0.30115}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00155, "acc_pose": 0.45717, "loss": 0.00155, "grad_norm": 0.21266, "time": 0.23907}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00139, "acc_pose": 0.49205, "loss": 0.00139, "grad_norm": 0.12632, "time": 0.23874}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05619, "heatmap_loss": 0.00138, "acc_pose": 0.52319, "loss": 0.00138, "grad_norm": 0.15, "time": 0.29848}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00129, "acc_pose": 0.53713, "loss": 0.00129, "grad_norm": 0.0895, "time": 0.2398}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00127, "acc_pose": 0.55022, "loss": 0.00127, "grad_norm": 0.09171, "time": 0.23905}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00126, "acc_pose": 0.55783, "loss": 0.00126, "grad_norm": 0.10844, "time": 0.23849}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00125, "acc_pose": 0.5577, "loss": 0.00125, "grad_norm": 0.12846, "time": 0.23861}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05603, "heatmap_loss": 0.0012, "acc_pose": 0.58006, "loss": 0.0012, "grad_norm": 0.10276, "time": 0.29732}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00118, "acc_pose": 0.58914, "loss": 0.00118, "grad_norm": 0.07626, "time": 0.23964}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00119, "acc_pose": 0.5981, "loss": 0.00119, "grad_norm": 0.13942, "time": 0.23998}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00049, "heatmap_loss": 0.00116, "acc_pose": 0.60457, "loss": 0.00116, "grad_norm": 0.09702, "time": 0.2392}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00115, "acc_pose": 0.60457, "loss": 0.00115, "grad_norm": 0.08902, "time": 0.23895}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05601, "heatmap_loss": 0.00114, "acc_pose": 0.61465, "loss": 0.00114, "grad_norm": 0.11699, "time": 0.29574}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00113, "acc_pose": 0.60386, "loss": 0.00113, "grad_norm": 0.0915, "time": 0.23815}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00112, "acc_pose": 0.61607, "loss": 0.00112, "grad_norm": 0.10761, "time": 0.2381}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00111, "acc_pose": 0.62341, "loss": 0.00111, "grad_norm": 0.08451, "time": 0.23763}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00109, "acc_pose": 0.62698, "loss": 0.00109, "grad_norm": 0.082, "time": 0.23786}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05525, "heatmap_loss": 0.00108, "acc_pose": 0.63422, "loss": 0.00108, "grad_norm": 0.08735, "time": 0.29806}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00109, "acc_pose": 0.62348, "loss": 0.00109, "grad_norm": 0.11136, "time": 0.23796}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00108, "acc_pose": 0.62961, "loss": 0.00108, "grad_norm": 0.09459, "time": 0.23757}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00106, "acc_pose": 0.6341, "loss": 0.00106, "grad_norm": 0.06872, "time": 0.23733}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00106, "acc_pose": 0.64455, "loss": 0.00106, "grad_norm": 0.09621, "time": 0.23768}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05533, "heatmap_loss": 0.00105, "acc_pose": 0.64043, "loss": 0.00105, "grad_norm": 0.08922, "time": 0.29892}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00103, "acc_pose": 0.64771, "loss": 0.00103, "grad_norm": 0.08456, "time": 0.23854}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00103, "acc_pose": 0.64946, "loss": 0.00103, "grad_norm": 0.07217, "time": 0.23784}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00103, "acc_pose": 0.65472, "loss": 0.00103, "grad_norm": 0.08321, "time": 0.23835}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00103, "acc_pose": 0.65549, "loss": 0.00103, "grad_norm": 0.09728, "time": 0.23781}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05517, "heatmap_loss": 0.00104, "acc_pose": 0.65961, "loss": 0.00104, "grad_norm": 0.14034, "time": 0.29906}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.001, "acc_pose": 0.66192, "loss": 0.001, "grad_norm": 0.05771, "time": 0.23837}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.001, "acc_pose": 0.65668, "loss": 0.001, "grad_norm": 0.05233, "time": 0.23852}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.001, "acc_pose": 0.66936, "loss": 0.001, "grad_norm": 0.08233, "time": 0.23854}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00099, "acc_pose": 0.66416, "loss": 0.00099, "grad_norm": 0.05946, "time": 0.23728}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05542, "heatmap_loss": 0.001, "acc_pose": 0.66572, "loss": 0.001, "grad_norm": 0.10617, "time": 0.30054}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00099, "acc_pose": 0.6701, "loss": 0.00099, "grad_norm": 0.08829, "time": 0.23842}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00098, "acc_pose": 0.67829, "loss": 0.00098, "grad_norm": 0.05567, "time": 0.23885}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00098, "acc_pose": 0.67141, "loss": 0.00098, "grad_norm": 0.0712, "time": 0.23733}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00097, "acc_pose": 0.67683, "loss": 0.00097, "grad_norm": 0.0582, "time": 0.23804}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05555, "heatmap_loss": 0.00099, "acc_pose": 0.67833, "loss": 0.00099, "grad_norm": 0.09599, "time": 0.29633}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00097, "acc_pose": 0.67067, "loss": 0.00097, "grad_norm": 0.07639, "time": 0.23825}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00095, "acc_pose": 0.68063, "loss": 0.00095, "grad_norm": 0.05137, "time": 0.23805}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00094, "acc_pose": 0.6764, "loss": 0.00094, "grad_norm": 0.03778, "time": 0.23827}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00096, "acc_pose": 0.68385, "loss": 0.00096, "grad_norm": 0.06712, "time": 0.23759}
+{"mode": "val", "epoch": 10, "iter": 407, "lr": 1e-05, "AP": 0.63837, "AP .5": 0.86642, "AP .75": 0.70795, "AP (M)": 0.5647, "AP (L)": 0.66097, "AR": 0.70179, "AR .5": 0.91105, "AR .75": 0.76826, "AR (M)": 0.65744, "AR (L)": 0.7644}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05525, "heatmap_loss": 0.00096, "acc_pose": 0.67874, "loss": 0.00096, "grad_norm": 0.07083, "time": 0.29458}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00094, "acc_pose": 0.68238, "loss": 0.00094, "grad_norm": 0.04506, "time": 0.23757}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00094, "acc_pose": 0.68553, "loss": 0.00094, "grad_norm": 0.0475, "time": 0.23761}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00096, "acc_pose": 0.68279, "loss": 0.00096, "grad_norm": 0.07904, "time": 0.23772}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00093, "acc_pose": 0.69423, "loss": 0.00093, "grad_norm": 0.04556, "time": 0.23776}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05603, "heatmap_loss": 0.00093, "acc_pose": 0.6903, "loss": 0.00093, "grad_norm": 0.04698, "time": 0.29789}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00092, "acc_pose": 0.69255, "loss": 0.00092, "grad_norm": 0.03949, "time": 0.23809}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00093, "acc_pose": 0.69281, "loss": 0.00093, "grad_norm": 0.05889, "time": 0.23721}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00094, "acc_pose": 0.68719, "loss": 0.00094, "grad_norm": 0.08132, "time": 0.23735}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00093, "acc_pose": 0.68701, "loss": 0.00093, "grad_norm": 0.05247, "time": 0.23795}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00091, "acc_pose": 0.69548, "loss": 0.00091, "grad_norm": 0.04458, "time": 0.29595}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00091, "acc_pose": 0.70027, "loss": 0.00091, "grad_norm": 0.03347, "time": 0.23765}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00092, "acc_pose": 0.69554, "loss": 0.00092, "grad_norm": 0.04732, "time": 0.23761}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00093, "acc_pose": 0.69756, "loss": 0.00093, "grad_norm": 0.06773, "time": 0.23723}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00091, "acc_pose": 0.7035, "loss": 0.00091, "grad_norm": 0.04501, "time": 0.23708}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05581, "heatmap_loss": 0.00091, "acc_pose": 0.69652, "loss": 0.00091, "grad_norm": 0.04158, "time": 0.29602}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00091, "acc_pose": 0.70099, "loss": 0.00091, "grad_norm": 0.051, "time": 0.23796}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.0009, "acc_pose": 0.70055, "loss": 0.0009, "grad_norm": 0.03919, "time": 0.23848}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.0009, "acc_pose": 0.69891, "loss": 0.0009, "grad_norm": 0.03838, "time": 0.23745}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00089, "acc_pose": 0.71053, "loss": 0.00089, "grad_norm": 0.04312, "time": 0.23757}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0546, "heatmap_loss": 0.00091, "acc_pose": 0.70013, "loss": 0.00091, "grad_norm": 0.06748, "time": 0.29568}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.0009, "acc_pose": 0.70443, "loss": 0.0009, "grad_norm": 0.04362, "time": 0.23768}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00089, "acc_pose": 0.69834, "loss": 0.00089, "grad_norm": 0.03224, "time": 0.23757}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00091, "acc_pose": 0.71183, "loss": 0.00091, "grad_norm": 0.05983, "time": 0.23695}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.0009, "acc_pose": 0.70458, "loss": 0.0009, "grad_norm": 0.05525, "time": 0.23665}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05598, "heatmap_loss": 0.00088, "acc_pose": 0.71245, "loss": 0.00088, "grad_norm": 0.03527, "time": 0.29595}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00088, "acc_pose": 0.71503, "loss": 0.00088, "grad_norm": 0.04073, "time": 0.23798}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00087, "acc_pose": 0.71386, "loss": 0.00087, "grad_norm": 0.02801, "time": 0.23762}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00088, "acc_pose": 0.71223, "loss": 0.00088, "grad_norm": 0.02729, "time": 0.23706}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00088, "acc_pose": 0.70968, "loss": 0.00088, "grad_norm": 0.03779, "time": 0.23759}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0551, "heatmap_loss": 0.00087, "acc_pose": 0.71041, "loss": 0.00087, "grad_norm": 0.03594, "time": 0.29539}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00087, "acc_pose": 0.70648, "loss": 0.00087, "grad_norm": 0.0325, "time": 0.23827}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00087, "acc_pose": 0.71506, "loss": 0.00087, "grad_norm": 0.03089, "time": 0.23735}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00087, "acc_pose": 0.71828, "loss": 0.00087, "grad_norm": 0.02832, "time": 0.23705}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00087, "acc_pose": 0.71975, "loss": 0.00087, "grad_norm": 0.0344, "time": 0.23709}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05602, "heatmap_loss": 0.00086, "acc_pose": 0.71735, "loss": 0.00086, "grad_norm": 0.03983, "time": 0.29506}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00086, "acc_pose": 0.72363, "loss": 0.00086, "grad_norm": 0.03434, "time": 0.23796}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00087, "acc_pose": 0.72045, "loss": 0.00087, "grad_norm": 0.0429, "time": 0.2375}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00049, "heatmap_loss": 0.00085, "acc_pose": 0.72336, "loss": 0.00085, "grad_norm": 0.02021, "time": 0.23736}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00085, "acc_pose": 0.7178, "loss": 0.00085, "grad_norm": 0.01832, "time": 0.23708}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05597, "heatmap_loss": 0.00086, "acc_pose": 0.71666, "loss": 0.00086, "grad_norm": 0.02529, "time": 0.29665}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00086, "acc_pose": 0.72188, "loss": 0.00086, "grad_norm": 0.0217, "time": 0.23762}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00086, "acc_pose": 0.7079, "loss": 0.00086, "grad_norm": 0.0237, "time": 0.23737}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00085, "acc_pose": 0.71605, "loss": 0.00085, "grad_norm": 0.02618, "time": 0.23787}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00085, "acc_pose": 0.72437, "loss": 0.00085, "grad_norm": 0.02708, "time": 0.23762}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05609, "heatmap_loss": 0.00084, "acc_pose": 0.73148, "loss": 0.00084, "grad_norm": 0.02049, "time": 0.2974}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00085, "acc_pose": 0.72254, "loss": 0.00085, "grad_norm": 0.02335, "time": 0.23702}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00085, "acc_pose": 0.71958, "loss": 0.00085, "grad_norm": 0.02002, "time": 0.23759}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00085, "acc_pose": 0.72853, "loss": 0.00085, "grad_norm": 0.01957, "time": 0.23723}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00085, "acc_pose": 0.72391, "loss": 0.00085, "grad_norm": 0.01602, "time": 0.23665}
+{"mode": "val", "epoch": 20, "iter": 407, "lr": 1e-05, "AP": 0.67925, "AP .5": 0.8837, "AP .75": 0.75609, "AP (M)": 0.60565, "AP (L)": 0.70507, "AR": 0.73942, "AR .5": 0.92506, "AR .75": 0.80872, "AR (M)": 0.69574, "AR (L)": 0.80223}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05577, "heatmap_loss": 0.00084, "acc_pose": 0.72554, "loss": 0.00084, "grad_norm": 0.02352, "time": 0.29537}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00083, "acc_pose": 0.7328, "loss": 0.00083, "grad_norm": 0.01845, "time": 0.2377}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00084, "acc_pose": 0.72321, "loss": 0.00084, "grad_norm": 0.01548, "time": 0.23764}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00083, "acc_pose": 0.73154, "loss": 0.00083, "grad_norm": 0.01761, "time": 0.23741}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00048, "heatmap_loss": 0.00084, "acc_pose": 0.72735, "loss": 0.00084, "grad_norm": 0.01933, "time": 0.23793}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0575, "heatmap_loss": 0.00084, "acc_pose": 0.73036, "loss": 0.00084, "grad_norm": 0.0149, "time": 0.29853}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00083, "acc_pose": 0.72176, "loss": 0.00083, "grad_norm": 0.01498, "time": 0.23759}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00083, "acc_pose": 0.72366, "loss": 0.00083, "grad_norm": 0.0133, "time": 0.23688}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00083, "acc_pose": 0.73883, "loss": 0.00083, "grad_norm": 0.01589, "time": 0.23779}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00083, "acc_pose": 0.73341, "loss": 0.00083, "grad_norm": 0.01457, "time": 0.23712}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05537, "heatmap_loss": 0.00083, "acc_pose": 0.73013, "loss": 0.00083, "grad_norm": 0.01788, "time": 0.29544}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00083, "acc_pose": 0.72666, "loss": 0.00083, "grad_norm": 0.01243, "time": 0.23884}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00082, "acc_pose": 0.72682, "loss": 0.00082, "grad_norm": 0.01182, "time": 0.23815}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00082, "acc_pose": 0.7358, "loss": 0.00082, "grad_norm": 0.01424, "time": 0.23766}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00082, "acc_pose": 0.73676, "loss": 0.00082, "grad_norm": 0.01435, "time": 0.2375}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05792, "heatmap_loss": 0.00082, "acc_pose": 0.73306, "loss": 0.00082, "grad_norm": 0.01602, "time": 0.29919}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00082, "acc_pose": 0.74126, "loss": 0.00082, "grad_norm": 0.01145, "time": 0.23793}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00082, "acc_pose": 0.72971, "loss": 0.00082, "grad_norm": 0.01037, "time": 0.23748}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00082, "acc_pose": 0.7308, "loss": 0.00082, "grad_norm": 0.00989, "time": 0.23744}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00082, "acc_pose": 0.73027, "loss": 0.00082, "grad_norm": 0.01138, "time": 0.23737}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05532, "heatmap_loss": 0.00081, "acc_pose": 0.74274, "loss": 0.00081, "grad_norm": 0.00933, "time": 0.29558}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00081, "acc_pose": 0.73547, "loss": 0.00081, "grad_norm": 0.00834, "time": 0.23721}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00081, "acc_pose": 0.73283, "loss": 0.00081, "grad_norm": 0.01052, "time": 0.23754}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00082, "acc_pose": 0.73266, "loss": 0.00082, "grad_norm": 0.00814, "time": 0.23802}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00081, "acc_pose": 0.73633, "loss": 0.00081, "grad_norm": 0.00717, "time": 0.23677}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05758, "heatmap_loss": 0.00081, "acc_pose": 0.73481, "loss": 0.00081, "grad_norm": 0.0082, "time": 0.29876}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00081, "acc_pose": 0.73453, "loss": 0.00081, "grad_norm": 0.00675, "time": 0.23799}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00081, "acc_pose": 0.73398, "loss": 0.00081, "grad_norm": 0.00678, "time": 0.23788}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00081, "acc_pose": 0.74144, "loss": 0.00081, "grad_norm": 0.00706, "time": 0.23768}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.0008, "acc_pose": 0.74359, "loss": 0.0008, "grad_norm": 0.00706, "time": 0.2375}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0553, "heatmap_loss": 0.0008, "acc_pose": 0.74146, "loss": 0.0008, "grad_norm": 0.00706, "time": 0.29532}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0008, "acc_pose": 0.74841, "loss": 0.0008, "grad_norm": 0.00706, "time": 0.23735}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0008, "acc_pose": 0.74248, "loss": 0.0008, "grad_norm": 0.00558, "time": 0.23758}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.0008, "acc_pose": 0.74657, "loss": 0.0008, "grad_norm": 0.00517, "time": 0.2368}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.0008, "acc_pose": 0.7365, "loss": 0.0008, "grad_norm": 0.00516, "time": 0.23699}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05584, "heatmap_loss": 0.0008, "acc_pose": 0.74584, "loss": 0.0008, "grad_norm": 0.00484, "time": 0.29809}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00079, "acc_pose": 0.74047, "loss": 0.00079, "grad_norm": 0.00413, "time": 0.23805}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.0008, "acc_pose": 0.74093, "loss": 0.0008, "grad_norm": 0.00468, "time": 0.23732}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0008, "acc_pose": 0.74111, "loss": 0.0008, "grad_norm": 0.00422, "time": 0.23708}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.0008, "acc_pose": 0.74444, "loss": 0.0008, "grad_norm": 0.00418, "time": 0.23777}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05527, "heatmap_loss": 0.0008, "acc_pose": 0.74997, "loss": 0.0008, "grad_norm": 0.00392, "time": 0.29555}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0008, "acc_pose": 0.74134, "loss": 0.0008, "grad_norm": 0.00357, "time": 0.23782}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00079, "acc_pose": 0.73602, "loss": 0.00079, "grad_norm": 0.00353, "time": 0.23722}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00079, "acc_pose": 0.74325, "loss": 0.00079, "grad_norm": 0.00342, "time": 0.23743}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00079, "acc_pose": 0.74558, "loss": 0.00079, "grad_norm": 0.00282, "time": 0.23729}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05833, "heatmap_loss": 0.00079, "acc_pose": 0.73937, "loss": 0.00079, "grad_norm": 0.00273, "time": 0.29884}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00079, "acc_pose": 0.74583, "loss": 0.00079, "grad_norm": 0.00268, "time": 0.2375}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00079, "acc_pose": 0.74417, "loss": 0.00079, "grad_norm": 0.00253, "time": 0.23741}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00079, "acc_pose": 0.74494, "loss": 0.00079, "grad_norm": 0.00235, "time": 0.23757}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00079, "acc_pose": 0.74755, "loss": 0.00079, "grad_norm": 0.00245, "time": 0.23718}
+{"mode": "val", "epoch": 30, "iter": 407, "lr": 1e-05, "AP": 0.69612, "AP .5": 0.88545, "AP .75": 0.77546, "AP (M)": 0.62122, "AP (L)": 0.72185, "AR": 0.75605, "AR .5": 0.929, "AR .75": 0.82604, "AR (M)": 0.71073, "AR (L)": 0.82129}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05431, "heatmap_loss": 0.00079, "acc_pose": 0.74551, "loss": 0.00079, "grad_norm": 0.00201, "time": 0.29382}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00079, "acc_pose": 0.7465, "loss": 0.00079, "grad_norm": 0.00215, "time": 0.23806}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00078, "acc_pose": 0.74335, "loss": 0.00078, "grad_norm": 0.00193, "time": 0.23755}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00078, "acc_pose": 0.7551, "loss": 0.00078, "grad_norm": 0.00199, "time": 0.23765}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00078, "acc_pose": 0.75625, "loss": 0.00078, "grad_norm": 0.00175, "time": 0.23732}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05592, "heatmap_loss": 0.00078, "acc_pose": 0.75197, "loss": 0.00078, "grad_norm": 0.00165, "time": 0.29626}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00078, "acc_pose": 0.75534, "loss": 0.00078, "grad_norm": 0.00156, "time": 0.23794}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00078, "acc_pose": 0.74844, "loss": 0.00078, "grad_norm": 0.00157, "time": 0.23804}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00078, "acc_pose": 0.75512, "loss": 0.00078, "grad_norm": 0.00146, "time": 0.23763}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00077, "acc_pose": 0.75512, "loss": 0.00077, "grad_norm": 0.00154, "time": 0.23715}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05625, "heatmap_loss": 0.00078, "acc_pose": 0.7558, "loss": 0.00078, "grad_norm": 0.00147, "time": 0.29699}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00078, "acc_pose": 0.74875, "loss": 0.00078, "grad_norm": 0.00138, "time": 0.23794}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00077, "acc_pose": 0.76127, "loss": 0.00077, "grad_norm": 0.00131, "time": 0.23772}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00079, "acc_pose": 0.74355, "loss": 0.00079, "grad_norm": 0.00145, "time": 0.23714}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00078, "acc_pose": 0.76302, "loss": 0.00078, "grad_norm": 0.00134, "time": 0.23743}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05507, "heatmap_loss": 0.00077, "acc_pose": 0.75337, "loss": 0.00077, "grad_norm": 0.00133, "time": 0.29779}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00056, "heatmap_loss": 0.00078, "acc_pose": 0.751, "loss": 0.00078, "grad_norm": 0.00128, "time": 0.23805}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00077, "acc_pose": 0.75355, "loss": 0.00077, "grad_norm": 0.00132, "time": 0.23758}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00077, "acc_pose": 0.7545, "loss": 0.00077, "grad_norm": 0.00132, "time": 0.23678}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00077, "acc_pose": 0.75408, "loss": 0.00077, "grad_norm": 0.00125, "time": 0.23786}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0558, "heatmap_loss": 0.00077, "acc_pose": 0.75187, "loss": 0.00077, "grad_norm": 0.00123, "time": 0.29754}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00077, "acc_pose": 0.75255, "loss": 0.00077, "grad_norm": 0.00129, "time": 0.23822}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00077, "acc_pose": 0.75309, "loss": 0.00077, "grad_norm": 0.00126, "time": 0.23692}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00077, "acc_pose": 0.76513, "loss": 0.00077, "grad_norm": 0.00122, "time": 0.23769}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00077, "acc_pose": 0.764, "loss": 0.00077, "grad_norm": 0.00122, "time": 0.23763}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0549, "heatmap_loss": 0.00076, "acc_pose": 0.74888, "loss": 0.00076, "grad_norm": 0.00122, "time": 0.29763}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00077, "acc_pose": 0.75556, "loss": 0.00077, "grad_norm": 0.00126, "time": 0.23741}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00076, "acc_pose": 0.75351, "loss": 0.00076, "grad_norm": 0.0012, "time": 0.23741}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00076, "acc_pose": 0.7582, "loss": 0.00076, "grad_norm": 0.00122, "time": 0.2376}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00077, "acc_pose": 0.75613, "loss": 0.00077, "grad_norm": 0.00122, "time": 0.23717}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05511, "heatmap_loss": 0.00076, "acc_pose": 0.75098, "loss": 0.00076, "grad_norm": 0.00126, "time": 0.29507}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00077, "acc_pose": 0.75532, "loss": 0.00077, "grad_norm": 0.00124, "time": 0.23781}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00076, "acc_pose": 0.75451, "loss": 0.00076, "grad_norm": 0.00119, "time": 0.23774}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00076, "acc_pose": 0.76572, "loss": 0.00076, "grad_norm": 0.00119, "time": 0.23775}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00076, "acc_pose": 0.75317, "loss": 0.00076, "grad_norm": 0.00124, "time": 0.23689}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05538, "heatmap_loss": 0.00077, "acc_pose": 0.75943, "loss": 0.00077, "grad_norm": 0.00121, "time": 0.29579}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00076, "acc_pose": 0.75805, "loss": 0.00076, "grad_norm": 0.00121, "time": 0.23787}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00076, "acc_pose": 0.75782, "loss": 0.00076, "grad_norm": 0.0012, "time": 0.23768}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00076, "acc_pose": 0.75269, "loss": 0.00076, "grad_norm": 0.00124, "time": 0.23655}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00076, "acc_pose": 0.75364, "loss": 0.00076, "grad_norm": 0.00123, "time": 0.23675}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05571, "heatmap_loss": 0.00076, "acc_pose": 0.75459, "loss": 0.00076, "grad_norm": 0.00117, "time": 0.29669}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00075, "acc_pose": 0.75936, "loss": 0.00075, "grad_norm": 0.0012, "time": 0.23801}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00075, "acc_pose": 0.75447, "loss": 0.00075, "grad_norm": 0.00123, "time": 0.23776}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00076, "acc_pose": 0.75363, "loss": 0.00076, "grad_norm": 0.00117, "time": 0.23766}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00076, "acc_pose": 0.75584, "loss": 0.00076, "grad_norm": 0.00123, "time": 0.23711}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05517, "heatmap_loss": 0.00075, "acc_pose": 0.76049, "loss": 0.00075, "grad_norm": 0.00124, "time": 0.29723}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00075, "acc_pose": 0.75196, "loss": 0.00075, "grad_norm": 0.00124, "time": 0.23827}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00049, "heatmap_loss": 0.00076, "acc_pose": 0.76155, "loss": 0.00076, "grad_norm": 0.00119, "time": 0.23778}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00075, "acc_pose": 0.75913, "loss": 0.00075, "grad_norm": 0.00119, "time": 0.23692}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00076, "acc_pose": 0.76202, "loss": 0.00076, "grad_norm": 0.00119, "time": 0.23766}
+{"mode": "val", "epoch": 40, "iter": 407, "lr": 1e-05, "AP": 0.7078, "AP .5": 0.89179, "AP .75": 0.78361, "AP (M)": 0.63486, "AP (L)": 0.73106, "AR": 0.76702, "AR .5": 0.93309, "AR .75": 0.83486, "AR (M)": 0.7236, "AR (L)": 0.82921}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05463, "heatmap_loss": 0.00075, "acc_pose": 0.76932, "loss": 0.00075, "grad_norm": 0.00116, "time": 0.29406}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00075, "acc_pose": 0.761, "loss": 0.00075, "grad_norm": 0.00124, "time": 0.23743}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00075, "acc_pose": 0.77072, "loss": 0.00075, "grad_norm": 0.00121, "time": 0.23693}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00075, "acc_pose": 0.76342, "loss": 0.00075, "grad_norm": 0.00119, "time": 0.23759}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00075, "acc_pose": 0.75184, "loss": 0.00075, "grad_norm": 0.00119, "time": 0.23744}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05555, "heatmap_loss": 0.00075, "acc_pose": 0.76218, "loss": 0.00075, "grad_norm": 0.00118, "time": 0.29607}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00075, "acc_pose": 0.75349, "loss": 0.00075, "grad_norm": 0.00116, "time": 0.23787}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00075, "acc_pose": 0.75874, "loss": 0.00075, "grad_norm": 0.00116, "time": 0.23698}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00074, "acc_pose": 0.76176, "loss": 0.00074, "grad_norm": 0.00121, "time": 0.23781}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00075, "acc_pose": 0.7568, "loss": 0.00075, "grad_norm": 0.00123, "time": 0.23763}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05499, "heatmap_loss": 0.00075, "acc_pose": 0.76789, "loss": 0.00075, "grad_norm": 0.00117, "time": 0.29545}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00075, "acc_pose": 0.76216, "loss": 0.00075, "grad_norm": 0.00123, "time": 0.23737}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00074, "acc_pose": 0.76361, "loss": 0.00074, "grad_norm": 0.00123, "time": 0.23754}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00075, "acc_pose": 0.76122, "loss": 0.00075, "grad_norm": 0.00124, "time": 0.23759}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00075, "acc_pose": 0.77237, "loss": 0.00075, "grad_norm": 0.00119, "time": 0.23726}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05545, "heatmap_loss": 0.00074, "acc_pose": 0.76251, "loss": 0.00074, "grad_norm": 0.00115, "time": 0.29803}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00074, "acc_pose": 0.76543, "loss": 0.00074, "grad_norm": 0.0012, "time": 0.23825}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00074, "acc_pose": 0.76974, "loss": 0.00074, "grad_norm": 0.00118, "time": 0.23804}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00074, "acc_pose": 0.76495, "loss": 0.00074, "grad_norm": 0.00121, "time": 0.23733}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00074, "acc_pose": 0.76377, "loss": 0.00074, "grad_norm": 0.00122, "time": 0.23729}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05605, "heatmap_loss": 0.00074, "acc_pose": 0.76621, "loss": 0.00074, "grad_norm": 0.00119, "time": 0.2961}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00074, "acc_pose": 0.76208, "loss": 0.00074, "grad_norm": 0.00117, "time": 0.23812}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00073, "acc_pose": 0.75759, "loss": 0.00073, "grad_norm": 0.00117, "time": 0.23803}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00074, "acc_pose": 0.76632, "loss": 0.00074, "grad_norm": 0.00119, "time": 0.23699}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.76257, "loss": 0.00074, "grad_norm": 0.00118, "time": 0.23791}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05616, "heatmap_loss": 0.00074, "acc_pose": 0.76408, "loss": 0.00074, "grad_norm": 0.00126, "time": 0.29515}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00074, "acc_pose": 0.7732, "loss": 0.00074, "grad_norm": 0.00122, "time": 0.23835}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00074, "acc_pose": 0.76623, "loss": 0.00074, "grad_norm": 0.00117, "time": 0.23735}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.76587, "loss": 0.00074, "grad_norm": 0.00121, "time": 0.23721}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.77218, "loss": 0.00074, "grad_norm": 0.00116, "time": 0.2371}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05584, "heatmap_loss": 0.00074, "acc_pose": 0.76193, "loss": 0.00074, "grad_norm": 0.0012, "time": 0.29633}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.7727, "loss": 0.00074, "grad_norm": 0.00118, "time": 0.23771}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00074, "acc_pose": 0.76267, "loss": 0.00074, "grad_norm": 0.00122, "time": 0.23715}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76812, "loss": 0.00073, "grad_norm": 0.00123, "time": 0.23809}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00073, "acc_pose": 0.77549, "loss": 0.00073, "grad_norm": 0.00123, "time": 0.23727}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05515, "heatmap_loss": 0.00073, "acc_pose": 0.76733, "loss": 0.00073, "grad_norm": 0.00119, "time": 0.2979}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00073, "acc_pose": 0.76378, "loss": 0.00073, "grad_norm": 0.0012, "time": 0.23745}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00073, "acc_pose": 0.77478, "loss": 0.00073, "grad_norm": 0.00117, "time": 0.2392}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00074, "acc_pose": 0.7687, "loss": 0.00074, "grad_norm": 0.00121, "time": 0.23857}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00073, "acc_pose": 0.77365, "loss": 0.00073, "grad_norm": 0.00118, "time": 0.23808}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00073, "acc_pose": 0.76562, "loss": 0.00073, "grad_norm": 0.00118, "time": 0.29854}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00074, "acc_pose": 0.76915, "loss": 0.00074, "grad_norm": 0.00118, "time": 0.23841}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00072, "acc_pose": 0.77123, "loss": 0.00072, "grad_norm": 0.0012, "time": 0.23822}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00073, "acc_pose": 0.76877, "loss": 0.00073, "grad_norm": 0.0012, "time": 0.23766}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00073, "acc_pose": 0.77245, "loss": 0.00073, "grad_norm": 0.00117, "time": 0.23789}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05526, "heatmap_loss": 0.00073, "acc_pose": 0.78114, "loss": 0.00073, "grad_norm": 0.00121, "time": 0.29767}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.77351, "loss": 0.00073, "grad_norm": 0.00115, "time": 0.23844}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00073, "acc_pose": 0.77405, "loss": 0.00073, "grad_norm": 0.00119, "time": 0.23858}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76802, "loss": 0.00073, "grad_norm": 0.00116, "time": 0.23742}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00073, "acc_pose": 0.76487, "loss": 0.00073, "grad_norm": 0.00121, "time": 0.23795}
+{"mode": "val", "epoch": 50, "iter": 407, "lr": 1e-05, "AP": 0.71611, "AP .5": 0.89335, "AP .75": 0.79372, "AP (M)": 0.64252, "AP (L)": 0.74068, "AR": 0.77377, "AR .5": 0.9353, "AR .75": 0.84288, "AR (M)": 0.73081, "AR (L)": 0.83605}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05597, "heatmap_loss": 0.00072, "acc_pose": 0.77757, "loss": 0.00072, "grad_norm": 0.00114, "time": 0.29634}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.76987, "loss": 0.00073, "grad_norm": 0.00122, "time": 0.23839}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00073, "acc_pose": 0.76946, "loss": 0.00073, "grad_norm": 0.00118, "time": 0.23824}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00072, "acc_pose": 0.77492, "loss": 0.00072, "grad_norm": 0.00123, "time": 0.23785}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00072, "acc_pose": 0.78261, "loss": 0.00072, "grad_norm": 0.00121, "time": 0.23758}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05507, "heatmap_loss": 0.00072, "acc_pose": 0.77043, "loss": 0.00072, "grad_norm": 0.0012, "time": 0.29954}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00073, "acc_pose": 0.77113, "loss": 0.00073, "grad_norm": 0.0012, "time": 0.23871}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00072, "acc_pose": 0.77536, "loss": 0.00072, "grad_norm": 0.00123, "time": 0.23797}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00071, "acc_pose": 0.77504, "loss": 0.00071, "grad_norm": 0.00113, "time": 0.23857}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00073, "acc_pose": 0.76407, "loss": 0.00073, "grad_norm": 0.00122, "time": 0.23748}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05497, "heatmap_loss": 0.00072, "acc_pose": 0.77408, "loss": 0.00072, "grad_norm": 0.0012, "time": 0.2981}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00072, "acc_pose": 0.76882, "loss": 0.00072, "grad_norm": 0.00114, "time": 0.23767}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00072, "acc_pose": 0.77249, "loss": 0.00072, "grad_norm": 0.00129, "time": 0.23782}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.77442, "loss": 0.00073, "grad_norm": 0.00122, "time": 0.23726}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00054, "heatmap_loss": 0.00072, "acc_pose": 0.77078, "loss": 0.00072, "grad_norm": 0.00113, "time": 0.23684}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05762, "heatmap_loss": 0.00072, "acc_pose": 0.77605, "loss": 0.00072, "grad_norm": 0.00117, "time": 0.29907}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00072, "acc_pose": 0.76977, "loss": 0.00072, "grad_norm": 0.00119, "time": 0.23859}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00072, "acc_pose": 0.77894, "loss": 0.00072, "grad_norm": 0.00116, "time": 0.23837}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00072, "acc_pose": 0.77221, "loss": 0.00072, "grad_norm": 0.00121, "time": 0.23811}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00072, "acc_pose": 0.77577, "loss": 0.00072, "grad_norm": 0.00122, "time": 0.2379}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05551, "heatmap_loss": 0.00072, "acc_pose": 0.77832, "loss": 0.00072, "grad_norm": 0.0012, "time": 0.29758}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00072, "acc_pose": 0.76762, "loss": 0.00072, "grad_norm": 0.00121, "time": 0.23781}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00072, "acc_pose": 0.7681, "loss": 0.00072, "grad_norm": 0.00113, "time": 0.2384}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00072, "acc_pose": 0.77465, "loss": 0.00072, "grad_norm": 0.00121, "time": 0.23806}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00072, "acc_pose": 0.77402, "loss": 0.00072, "grad_norm": 0.00126, "time": 0.23786}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05596, "heatmap_loss": 0.00073, "acc_pose": 0.77239, "loss": 0.00073, "grad_norm": 0.00115, "time": 0.29711}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00071, "acc_pose": 0.77973, "loss": 0.00071, "grad_norm": 0.00113, "time": 0.23836}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00071, "acc_pose": 0.77293, "loss": 0.00071, "grad_norm": 0.0012, "time": 0.23848}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00071, "acc_pose": 0.76923, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23817}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00071, "acc_pose": 0.77468, "loss": 0.00071, "grad_norm": 0.00118, "time": 0.23762}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05614, "heatmap_loss": 0.00072, "acc_pose": 0.78341, "loss": 0.00072, "grad_norm": 0.00122, "time": 0.29814}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.7735, "loss": 0.00071, "grad_norm": 0.00119, "time": 0.2388}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00072, "acc_pose": 0.77503, "loss": 0.00072, "grad_norm": 0.00119, "time": 0.23777}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00072, "acc_pose": 0.78395, "loss": 0.00072, "grad_norm": 0.00116, "time": 0.23723}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.77884, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23741}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05507, "heatmap_loss": 0.00071, "acc_pose": 0.77971, "loss": 0.00071, "grad_norm": 0.00119, "time": 0.29773}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00071, "acc_pose": 0.77583, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23832}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.77109, "loss": 0.00071, "grad_norm": 0.00118, "time": 0.23743}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00052, "heatmap_loss": 0.00072, "acc_pose": 0.78119, "loss": 0.00072, "grad_norm": 0.00119, "time": 0.23797}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.77584, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.237}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05556, "heatmap_loss": 0.00071, "acc_pose": 0.77753, "loss": 0.00071, "grad_norm": 0.00112, "time": 0.29693}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.7822, "loss": 0.00071, "grad_norm": 0.00117, "time": 0.23872}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.78044, "loss": 0.00071, "grad_norm": 0.00117, "time": 0.23855}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00071, "acc_pose": 0.77846, "loss": 0.00071, "grad_norm": 0.00114, "time": 0.23821}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.78134, "loss": 0.00071, "grad_norm": 0.00117, "time": 0.23795}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05559, "heatmap_loss": 0.00071, "acc_pose": 0.77917, "loss": 0.00071, "grad_norm": 0.00117, "time": 0.29746}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.77837, "loss": 0.0007, "grad_norm": 0.00114, "time": 0.2384}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00071, "acc_pose": 0.77468, "loss": 0.00071, "grad_norm": 0.00117, "time": 0.23821}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00071, "acc_pose": 0.78059, "loss": 0.00071, "grad_norm": 0.00114, "time": 0.23826}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00071, "acc_pose": 0.768, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23808}
+{"mode": "val", "epoch": 60, "iter": 407, "lr": 1e-05, "AP": 0.72138, "AP .5": 0.89564, "AP .75": 0.7972, "AP (M)": 0.64857, "AP (L)": 0.74628, "AR": 0.77805, "AR .5": 0.93671, "AR .75": 0.84509, "AR (M)": 0.7363, "AR (L)": 0.83846}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0554, "heatmap_loss": 0.0007, "acc_pose": 0.77166, "loss": 0.0007, "grad_norm": 0.00119, "time": 0.29563}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00071, "acc_pose": 0.7693, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23903}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00071, "acc_pose": 0.77334, "loss": 0.00071, "grad_norm": 0.00116, "time": 0.23866}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00071, "acc_pose": 0.77783, "loss": 0.00071, "grad_norm": 0.00112, "time": 0.23817}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00071, "acc_pose": 0.77896, "loss": 0.00071, "grad_norm": 0.00114, "time": 0.23786}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05553, "heatmap_loss": 0.0007, "acc_pose": 0.77425, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.29672}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.77808, "loss": 0.0007, "grad_norm": 0.00121, "time": 0.23911}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00071, "acc_pose": 0.77604, "loss": 0.00071, "grad_norm": 0.00115, "time": 0.23842}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.0007, "acc_pose": 0.78096, "loss": 0.0007, "grad_norm": 0.00118, "time": 0.23845}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.0007, "acc_pose": 0.78172, "loss": 0.0007, "grad_norm": 0.00114, "time": 0.23806}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05552, "heatmap_loss": 0.0007, "acc_pose": 0.78298, "loss": 0.0007, "grad_norm": 0.00117, "time": 0.29697}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.7858, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.23805}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.78429, "loss": 0.0007, "grad_norm": 0.00119, "time": 0.23766}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00071, "acc_pose": 0.78255, "loss": 0.00071, "grad_norm": 0.00116, "time": 0.23813}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.77609, "loss": 0.00071, "grad_norm": 0.00118, "time": 0.23743}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05838, "heatmap_loss": 0.0007, "acc_pose": 0.77652, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.30218}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.0007, "acc_pose": 0.77511, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.23898}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.78095, "loss": 0.00071, "grad_norm": 0.00119, "time": 0.23865}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.7802, "loss": 0.0007, "grad_norm": 0.00114, "time": 0.23772}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.0007, "acc_pose": 0.78143, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.23793}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05579, "heatmap_loss": 0.0007, "acc_pose": 0.78406, "loss": 0.0007, "grad_norm": 0.00112, "time": 0.29678}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00069, "acc_pose": 0.7834, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.2387}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.0007, "acc_pose": 0.78294, "loss": 0.0007, "grad_norm": 0.00114, "time": 0.23829}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.0007, "acc_pose": 0.78132, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.23749}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.0007, "acc_pose": 0.77918, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.23706}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05545, "heatmap_loss": 0.00069, "acc_pose": 0.78726, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.29681}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.0007, "acc_pose": 0.77942, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.2385}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.0007, "acc_pose": 0.78373, "loss": 0.0007, "grad_norm": 0.00111, "time": 0.23823}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00069, "acc_pose": 0.78505, "loss": 0.00069, "grad_norm": 0.00109, "time": 0.23796}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.0007, "acc_pose": 0.78205, "loss": 0.0007, "grad_norm": 0.00113, "time": 0.23796}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05525, "heatmap_loss": 0.0007, "acc_pose": 0.78533, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.29627}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.0007, "acc_pose": 0.77484, "loss": 0.0007, "grad_norm": 0.00111, "time": 0.23843}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.78363, "loss": 0.0007, "grad_norm": 0.0011, "time": 0.23817}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00069, "acc_pose": 0.78485, "loss": 0.00069, "grad_norm": 0.00119, "time": 0.23733}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.0007, "acc_pose": 0.78197, "loss": 0.0007, "grad_norm": 0.0011, "time": 0.23785}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05473, "heatmap_loss": 0.00069, "acc_pose": 0.77871, "loss": 0.00069, "grad_norm": 0.00118, "time": 0.29722}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00069, "acc_pose": 0.7817, "loss": 0.00069, "grad_norm": 0.00112, "time": 0.23827}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78345, "loss": 0.00069, "grad_norm": 0.00117, "time": 0.23837}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.78277, "loss": 0.0007, "grad_norm": 0.00117, "time": 0.23836}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.0007, "acc_pose": 0.77956, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.23827}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05541, "heatmap_loss": 0.00069, "acc_pose": 0.78485, "loss": 0.00069, "grad_norm": 0.00111, "time": 0.29847}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00069, "acc_pose": 0.77935, "loss": 0.00069, "grad_norm": 0.00117, "time": 0.23853}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00069, "acc_pose": 0.77785, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.23831}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00069, "acc_pose": 0.78656, "loss": 0.00069, "grad_norm": 0.00123, "time": 0.23762}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.0007, "acc_pose": 0.78485, "loss": 0.0007, "grad_norm": 0.00115, "time": 0.2377}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05501, "heatmap_loss": 0.0007, "acc_pose": 0.77889, "loss": 0.0007, "grad_norm": 0.00114, "time": 0.29669}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00069, "acc_pose": 0.78786, "loss": 0.00069, "grad_norm": 0.00112, "time": 0.23795}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.0007, "acc_pose": 0.77356, "loss": 0.0007, "grad_norm": 0.00112, "time": 0.23774}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00069, "acc_pose": 0.78678, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.23789}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00069, "acc_pose": 0.77726, "loss": 0.00069, "grad_norm": 0.00113, "time": 0.23828}
+{"mode": "val", "epoch": 70, "iter": 407, "lr": 1e-05, "AP": 0.72817, "AP .5": 0.89885, "AP .75": 0.8047, "AP (M)": 0.65621, "AP (L)": 0.75381, "AR": 0.78438, "AR .5": 0.94065, "AR .75": 0.8528, "AR (M)": 0.74288, "AR (L)": 0.84452}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05504, "heatmap_loss": 0.00069, "acc_pose": 0.7847, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.29496}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00069, "acc_pose": 0.78253, "loss": 0.00069, "grad_norm": 0.00116, "time": 0.23797}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00069, "acc_pose": 0.78197, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.23813}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00069, "acc_pose": 0.78725, "loss": 0.00069, "grad_norm": 0.00113, "time": 0.23788}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78549, "loss": 0.00069, "grad_norm": 0.00112, "time": 0.23844}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05576, "heatmap_loss": 0.00069, "acc_pose": 0.79079, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.29724}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.0007, "acc_pose": 0.78246, "loss": 0.0007, "grad_norm": 0.00116, "time": 0.23785}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00069, "acc_pose": 0.78044, "loss": 0.00069, "grad_norm": 0.0011, "time": 0.23782}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00069, "acc_pose": 0.78957, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.23782}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78198, "loss": 0.00069, "grad_norm": 0.00112, "time": 0.23742}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05532, "heatmap_loss": 0.00068, "acc_pose": 0.79024, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.29759}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00069, "acc_pose": 0.7897, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.23795}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00069, "acc_pose": 0.77524, "loss": 0.00069, "grad_norm": 0.00116, "time": 0.23808}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00069, "acc_pose": 0.78901, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.23777}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78808, "loss": 0.00069, "grad_norm": 0.00118, "time": 0.23807}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05561, "heatmap_loss": 0.00069, "acc_pose": 0.78406, "loss": 0.00069, "grad_norm": 0.0011, "time": 0.29627}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00068, "acc_pose": 0.78153, "loss": 0.00068, "grad_norm": 0.00112, "time": 0.23898}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00024, "heatmap_loss": 0.00069, "acc_pose": 0.77742, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.23772}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00024, "heatmap_loss": 0.00069, "acc_pose": 0.78799, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.2375}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00069, "acc_pose": 0.78226, "loss": 0.00069, "grad_norm": 0.00114, "time": 0.23783}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05593, "heatmap_loss": 0.00068, "acc_pose": 0.78827, "loss": 0.00068, "grad_norm": 0.00112, "time": 0.29815}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00068, "acc_pose": 0.78345, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23854}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00068, "acc_pose": 0.7859, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23764}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.79172, "loss": 0.00069, "grad_norm": 0.00121, "time": 0.23726}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00068, "acc_pose": 0.78876, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23715}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05832, "heatmap_loss": 0.00068, "acc_pose": 0.78713, "loss": 0.00068, "grad_norm": 0.00112, "time": 0.30043}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.78441, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23851}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.79425, "loss": 0.00068, "grad_norm": 0.00108, "time": 0.23819}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00068, "acc_pose": 0.79158, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23762}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00068, "acc_pose": 0.79362, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23772}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0559, "heatmap_loss": 0.00068, "acc_pose": 0.78506, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.29759}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00069, "acc_pose": 0.78254, "loss": 0.00069, "grad_norm": 0.00116, "time": 0.23758}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00068, "acc_pose": 0.78978, "loss": 0.00068, "grad_norm": 0.0011, "time": 0.23799}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.78738, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23781}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00068, "acc_pose": 0.78962, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23718}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00069, "acc_pose": 0.78945, "loss": 0.00069, "grad_norm": 0.00115, "time": 0.29783}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.79658, "loss": 0.00068, "grad_norm": 0.00117, "time": 0.23854}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00069, "acc_pose": 0.78481, "loss": 0.00069, "grad_norm": 0.00109, "time": 0.238}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78425, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23777}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00068, "acc_pose": 0.79208, "loss": 0.00068, "grad_norm": 0.00109, "time": 0.23753}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0549, "heatmap_loss": 0.00068, "acc_pose": 0.78919, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.29769}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00067, "acc_pose": 0.78084, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.23828}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.77814, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23792}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00068, "acc_pose": 0.7939, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.23778}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00068, "acc_pose": 0.7905, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.23813}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05532, "heatmap_loss": 0.00068, "acc_pose": 0.79143, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.29733}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.79438, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23847}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00068, "acc_pose": 0.77666, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.23752}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.79314, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23835}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.78742, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23728}
+{"mode": "val", "epoch": 80, "iter": 407, "lr": 1e-05, "AP": 0.73203, "AP .5": 0.90021, "AP .75": 0.80724, "AP (M)": 0.66029, "AP (L)": 0.75704, "AR": 0.78775, "AR .5": 0.94002, "AR .75": 0.85422, "AR (M)": 0.74668, "AR (L)": 0.84731}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05484, "heatmap_loss": 0.00067, "acc_pose": 0.79104, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.29466}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.7867, "loss": 0.00068, "grad_norm": 0.0011, "time": 0.23888}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.78561, "loss": 0.00068, "grad_norm": 0.00117, "time": 0.23747}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.79354, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.23773}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78947, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.23741}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05524, "heatmap_loss": 0.00068, "acc_pose": 0.79379, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.29637}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00068, "acc_pose": 0.79255, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.2381}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79487, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.23845}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00068, "acc_pose": 0.78844, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23821}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00068, "acc_pose": 0.78995, "loss": 0.00068, "grad_norm": 0.00109, "time": 0.23824}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05494, "heatmap_loss": 0.00067, "acc_pose": 0.79323, "loss": 0.00067, "grad_norm": 0.00109, "time": 0.29728}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78942, "loss": 0.00068, "grad_norm": 0.00115, "time": 0.23784}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00068, "acc_pose": 0.78551, "loss": 0.00068, "grad_norm": 0.00113, "time": 0.23912}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.78727, "loss": 0.00068, "grad_norm": 0.00119, "time": 0.2378}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.79298, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23835}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00067, "acc_pose": 0.79055, "loss": 0.00067, "grad_norm": 0.00108, "time": 0.29685}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.78485, "loss": 0.00067, "grad_norm": 0.00117, "time": 0.23853}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00068, "acc_pose": 0.78851, "loss": 0.00068, "grad_norm": 0.00111, "time": 0.23785}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00067, "acc_pose": 0.79638, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23763}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.78579, "loss": 0.00068, "grad_norm": 0.00114, "time": 0.23784}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05524, "heatmap_loss": 0.00067, "acc_pose": 0.78836, "loss": 0.00067, "grad_norm": 0.00108, "time": 0.29557}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00067, "acc_pose": 0.79896, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23807}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.78872, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23791}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79805, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.2379}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00068, "acc_pose": 0.79708, "loss": 0.00068, "grad_norm": 0.00116, "time": 0.23778}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05512, "heatmap_loss": 0.00067, "acc_pose": 0.79813, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.29762}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.78935, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23834}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00067, "acc_pose": 0.78874, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23803}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.7899, "loss": 0.00067, "grad_norm": 0.00109, "time": 0.2371}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00067, "acc_pose": 0.79064, "loss": 0.00067, "grad_norm": 0.00108, "time": 0.23755}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05535, "heatmap_loss": 0.00067, "acc_pose": 0.79587, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.29624}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.79855, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23789}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00067, "acc_pose": 0.79109, "loss": 0.00067, "grad_norm": 0.0011, "time": 0.23796}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.79806, "loss": 0.00067, "grad_norm": 0.0011, "time": 0.23734}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00067, "acc_pose": 0.79565, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.23752}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05511, "heatmap_loss": 0.00066, "acc_pose": 0.79446, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.29771}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00067, "acc_pose": 0.79298, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23818}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00067, "acc_pose": 0.79468, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23857}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00067, "acc_pose": 0.79632, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23838}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79843, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.23832}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0556, "heatmap_loss": 0.00067, "acc_pose": 0.78548, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.2981}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00067, "acc_pose": 0.79223, "loss": 0.00067, "grad_norm": 0.00115, "time": 0.23838}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00067, "acc_pose": 0.78878, "loss": 0.00067, "grad_norm": 0.00121, "time": 0.23792}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00056, "heatmap_loss": 0.00067, "acc_pose": 0.79663, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.2376}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00066, "acc_pose": 0.79829, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23762}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05498, "heatmap_loss": 0.00067, "acc_pose": 0.79504, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.30106}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79976, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23872}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.78713, "loss": 0.00067, "grad_norm": 0.00108, "time": 0.23887}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79488, "loss": 0.00067, "grad_norm": 0.00119, "time": 0.23823}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00067, "acc_pose": 0.79608, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23769}
+{"mode": "val", "epoch": 90, "iter": 407, "lr": 1e-05, "AP": 0.73256, "AP .5": 0.89784, "AP .75": 0.80885, "AP (M)": 0.66064, "AP (L)": 0.75623, "AR": 0.78794, "AR .5": 0.93939, "AR .75": 0.85674, "AR (M)": 0.74542, "AR (L)": 0.84942}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05469, "heatmap_loss": 0.00066, "acc_pose": 0.80078, "loss": 0.00066, "grad_norm": 0.00115, "time": 0.295}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00066, "acc_pose": 0.80182, "loss": 0.00066, "grad_norm": 0.00114, "time": 0.23834}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00066, "acc_pose": 0.80168, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23828}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00054, "heatmap_loss": 0.00067, "acc_pose": 0.79537, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.23833}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79351, "loss": 0.00067, "grad_norm": 0.00113, "time": 0.23792}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05496, "heatmap_loss": 0.00066, "acc_pose": 0.79906, "loss": 0.00066, "grad_norm": 0.00107, "time": 0.29734}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.7911, "loss": 0.00067, "grad_norm": 0.00114, "time": 0.23897}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00066, "acc_pose": 0.79368, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23776}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00067, "acc_pose": 0.79905, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.23808}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.79408, "loss": 0.00067, "grad_norm": 0.00111, "time": 0.23817}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05519, "heatmap_loss": 0.00066, "acc_pose": 0.7947, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.29795}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00067, "acc_pose": 0.79181, "loss": 0.00067, "grad_norm": 0.00106, "time": 0.2383}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.79936, "loss": 0.00067, "grad_norm": 0.00115, "time": 0.23796}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79944, "loss": 0.00067, "grad_norm": 0.00108, "time": 0.23834}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.80614, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.2381}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05821, "heatmap_loss": 0.00066, "acc_pose": 0.79237, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.30125}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00067, "acc_pose": 0.8013, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23806}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00066, "acc_pose": 0.79297, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23789}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.79787, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.23745}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.80462, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.23762}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05598, "heatmap_loss": 0.00066, "acc_pose": 0.79554, "loss": 0.00066, "grad_norm": 0.00108, "time": 0.29566}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79217, "loss": 0.00066, "grad_norm": 0.00114, "time": 0.23807}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00067, "acc_pose": 0.78943, "loss": 0.00067, "grad_norm": 0.00109, "time": 0.2378}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.7973, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23837}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79719, "loss": 0.00066, "grad_norm": 0.00113, "time": 0.23823}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0586, "heatmap_loss": 0.00066, "acc_pose": 0.80401, "loss": 0.00066, "grad_norm": 0.00106, "time": 0.29923}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79722, "loss": 0.00066, "grad_norm": 0.00112, "time": 0.23854}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00066, "acc_pose": 0.79589, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23805}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.79339, "loss": 0.00067, "grad_norm": 0.00116, "time": 0.23815}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00067, "acc_pose": 0.80003, "loss": 0.00067, "grad_norm": 0.00112, "time": 0.23721}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05587, "heatmap_loss": 0.00066, "acc_pose": 0.79847, "loss": 0.00066, "grad_norm": 0.00108, "time": 0.29719}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79531, "loss": 0.00066, "grad_norm": 0.00114, "time": 0.2384}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00066, "acc_pose": 0.80188, "loss": 0.00066, "grad_norm": 0.00112, "time": 0.23827}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00066, "acc_pose": 0.80344, "loss": 0.00066, "grad_norm": 0.00118, "time": 0.23803}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00066, "acc_pose": 0.80074, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23765}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0557, "heatmap_loss": 0.00066, "acc_pose": 0.79316, "loss": 0.00066, "grad_norm": 0.00107, "time": 0.29779}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00066, "acc_pose": 0.79537, "loss": 0.00066, "grad_norm": 0.00114, "time": 0.23837}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79733, "loss": 0.00065, "grad_norm": 0.00109, "time": 0.23859}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00066, "acc_pose": 0.80107, "loss": 0.00066, "grad_norm": 0.00107, "time": 0.2377}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00066, "acc_pose": 0.79779, "loss": 0.00066, "grad_norm": 0.00113, "time": 0.23863}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05553, "heatmap_loss": 0.00066, "acc_pose": 0.80225, "loss": 0.00066, "grad_norm": 0.00108, "time": 0.29697}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00066, "acc_pose": 0.7962, "loss": 0.00066, "grad_norm": 0.00108, "time": 0.238}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79832, "loss": 0.00066, "grad_norm": 0.00114, "time": 0.23849}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00066, "acc_pose": 0.79392, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.23786}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.80345, "loss": 0.00066, "grad_norm": 0.00112, "time": 0.23773}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05515, "heatmap_loss": 0.00065, "acc_pose": 0.80336, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.29805}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00066, "acc_pose": 0.79345, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23869}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00066, "acc_pose": 0.79536, "loss": 0.00066, "grad_norm": 0.00113, "time": 0.23825}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.80095, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23795}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79247, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23838}
+{"mode": "val", "epoch": 100, "iter": 407, "lr": 1e-05, "AP": 0.73578, "AP .5": 0.90219, "AP .75": 0.81051, "AP (M)": 0.66362, "AP (L)": 0.75984, "AR": 0.79126, "AR .5": 0.94112, "AR .75": 0.8569, "AR (M)": 0.74996, "AR (L)": 0.85076}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05464, "heatmap_loss": 0.00065, "acc_pose": 0.79855, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.29408}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00066, "acc_pose": 0.79295, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23875}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.80075, "loss": 0.00066, "grad_norm": 0.00113, "time": 0.23828}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.79843, "loss": 0.00066, "grad_norm": 0.00112, "time": 0.23842}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.80004, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23826}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05548, "heatmap_loss": 0.00065, "acc_pose": 0.80593, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.29694}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00065, "acc_pose": 0.79736, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.23869}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79286, "loss": 0.00066, "grad_norm": 0.00107, "time": 0.23757}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00066, "acc_pose": 0.80762, "loss": 0.00066, "grad_norm": 0.00109, "time": 0.23835}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.80393, "loss": 0.00066, "grad_norm": 0.00113, "time": 0.23718}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05525, "heatmap_loss": 0.00066, "acc_pose": 0.80247, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.2975}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.7977, "loss": 0.00066, "grad_norm": 0.00112, "time": 0.23807}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00066, "acc_pose": 0.8018, "loss": 0.00066, "grad_norm": 0.0011, "time": 0.23828}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.80236, "loss": 0.00065, "grad_norm": 0.00117, "time": 0.23824}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00066, "acc_pose": 0.79954, "loss": 0.00066, "grad_norm": 0.00111, "time": 0.23728}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00065, "acc_pose": 0.79862, "loss": 0.00065, "grad_norm": 0.00112, "time": 0.29629}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00065, "acc_pose": 0.80127, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23833}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.79927, "loss": 0.00065, "grad_norm": 0.00115, "time": 0.23817}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.80439, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.23776}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79886, "loss": 0.00066, "grad_norm": 0.00117, "time": 0.23803}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05619, "heatmap_loss": 0.00065, "acc_pose": 0.80526, "loss": 0.00065, "grad_norm": 0.00109, "time": 0.29856}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79418, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23834}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.7962, "loss": 0.00065, "grad_norm": 0.00112, "time": 0.23827}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00065, "acc_pose": 0.80807, "loss": 0.00065, "grad_norm": 0.00109, "time": 0.23806}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00066, "acc_pose": 0.79562, "loss": 0.00066, "grad_norm": 0.00115, "time": 0.23816}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05843, "heatmap_loss": 0.00065, "acc_pose": 0.79787, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.30183}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.7975, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.23822}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.79646, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.2382}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.8008, "loss": 0.00065, "grad_norm": 0.00115, "time": 0.23743}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00065, "acc_pose": 0.80065, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.23796}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05536, "heatmap_loss": 0.00065, "acc_pose": 0.80193, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.29758}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79633, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23925}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80537, "loss": 0.00065, "grad_norm": 0.00107, "time": 0.23866}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00065, "acc_pose": 0.80169, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23821}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00065, "acc_pose": 0.80186, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23794}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0556, "heatmap_loss": 0.00065, "acc_pose": 0.80115, "loss": 0.00065, "grad_norm": 0.00109, "time": 0.29932}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.7986, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.23893}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.79602, "loss": 0.00065, "grad_norm": 0.00114, "time": 0.2384}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00065, "acc_pose": 0.8012, "loss": 0.00065, "grad_norm": 0.00106, "time": 0.23849}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.80378, "loss": 0.00065, "grad_norm": 0.00114, "time": 0.23821}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05604, "heatmap_loss": 0.00065, "acc_pose": 0.80553, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.29756}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00065, "acc_pose": 0.79681, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23835}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.80413, "loss": 0.00065, "grad_norm": 0.00115, "time": 0.2385}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.80418, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.23758}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.79996, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23828}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05511, "heatmap_loss": 0.00065, "acc_pose": 0.79632, "loss": 0.00065, "grad_norm": 0.00114, "time": 0.29953}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00065, "acc_pose": 0.79784, "loss": 0.00065, "grad_norm": 0.00116, "time": 0.23828}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00046, "heatmap_loss": 0.00065, "acc_pose": 0.79869, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23829}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81142, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.2379}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00065, "acc_pose": 0.80731, "loss": 0.00065, "grad_norm": 0.00117, "time": 0.23825}
+{"mode": "val", "epoch": 110, "iter": 407, "lr": 1e-05, "AP": 0.74016, "AP .5": 0.90332, "AP .75": 0.81552, "AP (M)": 0.66892, "AP (L)": 0.76479, "AR": 0.79479, "AR .5": 0.9427, "AR .75": 0.86288, "AR (M)": 0.75373, "AR (L)": 0.85433}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05544, "heatmap_loss": 0.00064, "acc_pose": 0.80218, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.29522}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.79976, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23883}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79807, "loss": 0.00065, "grad_norm": 0.00112, "time": 0.23879}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.80151, "loss": 0.00065, "grad_norm": 0.00109, "time": 0.23842}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00065, "acc_pose": 0.79999, "loss": 0.00065, "grad_norm": 0.00108, "time": 0.23829}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05798, "heatmap_loss": 0.00065, "acc_pose": 0.80438, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.30022}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00065, "acc_pose": 0.79759, "loss": 0.00065, "grad_norm": 0.00108, "time": 0.23868}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0005, "heatmap_loss": 0.00064, "acc_pose": 0.8071, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.2376}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81306, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23811}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80903, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23828}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05516, "heatmap_loss": 0.00064, "acc_pose": 0.80779, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.29703}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00065, "acc_pose": 0.8044, "loss": 0.00065, "grad_norm": 0.00114, "time": 0.2387}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.80339, "loss": 0.00065, "grad_norm": 0.00113, "time": 0.23798}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80959, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23753}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00065, "acc_pose": 0.80069, "loss": 0.00065, "grad_norm": 0.0011, "time": 0.23827}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05599, "heatmap_loss": 0.00064, "acc_pose": 0.80144, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.29643}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00064, "acc_pose": 0.80775, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.2382}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79574, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23863}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00065, "acc_pose": 0.80961, "loss": 0.00065, "grad_norm": 0.00108, "time": 0.23758}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00064, "acc_pose": 0.8056, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.2377}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05556, "heatmap_loss": 0.00064, "acc_pose": 0.80251, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.29642}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80277, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23823}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00065, "acc_pose": 0.79817, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23798}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.8089, "loss": 0.00064, "grad_norm": 0.00108, "time": 0.23701}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.80393, "loss": 0.00065, "grad_norm": 0.00111, "time": 0.23769}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05534, "heatmap_loss": 0.00065, "acc_pose": 0.81073, "loss": 0.00065, "grad_norm": 0.00115, "time": 0.29744}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.79862, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23888}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.79645, "loss": 0.00064, "grad_norm": 0.00114, "time": 0.23811}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00064, "acc_pose": 0.80408, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23733}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00065, "acc_pose": 0.80349, "loss": 0.00065, "grad_norm": 0.00118, "time": 0.23764}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05576, "heatmap_loss": 0.00064, "acc_pose": 0.8015, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.29767}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00064, "acc_pose": 0.80871, "loss": 0.00064, "grad_norm": 0.00115, "time": 0.23832}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00064, "acc_pose": 0.80464, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23787}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00064, "acc_pose": 0.80419, "loss": 0.00064, "grad_norm": 0.00107, "time": 0.23735}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80763, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.2374}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05527, "heatmap_loss": 0.00064, "acc_pose": 0.80362, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.29782}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80188, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23849}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.8045, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23862}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80894, "loss": 0.00064, "grad_norm": 0.00107, "time": 0.23834}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80581, "loss": 0.00065, "grad_norm": 0.00118, "time": 0.2378}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05556, "heatmap_loss": 0.00064, "acc_pose": 0.81368, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.29909}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.81102, "loss": 0.00064, "grad_norm": 0.00114, "time": 0.23872}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.79795, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23833}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80441, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.23764}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00064, "acc_pose": 0.80503, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.238}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05468, "heatmap_loss": 0.00064, "acc_pose": 0.81227, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.29764}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00064, "acc_pose": 0.8068, "loss": 0.00064, "grad_norm": 0.00115, "time": 0.23849}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80043, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23821}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80377, "loss": 0.00064, "grad_norm": 0.00115, "time": 0.23827}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80777, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23741}
+{"mode": "val", "epoch": 120, "iter": 407, "lr": 1e-05, "AP": 0.74112, "AP .5": 0.90327, "AP .75": 0.817, "AP (M)": 0.66913, "AP (L)": 0.76594, "AR": 0.79578, "AR .5": 0.94128, "AR .75": 0.86288, "AR (M)": 0.75458, "AR (L)": 0.85556}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05534, "heatmap_loss": 0.00064, "acc_pose": 0.80132, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.29538}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.81209, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23882}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80901, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23841}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80753, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23864}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00064, "acc_pose": 0.80783, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23802}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05573, "heatmap_loss": 0.00064, "acc_pose": 0.80505, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.2989}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80833, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23867}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.80713, "loss": 0.00064, "grad_norm": 0.00115, "time": 0.23826}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81057, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.2381}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80737, "loss": 0.00064, "grad_norm": 0.00108, "time": 0.23828}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05538, "heatmap_loss": 0.00064, "acc_pose": 0.81391, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.29732}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80893, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23857}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.80662, "loss": 0.00064, "grad_norm": 0.00114, "time": 0.23854}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80559, "loss": 0.00064, "grad_norm": 0.00107, "time": 0.2387}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81103, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.23827}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05908, "heatmap_loss": 0.00064, "acc_pose": 0.80035, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.30058}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80458, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.23849}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00064, "acc_pose": 0.80406, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23789}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.80621, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23785}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.8068, "loss": 0.00064, "grad_norm": 0.00108, "time": 0.2374}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05599, "heatmap_loss": 0.00064, "acc_pose": 0.80562, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.2985}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81197, "loss": 0.00064, "grad_norm": 0.00108, "time": 0.2381}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80263, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.23931}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.80346, "loss": 0.00064, "grad_norm": 0.00107, "time": 0.23752}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80544, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.2376}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05572, "heatmap_loss": 0.00063, "acc_pose": 0.80916, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.29795}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00064, "acc_pose": 0.80607, "loss": 0.00064, "grad_norm": 0.00117, "time": 0.23851}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00064, "acc_pose": 0.80575, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.23813}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.81574, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23787}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.81082, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23789}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05554, "heatmap_loss": 0.00063, "acc_pose": 0.80856, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.29717}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00064, "acc_pose": 0.80298, "loss": 0.00064, "grad_norm": 0.00115, "time": 0.2391}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00063, "acc_pose": 0.80384, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.23855}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80839, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.23731}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.81338, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.23782}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05552, "heatmap_loss": 0.00064, "acc_pose": 0.80707, "loss": 0.00064, "grad_norm": 0.00114, "time": 0.29637}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00063, "acc_pose": 0.80694, "loss": 0.00063, "grad_norm": 0.00109, "time": 0.23827}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81061, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23744}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.79951, "loss": 0.00063, "grad_norm": 0.00113, "time": 0.23868}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80615, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.2379}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05623, "heatmap_loss": 0.00063, "acc_pose": 0.81046, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.2978}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00063, "acc_pose": 0.81132, "loss": 0.00063, "grad_norm": 0.00109, "time": 0.23834}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80199, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.23825}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.80709, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.23873}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00064, "acc_pose": 0.8028, "loss": 0.00064, "grad_norm": 0.00106, "time": 0.23768}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.056, "heatmap_loss": 0.00063, "acc_pose": 0.80774, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.29846}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.79642, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23818}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00064, "acc_pose": 0.80553, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23782}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00063, "acc_pose": 0.8092, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.2375}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.80885, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23767}
+{"mode": "val", "epoch": 130, "iter": 407, "lr": 1e-05, "AP": 0.74092, "AP .5": 0.90235, "AP .75": 0.81771, "AP (M)": 0.66984, "AP (L)": 0.76515, "AR": 0.79578, "AR .5": 0.94112, "AR .75": 0.86477, "AR (M)": 0.75354, "AR (L)": 0.85715}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05703, "heatmap_loss": 0.00063, "acc_pose": 0.81453, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.29668}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81044, "loss": 0.00063, "grad_norm": 0.00116, "time": 0.23853}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80661, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.2389}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00063, "acc_pose": 0.81361, "loss": 0.00063, "grad_norm": 0.00116, "time": 0.23858}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.81324, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23835}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05566, "heatmap_loss": 0.00063, "acc_pose": 0.80517, "loss": 0.00063, "grad_norm": 0.00109, "time": 0.29817}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00064, "acc_pose": 0.81132, "loss": 0.00064, "grad_norm": 0.0011, "time": 0.23987}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00063, "acc_pose": 0.81064, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.23893}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00063, "acc_pose": 0.80872, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23819}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00063, "acc_pose": 0.81296, "loss": 0.00063, "grad_norm": 0.00105, "time": 0.23869}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05599, "heatmap_loss": 0.00063, "acc_pose": 0.80187, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.29761}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80975, "loss": 0.00063, "grad_norm": 0.00113, "time": 0.23974}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.8043, "loss": 0.00064, "grad_norm": 0.00111, "time": 0.23924}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00063, "acc_pose": 0.80803, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.23823}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80206, "loss": 0.00064, "grad_norm": 0.00112, "time": 0.23853}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05548, "heatmap_loss": 0.00063, "acc_pose": 0.81253, "loss": 0.00063, "grad_norm": 0.00114, "time": 0.29777}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80554, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.23944}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80795, "loss": 0.00063, "grad_norm": 0.00114, "time": 0.23798}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81073, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23777}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80147, "loss": 0.00064, "grad_norm": 0.00109, "time": 0.23794}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05547, "heatmap_loss": 0.00063, "acc_pose": 0.81048, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.29855}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.80466, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.23833}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.80565, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.23823}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80749, "loss": 0.00063, "grad_norm": 0.00113, "time": 0.23837}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81232, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.23819}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05526, "heatmap_loss": 0.00063, "acc_pose": 0.81282, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.29838}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.81474, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23822}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.806, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23769}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00063, "acc_pose": 0.80255, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23762}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81194, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23725}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05509, "heatmap_loss": 0.00063, "acc_pose": 0.81216, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.29698}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.80442, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.23829}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80817, "loss": 0.00063, "grad_norm": 0.00109, "time": 0.23776}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00063, "acc_pose": 0.81326, "loss": 0.00063, "grad_norm": 0.00113, "time": 0.23898}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80965, "loss": 0.00063, "grad_norm": 0.00116, "time": 0.23813}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05543, "heatmap_loss": 0.00063, "acc_pose": 0.81263, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.29697}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81087, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23885}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.812, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23796}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00063, "acc_pose": 0.81065, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.23788}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00063, "acc_pose": 0.81402, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.23815}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05577, "heatmap_loss": 0.00062, "acc_pose": 0.81005, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.29741}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.80965, "loss": 0.00062, "grad_norm": 0.00107, "time": 0.23866}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81269, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.2379}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.8078, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23838}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80843, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.2382}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05542, "heatmap_loss": 0.00063, "acc_pose": 0.81596, "loss": 0.00063, "grad_norm": 0.00115, "time": 0.29803}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00063, "acc_pose": 0.80831, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.23804}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00063, "acc_pose": 0.80667, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23796}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.81104, "loss": 0.00064, "grad_norm": 0.00113, "time": 0.23835}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81019, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.23784}
+{"mode": "val", "epoch": 140, "iter": 407, "lr": 1e-05, "AP": 0.74259, "AP .5": 0.90339, "AP .75": 0.81779, "AP (M)": 0.67053, "AP (L)": 0.76695, "AR": 0.79743, "AR .5": 0.94238, "AR .75": 0.86445, "AR (M)": 0.75649, "AR (L)": 0.85723}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05518, "heatmap_loss": 0.00063, "acc_pose": 0.81029, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.2953}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.8157, "loss": 0.00062, "grad_norm": 0.00106, "time": 0.23859}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.79984, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.2379}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00063, "acc_pose": 0.81468, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23878}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00063, "acc_pose": 0.811, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23803}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05481, "heatmap_loss": 0.00062, "acc_pose": 0.81336, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.29792}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80516, "loss": 0.00063, "grad_norm": 0.00106, "time": 0.23895}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00063, "acc_pose": 0.81092, "loss": 0.00063, "grad_norm": 0.00111, "time": 0.23904}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.81346, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.23777}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81387, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.23802}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.0553, "heatmap_loss": 0.00062, "acc_pose": 0.81821, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.29676}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81058, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23863}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.81227, "loss": 0.00063, "grad_norm": 0.00107, "time": 0.23806}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81226, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23834}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00063, "acc_pose": 0.81403, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.23773}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05551, "heatmap_loss": 0.00062, "acc_pose": 0.80891, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.29798}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00062, "acc_pose": 0.80652, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23903}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.80218, "loss": 0.00062, "grad_norm": 0.00107, "time": 0.23891}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00063, "acc_pose": 0.80633, "loss": 0.00063, "grad_norm": 0.0011, "time": 0.23805}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81191, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23852}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05613, "heatmap_loss": 0.00062, "acc_pose": 0.81462, "loss": 0.00062, "grad_norm": 0.00104, "time": 0.29805}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.80996, "loss": 0.00062, "grad_norm": 0.00103, "time": 0.23841}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81233, "loss": 0.00062, "grad_norm": 0.00105, "time": 0.238}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.80791, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23841}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.82156, "loss": 0.00062, "grad_norm": 0.00107, "time": 0.23792}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05561, "heatmap_loss": 0.00062, "acc_pose": 0.80584, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.29781}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81173, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23936}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00062, "acc_pose": 0.80634, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.2392}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.81288, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23893}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00063, "acc_pose": 0.81138, "loss": 0.00063, "grad_norm": 0.00108, "time": 0.2387}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05562, "heatmap_loss": 0.00062, "acc_pose": 0.81915, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.29724}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.80995, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23883}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00063, "acc_pose": 0.80959, "loss": 0.00063, "grad_norm": 0.00109, "time": 0.23813}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.80883, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23833}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00063, "acc_pose": 0.80725, "loss": 0.00063, "grad_norm": 0.00112, "time": 0.23815}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.055, "heatmap_loss": 0.00062, "acc_pose": 0.81123, "loss": 0.00062, "grad_norm": 0.00107, "time": 0.29747}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.81035, "loss": 0.00062, "grad_norm": 0.00113, "time": 0.2386}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.81302, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23938}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81062, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23821}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00062, "acc_pose": 0.814, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23811}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05575, "heatmap_loss": 0.00062, "acc_pose": 0.80713, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.29881}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.8189, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.23866}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81984, "loss": 0.00062, "grad_norm": 0.00114, "time": 0.23863}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81108, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23856}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.8174, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.2383}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05797, "heatmap_loss": 0.00062, "acc_pose": 0.81324, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.30051}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.81773, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23906}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81129, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23802}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.82078, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.23872}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00062, "acc_pose": 0.81367, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.2383}
+{"mode": "val", "epoch": 150, "iter": 407, "lr": 1e-05, "AP": 0.744, "AP .5": 0.90357, "AP .75": 0.81931, "AP (M)": 0.67174, "AP (L)": 0.76932, "AR": 0.79814, "AR .5": 0.94191, "AR .75": 0.86555, "AR (M)": 0.75561, "AR (L)": 0.85994}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05431, "heatmap_loss": 0.00062, "acc_pose": 0.8101, "loss": 0.00062, "grad_norm": 0.00106, "time": 0.29439}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81982, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23883}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81659, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23843}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81179, "loss": 0.00062, "grad_norm": 0.00107, "time": 0.23876}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.8193, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23787}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05549, "heatmap_loss": 0.00061, "acc_pose": 0.81343, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.29823}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81664, "loss": 0.00062, "grad_norm": 0.00113, "time": 0.23825}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.81455, "loss": 0.00062, "grad_norm": 0.00106, "time": 0.23864}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81247, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.23741}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81909, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23788}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05529, "heatmap_loss": 0.00061, "acc_pose": 0.82056, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.29737}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.82322, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23796}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81923, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23817}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81602, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.2382}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00046, "heatmap_loss": 0.00061, "acc_pose": 0.81641, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23807}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05522, "heatmap_loss": 0.00062, "acc_pose": 0.81459, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.2994}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81521, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.2401}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00054, "heatmap_loss": 0.00062, "acc_pose": 0.8041, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.2382}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.81312, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.23858}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.81178, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23912}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05575, "heatmap_loss": 0.00062, "acc_pose": 0.819, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.29748}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00061, "acc_pose": 0.8142, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23845}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81415, "loss": 0.00062, "grad_norm": 0.00106, "time": 0.23866}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.81364, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.23759}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81613, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23763}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05872, "heatmap_loss": 0.00062, "acc_pose": 0.81084, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.30124}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.81239, "loss": 0.00062, "grad_norm": 0.00105, "time": 0.23895}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81087, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23911}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00062, "acc_pose": 0.81356, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23894}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81415, "loss": 0.00062, "grad_norm": 0.00105, "time": 0.23858}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05561, "heatmap_loss": 0.00061, "acc_pose": 0.82154, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.29742}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.8191, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23874}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.81272, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.23808}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.8213, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23898}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00062, "acc_pose": 0.813, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23808}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05499, "heatmap_loss": 0.00062, "acc_pose": 0.81118, "loss": 0.00062, "grad_norm": 0.00124, "time": 0.29789}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81534, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23825}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.80865, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23826}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.8184, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23839}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00054, "heatmap_loss": 0.00061, "acc_pose": 0.81523, "loss": 0.00061, "grad_norm": 0.00103, "time": 0.23777}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05505, "heatmap_loss": 0.00061, "acc_pose": 0.81211, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.29912}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00061, "acc_pose": 0.81116, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.23895}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00062, "acc_pose": 0.81083, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.23893}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00062, "acc_pose": 0.81392, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.23828}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.81234, "loss": 0.00062, "grad_norm": 0.00114, "time": 0.23868}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05636, "heatmap_loss": 0.00061, "acc_pose": 0.81739, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.29799}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.81562, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.23939}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81827, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.23861}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81369, "loss": 0.00061, "grad_norm": 0.00106, "time": 0.23841}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.81201, "loss": 0.00062, "grad_norm": 0.00113, "time": 0.23883}
+{"mode": "val", "epoch": 160, "iter": 407, "lr": 1e-05, "AP": 0.74271, "AP .5": 0.90216, "AP .75": 0.81674, "AP (M)": 0.67082, "AP (L)": 0.76703, "AR": 0.79787, "AR .5": 0.94159, "AR .75": 0.86477, "AR (M)": 0.75545, "AR (L)": 0.85953}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05574, "heatmap_loss": 0.00062, "acc_pose": 0.81888, "loss": 0.00062, "grad_norm": 0.00105, "time": 0.2959}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.82173, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23988}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81101, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.23858}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.82391, "loss": 0.00061, "grad_norm": 0.00105, "time": 0.23893}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81525, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23852}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.055, "heatmap_loss": 0.00061, "acc_pose": 0.81708, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.29881}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.81542, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.23909}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.81549, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23839}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81411, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23839}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00062, "acc_pose": 0.81604, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23815}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05462, "heatmap_loss": 0.00061, "acc_pose": 0.81834, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.29968}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00062, "acc_pose": 0.81281, "loss": 0.00062, "grad_norm": 0.00106, "time": 0.23847}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00062, "acc_pose": 0.80909, "loss": 0.00062, "grad_norm": 0.00109, "time": 0.23818}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81193, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.23807}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81536, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23766}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05533, "heatmap_loss": 0.00061, "acc_pose": 0.81475, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.29788}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81399, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23913}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81158, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.23883}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00062, "acc_pose": 0.81409, "loss": 0.00062, "grad_norm": 0.00108, "time": 0.2382}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.82045, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.23836}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05528, "heatmap_loss": 0.00061, "acc_pose": 0.81783, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.29837}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81787, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23798}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.82014, "loss": 0.00061, "grad_norm": 0.00105, "time": 0.23761}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81638, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.23748}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.82416, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23703}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05594, "heatmap_loss": 0.00061, "acc_pose": 0.81982, "loss": 0.00061, "grad_norm": 0.00105, "time": 0.29848}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81326, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23877}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81214, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.23858}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81553, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23835}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.81817, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.23802}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05516, "heatmap_loss": 0.00062, "acc_pose": 0.82087, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.29952}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.81603, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.23833}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00061, "acc_pose": 0.8122, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.23821}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.82861, "loss": 0.00061, "grad_norm": 0.00106, "time": 0.23845}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00061, "acc_pose": 0.81272, "loss": 0.00061, "grad_norm": 0.00107, "time": 0.23796}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05866, "heatmap_loss": 0.00061, "acc_pose": 0.82463, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.30052}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.81608, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.23862}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.81389, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.23803}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00061, "acc_pose": 0.81657, "loss": 0.00061, "grad_norm": 0.00106, "time": 0.23843}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.82185, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.23801}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05524, "heatmap_loss": 0.00061, "acc_pose": 0.82481, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.29809}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00061, "acc_pose": 0.81147, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23833}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00061, "acc_pose": 0.81886, "loss": 0.00061, "grad_norm": 0.00115, "time": 0.23827}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81814, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.23739}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.82153, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.23778}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 1e-05, "memory": 14884, "data_time": 0.05571, "heatmap_loss": 0.00061, "acc_pose": 0.82154, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.29784}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 1e-05, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.0006, "acc_pose": 0.81127, "loss": 0.0006, "grad_norm": 0.00109, "time": 0.23936}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 1e-05, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.80985, "loss": 0.00061, "grad_norm": 0.00108, "time": 0.23851}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 1e-05, "memory": 14884, "data_time": 0.00056, "heatmap_loss": 0.00061, "acc_pose": 0.81652, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.23832}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 1e-05, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81963, "loss": 0.00061, "grad_norm": 0.00106, "time": 0.23824}
+{"mode": "val", "epoch": 170, "iter": 407, "lr": 1e-05, "AP": 0.74564, "AP .5": 0.90364, "AP .75": 0.81755, "AP (M)": 0.6741, "AP (L)": 0.77142, "AR": 0.79937, "AR .5": 0.94222, "AR .75": 0.86398, "AR (M)": 0.75788, "AR (L)": 0.86009}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05495, "heatmap_loss": 0.0006, "acc_pose": 0.82275, "loss": 0.0006, "grad_norm": 0.00104, "time": 0.29465}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.82081, "loss": 0.0006, "grad_norm": 0.00108, "time": 0.23929}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.82772, "loss": 0.00059, "grad_norm": 0.00099, "time": 0.23895}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.82539, "loss": 0.00059, "grad_norm": 0.00103, "time": 0.23831}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.82168, "loss": 0.00059, "grad_norm": 0.00103, "time": 0.23779}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05612, "heatmap_loss": 0.00059, "acc_pose": 0.8228, "loss": 0.00059, "grad_norm": 0.00101, "time": 0.29719}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00059, "acc_pose": 0.81677, "loss": 0.00059, "grad_norm": 0.00102, "time": 0.23828}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82368, "loss": 0.00059, "grad_norm": 0.00102, "time": 0.23808}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82832, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.23833}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.83, "loss": 0.00058, "grad_norm": 0.00103, "time": 0.23762}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05606, "heatmap_loss": 0.00059, "acc_pose": 0.82147, "loss": 0.00059, "grad_norm": 0.00103, "time": 0.29798}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00058, "acc_pose": 0.83014, "loss": 0.00058, "grad_norm": 0.00099, "time": 0.23782}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00058, "acc_pose": 0.82854, "loss": 0.00058, "grad_norm": 0.00106, "time": 0.23779}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00059, "acc_pose": 0.83193, "loss": 0.00059, "grad_norm": 0.00104, "time": 0.23822}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00059, "acc_pose": 0.83026, "loss": 0.00059, "grad_norm": 0.00103, "time": 0.23782}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05533, "heatmap_loss": 0.00058, "acc_pose": 0.82887, "loss": 0.00058, "grad_norm": 0.00102, "time": 0.29848}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00059, "acc_pose": 0.82432, "loss": 0.00059, "grad_norm": 0.00104, "time": 0.23931}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00058, "acc_pose": 0.83, "loss": 0.00058, "grad_norm": 0.00096, "time": 0.23891}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00058, "acc_pose": 0.8326, "loss": 0.00058, "grad_norm": 0.001, "time": 0.23858}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82293, "loss": 0.00058, "grad_norm": 0.00101, "time": 0.23862}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05543, "heatmap_loss": 0.00058, "acc_pose": 0.83053, "loss": 0.00058, "grad_norm": 0.00098, "time": 0.29947}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82883, "loss": 0.00058, "grad_norm": 0.00104, "time": 0.2384}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82995, "loss": 0.00058, "grad_norm": 0.00098, "time": 0.23831}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83118, "loss": 0.00058, "grad_norm": 0.00099, "time": 0.23774}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00058, "acc_pose": 0.83721, "loss": 0.00058, "grad_norm": 0.00098, "time": 0.23774}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05886, "heatmap_loss": 0.00058, "acc_pose": 0.83028, "loss": 0.00058, "grad_norm": 0.00101, "time": 0.30043}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82865, "loss": 0.00058, "grad_norm": 0.00105, "time": 0.23882}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00058, "acc_pose": 0.82407, "loss": 0.00058, "grad_norm": 0.00103, "time": 0.23866}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.82529, "loss": 0.00058, "grad_norm": 0.00103, "time": 0.23777}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00058, "acc_pose": 0.82751, "loss": 0.00058, "grad_norm": 0.00101, "time": 0.23796}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05581, "heatmap_loss": 0.00057, "acc_pose": 0.831, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.29822}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82582, "loss": 0.00058, "grad_norm": 0.00097, "time": 0.23904}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82504, "loss": 0.00058, "grad_norm": 0.001, "time": 0.23851}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82893, "loss": 0.00058, "grad_norm": 0.00101, "time": 0.23813}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83074, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23816}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05503, "heatmap_loss": 0.00058, "acc_pose": 0.83135, "loss": 0.00058, "grad_norm": 0.00103, "time": 0.29858}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82851, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23851}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83298, "loss": 0.00057, "grad_norm": 0.00097, "time": 0.23807}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00057, "acc_pose": 0.82771, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.2385}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83845, "loss": 0.00057, "grad_norm": 0.00096, "time": 0.23817}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05503, "heatmap_loss": 0.00057, "acc_pose": 0.82537, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.29775}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82413, "loss": 0.00058, "grad_norm": 0.00106, "time": 0.23786}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.82898, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.23837}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00057, "acc_pose": 0.83582, "loss": 0.00057, "grad_norm": 0.00097, "time": 0.2373}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.83443, "loss": 0.00058, "grad_norm": 0.001, "time": 0.23822}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0549, "heatmap_loss": 0.00057, "acc_pose": 0.83147, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.29853}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.8373, "loss": 0.00058, "grad_norm": 0.001, "time": 0.23883}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.8273, "loss": 0.00058, "grad_norm": 0.00106, "time": 0.23869}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83483, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23851}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.83026, "loss": 0.00058, "grad_norm": 0.001, "time": 0.2385}
+{"mode": "val", "epoch": 180, "iter": 407, "lr": 0.0, "AP": 0.75254, "AP .5": 0.90662, "AP .75": 0.82583, "AP (M)": 0.68184, "AP (L)": 0.77785, "AR": 0.80674, "AR .5": 0.94506, "AR .75": 0.87217, "AR (M)": 0.76572, "AR (L)": 0.86641}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05542, "heatmap_loss": 0.00058, "acc_pose": 0.83045, "loss": 0.00058, "grad_norm": 0.00103, "time": 0.29514}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.8327, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23942}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82877, "loss": 0.00058, "grad_norm": 0.00099, "time": 0.23873}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00057, "acc_pose": 0.832, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.23859}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.83114, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.2387}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0561, "heatmap_loss": 0.00057, "acc_pose": 0.83273, "loss": 0.00057, "grad_norm": 0.00105, "time": 0.29583}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83574, "loss": 0.00057, "grad_norm": 0.00094, "time": 0.23872}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00058, "acc_pose": 0.82801, "loss": 0.00058, "grad_norm": 0.00102, "time": 0.23745}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83514, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23765}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00058, "acc_pose": 0.83457, "loss": 0.00058, "grad_norm": 0.00104, "time": 0.23785}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0558, "heatmap_loss": 0.00057, "acc_pose": 0.82862, "loss": 0.00057, "grad_norm": 0.001, "time": 0.29887}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00045, "heatmap_loss": 0.00057, "acc_pose": 0.83056, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23806}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00057, "acc_pose": 0.83264, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23803}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.83251, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.23831}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83265, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23766}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05556, "heatmap_loss": 0.00058, "acc_pose": 0.82919, "loss": 0.00058, "grad_norm": 0.00107, "time": 0.29731}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83391, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.23878}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00057, "acc_pose": 0.83034, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23713}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.82259, "loss": 0.00057, "grad_norm": 0.00097, "time": 0.23859}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00057, "acc_pose": 0.83286, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23808}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05591, "heatmap_loss": 0.00057, "acc_pose": 0.83669, "loss": 0.00057, "grad_norm": 0.001, "time": 0.29755}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.8316, "loss": 0.00057, "grad_norm": 0.00106, "time": 0.23844}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.83626, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.238}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83474, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23783}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00057, "acc_pose": 0.83135, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.23748}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05845, "heatmap_loss": 0.00057, "acc_pose": 0.82943, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.30075}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00057, "acc_pose": 0.83015, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23814}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83279, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23798}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.83422, "loss": 0.00057, "grad_norm": 0.00105, "time": 0.23787}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83223, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23715}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05487, "heatmap_loss": 0.00057, "acc_pose": 0.83564, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.29725}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00056, "acc_pose": 0.83331, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23854}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82855, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.23784}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82935, "loss": 0.00057, "grad_norm": 0.001, "time": 0.2377}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83291, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.23778}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05778, "heatmap_loss": 0.00057, "acc_pose": 0.83541, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.29781}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83522, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23886}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.83393, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.23809}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83419, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23823}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83996, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23724}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05594, "heatmap_loss": 0.00057, "acc_pose": 0.83497, "loss": 0.00057, "grad_norm": 0.00105, "time": 0.29797}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00056, "acc_pose": 0.83561, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23806}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.82915, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.23772}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.82807, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.23809}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00056, "acc_pose": 0.84192, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23803}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05621, "heatmap_loss": 0.00057, "acc_pose": 0.83684, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.29771}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83388, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.23862}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00056, "acc_pose": 0.83168, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.2386}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.83736, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23805}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00057, "acc_pose": 0.83352, "loss": 0.00057, "grad_norm": 0.00098, "time": 0.23765}
+{"mode": "val", "epoch": 190, "iter": 407, "lr": 0.0, "AP": 0.75367, "AP .5": 0.9056, "AP .75": 0.82722, "AP (M)": 0.6829, "AP (L)": 0.77894, "AR": 0.80729, "AR .5": 0.94427, "AR .75": 0.87295, "AR (M)": 0.76602, "AR (L)": 0.86734}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05447, "heatmap_loss": 0.00057, "acc_pose": 0.83621, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.29555}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.83353, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23848}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.8351, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23832}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83372, "loss": 0.00057, "grad_norm": 0.00099, "time": 0.23844}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83136, "loss": 0.00057, "grad_norm": 0.00097, "time": 0.23819}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.056, "heatmap_loss": 0.00057, "acc_pose": 0.83077, "loss": 0.00057, "grad_norm": 0.00096, "time": 0.29858}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00056, "acc_pose": 0.83017, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23917}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.83205, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.2383}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.8345, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.23777}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00057, "acc_pose": 0.84308, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23852}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0559, "heatmap_loss": 0.00057, "acc_pose": 0.8325, "loss": 0.00057, "grad_norm": 0.001, "time": 0.29809}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00043, "heatmap_loss": 0.00056, "acc_pose": 0.83507, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23923}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83941, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23804}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83103, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23827}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00052, "heatmap_loss": 0.00057, "acc_pose": 0.83044, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.23862}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05568, "heatmap_loss": 0.00056, "acc_pose": 0.84037, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.29786}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.82956, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23902}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.83223, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.23803}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.83441, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23826}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00056, "acc_pose": 0.83896, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.23765}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05538, "heatmap_loss": 0.00057, "acc_pose": 0.83412, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.29745}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.82983, "loss": 0.00057, "grad_norm": 0.00105, "time": 0.23864}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83915, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23811}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83525, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.23791}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00056, "acc_pose": 0.83659, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23735}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05834, "heatmap_loss": 0.00056, "acc_pose": 0.83396, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.30077}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.84077, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23818}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00057, "acc_pose": 0.82759, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.23763}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.83884, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.23749}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.83024, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23792}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05564, "heatmap_loss": 0.00057, "acc_pose": 0.83837, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.29751}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.84034, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23823}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.8262, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.23897}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83061, "loss": 0.00056, "grad_norm": 0.00109, "time": 0.23806}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.8313, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23761}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05588, "heatmap_loss": 0.00056, "acc_pose": 0.83994, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.29957}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.83524, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23886}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83337, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.2379}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83189, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23816}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00044, "heatmap_loss": 0.00056, "acc_pose": 0.83697, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.23855}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05551, "heatmap_loss": 0.00056, "acc_pose": 0.8352, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.29993}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83685, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23862}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.83016, "loss": 0.00057, "grad_norm": 0.001, "time": 0.2388}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00056, "acc_pose": 0.83094, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23918}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.83679, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23822}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05517, "heatmap_loss": 0.00056, "acc_pose": 0.83714, "loss": 0.00056, "grad_norm": 0.001, "time": 0.29909}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00056, "acc_pose": 0.83052, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.23846}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.82843, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23816}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.8384, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.2384}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83475, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.2376}
+{"mode": "val", "epoch": 200, "iter": 407, "lr": 0.0, "AP": 0.75469, "AP .5": 0.90635, "AP .75": 0.82768, "AP (M)": 0.68371, "AP (L)": 0.77982, "AR": 0.80889, "AR .5": 0.94521, "AR .75": 0.875, "AR (M)": 0.76793, "AR (L)": 0.86845}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0555, "heatmap_loss": 0.00056, "acc_pose": 0.83538, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.29443}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83026, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23902}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83277, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23876}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.84081, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23885}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83308, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.23832}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05603, "heatmap_loss": 0.00056, "acc_pose": 0.83389, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.29675}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00057, "acc_pose": 0.83877, "loss": 0.00057, "grad_norm": 0.001, "time": 0.23844}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83628, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.23773}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83441, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.23718}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.8441, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.2374}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05555, "heatmap_loss": 0.00056, "acc_pose": 0.83264, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.2971}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.83781, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23913}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00056, "acc_pose": 0.83967, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23813}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00041, "heatmap_loss": 0.00056, "acc_pose": 0.83274, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23805}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00056, "acc_pose": 0.83737, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.23871}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05594, "heatmap_loss": 0.00056, "acc_pose": 0.83507, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.30297}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00057, "acc_pose": 0.83703, "loss": 0.00057, "grad_norm": 0.00101, "time": 0.23894}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00026, "heatmap_loss": 0.00055, "acc_pose": 0.8351, "loss": 0.00055, "grad_norm": 0.00099, "time": 0.2387}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83489, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23772}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83477, "loss": 0.00056, "grad_norm": 0.00096, "time": 0.23852}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05608, "heatmap_loss": 0.00056, "acc_pose": 0.83977, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.29874}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00056, "acc_pose": 0.83482, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23819}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83623, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23787}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00036, "heatmap_loss": 0.00056, "acc_pose": 0.84361, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23816}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83904, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.2374}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05546, "heatmap_loss": 0.00056, "acc_pose": 0.8377, "loss": 0.00056, "grad_norm": 0.001, "time": 0.29717}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.8317, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23868}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83342, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23807}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00056, "acc_pose": 0.83985, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.23844}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.84122, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.2372}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05555, "heatmap_loss": 0.00056, "acc_pose": 0.83732, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.29724}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.82959, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.2386}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8368, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23828}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83661, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.23839}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.84037, "loss": 0.00056, "grad_norm": 0.00096, "time": 0.23825}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.0562, "heatmap_loss": 0.00056, "acc_pose": 0.83729, "loss": 0.00056, "grad_norm": 0.00095, "time": 0.29709}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00056, "acc_pose": 0.83843, "loss": 0.00056, "grad_norm": 0.001, "time": 0.23967}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.00025, "heatmap_loss": 0.00056, "acc_pose": 0.83388, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.23839}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00056, "acc_pose": 0.83803, "loss": 0.00056, "grad_norm": 0.00099, "time": 0.23805}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.83912, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.23764}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05597, "heatmap_loss": 0.00056, "acc_pose": 0.83608, "loss": 0.00056, "grad_norm": 0.001, "time": 0.29847}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00056, "acc_pose": 0.8376, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.23899}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0004, "heatmap_loss": 0.00056, "acc_pose": 0.83455, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23823}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00042, "heatmap_loss": 0.00055, "acc_pose": 0.83978, "loss": 0.00055, "grad_norm": 0.001, "time": 0.23793}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.84095, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.23811}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 14884, "data_time": 0.05633, "heatmap_loss": 0.00056, "acc_pose": 0.83497, "loss": 0.00056, "grad_norm": 0.00101, "time": 0.29762}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83635, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.23953}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83046, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.2382}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 14884, "data_time": 0.00027, "heatmap_loss": 0.00055, "acc_pose": 0.84158, "loss": 0.00055, "grad_norm": 0.00099, "time": 0.23867}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 14884, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83738, "loss": 0.00056, "grad_norm": 0.00097, "time": 0.23836}
+{"mode": "val", "epoch": 210, "iter": 407, "lr": 0.0, "AP": 0.75526, "AP .5": 0.90632, "AP .75": 0.82864, "AP (M)": 0.68432, "AP (L)": 0.78026, "AR": 0.80943, "AR .5": 0.94553, "AR .75": 0.87563, "AR (M)": 0.76826, "AR (L)": 0.86934}
diff --git a/vendor/ViTPose/logs/vitpose-b.log.json b/vendor/ViTPose/logs/vitpose-b.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ef64c5af50d45df32675b26c6fc8de5ae87cb73
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-b.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+5905982", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.06586, "heatmap_loss": 0.00214, "acc_pose": 0.06155, "loss": 0.00214, "time": 0.3532}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00186, "acc_pose": 0.25887, "loss": 0.00186, "time": 0.19385}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00159, "acc_pose": 0.39629, "loss": 0.00159, "time": 0.19363}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00145, "acc_pose": 0.46183, "loss": 0.00145, "time": 0.19371}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00135, "acc_pose": 0.50374, "loss": 0.00135, "time": 0.19344}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05537, "heatmap_loss": 0.00125, "acc_pose": 0.55242, "loss": 0.00125, "time": 0.25581}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00121, "acc_pose": 0.57423, "loss": 0.00121, "time": 0.19761}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00118, "acc_pose": 0.57644, "loss": 0.00118, "time": 0.19678}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00116, "acc_pose": 0.5839, "loss": 0.00116, "time": 0.19499}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00113, "acc_pose": 0.60309, "loss": 0.00113, "time": 0.19558}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05544, "heatmap_loss": 0.00109, "acc_pose": 0.62308, "loss": 0.00109, "time": 0.2537}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00107, "acc_pose": 0.62769, "loss": 0.00107, "time": 0.19374}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00106, "acc_pose": 0.62886, "loss": 0.00106, "time": 0.193}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00105, "acc_pose": 0.63644, "loss": 0.00105, "time": 0.19356}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00104, "acc_pose": 0.63998, "loss": 0.00104, "time": 0.19713}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05513, "heatmap_loss": 0.00102, "acc_pose": 0.65483, "loss": 0.00102, "time": 0.25179}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.001, "acc_pose": 0.6593, "loss": 0.001, "time": 0.19305}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00099, "acc_pose": 0.66863, "loss": 0.00099, "time": 0.1936}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00099, "acc_pose": 0.66848, "loss": 0.00099, "time": 0.19387}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00098, "acc_pose": 0.66785, "loss": 0.00098, "time": 0.19556}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05496, "heatmap_loss": 0.00097, "acc_pose": 0.6745, "loss": 0.00097, "time": 0.24984}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00097, "acc_pose": 0.66429, "loss": 0.00097, "time": 0.1925}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00095, "acc_pose": 0.67168, "loss": 0.00095, "time": 0.19244}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00095, "acc_pose": 0.67883, "loss": 0.00095, "time": 0.19243}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00094, "acc_pose": 0.67974, "loss": 0.00094, "time": 0.19273}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05462, "heatmap_loss": 0.00093, "acc_pose": 0.68822, "loss": 0.00093, "time": 0.25051}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00093, "acc_pose": 0.67627, "loss": 0.00093, "time": 0.19225}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00093, "acc_pose": 0.68534, "loss": 0.00093, "time": 0.19211}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00092, "acc_pose": 0.68586, "loss": 0.00092, "time": 0.193}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00093, "acc_pose": 0.69321, "loss": 0.00093, "time": 0.19284}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0553, "heatmap_loss": 0.00091, "acc_pose": 0.68836, "loss": 0.00091, "time": 0.25195}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.0009, "acc_pose": 0.69273, "loss": 0.0009, "time": 0.19219}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0009, "acc_pose": 0.69318, "loss": 0.0009, "time": 0.19249}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.0009, "acc_pose": 0.69727, "loss": 0.0009, "time": 0.19333}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.0009, "acc_pose": 0.70019, "loss": 0.0009, "time": 0.19235}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05449, "heatmap_loss": 0.00089, "acc_pose": 0.69968, "loss": 0.00089, "time": 0.25249}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00089, "acc_pose": 0.70358, "loss": 0.00089, "time": 0.1928}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00089, "acc_pose": 0.6976, "loss": 0.00089, "time": 0.19226}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00088, "acc_pose": 0.71078, "loss": 0.00088, "time": 0.19251}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00088, "acc_pose": 0.70111, "loss": 0.00088, "time": 0.19246}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05512, "heatmap_loss": 0.00087, "acc_pose": 0.70716, "loss": 0.00087, "time": 0.25195}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00087, "acc_pose": 0.71206, "loss": 0.00087, "time": 0.19312}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00087, "acc_pose": 0.71496, "loss": 0.00087, "time": 0.19314}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00087, "acc_pose": 0.71324, "loss": 0.00087, "time": 0.1924}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00087, "acc_pose": 0.71267, "loss": 0.00087, "time": 0.19217}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05661, "heatmap_loss": 0.00087, "acc_pose": 0.71521, "loss": 0.00087, "time": 0.25368}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00086, "acc_pose": 0.70887, "loss": 0.00086, "time": 0.19235}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00085, "acc_pose": 0.71852, "loss": 0.00085, "time": 0.1927}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00085, "acc_pose": 0.71485, "loss": 0.00085, "time": 0.19248}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00086, "acc_pose": 0.71869, "loss": 0.00086, "time": 0.19279}
+{"mode": "val", "epoch": 10, "iter": 407, "lr": 1e-05, "AP": 0.67647, "AP .5": 0.87915, "AP .75": 0.75323, "AP (M)": 0.60167, "AP (L)": 0.70376, "AR": 0.73509, "AR .5": 0.92034, "AR .75": 0.80368, "AR (M)": 0.69085, "AR (L)": 0.79814}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0548, "heatmap_loss": 0.00085, "acc_pose": 0.71744, "loss": 0.00085, "time": 0.24752}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00084, "acc_pose": 0.71827, "loss": 0.00084, "time": 0.19291}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00085, "acc_pose": 0.71782, "loss": 0.00085, "time": 0.19216}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00085, "acc_pose": 0.71726, "loss": 0.00085, "time": 0.19313}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00084, "acc_pose": 0.72753, "loss": 0.00084, "time": 0.19265}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05561, "heatmap_loss": 0.00084, "acc_pose": 0.72334, "loss": 0.00084, "time": 0.2506}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00083, "acc_pose": 0.7266, "loss": 0.00083, "time": 0.19257}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00084, "acc_pose": 0.72603, "loss": 0.00084, "time": 0.19243}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00083, "acc_pose": 0.71767, "loss": 0.00083, "time": 0.19218}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00083, "acc_pose": 0.72106, "loss": 0.00083, "time": 0.19258}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05507, "heatmap_loss": 0.00082, "acc_pose": 0.72823, "loss": 0.00082, "time": 0.25113}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00083, "acc_pose": 0.73015, "loss": 0.00083, "time": 0.19235}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00083, "acc_pose": 0.7245, "loss": 0.00083, "time": 0.19247}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00083, "acc_pose": 0.72801, "loss": 0.00083, "time": 0.19246}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00082, "acc_pose": 0.73468, "loss": 0.00082, "time": 0.19239}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05507, "heatmap_loss": 0.00082, "acc_pose": 0.72322, "loss": 0.00082, "time": 0.25141}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00082, "acc_pose": 0.7327, "loss": 0.00082, "time": 0.1921}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00082, "acc_pose": 0.7308, "loss": 0.00082, "time": 0.19211}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00081, "acc_pose": 0.73022, "loss": 0.00081, "time": 0.19237}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00081, "acc_pose": 0.73573, "loss": 0.00081, "time": 0.19242}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05411, "heatmap_loss": 0.00082, "acc_pose": 0.72517, "loss": 0.00082, "time": 0.25139}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00081, "acc_pose": 0.73122, "loss": 0.00081, "time": 0.19184}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00081, "acc_pose": 0.72444, "loss": 0.00081, "time": 0.19197}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00081, "acc_pose": 0.73929, "loss": 0.00081, "time": 0.19324}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00081, "acc_pose": 0.73636, "loss": 0.00081, "time": 0.19254}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05647, "heatmap_loss": 0.0008, "acc_pose": 0.73765, "loss": 0.0008, "time": 0.25156}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0008, "acc_pose": 0.73862, "loss": 0.0008, "time": 0.19205}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0008, "acc_pose": 0.73999, "loss": 0.0008, "time": 0.19259}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0008, "acc_pose": 0.73841, "loss": 0.0008, "time": 0.19216}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0008, "acc_pose": 0.74079, "loss": 0.0008, "time": 0.19225}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0545, "heatmap_loss": 0.0008, "acc_pose": 0.73401, "loss": 0.0008, "time": 0.25128}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0008, "acc_pose": 0.7345, "loss": 0.0008, "time": 0.19245}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00079, "acc_pose": 0.74211, "loss": 0.00079, "time": 0.19239}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0008, "acc_pose": 0.7427, "loss": 0.0008, "time": 0.19418}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0008, "acc_pose": 0.74482, "loss": 0.0008, "time": 0.19239}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05531, "heatmap_loss": 0.00079, "acc_pose": 0.739, "loss": 0.00079, "time": 0.25164}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00079, "acc_pose": 0.74628, "loss": 0.00079, "time": 0.19267}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00079, "acc_pose": 0.74222, "loss": 0.00079, "time": 0.19263}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00078, "acc_pose": 0.74936, "loss": 0.00078, "time": 0.19284}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00078, "acc_pose": 0.742, "loss": 0.00078, "time": 0.19224}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05511, "heatmap_loss": 0.00079, "acc_pose": 0.73899, "loss": 0.00079, "time": 0.25119}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00079, "acc_pose": 0.7438, "loss": 0.00079, "time": 0.19244}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00079, "acc_pose": 0.73262, "loss": 0.00079, "time": 0.19265}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00078, "acc_pose": 0.73536, "loss": 0.00078, "time": 0.19456}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00078, "acc_pose": 0.7495, "loss": 0.00078, "time": 0.19263}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05549, "heatmap_loss": 0.00077, "acc_pose": 0.75089, "loss": 0.00077, "time": 0.25155}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00078, "acc_pose": 0.75081, "loss": 0.00078, "time": 0.1922}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00078, "acc_pose": 0.74162, "loss": 0.00078, "time": 0.19299}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00078, "acc_pose": 0.74799, "loss": 0.00078, "time": 0.19342}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00078, "acc_pose": 0.74702, "loss": 0.00078, "time": 0.19327}
+{"mode": "val", "epoch": 20, "iter": 407, "lr": 1e-05, "AP": 0.70603, "AP .5": 0.89118, "AP .75": 0.78565, "AP (M)": 0.63397, "AP (L)": 0.73039, "AR": 0.76371, "AR .5": 0.93325, "AR .75": 0.83312, "AR (M)": 0.72131, "AR (L)": 0.82479}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05536, "heatmap_loss": 0.00078, "acc_pose": 0.7482, "loss": 0.00078, "time": 0.24936}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00077, "acc_pose": 0.75601, "loss": 0.00077, "time": 0.19306}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00077, "acc_pose": 0.74649, "loss": 0.00077, "time": 0.19248}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00077, "acc_pose": 0.75351, "loss": 0.00077, "time": 0.19257}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00078, "acc_pose": 0.74984, "loss": 0.00078, "time": 0.19268}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05546, "heatmap_loss": 0.00077, "acc_pose": 0.75352, "loss": 0.00077, "time": 0.25087}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00077, "acc_pose": 0.74849, "loss": 0.00077, "time": 0.19292}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00077, "acc_pose": 0.74769, "loss": 0.00077, "time": 0.19222}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00076, "acc_pose": 0.75639, "loss": 0.00076, "time": 0.19229}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0003, "heatmap_loss": 0.00077, "acc_pose": 0.75617, "loss": 0.00077, "time": 0.19243}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05509, "heatmap_loss": 0.00077, "acc_pose": 0.75097, "loss": 0.00077, "time": 0.25071}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00076, "acc_pose": 0.75068, "loss": 0.00076, "time": 0.19198}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00076, "acc_pose": 0.74992, "loss": 0.00076, "time": 0.19201}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00076, "acc_pose": 0.75933, "loss": 0.00076, "time": 0.19252}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00076, "acc_pose": 0.75636, "loss": 0.00076, "time": 0.19189}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05477, "heatmap_loss": 0.00076, "acc_pose": 0.75415, "loss": 0.00076, "time": 0.25066}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00056, "heatmap_loss": 0.00076, "acc_pose": 0.76588, "loss": 0.00076, "time": 0.19326}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00076, "acc_pose": 0.74952, "loss": 0.00076, "time": 0.19251}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00076, "acc_pose": 0.7519, "loss": 0.00076, "time": 0.19234}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00076, "acc_pose": 0.75092, "loss": 0.00076, "time": 0.19244}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05522, "heatmap_loss": 0.00075, "acc_pose": 0.76583, "loss": 0.00075, "time": 0.25092}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00075, "acc_pose": 0.75606, "loss": 0.00075, "time": 0.19216}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00075, "acc_pose": 0.7521, "loss": 0.00075, "time": 0.1922}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00076, "acc_pose": 0.75, "loss": 0.00076, "time": 0.19272}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00075, "acc_pose": 0.75397, "loss": 0.00075, "time": 0.19235}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05448, "heatmap_loss": 0.00075, "acc_pose": 0.75324, "loss": 0.00075, "time": 0.25065}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00075, "acc_pose": 0.75511, "loss": 0.00075, "time": 0.19286}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00075, "acc_pose": 0.75243, "loss": 0.00075, "time": 0.19261}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00075, "acc_pose": 0.7576, "loss": 0.00075, "time": 0.19276}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00075, "acc_pose": 0.76155, "loss": 0.00075, "time": 0.19273}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05497, "heatmap_loss": 0.00074, "acc_pose": 0.75893, "loss": 0.00074, "time": 0.25033}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00075, "acc_pose": 0.76467, "loss": 0.00075, "time": 0.19223}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00075, "acc_pose": 0.76127, "loss": 0.00075, "time": 0.19376}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00074, "acc_pose": 0.76668, "loss": 0.00074, "time": 0.19208}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00074, "acc_pose": 0.75405, "loss": 0.00074, "time": 0.19197}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05464, "heatmap_loss": 0.00074, "acc_pose": 0.76587, "loss": 0.00074, "time": 0.25071}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00074, "acc_pose": 0.76107, "loss": 0.00074, "time": 0.19189}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00074, "acc_pose": 0.75845, "loss": 0.00074, "time": 0.19204}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00075, "acc_pose": 0.76082, "loss": 0.00075, "time": 0.19233}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00045, "heatmap_loss": 0.00074, "acc_pose": 0.76397, "loss": 0.00074, "time": 0.19254}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05535, "heatmap_loss": 0.00074, "acc_pose": 0.76763, "loss": 0.00074, "time": 0.25172}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00074, "acc_pose": 0.75546, "loss": 0.00074, "time": 0.1924}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00074, "acc_pose": 0.75609, "loss": 0.00074, "time": 0.19218}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00074, "acc_pose": 0.76331, "loss": 0.00074, "time": 0.19242}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00073, "acc_pose": 0.76193, "loss": 0.00073, "time": 0.19227}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05665, "heatmap_loss": 0.00074, "acc_pose": 0.75762, "loss": 0.00074, "time": 0.25177}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00074, "acc_pose": 0.76239, "loss": 0.00074, "time": 0.19266}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00074, "acc_pose": 0.76048, "loss": 0.00074, "time": 0.19228}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00073, "acc_pose": 0.76277, "loss": 0.00073, "time": 0.19233}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00073, "acc_pose": 0.76636, "loss": 0.00073, "time": 0.1922}
+{"mode": "val", "epoch": 30, "iter": 407, "lr": 1e-05, "AP": 0.71786, "AP .5": 0.89526, "AP .75": 0.79338, "AP (M)": 0.64611, "AP (L)": 0.74376, "AR": 0.77382, "AR .5": 0.93671, "AR .75": 0.84068, "AR (M)": 0.73111, "AR (L)": 0.83545}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0541, "heatmap_loss": 0.00074, "acc_pose": 0.76361, "loss": 0.00074, "time": 0.2458}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00073, "acc_pose": 0.76492, "loss": 0.00073, "time": 0.19173}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.76639, "loss": 0.00073, "time": 0.1918}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00073, "acc_pose": 0.77203, "loss": 0.00073, "time": 0.19183}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00073, "acc_pose": 0.77363, "loss": 0.00073, "time": 0.19296}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05523, "heatmap_loss": 0.00073, "acc_pose": 0.76809, "loss": 0.00073, "time": 0.25169}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.77178, "loss": 0.00073, "time": 0.19298}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76417, "loss": 0.00073, "time": 0.19208}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00072, "acc_pose": 0.77183, "loss": 0.00072, "time": 0.19251}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00072, "acc_pose": 0.77596, "loss": 0.00072, "time": 0.19283}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0552, "heatmap_loss": 0.00072, "acc_pose": 0.77209, "loss": 0.00072, "time": 0.24978}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76675, "loss": 0.00073, "time": 0.19234}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00072, "acc_pose": 0.77506, "loss": 0.00072, "time": 0.19247}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76651, "loss": 0.00073, "time": 0.19207}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00073, "acc_pose": 0.77826, "loss": 0.00073, "time": 0.19214}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05447, "heatmap_loss": 0.00072, "acc_pose": 0.76849, "loss": 0.00072, "time": 0.24981}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00073, "acc_pose": 0.76703, "loss": 0.00073, "time": 0.19198}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00072, "acc_pose": 0.76975, "loss": 0.00072, "time": 0.19295}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00072, "acc_pose": 0.77253, "loss": 0.00072, "time": 0.19283}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00072, "acc_pose": 0.77078, "loss": 0.00072, "time": 0.19223}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05555, "heatmap_loss": 0.00072, "acc_pose": 0.77102, "loss": 0.00072, "time": 0.25291}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00072, "acc_pose": 0.77044, "loss": 0.00072, "time": 0.1934}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00071, "acc_pose": 0.76908, "loss": 0.00071, "time": 0.19243}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00072, "acc_pose": 0.77703, "loss": 0.00072, "time": 0.19245}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00072, "acc_pose": 0.77626, "loss": 0.00072, "time": 0.1921}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05479, "heatmap_loss": 0.00071, "acc_pose": 0.76774, "loss": 0.00071, "time": 0.25197}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00072, "acc_pose": 0.77463, "loss": 0.00072, "time": 0.19206}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00072, "acc_pose": 0.77184, "loss": 0.00072, "time": 0.1921}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00071, "acc_pose": 0.77387, "loss": 0.00071, "time": 0.19195}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00072, "acc_pose": 0.77081, "loss": 0.00072, "time": 0.19231}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0543, "heatmap_loss": 0.00071, "acc_pose": 0.77087, "loss": 0.00071, "time": 0.25135}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00072, "acc_pose": 0.77126, "loss": 0.00072, "time": 0.19271}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00071, "acc_pose": 0.77131, "loss": 0.00071, "time": 0.19198}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00071, "acc_pose": 0.78238, "loss": 0.00071, "time": 0.19235}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00071, "acc_pose": 0.76765, "loss": 0.00071, "time": 0.19294}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05483, "heatmap_loss": 0.00072, "acc_pose": 0.77779, "loss": 0.00072, "time": 0.25101}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00071, "acc_pose": 0.77601, "loss": 0.00071, "time": 0.19197}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00071, "acc_pose": 0.77424, "loss": 0.00071, "time": 0.19173}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00071, "acc_pose": 0.76905, "loss": 0.00071, "time": 0.19224}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00071, "acc_pose": 0.77167, "loss": 0.00071, "time": 0.19231}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05488, "heatmap_loss": 0.00071, "acc_pose": 0.77223, "loss": 0.00071, "time": 0.25027}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.0007, "acc_pose": 0.77558, "loss": 0.0007, "time": 0.19226}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00071, "acc_pose": 0.77093, "loss": 0.00071, "time": 0.19207}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00071, "acc_pose": 0.76871, "loss": 0.00071, "time": 0.19212}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00071, "acc_pose": 0.77157, "loss": 0.00071, "time": 0.19243}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0557, "heatmap_loss": 0.0007, "acc_pose": 0.77453, "loss": 0.0007, "time": 0.25277}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00071, "acc_pose": 0.76985, "loss": 0.00071, "time": 0.19215}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00071, "acc_pose": 0.77671, "loss": 0.00071, "time": 0.19232}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00071, "acc_pose": 0.77389, "loss": 0.00071, "time": 0.19209}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00071, "acc_pose": 0.77777, "loss": 0.00071, "time": 0.19181}
+{"mode": "val", "epoch": 40, "iter": 407, "lr": 1e-05, "AP": 0.72453, "AP .5": 0.89532, "AP .75": 0.79838, "AP (M)": 0.65275, "AP (L)": 0.74908, "AR": 0.78164, "AR .5": 0.93734, "AR .75": 0.84603, "AR (M)": 0.73983, "AR (L)": 0.84188}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05394, "heatmap_loss": 0.0007, "acc_pose": 0.78183, "loss": 0.0007, "time": 0.24558}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.77461, "loss": 0.0007, "time": 0.19148}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0007, "acc_pose": 0.78614, "loss": 0.0007, "time": 0.19225}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00071, "acc_pose": 0.77922, "loss": 0.00071, "time": 0.19207}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0007, "acc_pose": 0.77005, "loss": 0.0007, "time": 0.19186}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05448, "heatmap_loss": 0.0007, "acc_pose": 0.77665, "loss": 0.0007, "time": 0.24936}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0007, "acc_pose": 0.77607, "loss": 0.0007, "time": 0.19197}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.77124, "loss": 0.0007, "time": 0.19287}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.78102, "loss": 0.0007, "time": 0.19295}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.77112, "loss": 0.0007, "time": 0.19253}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05526, "heatmap_loss": 0.0007, "acc_pose": 0.78248, "loss": 0.0007, "time": 0.25132}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0007, "acc_pose": 0.77696, "loss": 0.0007, "time": 0.19219}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.77936, "loss": 0.0007, "time": 0.19242}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0007, "acc_pose": 0.77851, "loss": 0.0007, "time": 0.19315}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0007, "acc_pose": 0.78764, "loss": 0.0007, "time": 0.19199}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05495, "heatmap_loss": 0.0007, "acc_pose": 0.77721, "loss": 0.0007, "time": 0.25107}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0007, "acc_pose": 0.77977, "loss": 0.0007, "time": 0.19226}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00045, "heatmap_loss": 0.0007, "acc_pose": 0.78267, "loss": 0.0007, "time": 0.19199}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0005, "heatmap_loss": 0.00069, "acc_pose": 0.78177, "loss": 0.00069, "time": 0.19237}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00069, "acc_pose": 0.78088, "loss": 0.00069, "time": 0.19254}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05639, "heatmap_loss": 0.00069, "acc_pose": 0.78068, "loss": 0.00069, "time": 0.25131}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.7786, "loss": 0.00069, "time": 0.19231}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00069, "acc_pose": 0.77443, "loss": 0.00069, "time": 0.19236}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0007, "acc_pose": 0.78213, "loss": 0.0007, "time": 0.19192}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0007, "acc_pose": 0.78022, "loss": 0.0007, "time": 0.19207}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05545, "heatmap_loss": 0.00069, "acc_pose": 0.78274, "loss": 0.00069, "time": 0.25093}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00069, "acc_pose": 0.78374, "loss": 0.00069, "time": 0.19171}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00069, "acc_pose": 0.78126, "loss": 0.00069, "time": 0.19226}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00069, "acc_pose": 0.77904, "loss": 0.00069, "time": 0.19237}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00069, "acc_pose": 0.78787, "loss": 0.00069, "time": 0.19186}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05504, "heatmap_loss": 0.00069, "acc_pose": 0.7782, "loss": 0.00069, "time": 0.25105}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00069, "acc_pose": 0.78573, "loss": 0.00069, "time": 0.19187}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00069, "acc_pose": 0.77774, "loss": 0.00069, "time": 0.19222}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00069, "acc_pose": 0.78207, "loss": 0.00069, "time": 0.19199}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00069, "acc_pose": 0.78483, "loss": 0.00069, "time": 0.19205}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05514, "heatmap_loss": 0.00069, "acc_pose": 0.78327, "loss": 0.00069, "time": 0.25016}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00069, "acc_pose": 0.77421, "loss": 0.00069, "time": 0.1921}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00069, "acc_pose": 0.78616, "loss": 0.00069, "time": 0.19209}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00069, "acc_pose": 0.78081, "loss": 0.00069, "time": 0.19242}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00069, "acc_pose": 0.7889, "loss": 0.00069, "time": 0.19229}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05447, "heatmap_loss": 0.00069, "acc_pose": 0.78302, "loss": 0.00069, "time": 0.25072}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00055, "heatmap_loss": 0.00069, "acc_pose": 0.78117, "loss": 0.00069, "time": 0.19206}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00068, "acc_pose": 0.78283, "loss": 0.00068, "time": 0.19178}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00069, "acc_pose": 0.77924, "loss": 0.00069, "time": 0.19271}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00069, "acc_pose": 0.78598, "loss": 0.00069, "time": 0.19216}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0544, "heatmap_loss": 0.00068, "acc_pose": 0.79214, "loss": 0.00068, "time": 0.25106}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00068, "acc_pose": 0.78263, "loss": 0.00068, "time": 0.19243}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00068, "acc_pose": 0.78799, "loss": 0.00068, "time": 0.19176}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00068, "acc_pose": 0.78587, "loss": 0.00068, "time": 0.1919}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00068, "acc_pose": 0.78, "loss": 0.00068, "time": 0.19174}
+{"mode": "val", "epoch": 50, "iter": 407, "lr": 1e-05, "AP": 0.72904, "AP .5": 0.89718, "AP .75": 0.80435, "AP (M)": 0.65612, "AP (L)": 0.75492, "AR": 0.7858, "AR .5": 0.93986, "AR .75": 0.85123, "AR (M)": 0.74321, "AR (L)": 0.84738}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05473, "heatmap_loss": 0.00068, "acc_pose": 0.79149, "loss": 0.00068, "time": 0.24671}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00068, "acc_pose": 0.78256, "loss": 0.00068, "time": 0.19203}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78395, "loss": 0.00068, "time": 0.1916}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00068, "acc_pose": 0.78794, "loss": 0.00068, "time": 0.19184}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00068, "acc_pose": 0.79344, "loss": 0.00068, "time": 0.19193}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05475, "heatmap_loss": 0.00068, "acc_pose": 0.78481, "loss": 0.00068, "time": 0.25102}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78579, "loss": 0.00068, "time": 0.19164}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00068, "acc_pose": 0.79184, "loss": 0.00068, "time": 0.19205}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00067, "acc_pose": 0.78659, "loss": 0.00067, "time": 0.19191}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00069, "acc_pose": 0.77863, "loss": 0.00069, "time": 0.19222}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05435, "heatmap_loss": 0.00068, "acc_pose": 0.79202, "loss": 0.00068, "time": 0.25056}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00068, "acc_pose": 0.78314, "loss": 0.00068, "time": 0.19229}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00068, "acc_pose": 0.78853, "loss": 0.00068, "time": 0.19171}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00068, "acc_pose": 0.78848, "loss": 0.00068, "time": 0.19217}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78373, "loss": 0.00068, "time": 0.19183}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05434, "heatmap_loss": 0.00068, "acc_pose": 0.79, "loss": 0.00068, "time": 0.25168}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78208, "loss": 0.00068, "time": 0.1923}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00067, "acc_pose": 0.7913, "loss": 0.00067, "time": 0.19245}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78945, "loss": 0.00068, "time": 0.19189}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00068, "acc_pose": 0.78786, "loss": 0.00068, "time": 0.19339}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05538, "heatmap_loss": 0.00068, "acc_pose": 0.79309, "loss": 0.00068, "time": 0.25186}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00067, "acc_pose": 0.78217, "loss": 0.00067, "time": 0.1919}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00068, "acc_pose": 0.78471, "loss": 0.00068, "time": 0.19294}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00067, "acc_pose": 0.78794, "loss": 0.00067, "time": 0.19258}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00068, "acc_pose": 0.78849, "loss": 0.00068, "time": 0.19291}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05496, "heatmap_loss": 0.00068, "acc_pose": 0.7894, "loss": 0.00068, "time": 0.25314}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00067, "acc_pose": 0.78835, "loss": 0.00067, "time": 0.19329}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00067, "acc_pose": 0.78439, "loss": 0.00067, "time": 0.19274}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00067, "acc_pose": 0.78437, "loss": 0.00067, "time": 0.19252}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.78696, "loss": 0.00067, "time": 0.19317}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05546, "heatmap_loss": 0.00068, "acc_pose": 0.79492, "loss": 0.00068, "time": 0.25137}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00067, "acc_pose": 0.78607, "loss": 0.00067, "time": 0.19259}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.78883, "loss": 0.00067, "time": 0.19413}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00067, "acc_pose": 0.79624, "loss": 0.00067, "time": 0.19218}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.79261, "loss": 0.00067, "time": 0.19216}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05489, "heatmap_loss": 0.00067, "acc_pose": 0.79323, "loss": 0.00067, "time": 0.2526}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.78817, "loss": 0.00067, "time": 0.19202}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00067, "acc_pose": 0.78635, "loss": 0.00067, "time": 0.19224}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.79153, "loss": 0.00067, "time": 0.19215}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00067, "acc_pose": 0.7901, "loss": 0.00067, "time": 0.19294}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0557, "heatmap_loss": 0.00067, "acc_pose": 0.79163, "loss": 0.00067, "time": 0.25072}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.79631, "loss": 0.00067, "time": 0.19219}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00067, "acc_pose": 0.79325, "loss": 0.00067, "time": 0.1922}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.79214, "loss": 0.00067, "time": 0.19225}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00067, "acc_pose": 0.79132, "loss": 0.00067, "time": 0.19245}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05641, "heatmap_loss": 0.00067, "acc_pose": 0.79097, "loss": 0.00067, "time": 0.25159}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00051, "heatmap_loss": 0.00066, "acc_pose": 0.7917, "loss": 0.00066, "time": 0.19219}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00067, "acc_pose": 0.78661, "loss": 0.00067, "time": 0.19198}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.79153, "loss": 0.00067, "time": 0.19254}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00067, "acc_pose": 0.78208, "loss": 0.00067, "time": 0.19215}
+{"mode": "val", "epoch": 60, "iter": 407, "lr": 1e-05, "AP": 0.73611, "AP .5": 0.90005, "AP .75": 0.80881, "AP (M)": 0.66331, "AP (L)": 0.76088, "AR": 0.79137, "AR .5": 0.94002, "AR .75": 0.85784, "AR (M)": 0.7499, "AR (L)": 0.85154}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05447, "heatmap_loss": 0.00066, "acc_pose": 0.78464, "loss": 0.00066, "time": 0.24656}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00067, "acc_pose": 0.78649, "loss": 0.00067, "time": 0.19152}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00067, "acc_pose": 0.78689, "loss": 0.00067, "time": 0.19177}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79098, "loss": 0.00066, "time": 0.19191}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00066, "acc_pose": 0.79249, "loss": 0.00066, "time": 0.19209}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05525, "heatmap_loss": 0.00066, "acc_pose": 0.78429, "loss": 0.00066, "time": 0.25025}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.78807, "loss": 0.00066, "time": 0.19291}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00067, "acc_pose": 0.78893, "loss": 0.00067, "time": 0.19233}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.7921, "loss": 0.00066, "time": 0.19209}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00066, "acc_pose": 0.79448, "loss": 0.00066, "time": 0.19272}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05464, "heatmap_loss": 0.00066, "acc_pose": 0.79685, "loss": 0.00066, "time": 0.25186}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.7976, "loss": 0.00066, "time": 0.19232}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79743, "loss": 0.00066, "time": 0.19191}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00067, "acc_pose": 0.79508, "loss": 0.00067, "time": 0.19311}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00067, "acc_pose": 0.78809, "loss": 0.00067, "time": 0.19192}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05538, "heatmap_loss": 0.00066, "acc_pose": 0.7893, "loss": 0.00066, "time": 0.25187}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.78967, "loss": 0.00066, "time": 0.19232}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.79594, "loss": 0.00067, "time": 0.19242}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.79249, "loss": 0.00066, "time": 0.19204}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00066, "acc_pose": 0.79078, "loss": 0.00066, "time": 0.19187}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0565, "heatmap_loss": 0.00066, "acc_pose": 0.79394, "loss": 0.00066, "time": 0.25041}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00065, "acc_pose": 0.79851, "loss": 0.00065, "time": 0.19284}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00067, "acc_pose": 0.7966, "loss": 0.00067, "time": 0.19206}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.7913, "loss": 0.00066, "time": 0.19196}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79319, "loss": 0.00066, "time": 0.19176}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05571, "heatmap_loss": 0.00065, "acc_pose": 0.79889, "loss": 0.00065, "time": 0.25135}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.79215, "loss": 0.00066, "time": 0.19187}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79463, "loss": 0.00066, "time": 0.19229}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.79849, "loss": 0.00065, "time": 0.19217}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00066, "acc_pose": 0.79629, "loss": 0.00066, "time": 0.19225}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05533, "heatmap_loss": 0.00066, "acc_pose": 0.79648, "loss": 0.00066, "time": 0.25055}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.79117, "loss": 0.00066, "time": 0.19218}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.797, "loss": 0.00066, "time": 0.19224}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.7975, "loss": 0.00065, "time": 0.19195}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79609, "loss": 0.00066, "time": 0.19285}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05442, "heatmap_loss": 0.00066, "acc_pose": 0.78921, "loss": 0.00066, "time": 0.25145}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79322, "loss": 0.00065, "time": 0.19221}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.79577, "loss": 0.00066, "time": 0.19326}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.79547, "loss": 0.00066, "time": 0.192}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00066, "acc_pose": 0.79396, "loss": 0.00066, "time": 0.19198}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05496, "heatmap_loss": 0.00065, "acc_pose": 0.7979, "loss": 0.00065, "time": 0.25244}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00049, "heatmap_loss": 0.00065, "acc_pose": 0.7948, "loss": 0.00065, "time": 0.1933}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79228, "loss": 0.00065, "time": 0.19254}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79953, "loss": 0.00066, "time": 0.19247}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.79688, "loss": 0.00066, "time": 0.19245}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05464, "heatmap_loss": 0.00066, "acc_pose": 0.79411, "loss": 0.00066, "time": 0.25091}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79673, "loss": 0.00065, "time": 0.19201}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00066, "acc_pose": 0.78696, "loss": 0.00066, "time": 0.19249}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.79823, "loss": 0.00065, "time": 0.19201}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.78989, "loss": 0.00065, "time": 0.19135}
+{"mode": "val", "epoch": 70, "iter": 407, "lr": 1e-05, "AP": 0.73885, "AP .5": 0.89861, "AP .75": 0.81704, "AP (M)": 0.66621, "AP (L)": 0.76477, "AR": 0.7938, "AR .5": 0.9397, "AR .75": 0.86351, "AR (M)": 0.75264, "AR (L)": 0.8534}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05608, "heatmap_loss": 0.00065, "acc_pose": 0.79897, "loss": 0.00065, "time": 0.24852}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79596, "loss": 0.00065, "time": 0.19232}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79493, "loss": 0.00065, "time": 0.19259}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79969, "loss": 0.00065, "time": 0.19238}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.80418, "loss": 0.00065, "time": 0.19327}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05553, "heatmap_loss": 0.00065, "acc_pose": 0.80141, "loss": 0.00065, "time": 0.25123}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.79461, "loss": 0.00066, "time": 0.19232}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00065, "acc_pose": 0.79228, "loss": 0.00065, "time": 0.19204}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.80172, "loss": 0.00065, "time": 0.19286}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79591, "loss": 0.00065, "time": 0.19365}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05505, "heatmap_loss": 0.00064, "acc_pose": 0.80155, "loss": 0.00064, "time": 0.25003}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.8018, "loss": 0.00065, "time": 0.19197}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.78611, "loss": 0.00065, "time": 0.19189}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79986, "loss": 0.00065, "time": 0.19215}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00065, "acc_pose": 0.79967, "loss": 0.00065, "time": 0.19227}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05519, "heatmap_loss": 0.00065, "acc_pose": 0.79702, "loss": 0.00065, "time": 0.25121}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79423, "loss": 0.00065, "time": 0.19208}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.78876, "loss": 0.00065, "time": 0.19238}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00065, "acc_pose": 0.80159, "loss": 0.00065, "time": 0.19206}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00065, "acc_pose": 0.79544, "loss": 0.00065, "time": 0.19198}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05561, "heatmap_loss": 0.00064, "acc_pose": 0.80118, "loss": 0.00064, "time": 0.25214}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00064, "acc_pose": 0.79685, "loss": 0.00064, "time": 0.19219}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00064, "acc_pose": 0.79865, "loss": 0.00064, "time": 0.19247}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.80118, "loss": 0.00065, "time": 0.19231}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00065, "acc_pose": 0.79748, "loss": 0.00065, "time": 0.19219}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05466, "heatmap_loss": 0.00065, "acc_pose": 0.80297, "loss": 0.00065, "time": 0.25148}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.79616, "loss": 0.00064, "time": 0.19236}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80858, "loss": 0.00064, "time": 0.19186}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.80259, "loss": 0.00065, "time": 0.19215}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.80446, "loss": 0.00064, "time": 0.19207}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05544, "heatmap_loss": 0.00064, "acc_pose": 0.79593, "loss": 0.00064, "time": 0.25126}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.79526, "loss": 0.00065, "time": 0.19174}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.80325, "loss": 0.00064, "time": 0.19239}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00064, "acc_pose": 0.79805, "loss": 0.00064, "time": 0.19235}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.8052, "loss": 0.00064, "time": 0.19221}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05637, "heatmap_loss": 0.00065, "acc_pose": 0.80693, "loss": 0.00065, "time": 0.252}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00064, "acc_pose": 0.80859, "loss": 0.00064, "time": 0.19281}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00065, "acc_pose": 0.79971, "loss": 0.00065, "time": 0.19356}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.79723, "loss": 0.00064, "time": 0.19182}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80167, "loss": 0.00064, "time": 0.19192}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05448, "heatmap_loss": 0.00064, "acc_pose": 0.79936, "loss": 0.00064, "time": 0.24966}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00064, "acc_pose": 0.79382, "loss": 0.00064, "time": 0.19209}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.79093, "loss": 0.00064, "time": 0.19219}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80743, "loss": 0.00064, "time": 0.19223}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80366, "loss": 0.00064, "time": 0.19195}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05484, "heatmap_loss": 0.00064, "acc_pose": 0.80389, "loss": 0.00064, "time": 0.25018}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00064, "acc_pose": 0.80386, "loss": 0.00064, "time": 0.1919}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00064, "acc_pose": 0.79093, "loss": 0.00064, "time": 0.19272}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00064, "acc_pose": 0.80277, "loss": 0.00064, "time": 0.19254}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00064, "acc_pose": 0.80215, "loss": 0.00064, "time": 0.19251}
+{"mode": "val", "epoch": 80, "iter": 407, "lr": 1e-05, "AP": 0.74116, "AP .5": 0.90293, "AP .75": 0.81726, "AP (M)": 0.66874, "AP (L)": 0.76787, "AR": 0.79606, "AR .5": 0.9427, "AR .75": 0.86398, "AR (M)": 0.75422, "AR (L)": 0.85678}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05453, "heatmap_loss": 0.00063, "acc_pose": 0.80361, "loss": 0.00063, "time": 0.24645}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.79771, "loss": 0.00064, "time": 0.19202}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00065, "acc_pose": 0.7966, "loss": 0.00065, "time": 0.19168}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00064, "acc_pose": 0.80434, "loss": 0.00064, "time": 0.19199}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.80209, "loss": 0.00064, "time": 0.19159}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05478, "heatmap_loss": 0.00064, "acc_pose": 0.80272, "loss": 0.00064, "time": 0.25017}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.80388, "loss": 0.00064, "time": 0.19206}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00064, "acc_pose": 0.80918, "loss": 0.00064, "time": 0.19207}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.80201, "loss": 0.00064, "time": 0.19209}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.80165, "loss": 0.00064, "time": 0.19214}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05483, "heatmap_loss": 0.00064, "acc_pose": 0.80791, "loss": 0.00064, "time": 0.25026}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00064, "acc_pose": 0.80166, "loss": 0.00064, "time": 0.19168}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.80145, "loss": 0.00064, "time": 0.19237}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.79642, "loss": 0.00064, "time": 0.1923}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80142, "loss": 0.00064, "time": 0.19182}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0548, "heatmap_loss": 0.00063, "acc_pose": 0.80614, "loss": 0.00063, "time": 0.24973}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.79525, "loss": 0.00063, "time": 0.19159}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00064, "acc_pose": 0.8006, "loss": 0.00064, "time": 0.1935}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.80601, "loss": 0.00064, "time": 0.19176}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00064, "acc_pose": 0.79315, "loss": 0.00064, "time": 0.19226}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05508, "heatmap_loss": 0.00063, "acc_pose": 0.80192, "loss": 0.00063, "time": 0.25037}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.81023, "loss": 0.00063, "time": 0.19217}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80009, "loss": 0.00064, "time": 0.19203}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.80771, "loss": 0.00064, "time": 0.19161}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.80864, "loss": 0.00064, "time": 0.19204}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05475, "heatmap_loss": 0.00064, "acc_pose": 0.80951, "loss": 0.00064, "time": 0.25199}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.80006, "loss": 0.00064, "time": 0.19216}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00064, "acc_pose": 0.79967, "loss": 0.00064, "time": 0.19189}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00063, "acc_pose": 0.80419, "loss": 0.00063, "time": 0.19219}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80485, "loss": 0.00063, "time": 0.19289}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05525, "heatmap_loss": 0.00063, "acc_pose": 0.80828, "loss": 0.00063, "time": 0.25215}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00063, "acc_pose": 0.80988, "loss": 0.00063, "time": 0.19236}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.80108, "loss": 0.00064, "time": 0.19262}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.81004, "loss": 0.00063, "time": 0.19231}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00063, "acc_pose": 0.80742, "loss": 0.00063, "time": 0.19241}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05482, "heatmap_loss": 0.00063, "acc_pose": 0.80659, "loss": 0.00063, "time": 0.24915}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.8053, "loss": 0.00063, "time": 0.19199}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00063, "acc_pose": 0.80692, "loss": 0.00063, "time": 0.19236}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80479, "loss": 0.00063, "time": 0.19246}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00063, "acc_pose": 0.81144, "loss": 0.00063, "time": 0.19294}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05516, "heatmap_loss": 0.00063, "acc_pose": 0.79486, "loss": 0.00063, "time": 0.2506}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.80384, "loss": 0.00063, "time": 0.19254}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.79926, "loss": 0.00064, "time": 0.1916}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.80609, "loss": 0.00063, "time": 0.19262}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.81015, "loss": 0.00063, "time": 0.19206}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05522, "heatmap_loss": 0.00063, "acc_pose": 0.80685, "loss": 0.00063, "time": 0.25041}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.80911, "loss": 0.00063, "time": 0.19226}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.799, "loss": 0.00063, "time": 0.19252}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00064, "acc_pose": 0.80475, "loss": 0.00064, "time": 0.19307}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80843, "loss": 0.00063, "time": 0.19217}
+{"mode": "val", "epoch": 90, "iter": 407, "lr": 1e-05, "AP": 0.74286, "AP .5": 0.9013, "AP .75": 0.81831, "AP (M)": 0.67096, "AP (L)": 0.76935, "AR": 0.79646, "AR .5": 0.94049, "AR .75": 0.86429, "AR (M)": 0.75359, "AR (L)": 0.85819}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05503, "heatmap_loss": 0.00063, "acc_pose": 0.81116, "loss": 0.00063, "time": 0.24753}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.8159, "loss": 0.00062, "time": 0.19145}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81161, "loss": 0.00062, "time": 0.19164}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.80523, "loss": 0.00063, "time": 0.19241}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.80871, "loss": 0.00063, "time": 0.19176}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05447, "heatmap_loss": 0.00063, "acc_pose": 0.81047, "loss": 0.00063, "time": 0.24996}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.80356, "loss": 0.00063, "time": 0.19211}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80396, "loss": 0.00062, "time": 0.19197}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.80766, "loss": 0.00063, "time": 0.19189}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00064, "acc_pose": 0.8077, "loss": 0.00064, "time": 0.19237}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05454, "heatmap_loss": 0.00062, "acc_pose": 0.80546, "loss": 0.00062, "time": 0.252}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.80326, "loss": 0.00063, "time": 0.19201}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.81127, "loss": 0.00063, "time": 0.19257}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.8088, "loss": 0.00063, "time": 0.1926}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81743, "loss": 0.00062, "time": 0.19256}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05461, "heatmap_loss": 0.00063, "acc_pose": 0.80452, "loss": 0.00063, "time": 0.2511}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00063, "acc_pose": 0.81313, "loss": 0.00063, "time": 0.192}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80387, "loss": 0.00063, "time": 0.19229}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.81286, "loss": 0.00063, "time": 0.19245}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.81364, "loss": 0.00063, "time": 0.19195}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05542, "heatmap_loss": 0.00063, "acc_pose": 0.80728, "loss": 0.00063, "time": 0.25103}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80268, "loss": 0.00062, "time": 0.19213}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00063, "acc_pose": 0.80206, "loss": 0.00063, "time": 0.19202}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80861, "loss": 0.00062, "time": 0.19239}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80633, "loss": 0.00063, "time": 0.19198}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05713, "heatmap_loss": 0.00062, "acc_pose": 0.81358, "loss": 0.00062, "time": 0.25202}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.81246, "loss": 0.00063, "time": 0.19227}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80792, "loss": 0.00062, "time": 0.19154}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.80521, "loss": 0.00063, "time": 0.19348}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00063, "acc_pose": 0.81268, "loss": 0.00063, "time": 0.19221}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05512, "heatmap_loss": 0.00062, "acc_pose": 0.81092, "loss": 0.00062, "time": 0.24973}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00063, "acc_pose": 0.80637, "loss": 0.00063, "time": 0.19213}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.81044, "loss": 0.00062, "time": 0.19168}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.81114, "loss": 0.00062, "time": 0.19412}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80879, "loss": 0.00062, "time": 0.19252}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05495, "heatmap_loss": 0.00062, "acc_pose": 0.80414, "loss": 0.00062, "time": 0.25022}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.80786, "loss": 0.00062, "time": 0.19309}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.80986, "loss": 0.00062, "time": 0.19188}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.81335, "loss": 0.00063, "time": 0.19309}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81094, "loss": 0.00062, "time": 0.19377}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05504, "heatmap_loss": 0.00062, "acc_pose": 0.81417, "loss": 0.00062, "time": 0.25037}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.80955, "loss": 0.00062, "time": 0.19293}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00062, "acc_pose": 0.81027, "loss": 0.00062, "time": 0.19271}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.80568, "loss": 0.00062, "time": 0.1921}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81279, "loss": 0.00062, "time": 0.19152}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05586, "heatmap_loss": 0.00062, "acc_pose": 0.81099, "loss": 0.00062, "time": 0.25154}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.80873, "loss": 0.00062, "time": 0.19281}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00063, "acc_pose": 0.80616, "loss": 0.00063, "time": 0.19206}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81351, "loss": 0.00062, "time": 0.19191}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00063, "acc_pose": 0.80599, "loss": 0.00063, "time": 0.19156}
+{"mode": "val", "epoch": 100, "iter": 407, "lr": 1e-05, "AP": 0.7467, "AP .5": 0.90302, "AP .75": 0.82033, "AP (M)": 0.67517, "AP (L)": 0.77231, "AR": 0.80083, "AR .5": 0.94159, "AR .75": 0.86713, "AR (M)": 0.75952, "AR (L)": 0.86094}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05444, "heatmap_loss": 0.00062, "acc_pose": 0.80548, "loss": 0.00062, "time": 0.24717}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00062, "acc_pose": 0.80486, "loss": 0.00062, "time": 0.19299}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.8118, "loss": 0.00062, "time": 0.19211}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81092, "loss": 0.00062, "time": 0.1919}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81148, "loss": 0.00062, "time": 0.19188}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05579, "heatmap_loss": 0.00061, "acc_pose": 0.81649, "loss": 0.00061, "time": 0.24961}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.80959, "loss": 0.00062, "time": 0.19136}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00062, "acc_pose": 0.80327, "loss": 0.00062, "time": 0.19238}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00062, "acc_pose": 0.81567, "loss": 0.00062, "time": 0.19295}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00062, "acc_pose": 0.81356, "loss": 0.00062, "time": 0.19226}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05453, "heatmap_loss": 0.00062, "acc_pose": 0.81428, "loss": 0.00062, "time": 0.24998}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81174, "loss": 0.00062, "time": 0.19193}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81353, "loss": 0.00062, "time": 0.19219}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81057, "loss": 0.00061, "time": 0.19246}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81327, "loss": 0.00062, "time": 0.19187}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05473, "heatmap_loss": 0.00062, "acc_pose": 0.80931, "loss": 0.00062, "time": 0.25061}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.80997, "loss": 0.00061, "time": 0.19187}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.81138, "loss": 0.00062, "time": 0.19173}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81623, "loss": 0.00062, "time": 0.19232}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81245, "loss": 0.00062, "time": 0.19191}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05492, "heatmap_loss": 0.00062, "acc_pose": 0.81685, "loss": 0.00062, "time": 0.2503}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00062, "acc_pose": 0.80835, "loss": 0.00062, "time": 0.19204}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.80737, "loss": 0.00062, "time": 0.19147}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81772, "loss": 0.00061, "time": 0.19153}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81007, "loss": 0.00062, "time": 0.19228}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05502, "heatmap_loss": 0.00061, "acc_pose": 0.81111, "loss": 0.00061, "time": 0.25026}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81189, "loss": 0.00062, "time": 0.19182}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.80567, "loss": 0.00061, "time": 0.19201}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81659, "loss": 0.00061, "time": 0.19206}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00045, "heatmap_loss": 0.00062, "acc_pose": 0.81344, "loss": 0.00062, "time": 0.192}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05508, "heatmap_loss": 0.00061, "acc_pose": 0.81381, "loss": 0.00061, "time": 0.2501}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.80849, "loss": 0.00061, "time": 0.19163}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00062, "acc_pose": 0.81678, "loss": 0.00062, "time": 0.19169}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.81517, "loss": 0.00062, "time": 0.19517}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81179, "loss": 0.00061, "time": 0.19203}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05618, "heatmap_loss": 0.00061, "acc_pose": 0.815, "loss": 0.00061, "time": 0.25031}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00061, "acc_pose": 0.80967, "loss": 0.00061, "time": 0.19267}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00062, "acc_pose": 0.80782, "loss": 0.00062, "time": 0.19188}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81156, "loss": 0.00061, "time": 0.19225}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.8156, "loss": 0.00061, "time": 0.19235}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05567, "heatmap_loss": 0.00061, "acc_pose": 0.81956, "loss": 0.00061, "time": 0.24989}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.81434, "loss": 0.00062, "time": 0.19257}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00062, "acc_pose": 0.81774, "loss": 0.00062, "time": 0.19178}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81452, "loss": 0.00061, "time": 0.19187}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81425, "loss": 0.00061, "time": 0.19269}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05467, "heatmap_loss": 0.00061, "acc_pose": 0.81022, "loss": 0.00061, "time": 0.25053}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81054, "loss": 0.00061, "time": 0.19269}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00062, "acc_pose": 0.80916, "loss": 0.00062, "time": 0.19261}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.82241, "loss": 0.00061, "time": 0.19204}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.81913, "loss": 0.00061, "time": 0.19199}
+{"mode": "val", "epoch": 110, "iter": 407, "lr": 1e-05, "AP": 0.74983, "AP .5": 0.90617, "AP .75": 0.82317, "AP (M)": 0.6781, "AP (L)": 0.77724, "AR": 0.8022, "AR .5": 0.94443, "AR .75": 0.8676, "AR (M)": 0.76113, "AR (L)": 0.86217}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05537, "heatmap_loss": 0.00061, "acc_pose": 0.81179, "loss": 0.00061, "time": 0.24751}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81543, "loss": 0.00061, "time": 0.19101}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00061, "acc_pose": 0.8079, "loss": 0.00061, "time": 0.19101}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81178, "loss": 0.00061, "time": 0.19306}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00053, "heatmap_loss": 0.00061, "acc_pose": 0.81227, "loss": 0.00061, "time": 0.19239}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05449, "heatmap_loss": 0.00061, "acc_pose": 0.81468, "loss": 0.00061, "time": 0.25152}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81002, "loss": 0.00061, "time": 0.19205}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.81938, "loss": 0.0006, "time": 0.19206}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00061, "acc_pose": 0.82172, "loss": 0.00061, "time": 0.19226}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00061, "acc_pose": 0.81673, "loss": 0.00061, "time": 0.19187}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05558, "heatmap_loss": 0.00061, "acc_pose": 0.82035, "loss": 0.00061, "time": 0.25116}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81598, "loss": 0.00061, "time": 0.19203}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81924, "loss": 0.00061, "time": 0.19147}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.82077, "loss": 0.0006, "time": 0.19247}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81027, "loss": 0.00061, "time": 0.19164}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05508, "heatmap_loss": 0.00061, "acc_pose": 0.81236, "loss": 0.00061, "time": 0.25}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00061, "acc_pose": 0.81778, "loss": 0.00061, "time": 0.19147}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00061, "acc_pose": 0.80969, "loss": 0.00061, "time": 0.19371}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.82006, "loss": 0.00061, "time": 0.1921}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81557, "loss": 0.00061, "time": 0.19229}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05501, "heatmap_loss": 0.00061, "acc_pose": 0.81472, "loss": 0.00061, "time": 0.24977}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.81574, "loss": 0.0006, "time": 0.19268}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.81092, "loss": 0.00061, "time": 0.19161}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.8189, "loss": 0.0006, "time": 0.19227}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81757, "loss": 0.00061, "time": 0.19198}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05523, "heatmap_loss": 0.00061, "acc_pose": 0.82357, "loss": 0.00061, "time": 0.2508}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.8122, "loss": 0.0006, "time": 0.19209}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00061, "acc_pose": 0.8094, "loss": 0.00061, "time": 0.19248}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81742, "loss": 0.00061, "time": 0.19204}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81641, "loss": 0.00061, "time": 0.19223}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05525, "heatmap_loss": 0.00061, "acc_pose": 0.80963, "loss": 0.00061, "time": 0.25207}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81903, "loss": 0.00061, "time": 0.19274}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81947, "loss": 0.00061, "time": 0.19175}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.8145, "loss": 0.00061, "time": 0.19219}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.82083, "loss": 0.00061, "time": 0.19235}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05452, "heatmap_loss": 0.00061, "acc_pose": 0.81477, "loss": 0.00061, "time": 0.25185}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.0006, "acc_pose": 0.81269, "loss": 0.0006, "time": 0.19326}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00061, "acc_pose": 0.81516, "loss": 0.00061, "time": 0.19201}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00061, "acc_pose": 0.81906, "loss": 0.00061, "time": 0.19181}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00061, "acc_pose": 0.81847, "loss": 0.00061, "time": 0.19225}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05551, "heatmap_loss": 0.0006, "acc_pose": 0.82498, "loss": 0.0006, "time": 0.25093}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.82093, "loss": 0.0006, "time": 0.19242}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.81044, "loss": 0.00061, "time": 0.19204}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00055, "heatmap_loss": 0.00061, "acc_pose": 0.81617, "loss": 0.00061, "time": 0.19207}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81784, "loss": 0.0006, "time": 0.19219}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05443, "heatmap_loss": 0.0006, "acc_pose": 0.82352, "loss": 0.0006, "time": 0.25154}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00057, "heatmap_loss": 0.00061, "acc_pose": 0.8182, "loss": 0.00061, "time": 0.19192}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.0006, "acc_pose": 0.81343, "loss": 0.0006, "time": 0.19103}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00061, "acc_pose": 0.81423, "loss": 0.00061, "time": 0.19133}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82044, "loss": 0.0006, "time": 0.19181}
+{"mode": "val", "epoch": 120, "iter": 407, "lr": 1e-05, "AP": 0.7498, "AP .5": 0.90547, "AP .75": 0.82554, "AP (M)": 0.67855, "AP (L)": 0.77477, "AR": 0.80337, "AR .5": 0.94458, "AR .75": 0.87075, "AR (M)": 0.7631, "AR (L)": 0.86221}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05492, "heatmap_loss": 0.0006, "acc_pose": 0.81164, "loss": 0.0006, "time": 0.24632}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82427, "loss": 0.0006, "time": 0.19166}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00061, "acc_pose": 0.82063, "loss": 0.00061, "time": 0.19171}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.81664, "loss": 0.0006, "time": 0.19181}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.82047, "loss": 0.0006, "time": 0.19218}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05619, "heatmap_loss": 0.0006, "acc_pose": 0.81626, "loss": 0.0006, "time": 0.25035}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.81835, "loss": 0.0006, "time": 0.19156}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00061, "acc_pose": 0.82021, "loss": 0.00061, "time": 0.19176}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.0006, "acc_pose": 0.82212, "loss": 0.0006, "time": 0.19211}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00061, "acc_pose": 0.81754, "loss": 0.00061, "time": 0.19151}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05495, "heatmap_loss": 0.0006, "acc_pose": 0.82216, "loss": 0.0006, "time": 0.252}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82143, "loss": 0.0006, "time": 0.19391}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.82203, "loss": 0.0006, "time": 0.19214}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00061, "acc_pose": 0.81943, "loss": 0.00061, "time": 0.19217}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.82449, "loss": 0.0006, "time": 0.19199}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05588, "heatmap_loss": 0.0006, "acc_pose": 0.81102, "loss": 0.0006, "time": 0.25036}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.0006, "acc_pose": 0.81587, "loss": 0.0006, "time": 0.19155}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81816, "loss": 0.0006, "time": 0.19179}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.81831, "loss": 0.0006, "time": 0.19254}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.82083, "loss": 0.0006, "time": 0.19263}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05528, "heatmap_loss": 0.0006, "acc_pose": 0.81642, "loss": 0.0006, "time": 0.25005}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82335, "loss": 0.0006, "time": 0.19169}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.81395, "loss": 0.0006, "time": 0.1921}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.8148, "loss": 0.0006, "time": 0.19177}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81743, "loss": 0.0006, "time": 0.19241}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0554, "heatmap_loss": 0.0006, "acc_pose": 0.82094, "loss": 0.0006, "time": 0.25023}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.81538, "loss": 0.0006, "time": 0.19225}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.82077, "loss": 0.0006, "time": 0.19268}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.82534, "loss": 0.0006, "time": 0.19253}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82456, "loss": 0.0006, "time": 0.19184}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05487, "heatmap_loss": 0.0006, "acc_pose": 0.82089, "loss": 0.0006, "time": 0.25021}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.81809, "loss": 0.0006, "time": 0.1916}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.81697, "loss": 0.00059, "time": 0.19177}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.82091, "loss": 0.0006, "time": 0.19274}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.82661, "loss": 0.0006, "time": 0.19184}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05634, "heatmap_loss": 0.0006, "acc_pose": 0.82038, "loss": 0.0006, "time": 0.25037}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.81777, "loss": 0.0006, "time": 0.19205}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.82411, "loss": 0.0006, "time": 0.1922}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.0006, "acc_pose": 0.81152, "loss": 0.0006, "time": 0.19187}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.81675, "loss": 0.0006, "time": 0.19195}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05541, "heatmap_loss": 0.00059, "acc_pose": 0.8219, "loss": 0.00059, "time": 0.24903}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82339, "loss": 0.00059, "time": 0.19213}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81459, "loss": 0.0006, "time": 0.19217}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.8209, "loss": 0.00059, "time": 0.19194}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.81602, "loss": 0.0006, "time": 0.19188}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05521, "heatmap_loss": 0.0006, "acc_pose": 0.8188, "loss": 0.0006, "time": 0.25095}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.80589, "loss": 0.0006, "time": 0.19196}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.0006, "acc_pose": 0.818, "loss": 0.0006, "time": 0.19146}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.82281, "loss": 0.0006, "time": 0.19221}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.8201, "loss": 0.00059, "time": 0.19161}
+{"mode": "val", "epoch": 130, "iter": 407, "lr": 1e-05, "AP": 0.74963, "AP .5": 0.90487, "AP .75": 0.82631, "AP (M)": 0.67842, "AP (L)": 0.77457, "AR": 0.80252, "AR .5": 0.94254, "AR .75": 0.87012, "AR (M)": 0.76184, "AR (L)": 0.8618}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05452, "heatmap_loss": 0.0006, "acc_pose": 0.82752, "loss": 0.0006, "time": 0.24714}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.0006, "acc_pose": 0.82051, "loss": 0.0006, "time": 0.19152}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81651, "loss": 0.0006, "time": 0.19136}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.82415, "loss": 0.00059, "time": 0.19165}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.82201, "loss": 0.0006, "time": 0.19189}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05525, "heatmap_loss": 0.00059, "acc_pose": 0.81879, "loss": 0.00059, "time": 0.25077}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.0006, "acc_pose": 0.82195, "loss": 0.0006, "time": 0.19215}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82106, "loss": 0.00059, "time": 0.19208}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82094, "loss": 0.00059, "time": 0.19212}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00055, "heatmap_loss": 0.0006, "acc_pose": 0.82537, "loss": 0.0006, "time": 0.19199}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05531, "heatmap_loss": 0.00059, "acc_pose": 0.81543, "loss": 0.00059, "time": 0.25017}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82329, "loss": 0.00059, "time": 0.19385}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.81867, "loss": 0.0006, "time": 0.19184}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82061, "loss": 0.00059, "time": 0.19275}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.81532, "loss": 0.0006, "time": 0.19232}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0564, "heatmap_loss": 0.00059, "acc_pose": 0.82163, "loss": 0.00059, "time": 0.25219}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00059, "acc_pose": 0.82044, "loss": 0.00059, "time": 0.1918}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.82087, "loss": 0.00059, "time": 0.19212}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82624, "loss": 0.00059, "time": 0.19218}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81329, "loss": 0.0006, "time": 0.19392}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05517, "heatmap_loss": 0.00059, "acc_pose": 0.82422, "loss": 0.00059, "time": 0.25088}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.81927, "loss": 0.00059, "time": 0.19259}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.81458, "loss": 0.0006, "time": 0.1914}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.0006, "acc_pose": 0.82048, "loss": 0.0006, "time": 0.19178}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82168, "loss": 0.00059, "time": 0.19157}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05462, "heatmap_loss": 0.0006, "acc_pose": 0.82117, "loss": 0.0006, "time": 0.25159}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82721, "loss": 0.00059, "time": 0.19242}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.81946, "loss": 0.00059, "time": 0.19176}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.8167, "loss": 0.00059, "time": 0.19167}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00045, "heatmap_loss": 0.00059, "acc_pose": 0.82652, "loss": 0.00059, "time": 0.19203}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05454, "heatmap_loss": 0.00059, "acc_pose": 0.82215, "loss": 0.00059, "time": 0.25023}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00059, "acc_pose": 0.81446, "loss": 0.00059, "time": 0.19344}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.81921, "loss": 0.00059, "time": 0.19353}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82406, "loss": 0.00059, "time": 0.19354}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82183, "loss": 0.00059, "time": 0.19217}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05472, "heatmap_loss": 0.00059, "acc_pose": 0.82144, "loss": 0.00059, "time": 0.2507}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82454, "loss": 0.00058, "time": 0.19149}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82263, "loss": 0.00059, "time": 0.19222}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82331, "loss": 0.00059, "time": 0.1915}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.8272, "loss": 0.00059, "time": 0.19181}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05536, "heatmap_loss": 0.00059, "acc_pose": 0.82365, "loss": 0.00059, "time": 0.25096}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82295, "loss": 0.00059, "time": 0.19119}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82319, "loss": 0.00059, "time": 0.19313}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.81869, "loss": 0.00059, "time": 0.19293}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.82135, "loss": 0.00059, "time": 0.19521}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05505, "heatmap_loss": 0.00059, "acc_pose": 0.82891, "loss": 0.00059, "time": 0.25031}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.82179, "loss": 0.00059, "time": 0.1922}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.81739, "loss": 0.00059, "time": 0.19234}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.82255, "loss": 0.0006, "time": 0.19258}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82323, "loss": 0.00059, "time": 0.19395}
+{"mode": "val", "epoch": 140, "iter": 407, "lr": 1e-05, "AP": 0.75261, "AP .5": 0.90621, "AP .75": 0.82417, "AP (M)": 0.68224, "AP (L)": 0.77726, "AR": 0.80601, "AR .5": 0.9449, "AR .75": 0.8698, "AR (M)": 0.76618, "AR (L)": 0.86451}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05549, "heatmap_loss": 0.00059, "acc_pose": 0.82214, "loss": 0.00059, "time": 0.24762}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.82711, "loss": 0.00058, "time": 0.19284}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.81413, "loss": 0.00059, "time": 0.19171}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82665, "loss": 0.00059, "time": 0.19169}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82028, "loss": 0.00059, "time": 0.19201}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0547, "heatmap_loss": 0.00058, "acc_pose": 0.82611, "loss": 0.00058, "time": 0.24992}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00059, "acc_pose": 0.81453, "loss": 0.00059, "time": 0.19264}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00059, "acc_pose": 0.82472, "loss": 0.00059, "time": 0.19199}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82185, "loss": 0.00059, "time": 0.19244}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82628, "loss": 0.00058, "time": 0.19159}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05484, "heatmap_loss": 0.00058, "acc_pose": 0.82915, "loss": 0.00058, "time": 0.24992}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.82372, "loss": 0.00059, "time": 0.19154}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82387, "loss": 0.00059, "time": 0.1932}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82538, "loss": 0.00059, "time": 0.19379}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00059, "acc_pose": 0.82375, "loss": 0.00059, "time": 0.19272}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05493, "heatmap_loss": 0.00058, "acc_pose": 0.82198, "loss": 0.00058, "time": 0.24966}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.81837, "loss": 0.00059, "time": 0.19184}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.81692, "loss": 0.00059, "time": 0.19135}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.81968, "loss": 0.00059, "time": 0.19208}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82419, "loss": 0.00058, "time": 0.19163}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05516, "heatmap_loss": 0.00059, "acc_pose": 0.82504, "loss": 0.00059, "time": 0.25048}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.8225, "loss": 0.00058, "time": 0.19288}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82251, "loss": 0.00059, "time": 0.19267}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82134, "loss": 0.00059, "time": 0.19238}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.831, "loss": 0.00059, "time": 0.19193}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05544, "heatmap_loss": 0.00058, "acc_pose": 0.82208, "loss": 0.00058, "time": 0.2534}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.82279, "loss": 0.00058, "time": 0.19204}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00059, "acc_pose": 0.81746, "loss": 0.00059, "time": 0.19245}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00059, "acc_pose": 0.82373, "loss": 0.00059, "time": 0.19291}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82308, "loss": 0.00059, "time": 0.19255}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05503, "heatmap_loss": 0.00058, "acc_pose": 0.82892, "loss": 0.00058, "time": 0.25217}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00058, "acc_pose": 0.82275, "loss": 0.00058, "time": 0.19268}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00059, "acc_pose": 0.82321, "loss": 0.00059, "time": 0.19182}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00059, "acc_pose": 0.82125, "loss": 0.00059, "time": 0.19208}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.81857, "loss": 0.00059, "time": 0.19179}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05525, "heatmap_loss": 0.00058, "acc_pose": 0.82104, "loss": 0.00058, "time": 0.25548}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.82385, "loss": 0.00059, "time": 0.19277}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82472, "loss": 0.00058, "time": 0.19215}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82551, "loss": 0.00058, "time": 0.19214}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00059, "acc_pose": 0.82771, "loss": 0.00059, "time": 0.19173}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.0548, "heatmap_loss": 0.00058, "acc_pose": 0.81944, "loss": 0.00058, "time": 0.25034}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.83109, "loss": 0.00058, "time": 0.19219}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.83027, "loss": 0.00058, "time": 0.1925}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82611, "loss": 0.00058, "time": 0.1925}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82879, "loss": 0.00058, "time": 0.19264}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05499, "heatmap_loss": 0.00058, "acc_pose": 0.8275, "loss": 0.00058, "time": 0.25061}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82666, "loss": 0.00058, "time": 0.19225}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00059, "acc_pose": 0.82702, "loss": 0.00059, "time": 0.19196}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.83042, "loss": 0.00058, "time": 0.19169}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82674, "loss": 0.00058, "time": 0.19322}
+{"mode": "val", "epoch": 150, "iter": 407, "lr": 1e-05, "AP": 0.75281, "AP .5": 0.90617, "AP .75": 0.82731, "AP (M)": 0.68037, "AP (L)": 0.77994, "AR": 0.8056, "AR .5": 0.94569, "AR .75": 0.87217, "AR (M)": 0.76394, "AR (L)": 0.86589}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05398, "heatmap_loss": 0.00058, "acc_pose": 0.82387, "loss": 0.00058, "time": 0.24667}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.83128, "loss": 0.00058, "time": 0.19228}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.82494, "loss": 0.00058, "time": 0.19149}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00027, "heatmap_loss": 0.00059, "acc_pose": 0.82431, "loss": 0.00059, "time": 0.19243}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.83391, "loss": 0.00058, "time": 0.19286}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05552, "heatmap_loss": 0.00058, "acc_pose": 0.82518, "loss": 0.00058, "time": 0.24988}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82858, "loss": 0.00058, "time": 0.19138}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82808, "loss": 0.00058, "time": 0.19139}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82286, "loss": 0.00058, "time": 0.19151}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.83341, "loss": 0.00058, "time": 0.19154}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05518, "heatmap_loss": 0.00058, "acc_pose": 0.83146, "loss": 0.00058, "time": 0.25071}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.83412, "loss": 0.00058, "time": 0.19291}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82905, "loss": 0.00058, "time": 0.19336}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.82564, "loss": 0.00058, "time": 0.19272}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00058, "acc_pose": 0.82935, "loss": 0.00058, "time": 0.19227}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05527, "heatmap_loss": 0.00058, "acc_pose": 0.8263, "loss": 0.00058, "time": 0.25089}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82657, "loss": 0.00058, "time": 0.19177}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.81855, "loss": 0.00058, "time": 0.1917}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82344, "loss": 0.00058, "time": 0.19289}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82451, "loss": 0.00058, "time": 0.19189}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05507, "heatmap_loss": 0.00058, "acc_pose": 0.82992, "loss": 0.00058, "time": 0.25205}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82861, "loss": 0.00058, "time": 0.19198}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.83292, "loss": 0.00058, "time": 0.19268}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00058, "acc_pose": 0.8262, "loss": 0.00058, "time": 0.19216}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82733, "loss": 0.00058, "time": 0.19219}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05531, "heatmap_loss": 0.00058, "acc_pose": 0.82069, "loss": 0.00058, "time": 0.2516}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82398, "loss": 0.00058, "time": 0.19197}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.81936, "loss": 0.00058, "time": 0.19169}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82542, "loss": 0.00058, "time": 0.19264}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82546, "loss": 0.00058, "time": 0.19197}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05563, "heatmap_loss": 0.00058, "acc_pose": 0.83582, "loss": 0.00058, "time": 0.2505}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.83219, "loss": 0.00058, "time": 0.19197}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82657, "loss": 0.00058, "time": 0.19174}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.83408, "loss": 0.00058, "time": 0.19202}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82338, "loss": 0.00058, "time": 0.19263}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05457, "heatmap_loss": 0.00058, "acc_pose": 0.82197, "loss": 0.00058, "time": 0.25106}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82714, "loss": 0.00058, "time": 0.1922}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82272, "loss": 0.00058, "time": 0.19129}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.83326, "loss": 0.00058, "time": 0.19197}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82628, "loss": 0.00058, "time": 0.1917}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05479, "heatmap_loss": 0.00057, "acc_pose": 0.8263, "loss": 0.00057, "time": 0.25114}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.82326, "loss": 0.00058, "time": 0.19183}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.82222, "loss": 0.00058, "time": 0.19317}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82751, "loss": 0.00058, "time": 0.19249}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82732, "loss": 0.00058, "time": 0.19206}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05548, "heatmap_loss": 0.00058, "acc_pose": 0.82821, "loss": 0.00058, "time": 0.25355}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82501, "loss": 0.00058, "time": 0.19187}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.82814, "loss": 0.00058, "time": 0.19187}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.82565, "loss": 0.00057, "time": 0.19203}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82387, "loss": 0.00058, "time": 0.19166}
+{"mode": "val", "epoch": 160, "iter": 407, "lr": 1e-05, "AP": 0.75134, "AP .5": 0.90501, "AP .75": 0.82264, "AP (M)": 0.67996, "AP (L)": 0.77845, "AR": 0.80449, "AR .5": 0.9438, "AR .75": 0.86792, "AR (M)": 0.7634, "AR (L)": 0.86455}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05488, "heatmap_loss": 0.00058, "acc_pose": 0.83186, "loss": 0.00058, "time": 0.24761}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.83383, "loss": 0.00058, "time": 0.19295}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00058, "acc_pose": 0.82452, "loss": 0.00058, "time": 0.19179}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.83441, "loss": 0.00057, "time": 0.19258}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.82951, "loss": 0.00057, "time": 0.19259}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05476, "heatmap_loss": 0.00057, "acc_pose": 0.83003, "loss": 0.00057, "time": 0.25083}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.82521, "loss": 0.00057, "time": 0.19206}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83102, "loss": 0.00057, "time": 0.19244}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.82731, "loss": 0.00057, "time": 0.19203}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82932, "loss": 0.00058, "time": 0.1916}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05594, "heatmap_loss": 0.00057, "acc_pose": 0.82989, "loss": 0.00057, "time": 0.25139}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82605, "loss": 0.00058, "time": 0.1918}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82663, "loss": 0.00058, "time": 0.19248}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82289, "loss": 0.00058, "time": 0.19361}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.82748, "loss": 0.00058, "time": 0.19221}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05504, "heatmap_loss": 0.00057, "acc_pose": 0.82917, "loss": 0.00057, "time": 0.25205}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.8242, "loss": 0.00058, "time": 0.19225}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00058, "acc_pose": 0.82332, "loss": 0.00058, "time": 0.1916}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82754, "loss": 0.00058, "time": 0.19178}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.83486, "loss": 0.00058, "time": 0.19193}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05501, "heatmap_loss": 0.00057, "acc_pose": 0.8288, "loss": 0.00057, "time": 0.25135}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.82906, "loss": 0.00057, "time": 0.19297}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.83139, "loss": 0.00057, "time": 0.19272}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.82862, "loss": 0.00057, "time": 0.19214}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.83616, "loss": 0.00057, "time": 0.1921}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05536, "heatmap_loss": 0.00058, "acc_pose": 0.82948, "loss": 0.00058, "time": 0.25108}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.82617, "loss": 0.00057, "time": 0.19216}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00057, "acc_pose": 0.82926, "loss": 0.00057, "time": 0.19289}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00057, "acc_pose": 0.82339, "loss": 0.00057, "time": 0.19259}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.8337, "loss": 0.00057, "time": 0.19221}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05503, "heatmap_loss": 0.00057, "acc_pose": 0.83072, "loss": 0.00057, "time": 0.25253}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.8275, "loss": 0.00057, "time": 0.19177}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00057, "acc_pose": 0.82792, "loss": 0.00057, "time": 0.1923}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00053, "heatmap_loss": 0.00057, "acc_pose": 0.83795, "loss": 0.00057, "time": 0.19257}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.82431, "loss": 0.00057, "time": 0.19176}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05552, "heatmap_loss": 0.00057, "acc_pose": 0.83464, "loss": 0.00057, "time": 0.25137}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.82931, "loss": 0.00057, "time": 0.19185}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00058, "acc_pose": 0.82353, "loss": 0.00058, "time": 0.19236}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.8298, "loss": 0.00057, "time": 0.19264}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.8333, "loss": 0.00057, "time": 0.1938}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05453, "heatmap_loss": 0.00057, "acc_pose": 0.83666, "loss": 0.00057, "time": 0.25086}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00057, "acc_pose": 0.8236, "loss": 0.00057, "time": 0.19201}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.83262, "loss": 0.00057, "time": 0.19211}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00057, "acc_pose": 0.82807, "loss": 0.00057, "time": 0.19165}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00057, "acc_pose": 0.8341, "loss": 0.00057, "time": 0.19189}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 1e-05, "memory": 14090, "data_time": 0.05571, "heatmap_loss": 0.00057, "acc_pose": 0.83024, "loss": 0.00057, "time": 0.25108}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.82416, "loss": 0.00057, "time": 0.19164}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 1e-05, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.82121, "loss": 0.00057, "time": 0.19149}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 1e-05, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00057, "acc_pose": 0.82803, "loss": 0.00057, "time": 0.19174}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 1e-05, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.83169, "loss": 0.00057, "time": 0.19257}
+{"mode": "val", "epoch": 170, "iter": 407, "lr": 1e-05, "AP": 0.7537, "AP .5": 0.90603, "AP .75": 0.83007, "AP (M)": 0.68317, "AP (L)": 0.78024, "AR": 0.80575, "AR .5": 0.94254, "AR .75": 0.87358, "AR (M)": 0.76452, "AR (L)": 0.86641}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05487, "heatmap_loss": 0.00056, "acc_pose": 0.83586, "loss": 0.00056, "time": 0.24861}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00056, "acc_pose": 0.8335, "loss": 0.00056, "time": 0.1925}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00056, "acc_pose": 0.83515, "loss": 0.00056, "time": 0.1926}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00056, "acc_pose": 0.83831, "loss": 0.00056, "time": 0.19239}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00056, "acc_pose": 0.83195, "loss": 0.00056, "time": 0.19269}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0573, "heatmap_loss": 0.00056, "acc_pose": 0.83641, "loss": 0.00056, "time": 0.25094}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00056, "acc_pose": 0.82807, "loss": 0.00056, "time": 0.193}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00056, "acc_pose": 0.83186, "loss": 0.00056, "time": 0.19177}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.83726, "loss": 0.00055, "time": 0.19226}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00056, "acc_pose": 0.83664, "loss": 0.00056, "time": 0.19215}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05555, "heatmap_loss": 0.00056, "acc_pose": 0.83187, "loss": 0.00056, "time": 0.24988}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83752, "loss": 0.00055, "time": 0.19189}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83323, "loss": 0.00055, "time": 0.19257}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00056, "acc_pose": 0.83975, "loss": 0.00056, "time": 0.19207}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00056, "acc_pose": 0.83619, "loss": 0.00056, "time": 0.19221}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05498, "heatmap_loss": 0.00056, "acc_pose": 0.83626, "loss": 0.00056, "time": 0.25149}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00056, "acc_pose": 0.83378, "loss": 0.00056, "time": 0.19227}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83807, "loss": 0.00055, "time": 0.19233}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84035, "loss": 0.00055, "time": 0.19199}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00055, "acc_pose": 0.82983, "loss": 0.00055, "time": 0.19239}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05496, "heatmap_loss": 0.00055, "acc_pose": 0.83778, "loss": 0.00055, "time": 0.25102}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83572, "loss": 0.00055, "time": 0.19156}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00056, "acc_pose": 0.83668, "loss": 0.00056, "time": 0.1926}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83989, "loss": 0.00055, "time": 0.19205}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84315, "loss": 0.00055, "time": 0.19183}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05547, "heatmap_loss": 0.00055, "acc_pose": 0.8349, "loss": 0.00055, "time": 0.25204}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83544, "loss": 0.00055, "time": 0.19244}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00056, "acc_pose": 0.83172, "loss": 0.00056, "time": 0.1926}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83102, "loss": 0.00055, "time": 0.19291}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83483, "loss": 0.00055, "time": 0.19186}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05533, "heatmap_loss": 0.00055, "acc_pose": 0.83944, "loss": 0.00055, "time": 0.25214}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83133, "loss": 0.00055, "time": 0.19252}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00056, "acc_pose": 0.83362, "loss": 0.00056, "time": 0.19231}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83972, "loss": 0.00055, "time": 0.19191}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00055, "acc_pose": 0.83487, "loss": 0.00055, "time": 0.19314}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05495, "heatmap_loss": 0.00055, "acc_pose": 0.84149, "loss": 0.00055, "time": 0.25159}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83725, "loss": 0.00055, "time": 0.19263}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.84074, "loss": 0.00055, "time": 0.19187}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00042, "heatmap_loss": 0.00055, "acc_pose": 0.83352, "loss": 0.00055, "time": 0.19193}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84432, "loss": 0.00055, "time": 0.19196}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05538, "heatmap_loss": 0.00055, "acc_pose": 0.83752, "loss": 0.00055, "time": 0.25231}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83335, "loss": 0.00056, "time": 0.1913}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83708, "loss": 0.00055, "time": 0.19205}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84076, "loss": 0.00055, "time": 0.1926}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.8436, "loss": 0.00055, "time": 0.19284}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05535, "heatmap_loss": 0.00055, "acc_pose": 0.84098, "loss": 0.00055, "time": 0.2521}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.84376, "loss": 0.00055, "time": 0.19207}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83685, "loss": 0.00055, "time": 0.19195}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84226, "loss": 0.00055, "time": 0.19252}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83624, "loss": 0.00055, "time": 0.19212}
+{"mode": "val", "epoch": 180, "iter": 407, "lr": 0.0, "AP": 0.75758, "AP .5": 0.90726, "AP .75": 0.83111, "AP (M)": 0.68695, "AP (L)": 0.78294, "AR": 0.81066, "AR .5": 0.94553, "AR .75": 0.87579, "AR (M)": 0.76965, "AR (L)": 0.87064}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05493, "heatmap_loss": 0.00055, "acc_pose": 0.83885, "loss": 0.00055, "time": 0.24778}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83729, "loss": 0.00055, "time": 0.19264}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83611, "loss": 0.00055, "time": 0.19216}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83747, "loss": 0.00055, "time": 0.19216}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83801, "loss": 0.00055, "time": 0.19206}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05554, "heatmap_loss": 0.00055, "acc_pose": 0.8414, "loss": 0.00055, "time": 0.25046}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83979, "loss": 0.00055, "time": 0.19208}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83572, "loss": 0.00055, "time": 0.19325}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.84498, "loss": 0.00055, "time": 0.19228}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.8414, "loss": 0.00055, "time": 0.19235}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05472, "heatmap_loss": 0.00055, "acc_pose": 0.83652, "loss": 0.00055, "time": 0.25158}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.83593, "loss": 0.00055, "time": 0.19173}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.83516, "loss": 0.00055, "time": 0.19241}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.8413, "loss": 0.00055, "time": 0.193}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.84172, "loss": 0.00055, "time": 0.19261}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05505, "heatmap_loss": 0.00055, "acc_pose": 0.83701, "loss": 0.00055, "time": 0.25112}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84295, "loss": 0.00054, "time": 0.19252}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83995, "loss": 0.00054, "time": 0.19189}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83263, "loss": 0.00055, "time": 0.19337}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84147, "loss": 0.00055, "time": 0.19272}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05681, "heatmap_loss": 0.00054, "acc_pose": 0.84268, "loss": 0.00054, "time": 0.2535}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.8387, "loss": 0.00055, "time": 0.19158}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.84268, "loss": 0.00055, "time": 0.19202}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.84002, "loss": 0.00055, "time": 0.19314}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83816, "loss": 0.00055, "time": 0.19262}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05478, "heatmap_loss": 0.00055, "acc_pose": 0.83746, "loss": 0.00055, "time": 0.25255}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83506, "loss": 0.00055, "time": 0.19225}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.83865, "loss": 0.00055, "time": 0.19235}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83989, "loss": 0.00055, "time": 0.19164}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84141, "loss": 0.00054, "time": 0.19264}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05401, "heatmap_loss": 0.00055, "acc_pose": 0.84519, "loss": 0.00055, "time": 0.25138}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84002, "loss": 0.00054, "time": 0.19334}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.8368, "loss": 0.00055, "time": 0.19262}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83619, "loss": 0.00055, "time": 0.19211}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84277, "loss": 0.00055, "time": 0.19247}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0546, "heatmap_loss": 0.00054, "acc_pose": 0.84184, "loss": 0.00054, "time": 0.25051}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84401, "loss": 0.00054, "time": 0.19243}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.84032, "loss": 0.00055, "time": 0.19284}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83874, "loss": 0.00055, "time": 0.19487}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84552, "loss": 0.00054, "time": 0.19293}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05533, "heatmap_loss": 0.00055, "acc_pose": 0.84403, "loss": 0.00055, "time": 0.25171}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84044, "loss": 0.00054, "time": 0.19225}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.84078, "loss": 0.00055, "time": 0.19167}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83546, "loss": 0.00055, "time": 0.19196}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84716, "loss": 0.00054, "time": 0.1933}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05514, "heatmap_loss": 0.00055, "acc_pose": 0.84264, "loss": 0.00055, "time": 0.25016}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.84177, "loss": 0.00055, "time": 0.1921}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84124, "loss": 0.00054, "time": 0.19178}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84547, "loss": 0.00054, "time": 0.1927}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.84236, "loss": 0.00055, "time": 0.19218}
+{"mode": "val", "epoch": 190, "iter": 407, "lr": 0.0, "AP": 0.75787, "AP .5": 0.90716, "AP .75": 0.83162, "AP (M)": 0.68739, "AP (L)": 0.78369, "AR": 0.81, "AR .5": 0.94584, "AR .75": 0.87547, "AR (M)": 0.769, "AR (L)": 0.86968}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05522, "heatmap_loss": 0.00054, "acc_pose": 0.8422, "loss": 0.00054, "time": 0.2481}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.84094, "loss": 0.00055, "time": 0.19158}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84186, "loss": 0.00054, "time": 0.19232}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84091, "loss": 0.00054, "time": 0.19136}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.83864, "loss": 0.00054, "time": 0.19157}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05529, "heatmap_loss": 0.00055, "acc_pose": 0.83773, "loss": 0.00055, "time": 0.25}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.835, "loss": 0.00054, "time": 0.19243}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84042, "loss": 0.00054, "time": 0.19309}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84013, "loss": 0.00054, "time": 0.19314}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00054, "acc_pose": 0.84605, "loss": 0.00054, "time": 0.19255}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05498, "heatmap_loss": 0.00055, "acc_pose": 0.84174, "loss": 0.00055, "time": 0.24998}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84059, "loss": 0.00054, "time": 0.19211}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84309, "loss": 0.00054, "time": 0.19311}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.83831, "loss": 0.00054, "time": 0.19315}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00053, "heatmap_loss": 0.00054, "acc_pose": 0.83608, "loss": 0.00054, "time": 0.19314}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05523, "heatmap_loss": 0.00054, "acc_pose": 0.8458, "loss": 0.00054, "time": 0.25094}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00055, "acc_pose": 0.83849, "loss": 0.00055, "time": 0.19248}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84044, "loss": 0.00054, "time": 0.19252}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.83974, "loss": 0.00054, "time": 0.19176}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84441, "loss": 0.00054, "time": 0.19311}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0553, "heatmap_loss": 0.00054, "acc_pose": 0.84124, "loss": 0.00054, "time": 0.25226}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.83477, "loss": 0.00054, "time": 0.19208}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84552, "loss": 0.00054, "time": 0.19282}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.8402, "loss": 0.00054, "time": 0.19235}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84204, "loss": 0.00054, "time": 0.19195}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05563, "heatmap_loss": 0.00054, "acc_pose": 0.84103, "loss": 0.00054, "time": 0.25336}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84822, "loss": 0.00054, "time": 0.19234}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83332, "loss": 0.00055, "time": 0.19262}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84668, "loss": 0.00054, "time": 0.19419}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00054, "acc_pose": 0.8371, "loss": 0.00054, "time": 0.19221}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05533, "heatmap_loss": 0.00054, "acc_pose": 0.8437, "loss": 0.00054, "time": 0.24996}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84633, "loss": 0.00054, "time": 0.19228}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.82946, "loss": 0.00054, "time": 0.19146}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83893, "loss": 0.00054, "time": 0.19192}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00052, "heatmap_loss": 0.00054, "acc_pose": 0.84026, "loss": 0.00054, "time": 0.1921}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05539, "heatmap_loss": 0.00054, "acc_pose": 0.8439, "loss": 0.00054, "time": 0.25053}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00055, "acc_pose": 0.84102, "loss": 0.00055, "time": 0.19229}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00054, "heatmap_loss": 0.00054, "acc_pose": 0.84033, "loss": 0.00054, "time": 0.19146}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.83822, "loss": 0.00054, "time": 0.192}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00054, "acc_pose": 0.84623, "loss": 0.00054, "time": 0.19201}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05527, "heatmap_loss": 0.00054, "acc_pose": 0.84344, "loss": 0.00054, "time": 0.25363}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84344, "loss": 0.00054, "time": 0.19245}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83843, "loss": 0.00055, "time": 0.19248}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.83878, "loss": 0.00054, "time": 0.19266}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.84363, "loss": 0.00055, "time": 0.19198}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05486, "heatmap_loss": 0.00054, "acc_pose": 0.84431, "loss": 0.00054, "time": 0.25024}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.83807, "loss": 0.00054, "time": 0.19269}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.83299, "loss": 0.00054, "time": 0.19169}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84279, "loss": 0.00054, "time": 0.19193}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00054, "acc_pose": 0.83979, "loss": 0.00054, "time": 0.19181}
+{"mode": "val", "epoch": 200, "iter": 407, "lr": 0.0, "AP": 0.75755, "AP .5": 0.90679, "AP .75": 0.83125, "AP (M)": 0.6862, "AP (L)": 0.78449, "AR": 0.80956, "AR .5": 0.9449, "AR .75": 0.87531, "AR (M)": 0.76848, "AR (L)": 0.8699}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0546, "heatmap_loss": 0.00054, "acc_pose": 0.83958, "loss": 0.00054, "time": 0.2469}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.8377, "loss": 0.00054, "time": 0.19233}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.8388, "loss": 0.00054, "time": 0.19249}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84619, "loss": 0.00054, "time": 0.19361}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.83992, "loss": 0.00054, "time": 0.19324}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05546, "heatmap_loss": 0.00054, "acc_pose": 0.84013, "loss": 0.00054, "time": 0.25025}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00055, "acc_pose": 0.84214, "loss": 0.00055, "time": 0.19265}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84105, "loss": 0.00054, "time": 0.19196}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.0004, "heatmap_loss": 0.00054, "acc_pose": 0.84163, "loss": 0.00054, "time": 0.19168}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84948, "loss": 0.00054, "time": 0.19196}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0547, "heatmap_loss": 0.00054, "acc_pose": 0.83758, "loss": 0.00054, "time": 0.24917}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84567, "loss": 0.00054, "time": 0.192}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84453, "loss": 0.00054, "time": 0.19204}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84029, "loss": 0.00054, "time": 0.19235}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84212, "loss": 0.00054, "time": 0.1921}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05564, "heatmap_loss": 0.00054, "acc_pose": 0.84082, "loss": 0.00054, "time": 0.25087}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.83945, "loss": 0.00054, "time": 0.19206}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84128, "loss": 0.00054, "time": 0.19238}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84176, "loss": 0.00054, "time": 0.19203}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.8399, "loss": 0.00054, "time": 0.19338}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05522, "heatmap_loss": 0.00054, "acc_pose": 0.84643, "loss": 0.00054, "time": 0.25125}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.83879, "loss": 0.00054, "time": 0.19201}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84232, "loss": 0.00054, "time": 0.19184}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84765, "loss": 0.00054, "time": 0.19168}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84443, "loss": 0.00054, "time": 0.19224}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05471, "heatmap_loss": 0.00054, "acc_pose": 0.84274, "loss": 0.00054, "time": 0.25093}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.83922, "loss": 0.00054, "time": 0.19233}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.83649, "loss": 0.00054, "time": 0.19178}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00039, "heatmap_loss": 0.00054, "acc_pose": 0.84631, "loss": 0.00054, "time": 0.19277}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84688, "loss": 0.00054, "time": 0.19176}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05474, "heatmap_loss": 0.00054, "acc_pose": 0.84194, "loss": 0.00054, "time": 0.24985}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.83528, "loss": 0.00054, "time": 0.19236}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84224, "loss": 0.00054, "time": 0.19215}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84453, "loss": 0.00054, "time": 0.19173}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00044, "heatmap_loss": 0.00054, "acc_pose": 0.84403, "loss": 0.00054, "time": 0.19193}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05579, "heatmap_loss": 0.00054, "acc_pose": 0.8411, "loss": 0.00054, "time": 0.25186}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00041, "heatmap_loss": 0.00054, "acc_pose": 0.84353, "loss": 0.00054, "time": 0.19204}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84097, "loss": 0.00054, "time": 0.19191}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00054, "acc_pose": 0.8434, "loss": 0.00054, "time": 0.19198}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00043, "heatmap_loss": 0.00054, "acc_pose": 0.84669, "loss": 0.00054, "time": 0.19203}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.0567, "heatmap_loss": 0.00054, "acc_pose": 0.84273, "loss": 0.00054, "time": 0.25323}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84593, "loss": 0.00054, "time": 0.19234}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.83997, "loss": 0.00054, "time": 0.19163}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00053, "acc_pose": 0.84397, "loss": 0.00053, "time": 0.19249}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00038, "heatmap_loss": 0.00054, "acc_pose": 0.84646, "loss": 0.00054, "time": 0.19249}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 14090, "data_time": 0.05512, "heatmap_loss": 0.00054, "acc_pose": 0.84194, "loss": 0.00054, "time": 0.25008}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 14090, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84276, "loss": 0.00054, "time": 0.1923}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.83771, "loss": 0.00054, "time": 0.19263}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 14090, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.84572, "loss": 0.00054, "time": 0.194}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 14090, "data_time": 0.00037, "heatmap_loss": 0.00054, "acc_pose": 0.84364, "loss": 0.00054, "time": 0.1921}
+{"mode": "val", "epoch": 210, "iter": 407, "lr": 0.0, "AP": 0.75797, "AP .5": 0.90664, "AP .75": 0.83177, "AP (M)": 0.68687, "AP (L)": 0.78389, "AR": 0.81072, "AR .5": 0.94553, "AR .75": 0.87657, "AR (M)": 0.77012, "AR (L)": 0.86983}
diff --git a/vendor/ViTPose/logs/vitpose-h-simple.log.json b/vendor/ViTPose/logs/vitpose-h-simple.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..57a0502e96297e623bcba386b4687bb285cb3c0b
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-h-simple.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+cb93b25", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.05293, "heatmap_loss": 0.00297, "acc_pose": 0.01131, "loss": 0.00297, "grad_norm": 0.19058, "time": 0.52151}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0026, "acc_pose": 0.02932, "loss": 0.0026, "grad_norm": 0.18838, "time": 0.34167}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00264, "acc_pose": 0.06887, "loss": 0.00264, "grad_norm": 0.19104, "time": 0.34008}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00276, "acc_pose": 0.08797, "loss": 0.00276, "grad_norm": 0.19039, "time": 0.34065}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00283, "acc_pose": 0.11359, "loss": 0.00283, "grad_norm": 0.16872, "time": 0.33942}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04677, "heatmap_loss": 0.00232, "acc_pose": 0.13551, "loss": 0.00232, "grad_norm": 0.06389, "time": 0.39018}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00214, "acc_pose": 0.17894, "loss": 0.00214, "grad_norm": 0.02635, "time": 0.34228}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00212, "acc_pose": 0.18381, "loss": 0.00212, "grad_norm": 0.02546, "time": 0.33966}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00212, "acc_pose": 0.18125, "loss": 0.00212, "grad_norm": 0.02535, "time": 0.34033}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00209, "acc_pose": 0.18781, "loss": 0.00209, "grad_norm": 0.01764, "time": 0.34053}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04671, "heatmap_loss": 0.00215, "acc_pose": 0.19025, "loss": 0.00215, "grad_norm": 0.02762, "time": 0.38975}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00206, "acc_pose": 0.20656, "loss": 0.00206, "grad_norm": 0.01532, "time": 0.34}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00203, "acc_pose": 0.19292, "loss": 0.00203, "grad_norm": 0.01494, "time": 0.33927}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00201, "acc_pose": 0.21062, "loss": 0.00201, "grad_norm": 0.01142, "time": 0.33941}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00198, "acc_pose": 0.21841, "loss": 0.00198, "grad_norm": 0.0108, "time": 0.34021}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04666, "heatmap_loss": 0.00195, "acc_pose": 0.22025, "loss": 0.00195, "grad_norm": 0.01554, "time": 0.38756}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00206, "acc_pose": 0.2361, "loss": 0.00206, "grad_norm": 0.03529, "time": 0.33773}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00191, "acc_pose": 0.25507, "loss": 0.00191, "grad_norm": 0.02021, "time": 0.33854}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00188, "acc_pose": 0.27192, "loss": 0.00188, "grad_norm": 0.01774, "time": 0.3392}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00178, "acc_pose": 0.30238, "loss": 0.00178, "grad_norm": 0.0109, "time": 0.33865}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04674, "heatmap_loss": 0.00165, "acc_pose": 0.37009, "loss": 0.00165, "grad_norm": 0.00831, "time": 0.38613}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00161, "acc_pose": 0.38802, "loss": 0.00161, "grad_norm": 0.01211, "time": 0.3344}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00154, "acc_pose": 0.43671, "loss": 0.00154, "grad_norm": 0.0106, "time": 0.33466}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00152, "acc_pose": 0.431, "loss": 0.00152, "grad_norm": 0.0123, "time": 0.33503}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00149, "acc_pose": 0.44643, "loss": 0.00149, "grad_norm": 0.01491, "time": 0.3339}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04669, "heatmap_loss": 0.00137, "acc_pose": 0.49262, "loss": 0.00137, "grad_norm": 0.01059, "time": 0.38432}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00134, "acc_pose": 0.50653, "loss": 0.00134, "grad_norm": 0.00969, "time": 0.33462}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00131, "acc_pose": 0.54633, "loss": 0.00131, "grad_norm": 0.01392, "time": 0.33733}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00139, "acc_pose": 0.52807, "loss": 0.00139, "grad_norm": 0.02206, "time": 0.33525}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00126, "acc_pose": 0.54308, "loss": 0.00126, "grad_norm": 0.00803, "time": 0.33717}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04684, "heatmap_loss": 0.00122, "acc_pose": 0.5614, "loss": 0.00122, "grad_norm": 0.00908, "time": 0.38928}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.0012, "acc_pose": 0.5816, "loss": 0.0012, "grad_norm": 0.00893, "time": 0.33679}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00126, "acc_pose": 0.57847, "loss": 0.00126, "grad_norm": 0.01545, "time": 0.33833}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00126, "acc_pose": 0.56575, "loss": 0.00126, "grad_norm": 0.01603, "time": 0.33832}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00115, "acc_pose": 0.60182, "loss": 0.00115, "grad_norm": 0.00492, "time": 0.33886}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00111, "acc_pose": 0.60023, "loss": 0.00111, "grad_norm": 0.00606, "time": 0.38646}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00111, "acc_pose": 0.60057, "loss": 0.00111, "grad_norm": 0.00712, "time": 0.3384}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0011, "acc_pose": 0.62444, "loss": 0.0011, "grad_norm": 0.00483, "time": 0.33978}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00108, "acc_pose": 0.61419, "loss": 0.00108, "grad_norm": 0.00424, "time": 0.33606}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00105, "acc_pose": 0.6378, "loss": 0.00105, "grad_norm": 0.00407, "time": 0.33708}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04672, "heatmap_loss": 0.00104, "acc_pose": 0.63702, "loss": 0.00104, "grad_norm": 0.00566, "time": 0.38555}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00104, "acc_pose": 0.6478, "loss": 0.00104, "grad_norm": 0.00667, "time": 0.3336}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00103, "acc_pose": 0.65266, "loss": 0.00103, "grad_norm": 0.00636, "time": 0.33507}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00102, "acc_pose": 0.63901, "loss": 0.00102, "grad_norm": 0.00689, "time": 0.33305}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00102, "acc_pose": 0.66026, "loss": 0.00102, "grad_norm": 0.00886, "time": 0.33449}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04667, "heatmap_loss": 0.00099, "acc_pose": 0.66112, "loss": 0.00099, "grad_norm": 0.00509, "time": 0.38456}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00098, "acc_pose": 0.6508, "loss": 0.00098, "grad_norm": 0.0045, "time": 0.33599}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00061, "heatmap_loss": 0.00099, "acc_pose": 0.65702, "loss": 0.00099, "grad_norm": 0.00503, "time": 0.33831}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00097, "acc_pose": 0.65133, "loss": 0.00097, "grad_norm": 0.00682, "time": 0.33685}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00097, "acc_pose": 0.64197, "loss": 0.00097, "grad_norm": 0.00532, "time": 0.33706}
+{"mode": "val", "epoch": 10, "iter": 204, "lr": 0.0, "AP": 0.67965, "AP .5": 0.88156, "AP .75": 0.75853, "AP (M)": 0.60239, "AP (L)": 0.70688, "AR": 0.74126, "AR .5": 0.92569, "AR .75": 0.81266, "AR (M)": 0.69325, "AR (L)": 0.80944}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04728, "heatmap_loss": 0.00097, "acc_pose": 0.67524, "loss": 0.00097, "grad_norm": 0.00782, "time": 0.38409}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00094, "acc_pose": 0.67478, "loss": 0.00094, "grad_norm": 0.00477, "time": 0.33797}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00094, "acc_pose": 0.66583, "loss": 0.00094, "grad_norm": 0.00515, "time": 0.33778}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00095, "acc_pose": 0.68735, "loss": 0.00095, "grad_norm": 0.00615, "time": 0.34056}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00093, "acc_pose": 0.68291, "loss": 0.00093, "grad_norm": 0.00471, "time": 0.34056}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00093, "acc_pose": 0.67752, "loss": 0.00093, "grad_norm": 0.00733, "time": 0.38939}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00096, "acc_pose": 0.69269, "loss": 0.00096, "grad_norm": 0.00988, "time": 0.33734}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00095, "acc_pose": 0.68473, "loss": 0.00095, "grad_norm": 0.0072, "time": 0.33743}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00095, "acc_pose": 0.68487, "loss": 0.00095, "grad_norm": 0.00645, "time": 0.33615}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00091, "acc_pose": 0.68931, "loss": 0.00091, "grad_norm": 0.00318, "time": 0.33625}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04719, "heatmap_loss": 0.00092, "acc_pose": 0.68644, "loss": 0.00092, "grad_norm": 0.0051, "time": 0.38387}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0009, "acc_pose": 0.70693, "loss": 0.0009, "grad_norm": 0.00332, "time": 0.33411}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0009, "acc_pose": 0.6977, "loss": 0.0009, "grad_norm": 0.00403, "time": 0.33418}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00089, "acc_pose": 0.71661, "loss": 0.00089, "grad_norm": 0.00341, "time": 0.33429}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00089, "acc_pose": 0.69298, "loss": 0.00089, "grad_norm": 0.00415, "time": 0.33525}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04713, "heatmap_loss": 0.00089, "acc_pose": 0.69286, "loss": 0.00089, "grad_norm": 0.00417, "time": 0.38837}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00088, "acc_pose": 0.72214, "loss": 0.00088, "grad_norm": 0.00353, "time": 0.33828}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00088, "acc_pose": 0.71615, "loss": 0.00088, "grad_norm": 0.00383, "time": 0.33746}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00088, "acc_pose": 0.70948, "loss": 0.00088, "grad_norm": 0.00385, "time": 0.33603}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00086, "acc_pose": 0.72187, "loss": 0.00086, "grad_norm": 0.0028, "time": 0.33876}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04682, "heatmap_loss": 0.00086, "acc_pose": 0.69541, "loss": 0.00086, "grad_norm": 0.003, "time": 0.38876}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00086, "acc_pose": 0.72578, "loss": 0.00086, "grad_norm": 0.00363, "time": 0.33775}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00086, "acc_pose": 0.69954, "loss": 0.00086, "grad_norm": 0.00268, "time": 0.3365}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00085, "acc_pose": 0.71021, "loss": 0.00085, "grad_norm": 0.00291, "time": 0.33587}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00085, "acc_pose": 0.69157, "loss": 0.00085, "grad_norm": 0.00304, "time": 0.33752}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04706, "heatmap_loss": 0.00084, "acc_pose": 0.73389, "loss": 0.00084, "grad_norm": 0.00281, "time": 0.38461}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00086, "acc_pose": 0.73023, "loss": 0.00086, "grad_norm": 0.00496, "time": 0.33403}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00086, "acc_pose": 0.71539, "loss": 0.00086, "grad_norm": 0.00453, "time": 0.33432}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00087, "acc_pose": 0.72183, "loss": 0.00087, "grad_norm": 0.00568, "time": 0.33329}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00084, "acc_pose": 0.72053, "loss": 0.00084, "grad_norm": 0.00318, "time": 0.33426}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04694, "heatmap_loss": 0.00084, "acc_pose": 0.73349, "loss": 0.00084, "grad_norm": 0.00263, "time": 0.38594}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00083, "acc_pose": 0.72952, "loss": 0.00083, "grad_norm": 0.00307, "time": 0.33608}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00083, "acc_pose": 0.7442, "loss": 0.00083, "grad_norm": 0.00347, "time": 0.33469}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00082, "acc_pose": 0.72588, "loss": 0.00082, "grad_norm": 0.00206, "time": 0.33419}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00081, "acc_pose": 0.7362, "loss": 0.00081, "grad_norm": 0.00203, "time": 0.33487}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0471, "heatmap_loss": 0.00082, "acc_pose": 0.72712, "loss": 0.00082, "grad_norm": 0.00278, "time": 0.38685}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00081, "acc_pose": 0.72257, "loss": 0.00081, "grad_norm": 0.00226, "time": 0.33612}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0008, "acc_pose": 0.74582, "loss": 0.0008, "grad_norm": 0.00215, "time": 0.33885}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00081, "acc_pose": 0.72482, "loss": 0.00081, "grad_norm": 0.00261, "time": 0.337}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00081, "acc_pose": 0.7439, "loss": 0.00081, "grad_norm": 0.0027, "time": 0.33827}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04698, "heatmap_loss": 0.0008, "acc_pose": 0.73639, "loss": 0.0008, "grad_norm": 0.00199, "time": 0.38582}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.0008, "acc_pose": 0.75469, "loss": 0.0008, "grad_norm": 0.00232, "time": 0.33585}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00083, "acc_pose": 0.73045, "loss": 0.00083, "grad_norm": 0.00441, "time": 0.33611}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00088, "acc_pose": 0.73042, "loss": 0.00088, "grad_norm": 0.0042, "time": 0.33609}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00082, "acc_pose": 0.72058, "loss": 0.00082, "grad_norm": 0.00257, "time": 0.34133}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04692, "heatmap_loss": 0.00079, "acc_pose": 0.73707, "loss": 0.00079, "grad_norm": 0.00204, "time": 0.38993}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00079, "acc_pose": 0.75538, "loss": 0.00079, "grad_norm": 0.00211, "time": 0.33839}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00079, "acc_pose": 0.74883, "loss": 0.00079, "grad_norm": 0.00188, "time": 0.3369}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00079, "acc_pose": 0.73459, "loss": 0.00079, "grad_norm": 0.00189, "time": 0.33622}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00079, "acc_pose": 0.73594, "loss": 0.00079, "grad_norm": 0.00184, "time": 0.33761}
+{"mode": "val", "epoch": 20, "iter": 204, "lr": 0.0, "AP": 0.73444, "AP .5": 0.89993, "AP .75": 0.81329, "AP (M)": 0.65545, "AP (L)": 0.76303, "AR": 0.79321, "AR .5": 0.94096, "AR .75": 0.86272, "AR (M)": 0.74728, "AR (L)": 0.85946}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04758, "heatmap_loss": 0.00078, "acc_pose": 0.73618, "loss": 0.00078, "grad_norm": 0.00192, "time": 0.38342}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00078, "acc_pose": 0.74914, "loss": 0.00078, "grad_norm": 0.00194, "time": 0.3369}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00078, "acc_pose": 0.75432, "loss": 0.00078, "grad_norm": 0.0019, "time": 0.33504}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00077, "acc_pose": 0.73291, "loss": 0.00077, "grad_norm": 0.00185, "time": 0.33576}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00078, "acc_pose": 0.74413, "loss": 0.00078, "grad_norm": 0.00258, "time": 0.33376}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04699, "heatmap_loss": 0.00078, "acc_pose": 0.74918, "loss": 0.00078, "grad_norm": 0.00243, "time": 0.38402}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00078, "acc_pose": 0.74641, "loss": 0.00078, "grad_norm": 0.0025, "time": 0.33569}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00077, "acc_pose": 0.7503, "loss": 0.00077, "grad_norm": 0.00241, "time": 0.33426}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00055, "heatmap_loss": 0.00078, "acc_pose": 0.75505, "loss": 0.00078, "grad_norm": 0.00265, "time": 0.33663}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00062, "heatmap_loss": 0.00076, "acc_pose": 0.76472, "loss": 0.00076, "grad_norm": 0.00183, "time": 0.336}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04689, "heatmap_loss": 0.00076, "acc_pose": 0.76097, "loss": 0.00076, "grad_norm": 0.00177, "time": 0.38724}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00077, "acc_pose": 0.75629, "loss": 0.00077, "grad_norm": 0.0024, "time": 0.3366}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00076, "acc_pose": 0.75633, "loss": 0.00076, "grad_norm": 0.00211, "time": 0.33368}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00076, "acc_pose": 0.7474, "loss": 0.00076, "grad_norm": 0.00189, "time": 0.33534}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00077, "acc_pose": 0.7524, "loss": 0.00077, "grad_norm": 0.00207, "time": 0.33498}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04687, "heatmap_loss": 0.00075, "acc_pose": 0.74043, "loss": 0.00075, "grad_norm": 0.0017, "time": 0.38812}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00075, "acc_pose": 0.76057, "loss": 0.00075, "grad_norm": 0.00179, "time": 0.33768}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00076, "acc_pose": 0.75992, "loss": 0.00076, "grad_norm": 0.00222, "time": 0.33684}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.0008, "acc_pose": 0.74762, "loss": 0.0008, "grad_norm": 0.0029, "time": 0.33481}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00076, "acc_pose": 0.75743, "loss": 0.00076, "grad_norm": 0.00187, "time": 0.33529}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04686, "heatmap_loss": 0.0008, "acc_pose": 0.74839, "loss": 0.0008, "grad_norm": 0.00255, "time": 0.38854}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00078, "acc_pose": 0.75396, "loss": 0.00078, "grad_norm": 0.00217, "time": 0.33724}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00076, "acc_pose": 0.76647, "loss": 0.00076, "grad_norm": 0.00187, "time": 0.33646}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00075, "acc_pose": 0.75792, "loss": 0.00075, "grad_norm": 0.00166, "time": 0.33582}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00074, "acc_pose": 0.75221, "loss": 0.00074, "grad_norm": 0.00179, "time": 0.33566}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04689, "heatmap_loss": 0.00074, "acc_pose": 0.76395, "loss": 0.00074, "grad_norm": 0.00159, "time": 0.38695}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00074, "acc_pose": 0.77739, "loss": 0.00074, "grad_norm": 0.00162, "time": 0.33516}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00073, "acc_pose": 0.75464, "loss": 0.00073, "grad_norm": 0.00169, "time": 0.33582}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00074, "acc_pose": 0.77193, "loss": 0.00074, "grad_norm": 0.00162, "time": 0.33465}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00073, "acc_pose": 0.75601, "loss": 0.00073, "grad_norm": 0.0017, "time": 0.33453}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04731, "heatmap_loss": 0.00074, "acc_pose": 0.7591, "loss": 0.00074, "grad_norm": 0.00166, "time": 0.38461}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00073, "acc_pose": 0.76521, "loss": 0.00073, "grad_norm": 0.00164, "time": 0.3341}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00073, "acc_pose": 0.77981, "loss": 0.00073, "grad_norm": 0.00182, "time": 0.33343}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00073, "acc_pose": 0.75147, "loss": 0.00073, "grad_norm": 0.00165, "time": 0.33274}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00072, "acc_pose": 0.75207, "loss": 0.00072, "grad_norm": 0.00168, "time": 0.33398}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04692, "heatmap_loss": 0.00072, "acc_pose": 0.7818, "loss": 0.00072, "grad_norm": 0.00172, "time": 0.38863}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00072, "acc_pose": 0.76466, "loss": 0.00072, "grad_norm": 0.00168, "time": 0.33695}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00073, "acc_pose": 0.76279, "loss": 0.00073, "grad_norm": 0.00175, "time": 0.33734}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00072, "acc_pose": 0.76678, "loss": 0.00072, "grad_norm": 0.00157, "time": 0.33717}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00072, "acc_pose": 0.75865, "loss": 0.00072, "grad_norm": 0.00182, "time": 0.33692}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04714, "heatmap_loss": 0.00072, "acc_pose": 0.75806, "loss": 0.00072, "grad_norm": 0.00186, "time": 0.38766}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00073, "acc_pose": 0.77253, "loss": 0.00073, "grad_norm": 0.00162, "time": 0.3343}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00072, "acc_pose": 0.77557, "loss": 0.00072, "grad_norm": 0.00167, "time": 0.33651}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00072, "acc_pose": 0.76979, "loss": 0.00072, "grad_norm": 0.00188, "time": 0.33741}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00072, "acc_pose": 0.76709, "loss": 0.00072, "grad_norm": 0.00167, "time": 0.33639}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00072, "acc_pose": 0.76195, "loss": 0.00072, "grad_norm": 0.0017, "time": 0.38597}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00073, "acc_pose": 0.7692, "loss": 0.00073, "grad_norm": 0.00169, "time": 0.33692}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00072, "acc_pose": 0.77634, "loss": 0.00072, "grad_norm": 0.00171, "time": 0.33541}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00071, "acc_pose": 0.77697, "loss": 0.00071, "grad_norm": 0.00158, "time": 0.33744}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00072, "acc_pose": 0.75441, "loss": 0.00072, "grad_norm": 0.00175, "time": 0.33718}
+{"mode": "val", "epoch": 30, "iter": 204, "lr": 0.0, "AP": 0.75264, "AP .5": 0.90546, "AP .75": 0.82769, "AP (M)": 0.67772, "AP (L)": 0.77964, "AR": 0.80932, "AR .5": 0.94742, "AR .75": 0.87547, "AR (M)": 0.76599, "AR (L)": 0.87161}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04772, "heatmap_loss": 0.00072, "acc_pose": 0.78441, "loss": 0.00072, "grad_norm": 0.0016, "time": 0.38064}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00071, "acc_pose": 0.78017, "loss": 0.00071, "grad_norm": 0.00158, "time": 0.33713}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00071, "acc_pose": 0.7625, "loss": 0.00071, "grad_norm": 0.00165, "time": 0.33729}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00071, "acc_pose": 0.77824, "loss": 0.00071, "grad_norm": 0.00168, "time": 0.34009}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.0007, "acc_pose": 0.77605, "loss": 0.0007, "grad_norm": 0.00159, "time": 0.33592}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04954, "heatmap_loss": 0.0007, "acc_pose": 0.77422, "loss": 0.0007, "grad_norm": 0.00153, "time": 0.38911}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00071, "acc_pose": 0.79526, "loss": 0.00071, "grad_norm": 0.00158, "time": 0.34117}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0007, "acc_pose": 0.77912, "loss": 0.0007, "grad_norm": 0.00161, "time": 0.33969}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0007, "acc_pose": 0.78037, "loss": 0.0007, "grad_norm": 0.00159, "time": 0.33931}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.0007, "acc_pose": 0.7759, "loss": 0.0007, "grad_norm": 0.00164, "time": 0.33966}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.0007, "acc_pose": 0.78046, "loss": 0.0007, "grad_norm": 0.00157, "time": 0.3874}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00062, "heatmap_loss": 0.00071, "acc_pose": 0.78073, "loss": 0.00071, "grad_norm": 0.00163, "time": 0.33505}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0007, "acc_pose": 0.78105, "loss": 0.0007, "grad_norm": 0.00175, "time": 0.33515}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.0007, "acc_pose": 0.78112, "loss": 0.0007, "grad_norm": 0.0017, "time": 0.33414}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0007, "acc_pose": 0.78013, "loss": 0.0007, "grad_norm": 0.00158, "time": 0.33492}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.0007, "acc_pose": 0.77626, "loss": 0.0007, "grad_norm": 0.00152, "time": 0.38795}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00074, "acc_pose": 0.75652, "loss": 0.00074, "grad_norm": 0.00198, "time": 0.33678}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00072, "acc_pose": 0.78386, "loss": 0.00072, "grad_norm": 0.00177, "time": 0.33887}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0007, "acc_pose": 0.7807, "loss": 0.0007, "grad_norm": 0.00154, "time": 0.33665}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0007, "acc_pose": 0.77928, "loss": 0.0007, "grad_norm": 0.00155, "time": 0.33513}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04699, "heatmap_loss": 0.0007, "acc_pose": 0.77964, "loss": 0.0007, "grad_norm": 0.00164, "time": 0.38551}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00069, "acc_pose": 0.78187, "loss": 0.00069, "grad_norm": 0.00161, "time": 0.33603}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.77757, "loss": 0.00069, "grad_norm": 0.00153, "time": 0.33614}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00069, "acc_pose": 0.77432, "loss": 0.00069, "grad_norm": 0.00166, "time": 0.33445}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00055, "heatmap_loss": 0.00069, "acc_pose": 0.77995, "loss": 0.00069, "grad_norm": 0.0016, "time": 0.33804}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04718, "heatmap_loss": 0.00069, "acc_pose": 0.76456, "loss": 0.00069, "grad_norm": 0.00155, "time": 0.3866}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.7746, "loss": 0.00069, "grad_norm": 0.00153, "time": 0.33398}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00069, "acc_pose": 0.76628, "loss": 0.00069, "grad_norm": 0.00155, "time": 0.33506}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00068, "acc_pose": 0.77429, "loss": 0.00068, "grad_norm": 0.00154, "time": 0.33673}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.78511, "loss": 0.00069, "grad_norm": 0.0015, "time": 0.33873}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00068, "acc_pose": 0.77047, "loss": 0.00068, "grad_norm": 0.00145, "time": 0.38832}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00069, "acc_pose": 0.77379, "loss": 0.00069, "grad_norm": 0.00155, "time": 0.33481}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00068, "acc_pose": 0.7754, "loss": 0.00068, "grad_norm": 0.00146, "time": 0.33508}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00068, "acc_pose": 0.79712, "loss": 0.00068, "grad_norm": 0.00154, "time": 0.33642}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00068, "acc_pose": 0.76381, "loss": 0.00068, "grad_norm": 0.00152, "time": 0.33721}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0472, "heatmap_loss": 0.00068, "acc_pose": 0.78035, "loss": 0.00068, "grad_norm": 0.00156, "time": 0.38817}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00068, "acc_pose": 0.78901, "loss": 0.00068, "grad_norm": 0.00149, "time": 0.33891}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.80312, "loss": 0.00069, "grad_norm": 0.00153, "time": 0.33922}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00068, "acc_pose": 0.77435, "loss": 0.00068, "grad_norm": 0.00143, "time": 0.33857}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00069, "acc_pose": 0.79214, "loss": 0.00069, "grad_norm": 0.00161, "time": 0.33915}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04687, "heatmap_loss": 0.00068, "acc_pose": 0.79059, "loss": 0.00068, "grad_norm": 0.00155, "time": 0.38425}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00067, "acc_pose": 0.77419, "loss": 0.00067, "grad_norm": 0.00159, "time": 0.33603}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00072, "acc_pose": 0.79285, "loss": 0.00072, "grad_norm": 0.002, "time": 0.33776}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00072, "acc_pose": 0.76485, "loss": 0.00072, "grad_norm": 0.00154, "time": 0.33485}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.7805, "loss": 0.00069, "grad_norm": 0.00164, "time": 0.33488}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04698, "heatmap_loss": 0.00068, "acc_pose": 0.77962, "loss": 0.00068, "grad_norm": 0.00153, "time": 0.38415}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00068, "acc_pose": 0.78728, "loss": 0.00068, "grad_norm": 0.00157, "time": 0.3349}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00046, "heatmap_loss": 0.00068, "acc_pose": 0.78463, "loss": 0.00068, "grad_norm": 0.0015, "time": 0.33432}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.7979, "loss": 0.00067, "grad_norm": 0.00147, "time": 0.33532}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.77893, "loss": 0.00067, "grad_norm": 0.00145, "time": 0.33485}
+{"mode": "val", "epoch": 40, "iter": 204, "lr": 0.0, "AP": 0.76184, "AP .5": 0.90694, "AP .75": 0.83713, "AP (M)": 0.68769, "AP (L)": 0.7905, "AR": 0.81568, "AR .5": 0.94726, "AR .75": 0.88145, "AR (M)": 0.77225, "AR (L)": 0.87867}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04649, "heatmap_loss": 0.00067, "acc_pose": 0.78731, "loss": 0.00067, "grad_norm": 0.00146, "time": 0.38286}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00067, "acc_pose": 0.78718, "loss": 0.00067, "grad_norm": 0.00159, "time": 0.33636}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00067, "acc_pose": 0.79362, "loss": 0.00067, "grad_norm": 0.00148, "time": 0.33439}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00067, "acc_pose": 0.77885, "loss": 0.00067, "grad_norm": 0.00149, "time": 0.33472}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00067, "acc_pose": 0.78137, "loss": 0.00067, "grad_norm": 0.0015, "time": 0.33538}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04667, "heatmap_loss": 0.00067, "acc_pose": 0.79283, "loss": 0.00067, "grad_norm": 0.00146, "time": 0.38602}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00067, "acc_pose": 0.78821, "loss": 0.00067, "grad_norm": 0.00149, "time": 0.33729}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.7928, "loss": 0.00067, "grad_norm": 0.0015, "time": 0.33597}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.80383, "loss": 0.00066, "grad_norm": 0.00153, "time": 0.33516}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00067, "acc_pose": 0.78674, "loss": 0.00067, "grad_norm": 0.00148, "time": 0.33392}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04715, "heatmap_loss": 0.00067, "acc_pose": 0.78703, "loss": 0.00067, "grad_norm": 0.00159, "time": 0.38466}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.79378, "loss": 0.00066, "grad_norm": 0.0016, "time": 0.33314}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00066, "acc_pose": 0.78707, "loss": 0.00066, "grad_norm": 0.00147, "time": 0.3343}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.79178, "loss": 0.00066, "grad_norm": 0.00141, "time": 0.3342}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00067, "acc_pose": 0.77378, "loss": 0.00067, "grad_norm": 0.00159, "time": 0.33665}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0489, "heatmap_loss": 0.00066, "acc_pose": 0.78374, "loss": 0.00066, "grad_norm": 0.0015, "time": 0.38516}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00066, "acc_pose": 0.79839, "loss": 0.00066, "grad_norm": 0.00149, "time": 0.33426}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.79202, "loss": 0.00066, "grad_norm": 0.00143, "time": 0.33796}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.79628, "loss": 0.00066, "grad_norm": 0.0015, "time": 0.33842}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.78514, "loss": 0.00066, "grad_norm": 0.00148, "time": 0.33743}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00067, "acc_pose": 0.79576, "loss": 0.00067, "grad_norm": 0.00149, "time": 0.38999}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.7971, "loss": 0.00066, "grad_norm": 0.00153, "time": 0.33598}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.78032, "loss": 0.00066, "grad_norm": 0.0015, "time": 0.33619}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00066, "acc_pose": 0.78611, "loss": 0.00066, "grad_norm": 0.00155, "time": 0.33448}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.80095, "loss": 0.00066, "grad_norm": 0.00144, "time": 0.33611}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04704, "heatmap_loss": 0.00065, "acc_pose": 0.78367, "loss": 0.00065, "grad_norm": 0.00149, "time": 0.3882}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00065, "acc_pose": 0.80015, "loss": 0.00065, "grad_norm": 0.00147, "time": 0.33629}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.78171, "loss": 0.00066, "grad_norm": 0.00155, "time": 0.33773}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00065, "acc_pose": 0.79977, "loss": 0.00065, "grad_norm": 0.00149, "time": 0.33637}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.81316, "loss": 0.00065, "grad_norm": 0.00152, "time": 0.33668}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04714, "heatmap_loss": 0.00065, "acc_pose": 0.79847, "loss": 0.00065, "grad_norm": 0.00148, "time": 0.38769}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.80256, "loss": 0.00065, "grad_norm": 0.00141, "time": 0.33302}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00064, "heatmap_loss": 0.00067, "acc_pose": 0.79847, "loss": 0.00067, "grad_norm": 0.00151, "time": 0.33536}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.79333, "loss": 0.00066, "grad_norm": 0.00157, "time": 0.3347}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.79586, "loss": 0.00066, "grad_norm": 0.00143, "time": 0.33575}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00066, "acc_pose": 0.79189, "loss": 0.00066, "grad_norm": 0.00151, "time": 0.38552}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.81404, "loss": 0.00064, "grad_norm": 0.00145, "time": 0.33538}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.78923, "loss": 0.00065, "grad_norm": 0.00153, "time": 0.33581}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.79664, "loss": 0.00066, "grad_norm": 0.00149, "time": 0.336}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.79736, "loss": 0.00066, "grad_norm": 0.00152, "time": 0.33694}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04819, "heatmap_loss": 0.00066, "acc_pose": 0.79115, "loss": 0.00066, "grad_norm": 0.00146, "time": 0.38769}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00068, "acc_pose": 0.78194, "loss": 0.00068, "grad_norm": 0.00163, "time": 0.33733}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00065, "acc_pose": 0.79345, "loss": 0.00065, "grad_norm": 0.00144, "time": 0.33648}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00065, "acc_pose": 0.78361, "loss": 0.00065, "grad_norm": 0.00146, "time": 0.3368}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.80556, "loss": 0.00065, "grad_norm": 0.00148, "time": 0.3367}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04709, "heatmap_loss": 0.00065, "acc_pose": 0.81693, "loss": 0.00065, "grad_norm": 0.00144, "time": 0.38499}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.80127, "loss": 0.00066, "grad_norm": 0.00142, "time": 0.33434}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00065, "acc_pose": 0.79834, "loss": 0.00065, "grad_norm": 0.00144, "time": 0.33446}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.78893, "loss": 0.00065, "grad_norm": 0.0015, "time": 0.33385}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00064, "acc_pose": 0.77737, "loss": 0.00064, "grad_norm": 0.00146, "time": 0.3366}
+{"mode": "val", "epoch": 50, "iter": 204, "lr": 0.0, "AP": 0.76895, "AP .5": 0.90937, "AP .75": 0.84362, "AP (M)": 0.6952, "AP (L)": 0.7984, "AR": 0.82272, "AR .5": 0.94994, "AR .75": 0.88822, "AR (M)": 0.78082, "AR (L)": 0.8841}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04689, "heatmap_loss": 0.00065, "acc_pose": 0.79649, "loss": 0.00065, "grad_norm": 0.00146, "time": 0.38164}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.80442, "loss": 0.00065, "grad_norm": 0.00147, "time": 0.33401}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.79142, "loss": 0.00065, "grad_norm": 0.00147, "time": 0.33582}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00065, "acc_pose": 0.79836, "loss": 0.00065, "grad_norm": 0.0015, "time": 0.33546}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00065, "acc_pose": 0.78636, "loss": 0.00065, "grad_norm": 0.00156, "time": 0.33405}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04715, "heatmap_loss": 0.00065, "acc_pose": 0.79131, "loss": 0.00065, "grad_norm": 0.00141, "time": 0.38871}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00064, "acc_pose": 0.80077, "loss": 0.00064, "grad_norm": 0.00142, "time": 0.33519}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.81402, "loss": 0.00065, "grad_norm": 0.0015, "time": 0.33852}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00064, "acc_pose": 0.80053, "loss": 0.00064, "grad_norm": 0.00148, "time": 0.33692}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00059, "heatmap_loss": 0.00065, "acc_pose": 0.79431, "loss": 0.00065, "grad_norm": 0.00139, "time": 0.33806}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04724, "heatmap_loss": 0.00064, "acc_pose": 0.79813, "loss": 0.00064, "grad_norm": 0.00141, "time": 0.386}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.80101, "loss": 0.00064, "grad_norm": 0.00141, "time": 0.33393}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.80871, "loss": 0.00064, "grad_norm": 0.00146, "time": 0.33802}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.79996, "loss": 0.00064, "grad_norm": 0.00139, "time": 0.33697}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.80484, "loss": 0.00064, "grad_norm": 0.00143, "time": 0.33994}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04729, "heatmap_loss": 0.00064, "acc_pose": 0.78863, "loss": 0.00064, "grad_norm": 0.00134, "time": 0.3876}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.81466, "loss": 0.00064, "grad_norm": 0.0014, "time": 0.33362}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00063, "acc_pose": 0.80664, "loss": 0.00063, "grad_norm": 0.00134, "time": 0.33501}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.79997, "loss": 0.00064, "grad_norm": 0.0015, "time": 0.33449}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00065, "acc_pose": 0.81176, "loss": 0.00065, "grad_norm": 0.00143, "time": 0.33443}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00064, "acc_pose": 0.80555, "loss": 0.00064, "grad_norm": 0.00148, "time": 0.38644}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00064, "acc_pose": 0.8002, "loss": 0.00064, "grad_norm": 0.00133, "time": 0.33329}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00064, "acc_pose": 0.79641, "loss": 0.00064, "grad_norm": 0.00138, "time": 0.33374}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00063, "acc_pose": 0.80807, "loss": 0.00063, "grad_norm": 0.00141, "time": 0.33583}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00063, "acc_pose": 0.80559, "loss": 0.00063, "grad_norm": 0.00146, "time": 0.33545}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00064, "acc_pose": 0.79183, "loss": 0.00064, "grad_norm": 0.00137, "time": 0.38496}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.82086, "loss": 0.00063, "grad_norm": 0.00143, "time": 0.33477}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00063, "acc_pose": 0.79241, "loss": 0.00063, "grad_norm": 0.0015, "time": 0.33751}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.79301, "loss": 0.00063, "grad_norm": 0.00138, "time": 0.33604}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.79112, "loss": 0.00063, "grad_norm": 0.00142, "time": 0.33577}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04708, "heatmap_loss": 0.00064, "acc_pose": 0.80065, "loss": 0.00064, "grad_norm": 0.00144, "time": 0.38866}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00063, "acc_pose": 0.80415, "loss": 0.00063, "grad_norm": 0.0014, "time": 0.33644}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.79827, "loss": 0.00063, "grad_norm": 0.00136, "time": 0.33766}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.81207, "loss": 0.00063, "grad_norm": 0.00155, "time": 0.33501}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.78988, "loss": 0.00064, "grad_norm": 0.00138, "time": 0.33608}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04705, "heatmap_loss": 0.00064, "acc_pose": 0.80396, "loss": 0.00064, "grad_norm": 0.00142, "time": 0.3884}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.80656, "loss": 0.00063, "grad_norm": 0.00138, "time": 0.33808}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.80739, "loss": 0.00063, "grad_norm": 0.00143, "time": 0.33418}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80733, "loss": 0.00063, "grad_norm": 0.00138, "time": 0.33588}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.79086, "loss": 0.00063, "grad_norm": 0.00135, "time": 0.33673}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00063, "acc_pose": 0.80894, "loss": 0.00063, "grad_norm": 0.00144, "time": 0.38852}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80312, "loss": 0.00063, "grad_norm": 0.00141, "time": 0.33613}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.81124, "loss": 0.00063, "grad_norm": 0.00133, "time": 0.33633}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.79697, "loss": 0.00062, "grad_norm": 0.0014, "time": 0.34053}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00063, "acc_pose": 0.82115, "loss": 0.00063, "grad_norm": 0.00142, "time": 0.33886}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04733, "heatmap_loss": 0.00063, "acc_pose": 0.81256, "loss": 0.00063, "grad_norm": 0.0014, "time": 0.38515}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.81092, "loss": 0.00062, "grad_norm": 0.00149, "time": 0.33402}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.80677, "loss": 0.00063, "grad_norm": 0.0014, "time": 0.33406}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.81114, "loss": 0.00063, "grad_norm": 0.00143, "time": 0.33394}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80344, "loss": 0.00063, "grad_norm": 0.00147, "time": 0.33611}
+{"mode": "val", "epoch": 60, "iter": 204, "lr": 0.0, "AP": 0.77286, "AP .5": 0.91075, "AP .75": 0.84527, "AP (M)": 0.69948, "AP (L)": 0.80437, "AR": 0.82467, "AR .5": 0.94946, "AR .75": 0.88791, "AR (M)": 0.78233, "AR (L)": 0.88655}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04702, "heatmap_loss": 0.00062, "acc_pose": 0.79793, "loss": 0.00062, "grad_norm": 0.00134, "time": 0.38755}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00065, "acc_pose": 0.79435, "loss": 0.00065, "grad_norm": 0.00166, "time": 0.33883}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81395, "loss": 0.00062, "grad_norm": 0.00136, "time": 0.33817}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.79548, "loss": 0.00063, "grad_norm": 0.00139, "time": 0.33729}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00063, "acc_pose": 0.81552, "loss": 0.00063, "grad_norm": 0.00144, "time": 0.33622}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04732, "heatmap_loss": 0.00063, "acc_pose": 0.80037, "loss": 0.00063, "grad_norm": 0.00142, "time": 0.38477}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.80578, "loss": 0.00063, "grad_norm": 0.00143, "time": 0.33664}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81735, "loss": 0.00062, "grad_norm": 0.00137, "time": 0.33691}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80593, "loss": 0.00063, "grad_norm": 0.00136, "time": 0.33577}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.80774, "loss": 0.00062, "grad_norm": 0.00144, "time": 0.33526}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04712, "heatmap_loss": 0.00062, "acc_pose": 0.80081, "loss": 0.00062, "grad_norm": 0.00138, "time": 0.38884}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00062, "acc_pose": 0.80198, "loss": 0.00062, "grad_norm": 0.00141, "time": 0.33491}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80659, "loss": 0.00063, "grad_norm": 0.00138, "time": 0.33619}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.80235, "loss": 0.00062, "grad_norm": 0.0014, "time": 0.33637}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.81408, "loss": 0.00062, "grad_norm": 0.00135, "time": 0.33807}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00061, "acc_pose": 0.80458, "loss": 0.00061, "grad_norm": 0.00134, "time": 0.39249}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81635, "loss": 0.00061, "grad_norm": 0.0014, "time": 0.33897}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.80646, "loss": 0.00062, "grad_norm": 0.00141, "time": 0.3393}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.78707, "loss": 0.00062, "grad_norm": 0.0014, "time": 0.33594}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.80933, "loss": 0.00062, "grad_norm": 0.00138, "time": 0.33669}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04731, "heatmap_loss": 0.00062, "acc_pose": 0.80898, "loss": 0.00062, "grad_norm": 0.00136, "time": 0.38711}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00062, "acc_pose": 0.81412, "loss": 0.00062, "grad_norm": 0.00139, "time": 0.33441}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00046, "heatmap_loss": 0.00062, "acc_pose": 0.80944, "loss": 0.00062, "grad_norm": 0.00139, "time": 0.33541}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80488, "loss": 0.00061, "grad_norm": 0.00143, "time": 0.33522}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81408, "loss": 0.00062, "grad_norm": 0.00134, "time": 0.33542}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04707, "heatmap_loss": 0.00062, "acc_pose": 0.81592, "loss": 0.00062, "grad_norm": 0.00199, "time": 0.38826}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81624, "loss": 0.00062, "grad_norm": 0.00144, "time": 0.33453}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.79986, "loss": 0.00062, "grad_norm": 0.00144, "time": 0.33508}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.79971, "loss": 0.00062, "grad_norm": 0.00133, "time": 0.33616}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.79098, "loss": 0.00061, "grad_norm": 0.00148, "time": 0.33405}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04726, "heatmap_loss": 0.00062, "acc_pose": 0.80922, "loss": 0.00062, "grad_norm": 0.00138, "time": 0.38644}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.82826, "loss": 0.00061, "grad_norm": 0.0013, "time": 0.33393}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.80693, "loss": 0.00062, "grad_norm": 0.00141, "time": 0.33829}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80079, "loss": 0.00061, "grad_norm": 0.00136, "time": 0.33729}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00062, "acc_pose": 0.81203, "loss": 0.00062, "grad_norm": 0.00142, "time": 0.3359}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04743, "heatmap_loss": 0.00061, "acc_pose": 0.80518, "loss": 0.00061, "grad_norm": 0.0013, "time": 0.38514}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.79657, "loss": 0.00061, "grad_norm": 0.00133, "time": 0.3346}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.81837, "loss": 0.00062, "grad_norm": 0.00133, "time": 0.33431}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00062, "acc_pose": 0.80202, "loss": 0.00062, "grad_norm": 0.00142, "time": 0.33493}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81926, "loss": 0.00062, "grad_norm": 0.00137, "time": 0.3348}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04757, "heatmap_loss": 0.00062, "acc_pose": 0.80276, "loss": 0.00062, "grad_norm": 0.00138, "time": 0.38823}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.82005, "loss": 0.00061, "grad_norm": 0.00143, "time": 0.3384}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00056, "heatmap_loss": 0.00061, "acc_pose": 0.81598, "loss": 0.00061, "grad_norm": 0.00136, "time": 0.3377}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.80567, "loss": 0.00062, "grad_norm": 0.00134, "time": 0.33439}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00063, "heatmap_loss": 0.00062, "acc_pose": 0.8048, "loss": 0.00062, "grad_norm": 0.00133, "time": 0.33668}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04709, "heatmap_loss": 0.00061, "acc_pose": 0.81476, "loss": 0.00061, "grad_norm": 0.00134, "time": 0.38708}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81769, "loss": 0.00061, "grad_norm": 0.00146, "time": 0.33913}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00061, "acc_pose": 0.80406, "loss": 0.00061, "grad_norm": 0.00136, "time": 0.33749}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.80287, "loss": 0.00062, "grad_norm": 0.00137, "time": 0.33794}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81436, "loss": 0.00061, "grad_norm": 0.00147, "time": 0.33693}
+{"mode": "val", "epoch": 70, "iter": 204, "lr": 0.0, "AP": 0.77446, "AP .5": 0.91254, "AP .75": 0.84454, "AP (M)": 0.69984, "AP (L)": 0.80548, "AR": 0.82679, "AR .5": 0.95025, "AR .75": 0.8887, "AR (M)": 0.78476, "AR (L)": 0.88811}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04689, "heatmap_loss": 0.00061, "acc_pose": 0.81639, "loss": 0.00061, "grad_norm": 0.00131, "time": 0.38536}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80946, "loss": 0.00061, "grad_norm": 0.00135, "time": 0.33741}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00062, "acc_pose": 0.80596, "loss": 0.00062, "grad_norm": 0.00136, "time": 0.33569}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80743, "loss": 0.00061, "grad_norm": 0.00135, "time": 0.33458}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00061, "acc_pose": 0.82504, "loss": 0.00061, "grad_norm": 0.00139, "time": 0.33762}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04729, "heatmap_loss": 0.00061, "acc_pose": 0.81344, "loss": 0.00061, "grad_norm": 0.00134, "time": 0.3895}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.819, "loss": 0.00061, "grad_norm": 0.0014, "time": 0.33906}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80561, "loss": 0.00061, "grad_norm": 0.00143, "time": 0.33979}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.81303, "loss": 0.00061, "grad_norm": 0.00135, "time": 0.33962}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80633, "loss": 0.00061, "grad_norm": 0.00131, "time": 0.33956}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0485, "heatmap_loss": 0.00061, "acc_pose": 0.80483, "loss": 0.00061, "grad_norm": 0.0014, "time": 0.38615}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00061, "acc_pose": 0.81726, "loss": 0.00061, "grad_norm": 0.00135, "time": 0.33474}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80545, "loss": 0.00061, "grad_norm": 0.00133, "time": 0.33459}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.81753, "loss": 0.0006, "grad_norm": 0.00131, "time": 0.33332}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.81838, "loss": 0.00061, "grad_norm": 0.00131, "time": 0.33443}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04716, "heatmap_loss": 0.00061, "acc_pose": 0.81594, "loss": 0.00061, "grad_norm": 0.00138, "time": 0.38818}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.8002, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33867}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.79808, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.33684}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.80556, "loss": 0.0006, "grad_norm": 0.00136, "time": 0.33431}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.80017, "loss": 0.00061, "grad_norm": 0.00132, "time": 0.33402}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04711, "heatmap_loss": 0.00061, "acc_pose": 0.82033, "loss": 0.00061, "grad_norm": 0.00128, "time": 0.39197}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.0006, "acc_pose": 0.80535, "loss": 0.0006, "grad_norm": 0.0014, "time": 0.33547}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.81483, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33581}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80653, "loss": 0.00061, "grad_norm": 0.00136, "time": 0.33392}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.81621, "loss": 0.00061, "grad_norm": 0.00145, "time": 0.33411}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04711, "heatmap_loss": 0.0006, "acc_pose": 0.80615, "loss": 0.0006, "grad_norm": 0.00136, "time": 0.38675}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.81975, "loss": 0.00061, "grad_norm": 0.00131, "time": 0.33434}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.82868, "loss": 0.0006, "grad_norm": 0.00134, "time": 0.33512}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.0006, "acc_pose": 0.81412, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33369}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.8132, "loss": 0.00061, "grad_norm": 0.00136, "time": 0.33498}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04737, "heatmap_loss": 0.0006, "acc_pose": 0.80821, "loss": 0.0006, "grad_norm": 0.00139, "time": 0.38677}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81447, "loss": 0.00061, "grad_norm": 0.00143, "time": 0.33441}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00061, "heatmap_loss": 0.0006, "acc_pose": 0.81714, "loss": 0.0006, "grad_norm": 0.00138, "time": 0.33534}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.81137, "loss": 0.0006, "grad_norm": 0.00128, "time": 0.33447}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.81757, "loss": 0.0006, "grad_norm": 0.0014, "time": 0.33552}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04718, "heatmap_loss": 0.0006, "acc_pose": 0.82578, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.38414}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.83002, "loss": 0.00059, "grad_norm": 0.0014, "time": 0.33491}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.82652, "loss": 0.0006, "grad_norm": 0.00138, "time": 0.3349}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.80484, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.3355}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.81862, "loss": 0.0006, "grad_norm": 0.00136, "time": 0.33473}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04704, "heatmap_loss": 0.0006, "acc_pose": 0.81849, "loss": 0.0006, "grad_norm": 0.00137, "time": 0.38663}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.0006, "acc_pose": 0.82441, "loss": 0.0006, "grad_norm": 0.00145, "time": 0.33355}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.0006, "acc_pose": 0.80771, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33266}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.0006, "acc_pose": 0.82817, "loss": 0.0006, "grad_norm": 0.00126, "time": 0.33317}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.81566, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33306}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04887, "heatmap_loss": 0.00061, "acc_pose": 0.82191, "loss": 0.00061, "grad_norm": 0.00134, "time": 0.38593}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.81764, "loss": 0.0006, "grad_norm": 0.00139, "time": 0.33404}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81815, "loss": 0.00059, "grad_norm": 0.00129, "time": 0.33295}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.81302, "loss": 0.0006, "grad_norm": 0.00133, "time": 0.33466}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.80725, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33335}
+{"mode": "val", "epoch": 80, "iter": 204, "lr": 0.0, "AP": 0.77814, "AP .5": 0.91362, "AP .75": 0.85079, "AP (M)": 0.70428, "AP (L)": 0.8087, "AR": 0.83057, "AR .5": 0.9512, "AR .75": 0.89389, "AR (M)": 0.78883, "AR (L)": 0.89223}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04796, "heatmap_loss": 0.0006, "acc_pose": 0.81763, "loss": 0.0006, "grad_norm": 0.00128, "time": 0.38293}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.82531, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33788}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00055, "heatmap_loss": 0.0006, "acc_pose": 0.81697, "loss": 0.0006, "grad_norm": 0.00138, "time": 0.33664}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.81899, "loss": 0.0006, "grad_norm": 0.0014, "time": 0.33462}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.81305, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33412}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04804, "heatmap_loss": 0.0006, "acc_pose": 0.81968, "loss": 0.0006, "grad_norm": 0.00137, "time": 0.38696}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.84184, "loss": 0.0006, "grad_norm": 0.00137, "time": 0.33525}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.8042, "loss": 0.0006, "grad_norm": 0.00125, "time": 0.33532}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.82822, "loss": 0.0006, "grad_norm": 0.00141, "time": 0.33364}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.80407, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.33407}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04807, "heatmap_loss": 0.00059, "acc_pose": 0.80674, "loss": 0.00059, "grad_norm": 0.00136, "time": 0.38548}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.8133, "loss": 0.0006, "grad_norm": 0.00129, "time": 0.33357}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.81889, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33331}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.81569, "loss": 0.0006, "grad_norm": 0.00137, "time": 0.33298}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.82754, "loss": 0.0006, "grad_norm": 0.00132, "time": 0.33382}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0498, "heatmap_loss": 0.0006, "acc_pose": 0.80601, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.38718}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.81891, "loss": 0.00059, "grad_norm": 0.00137, "time": 0.33402}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81421, "loss": 0.00059, "grad_norm": 0.00124, "time": 0.33436}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.82815, "loss": 0.0006, "grad_norm": 0.00135, "time": 0.33377}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.80829, "loss": 0.0006, "grad_norm": 0.00132, "time": 0.33459}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0469, "heatmap_loss": 0.0006, "acc_pose": 0.81747, "loss": 0.0006, "grad_norm": 0.00129, "time": 0.38801}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.0006, "acc_pose": 0.82376, "loss": 0.0006, "grad_norm": 0.00128, "time": 0.33411}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.81768, "loss": 0.0006, "grad_norm": 0.0013, "time": 0.33409}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82466, "loss": 0.00059, "grad_norm": 0.00132, "time": 0.3368}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00059, "heatmap_loss": 0.00059, "acc_pose": 0.83143, "loss": 0.00059, "grad_norm": 0.00129, "time": 0.33583}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04682, "heatmap_loss": 0.0006, "acc_pose": 0.81594, "loss": 0.0006, "grad_norm": 0.00128, "time": 0.38827}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00059, "acc_pose": 0.81652, "loss": 0.00059, "grad_norm": 0.00135, "time": 0.33489}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.0006, "acc_pose": 0.82475, "loss": 0.0006, "grad_norm": 0.00132, "time": 0.33526}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81145, "loss": 0.00059, "grad_norm": 0.00128, "time": 0.33347}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81998, "loss": 0.00059, "grad_norm": 0.00136, "time": 0.33379}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0467, "heatmap_loss": 0.00059, "acc_pose": 0.83136, "loss": 0.00059, "grad_norm": 0.00136, "time": 0.38516}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00059, "acc_pose": 0.81457, "loss": 0.00059, "grad_norm": 0.0013, "time": 0.33354}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.82536, "loss": 0.00059, "grad_norm": 0.00124, "time": 0.33509}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00056, "heatmap_loss": 0.00059, "acc_pose": 0.82359, "loss": 0.00059, "grad_norm": 0.00127, "time": 0.33574}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.82564, "loss": 0.00059, "grad_norm": 0.00135, "time": 0.33386}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04675, "heatmap_loss": 0.00058, "acc_pose": 0.81904, "loss": 0.00058, "grad_norm": 0.00133, "time": 0.38495}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.81497, "loss": 0.00059, "grad_norm": 0.0013, "time": 0.3333}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.83335, "loss": 0.00059, "grad_norm": 0.00138, "time": 0.33417}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.82408, "loss": 0.00059, "grad_norm": 0.00124, "time": 0.33227}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.82758, "loss": 0.00059, "grad_norm": 0.00134, "time": 0.33337}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04697, "heatmap_loss": 0.00059, "acc_pose": 0.81913, "loss": 0.00059, "grad_norm": 0.00127, "time": 0.39144}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.82876, "loss": 0.00059, "grad_norm": 0.00133, "time": 0.33972}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.82925, "loss": 0.00059, "grad_norm": 0.0014, "time": 0.33868}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82585, "loss": 0.00059, "grad_norm": 0.00123, "time": 0.33683}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.82709, "loss": 0.00058, "grad_norm": 0.00133, "time": 0.33362}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04698, "heatmap_loss": 0.00059, "acc_pose": 0.81464, "loss": 0.00059, "grad_norm": 0.00134, "time": 0.38502}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.82448, "loss": 0.00059, "grad_norm": 0.00128, "time": 0.33468}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.83001, "loss": 0.00059, "grad_norm": 0.00134, "time": 0.33369}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00059, "acc_pose": 0.83098, "loss": 0.00059, "grad_norm": 0.00135, "time": 0.33418}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.81628, "loss": 0.00058, "grad_norm": 0.00127, "time": 0.33424}
+{"mode": "val", "epoch": 90, "iter": 204, "lr": 0.0, "AP": 0.77926, "AP .5": 0.91429, "AP .75": 0.84969, "AP (M)": 0.70574, "AP (L)": 0.8087, "AR": 0.83049, "AR .5": 0.95183, "AR .75": 0.89106, "AR (M)": 0.78896, "AR (L)": 0.89175}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04773, "heatmap_loss": 0.00059, "acc_pose": 0.82535, "loss": 0.00059, "grad_norm": 0.0013, "time": 0.38219}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.81581, "loss": 0.00059, "grad_norm": 0.00124, "time": 0.33631}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.81531, "loss": 0.00059, "grad_norm": 0.00125, "time": 0.33766}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82174, "loss": 0.00059, "grad_norm": 0.00125, "time": 0.33797}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82242, "loss": 0.00059, "grad_norm": 0.00144, "time": 0.33914}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04692, "heatmap_loss": 0.00059, "acc_pose": 0.83771, "loss": 0.00059, "grad_norm": 0.00125, "time": 0.38324}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82298, "loss": 0.00058, "grad_norm": 0.00139, "time": 0.3337}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.81551, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33684}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.81646, "loss": 0.00059, "grad_norm": 0.00132, "time": 0.33584}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.82121, "loss": 0.00058, "grad_norm": 0.00127, "time": 0.33794}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04691, "heatmap_loss": 0.00058, "acc_pose": 0.82188, "loss": 0.00058, "grad_norm": 0.00135, "time": 0.38581}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00059, "acc_pose": 0.81714, "loss": 0.00059, "grad_norm": 0.00127, "time": 0.33623}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82847, "loss": 0.00059, "grad_norm": 0.0013, "time": 0.33495}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00059, "acc_pose": 0.82818, "loss": 0.00059, "grad_norm": 0.00134, "time": 0.33418}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83929, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33298}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04683, "heatmap_loss": 0.00058, "acc_pose": 0.82838, "loss": 0.00058, "grad_norm": 0.00119, "time": 0.38533}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.8333, "loss": 0.00059, "grad_norm": 0.00125, "time": 0.3368}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.81467, "loss": 0.00059, "grad_norm": 0.00127, "time": 0.33492}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.82683, "loss": 0.00059, "grad_norm": 0.00128, "time": 0.33581}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82862, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33466}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00058, "acc_pose": 0.81387, "loss": 0.00058, "grad_norm": 0.00128, "time": 0.38533}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00058, "acc_pose": 0.83, "loss": 0.00058, "grad_norm": 0.00132, "time": 0.33491}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.8214, "loss": 0.00059, "grad_norm": 0.00149, "time": 0.33424}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00058, "acc_pose": 0.82336, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33303}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.82441, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33387}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00058, "acc_pose": 0.83248, "loss": 0.00058, "grad_norm": 0.00124, "time": 0.38572}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.83893, "loss": 0.00058, "grad_norm": 0.00137, "time": 0.33342}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.8263, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33417}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.83154, "loss": 0.00058, "grad_norm": 0.00126, "time": 0.33271}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81896, "loss": 0.00059, "grad_norm": 0.00125, "time": 0.33464}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04787, "heatmap_loss": 0.00058, "acc_pose": 0.82622, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.38496}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.81974, "loss": 0.00058, "grad_norm": 0.00124, "time": 0.33421}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.83179, "loss": 0.00058, "grad_norm": 0.00128, "time": 0.33444}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.81775, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33317}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.8162, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33458}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04686, "heatmap_loss": 0.00058, "acc_pose": 0.82324, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.38837}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82292, "loss": 0.00058, "grad_norm": 0.00142, "time": 0.33314}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82726, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33378}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.8103, "loss": 0.00058, "grad_norm": 0.00124, "time": 0.33377}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.83039, "loss": 0.00059, "grad_norm": 0.00134, "time": 0.3349}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04662, "heatmap_loss": 0.00058, "acc_pose": 0.83325, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.3897}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83892, "loss": 0.00058, "grad_norm": 0.00137, "time": 0.33469}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00059, "acc_pose": 0.81197, "loss": 0.00059, "grad_norm": 0.00131, "time": 0.33361}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.83128, "loss": 0.00058, "grad_norm": 0.00126, "time": 0.33512}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00058, "acc_pose": 0.82106, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33434}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00058, "acc_pose": 0.837, "loss": 0.00058, "grad_norm": 0.00123, "time": 0.38541}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.82253, "loss": 0.00058, "grad_norm": 0.00124, "time": 0.33407}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.81616, "loss": 0.00058, "grad_norm": 0.00131, "time": 0.3341}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.8375, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33326}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.81211, "loss": 0.00058, "grad_norm": 0.00134, "time": 0.33275}
+{"mode": "val", "epoch": 100, "iter": 204, "lr": 0.0, "AP": 0.78106, "AP .5": 0.91408, "AP .75": 0.85203, "AP (M)": 0.70683, "AP (L)": 0.81149, "AR": 0.83334, "AR .5": 0.95246, "AR .75": 0.89436, "AR (M)": 0.79156, "AR (L)": 0.89506}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04681, "heatmap_loss": 0.00058, "acc_pose": 0.82892, "loss": 0.00058, "grad_norm": 0.0012, "time": 0.38246}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83022, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33381}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00058, "acc_pose": 0.8207, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33293}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.82914, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33382}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00057, "acc_pose": 0.81526, "loss": 0.00057, "grad_norm": 0.00135, "time": 0.33422}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04701, "heatmap_loss": 0.00058, "acc_pose": 0.82215, "loss": 0.00058, "grad_norm": 0.00127, "time": 0.38464}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.82931, "loss": 0.00058, "grad_norm": 0.00132, "time": 0.33398}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.81644, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33341}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.8199, "loss": 0.00058, "grad_norm": 0.00128, "time": 0.33448}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83807, "loss": 0.00058, "grad_norm": 0.00135, "time": 0.33394}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04685, "heatmap_loss": 0.00058, "acc_pose": 0.8207, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.38645}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82246, "loss": 0.00057, "grad_norm": 0.0012, "time": 0.33442}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.84264, "loss": 0.00058, "grad_norm": 0.00135, "time": 0.33455}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.8126, "loss": 0.00058, "grad_norm": 0.00128, "time": 0.3348}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.82855, "loss": 0.00058, "grad_norm": 0.00126, "time": 0.33427}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04704, "heatmap_loss": 0.00058, "acc_pose": 0.83283, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.38566}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82197, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.3368}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00058, "acc_pose": 0.82291, "loss": 0.00058, "grad_norm": 0.00128, "time": 0.3384}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00058, "acc_pose": 0.82631, "loss": 0.00058, "grad_norm": 0.0012, "time": 0.33529}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.81903, "loss": 0.00058, "grad_norm": 0.00122, "time": 0.33339}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00058, "acc_pose": 0.82661, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.38537}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.8274, "loss": 0.00058, "grad_norm": 0.00132, "time": 0.33409}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.8197, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.33377}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82808, "loss": 0.00058, "grad_norm": 0.00125, "time": 0.33349}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00058, "acc_pose": 0.81814, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.33367}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04684, "heatmap_loss": 0.00058, "acc_pose": 0.82173, "loss": 0.00058, "grad_norm": 0.00122, "time": 0.38419}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.82742, "loss": 0.00057, "grad_norm": 0.00129, "time": 0.33285}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00057, "acc_pose": 0.83016, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.33372}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.833, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33352}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00057, "acc_pose": 0.82114, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33575}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04709, "heatmap_loss": 0.00057, "acc_pose": 0.82985, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.38464}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.81406, "loss": 0.00057, "grad_norm": 0.00137, "time": 0.33317}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.8261, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.33685}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.82782, "loss": 0.00057, "grad_norm": 0.00117, "time": 0.33412}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82632, "loss": 0.00057, "grad_norm": 0.00126, "time": 0.33496}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04714, "heatmap_loss": 0.00058, "acc_pose": 0.82615, "loss": 0.00058, "grad_norm": 0.00126, "time": 0.38442}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.8241, "loss": 0.00058, "grad_norm": 0.00127, "time": 0.33382}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.81112, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.33326}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83953, "loss": 0.00057, "grad_norm": 0.0013, "time": 0.33252}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.8302, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.3338}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04715, "heatmap_loss": 0.00057, "acc_pose": 0.82612, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.38567}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.82458, "loss": 0.00058, "grad_norm": 0.00129, "time": 0.33474}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83208, "loss": 0.00058, "grad_norm": 0.00135, "time": 0.3353}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.82812, "loss": 0.00057, "grad_norm": 0.00126, "time": 0.33377}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.81978, "loss": 0.00057, "grad_norm": 0.0012, "time": 0.33497}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04764, "heatmap_loss": 0.00057, "acc_pose": 0.82044, "loss": 0.00057, "grad_norm": 0.00144, "time": 0.3858}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00058, "acc_pose": 0.82116, "loss": 0.00058, "grad_norm": 0.0013, "time": 0.33366}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.8284, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.33463}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83666, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33339}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.82435, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33483}
+{"mode": "val", "epoch": 110, "iter": 204, "lr": 0.0, "AP": 0.78115, "AP .5": 0.9141, "AP .75": 0.85008, "AP (M)": 0.70736, "AP (L)": 0.81217, "AR": 0.83292, "AR .5": 0.95183, "AR .75": 0.89232, "AR (M)": 0.7912, "AR (L)": 0.89443}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04691, "heatmap_loss": 0.00057, "acc_pose": 0.81984, "loss": 0.00057, "grad_norm": 0.00121, "time": 0.38461}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.83205, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33613}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83536, "loss": 0.00057, "grad_norm": 0.00137, "time": 0.33431}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82137, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33536}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.81306, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33451}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04708, "heatmap_loss": 0.00057, "acc_pose": 0.82765, "loss": 0.00057, "grad_norm": 0.00134, "time": 0.38569}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82606, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.33381}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.84154, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.33401}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.82767, "loss": 0.00057, "grad_norm": 0.00126, "time": 0.33545}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.83768, "loss": 0.00057, "grad_norm": 0.00126, "time": 0.33313}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04725, "heatmap_loss": 0.00057, "acc_pose": 0.83544, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.39139}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.84289, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33603}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82602, "loss": 0.00057, "grad_norm": 0.00122, "time": 0.33609}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83652, "loss": 0.00057, "grad_norm": 0.00122, "time": 0.33548}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83055, "loss": 0.00057, "grad_norm": 0.00138, "time": 0.33495}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04748, "heatmap_loss": 0.00057, "acc_pose": 0.8328, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.38918}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82983, "loss": 0.00057, "grad_norm": 0.00122, "time": 0.33594}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82906, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33625}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.83637, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.33771}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83331, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33546}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04701, "heatmap_loss": 0.00057, "acc_pose": 0.82781, "loss": 0.00057, "grad_norm": 0.00158, "time": 0.38457}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83112, "loss": 0.00057, "grad_norm": 0.00124, "time": 0.33468}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.8311, "loss": 0.00057, "grad_norm": 0.00129, "time": 0.33388}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.83177, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33326}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.8284, "loss": 0.00057, "grad_norm": 0.00122, "time": 0.33423}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04743, "heatmap_loss": 0.00057, "acc_pose": 0.84011, "loss": 0.00057, "grad_norm": 0.0013, "time": 0.38713}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.82021, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33416}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.82908, "loss": 0.00056, "grad_norm": 0.00115, "time": 0.33477}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00056, "acc_pose": 0.84915, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33436}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.84059, "loss": 0.00057, "grad_norm": 0.0012, "time": 0.33325}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04727, "heatmap_loss": 0.00057, "acc_pose": 0.82188, "loss": 0.00057, "grad_norm": 0.00121, "time": 0.38504}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00056, "heatmap_loss": 0.00057, "acc_pose": 0.83248, "loss": 0.00057, "grad_norm": 0.00132, "time": 0.33382}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83616, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.33364}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.8422, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33309}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82583, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33415}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04733, "heatmap_loss": 0.00057, "acc_pose": 0.8267, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.38525}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.83275, "loss": 0.00057, "grad_norm": 0.00126, "time": 0.33401}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83488, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33523}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.8256, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33424}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83701, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33631}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04722, "heatmap_loss": 0.00056, "acc_pose": 0.83969, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.38847}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.82823, "loss": 0.00056, "grad_norm": 0.00119, "time": 0.33697}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00056, "acc_pose": 0.82657, "loss": 0.00056, "grad_norm": 0.00133, "time": 0.33765}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.83752, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33523}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.8361, "loss": 0.00057, "grad_norm": 0.00132, "time": 0.33529}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04698, "heatmap_loss": 0.00056, "acc_pose": 0.8382, "loss": 0.00056, "grad_norm": 0.00121, "time": 0.38492}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84134, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33397}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83699, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33441}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.82447, "loss": 0.00057, "grad_norm": 0.00125, "time": 0.33284}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83909, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33379}
+{"mode": "val", "epoch": 120, "iter": 204, "lr": 0.0, "AP": 0.7829, "AP .5": 0.91432, "AP .75": 0.85217, "AP (M)": 0.70862, "AP (L)": 0.81443, "AR": 0.83408, "AR .5": 0.95183, "AR .75": 0.89499, "AR (M)": 0.79257, "AR (L)": 0.89506}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04708, "heatmap_loss": 0.00056, "acc_pose": 0.82548, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.38043}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00056, "acc_pose": 0.85259, "loss": 0.00056, "grad_norm": 0.00121, "time": 0.33292}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.83967, "loss": 0.00057, "grad_norm": 0.00127, "time": 0.33421}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.82963, "loss": 0.00056, "grad_norm": 0.00116, "time": 0.33446}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.83942, "loss": 0.00056, "grad_norm": 0.00119, "time": 0.33408}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04731, "heatmap_loss": 0.00056, "acc_pose": 0.82357, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.38532}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83218, "loss": 0.00057, "grad_norm": 0.00123, "time": 0.33382}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83367, "loss": 0.00057, "grad_norm": 0.00121, "time": 0.33291}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.82639, "loss": 0.00056, "grad_norm": 0.00128, "time": 0.33351}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.81703, "loss": 0.00057, "grad_norm": 0.00128, "time": 0.33201}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04732, "heatmap_loss": 0.00056, "acc_pose": 0.82297, "loss": 0.00056, "grad_norm": 0.00128, "time": 0.38678}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83027, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33421}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.8372, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.33295}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83362, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.33361}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.84014, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.33316}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04733, "heatmap_loss": 0.00056, "acc_pose": 0.83965, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.38499}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.84021, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33395}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.82455, "loss": 0.00056, "grad_norm": 0.00131, "time": 0.33246}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83804, "loss": 0.00056, "grad_norm": 0.00129, "time": 0.33382}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.84304, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.33243}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04707, "heatmap_loss": 0.00056, "acc_pose": 0.83188, "loss": 0.00056, "grad_norm": 0.00126, "time": 0.38467}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83053, "loss": 0.00056, "grad_norm": 0.00135, "time": 0.33415}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.82929, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.33247}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83729, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33418}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.82959, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33239}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04732, "heatmap_loss": 0.00056, "acc_pose": 0.82674, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.38436}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83548, "loss": 0.00056, "grad_norm": 0.00128, "time": 0.33474}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84025, "loss": 0.00056, "grad_norm": 0.00116, "time": 0.33341}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.83755, "loss": 0.00056, "grad_norm": 0.0013, "time": 0.33475}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83344, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.33425}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04716, "heatmap_loss": 0.00056, "acc_pose": 0.82754, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.38577}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.8266, "loss": 0.00057, "grad_norm": 0.00119, "time": 0.33494}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83047, "loss": 0.00055, "grad_norm": 0.00129, "time": 0.33423}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.84254, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.3349}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.82548, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.33261}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00056, "acc_pose": 0.83473, "loss": 0.00056, "grad_norm": 0.0012, "time": 0.38485}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83281, "loss": 0.00056, "grad_norm": 0.00121, "time": 0.33305}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83017, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33368}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.8385, "loss": 0.00056, "grad_norm": 0.00118, "time": 0.3332}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83904, "loss": 0.00056, "grad_norm": 0.00126, "time": 0.33326}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00056, "acc_pose": 0.82912, "loss": 0.00056, "grad_norm": 0.00131, "time": 0.38652}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83403, "loss": 0.00056, "grad_norm": 0.00115, "time": 0.33351}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.82143, "loss": 0.00056, "grad_norm": 0.00129, "time": 0.33353}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.82898, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33442}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.82493, "loss": 0.00057, "grad_norm": 0.00129, "time": 0.33351}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04735, "heatmap_loss": 0.00056, "acc_pose": 0.84199, "loss": 0.00056, "grad_norm": 0.00118, "time": 0.38633}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.82493, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33405}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83151, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33345}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8327, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.33381}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.82437, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.33395}
+{"mode": "val", "epoch": 130, "iter": 204, "lr": 0.0, "AP": 0.78452, "AP .5": 0.91531, "AP .75": 0.85197, "AP (M)": 0.70994, "AP (L)": 0.81721, "AR": 0.83517, "AR .5": 0.95246, "AR .75": 0.89436, "AR (M)": 0.79273, "AR (L)": 0.89762}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04711, "heatmap_loss": 0.00055, "acc_pose": 0.84708, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.38209}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.84145, "loss": 0.00056, "grad_norm": 0.00119, "time": 0.33199}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.83458, "loss": 0.00056, "grad_norm": 0.00128, "time": 0.33469}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.82075, "loss": 0.00056, "grad_norm": 0.00126, "time": 0.33315}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.84044, "loss": 0.00056, "grad_norm": 0.0013, "time": 0.33454}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04723, "heatmap_loss": 0.00056, "acc_pose": 0.84326, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.38508}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83106, "loss": 0.00056, "grad_norm": 0.00126, "time": 0.33331}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83615, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.3339}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83328, "loss": 0.00056, "grad_norm": 0.00122, "time": 0.33423}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.84002, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.33471}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04762, "heatmap_loss": 0.00055, "acc_pose": 0.82447, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.38623}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.85243, "loss": 0.00055, "grad_norm": 0.00119, "time": 0.33377}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83646, "loss": 0.00056, "grad_norm": 0.00114, "time": 0.33328}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.82862, "loss": 0.00056, "grad_norm": 0.00129, "time": 0.33271}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.82968, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33301}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04728, "heatmap_loss": 0.00055, "acc_pose": 0.82953, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.38841}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84207, "loss": 0.00056, "grad_norm": 0.00125, "time": 0.33425}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83657, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.33397}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83898, "loss": 0.00055, "grad_norm": 0.00133, "time": 0.33377}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.83189, "loss": 0.00056, "grad_norm": 0.00117, "time": 0.33324}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04728, "heatmap_loss": 0.00055, "acc_pose": 0.84608, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.38571}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.82863, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33455}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.82701, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33362}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.8366, "loss": 0.00056, "grad_norm": 0.00126, "time": 0.33403}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84008, "loss": 0.00055, "grad_norm": 0.00119, "time": 0.33536}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04706, "heatmap_loss": 0.00056, "acc_pose": 0.83576, "loss": 0.00056, "grad_norm": 0.00114, "time": 0.38665}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83096, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.33465}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83347, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.33446}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83676, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.33317}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83334, "loss": 0.00055, "grad_norm": 0.00117, "time": 0.33346}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04703, "heatmap_loss": 0.00056, "acc_pose": 0.81974, "loss": 0.00056, "grad_norm": 0.00121, "time": 0.39088}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.84337, "loss": 0.00056, "grad_norm": 0.00124, "time": 0.33689}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84055, "loss": 0.00056, "grad_norm": 0.00123, "time": 0.33629}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.84702, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33649}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.83059, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33466}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00056, "acc_pose": 0.83983, "loss": 0.00056, "grad_norm": 0.00129, "time": 0.3841}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83783, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.33472}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.8339, "loss": 0.00056, "grad_norm": 0.00117, "time": 0.33243}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84021, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.33424}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84466, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.33348}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04711, "heatmap_loss": 0.00055, "acc_pose": 0.82912, "loss": 0.00055, "grad_norm": 0.00115, "time": 0.38502}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.8352, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33696}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.8422, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.33553}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8362, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33632}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.82986, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.33445}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00055, "acc_pose": 0.85153, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.38425}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8381, "loss": 0.00055, "grad_norm": 0.00129, "time": 0.33526}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83913, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.3359}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00056, "acc_pose": 0.84042, "loss": 0.00056, "grad_norm": 0.00133, "time": 0.33705}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8377, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.33628}
+{"mode": "val", "epoch": 140, "iter": 204, "lr": 0.0, "AP": 0.78596, "AP .5": 0.91584, "AP .75": 0.85355, "AP (M)": 0.71168, "AP (L)": 0.81774, "AR": 0.83624, "AR .5": 0.9534, "AR .75": 0.89562, "AR (M)": 0.79484, "AR (L)": 0.89706}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04687, "heatmap_loss": 0.00055, "acc_pose": 0.82666, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.38025}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83215, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.33232}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82867, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.33472}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83996, "loss": 0.00056, "grad_norm": 0.00121, "time": 0.33491}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83651, "loss": 0.00055, "grad_norm": 0.00119, "time": 0.33476}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04723, "heatmap_loss": 0.00055, "acc_pose": 0.84234, "loss": 0.00055, "grad_norm": 0.00113, "time": 0.38567}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83114, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.33367}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83571, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.33465}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83048, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33247}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83661, "loss": 0.00054, "grad_norm": 0.00111, "time": 0.33369}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0473, "heatmap_loss": 0.00055, "acc_pose": 0.84672, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.38539}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.8203, "loss": 0.00056, "grad_norm": 0.00127, "time": 0.333}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83816, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.33336}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84442, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.33552}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83886, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33401}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04706, "heatmap_loss": 0.00055, "acc_pose": 0.83119, "loss": 0.00055, "grad_norm": 0.00116, "time": 0.38992}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84159, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.33413}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84513, "loss": 0.00055, "grad_norm": 0.00118, "time": 0.33632}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00055, "acc_pose": 0.82844, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.33684}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.84543, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.33849}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04736, "heatmap_loss": 0.00055, "acc_pose": 0.84915, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.38599}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.85658, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.33404}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83314, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33334}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84068, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.33487}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.83912, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.33548}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04705, "heatmap_loss": 0.00055, "acc_pose": 0.84561, "loss": 0.00055, "grad_norm": 0.00119, "time": 0.3856}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.8452, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.33503}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83454, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.33701}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84439, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.3353}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82957, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33495}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0471, "heatmap_loss": 0.00055, "acc_pose": 0.83401, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.38758}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84279, "loss": 0.00054, "grad_norm": 0.00114, "time": 0.33544}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83991, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33613}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00055, "acc_pose": 0.83648, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.3344}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84656, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.33413}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04702, "heatmap_loss": 0.00055, "acc_pose": 0.84423, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.38481}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83744, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.33455}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83293, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.33436}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00055, "heatmap_loss": 0.00055, "acc_pose": 0.82103, "loss": 0.00055, "grad_norm": 0.00132, "time": 0.33379}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.82906, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.33435}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00055, "acc_pose": 0.84014, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.3854}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83132, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.33499}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84485, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.33526}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83095, "loss": 0.00055, "grad_norm": 0.00117, "time": 0.33542}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.8552, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.33474}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04843, "heatmap_loss": 0.00055, "acc_pose": 0.84033, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.3843}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.8443, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33383}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.84412, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33731}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00055, "acc_pose": 0.82873, "loss": 0.00055, "grad_norm": 0.00116, "time": 0.33668}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00055, "acc_pose": 0.84383, "loss": 0.00055, "grad_norm": 0.00115, "time": 0.33358}
+{"mode": "val", "epoch": 150, "iter": 204, "lr": 0.0, "AP": 0.78647, "AP .5": 0.91615, "AP .75": 0.85368, "AP (M)": 0.71303, "AP (L)": 0.81741, "AR": 0.83709, "AR .5": 0.95309, "AR .75": 0.89531, "AR (M)": 0.79547, "AR (L)": 0.89818}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04679, "heatmap_loss": 0.00055, "acc_pose": 0.82483, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.3839}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84024, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33513}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84228, "loss": 0.00055, "grad_norm": 0.00112, "time": 0.33468}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.82807, "loss": 0.00055, "grad_norm": 0.00123, "time": 0.33642}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.84449, "loss": 0.00055, "grad_norm": 0.00121, "time": 0.33637}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04707, "heatmap_loss": 0.00054, "acc_pose": 0.83767, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.39142}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.83931, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.33941}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84038, "loss": 0.00055, "grad_norm": 0.00116, "time": 0.33728}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84415, "loss": 0.00055, "grad_norm": 0.00124, "time": 0.33851}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00054, "acc_pose": 0.84059, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33875}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04709, "heatmap_loss": 0.00055, "acc_pose": 0.85124, "loss": 0.00055, "grad_norm": 0.0012, "time": 0.3901}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84132, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33813}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84515, "loss": 0.00054, "grad_norm": 0.00111, "time": 0.33751}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8321, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.3363}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.85333, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.33859}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04709, "heatmap_loss": 0.00055, "acc_pose": 0.83877, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.38876}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84354, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.33574}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.84444, "loss": 0.00055, "grad_norm": 0.00122, "time": 0.33575}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83451, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33478}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.84114, "loss": 0.00054, "grad_norm": 0.00114, "time": 0.33542}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04713, "heatmap_loss": 0.00054, "acc_pose": 0.8354, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.38769}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84145, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33386}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.85241, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.3359}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83489, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.33545}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.837, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.33638}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04727, "heatmap_loss": 0.00054, "acc_pose": 0.8327, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.3893}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84802, "loss": 0.00055, "grad_norm": 0.00111, "time": 0.33837}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.8347, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.33764}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84737, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.33561}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83736, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33402}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04885, "heatmap_loss": 0.00054, "acc_pose": 0.85141, "loss": 0.00054, "grad_norm": 0.00134, "time": 0.38539}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.85276, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33295}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83787, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33737}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83928, "loss": 0.00054, "grad_norm": 0.00117, "time": 0.33461}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.81991, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.33306}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04729, "heatmap_loss": 0.00054, "acc_pose": 0.83197, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.38638}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83984, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.33496}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.83205, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.33445}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84282, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.33814}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84055, "loss": 0.00054, "grad_norm": 0.00111, "time": 0.33805}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00054, "acc_pose": 0.84408, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.38918}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83447, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33703}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.85241, "loss": 0.00054, "grad_norm": 0.00108, "time": 0.33549}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83622, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.33569}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83795, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.3379}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0474, "heatmap_loss": 0.00055, "acc_pose": 0.83642, "loss": 0.00055, "grad_norm": 0.00116, "time": 0.38673}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84596, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.33518}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84351, "loss": 0.00054, "grad_norm": 0.00127, "time": 0.33486}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.83283, "loss": 0.00054, "grad_norm": 0.00113, "time": 0.33586}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.82653, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33337}
+{"mode": "val", "epoch": 160, "iter": 204, "lr": 0.0, "AP": 0.78504, "AP .5": 0.91558, "AP .75": 0.85247, "AP (M)": 0.71099, "AP (L)": 0.81552, "AR": 0.83632, "AR .5": 0.95309, "AR .75": 0.89547, "AR (M)": 0.79418, "AR (L)": 0.89822}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04678, "heatmap_loss": 0.00055, "acc_pose": 0.85463, "loss": 0.00055, "grad_norm": 0.00127, "time": 0.38128}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84497, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.33396}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84603, "loss": 0.00054, "grad_norm": 0.00113, "time": 0.33273}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84408, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33457}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.82908, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.33811}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04703, "heatmap_loss": 0.00054, "acc_pose": 0.84941, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.38819}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.84324, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33777}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84754, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.33667}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.85476, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33584}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83779, "loss": 0.00054, "grad_norm": 0.00121, "time": 0.33402}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04706, "heatmap_loss": 0.00054, "acc_pose": 0.82911, "loss": 0.00054, "grad_norm": 0.00117, "time": 0.38764}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00054, "acc_pose": 0.84099, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33644}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83229, "loss": 0.00054, "grad_norm": 0.00114, "time": 0.33619}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83967, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33741}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84235, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33705}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00054, "acc_pose": 0.85386, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.38576}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84079, "loss": 0.00054, "grad_norm": 0.00117, "time": 0.33346}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84368, "loss": 0.00054, "grad_norm": 0.00113, "time": 0.3343}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.85393, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33606}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84531, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33641}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04698, "heatmap_loss": 0.00054, "acc_pose": 0.86035, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.38759}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84377, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.33605}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84781, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.33505}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00053, "acc_pose": 0.84475, "loss": 0.00053, "grad_norm": 0.00113, "time": 0.33524}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84254, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.33901}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04743, "heatmap_loss": 0.00054, "acc_pose": 0.83741, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.3856}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84352, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.33429}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83752, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.33401}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84252, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.33269}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.83519, "loss": 0.00054, "grad_norm": 0.00114, "time": 0.33425}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04714, "heatmap_loss": 0.00054, "acc_pose": 0.8487, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.38991}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.85046, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.33632}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.83349, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33727}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84835, "loss": 0.00054, "grad_norm": 0.00111, "time": 0.33649}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00054, "acc_pose": 0.83618, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.33567}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04697, "heatmap_loss": 0.00054, "acc_pose": 0.83013, "loss": 0.00054, "grad_norm": 0.00115, "time": 0.3859}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.84263, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.33554}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00056, "heatmap_loss": 0.00054, "acc_pose": 0.83271, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.33896}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00054, "acc_pose": 0.84049, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.33969}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83544, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.34015}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04711, "heatmap_loss": 0.00053, "acc_pose": 0.85305, "loss": 0.00053, "grad_norm": 0.00116, "time": 0.39044}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.83405, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.34074}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84843, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33788}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84221, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.33639}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00054, "acc_pose": 0.852, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.33468}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04729, "heatmap_loss": 0.00054, "acc_pose": 0.82139, "loss": 0.00054, "grad_norm": 0.00119, "time": 0.38759}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84453, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.33473}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00054, "acc_pose": 0.84593, "loss": 0.00054, "grad_norm": 0.00116, "time": 0.3364}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.84218, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.33589}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84024, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.33342}
+{"mode": "val", "epoch": 170, "iter": 204, "lr": 0.0, "AP": 0.78793, "AP .5": 0.91601, "AP .75": 0.85584, "AP (M)": 0.71474, "AP (L)": 0.81939, "AR": 0.83794, "AR .5": 0.95293, "AR .75": 0.8983, "AR (M)": 0.7971, "AR (L)": 0.89773}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04696, "heatmap_loss": 0.00054, "acc_pose": 0.83822, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.3809}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.85071, "loss": 0.00053, "grad_norm": 0.00112, "time": 0.33255}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84039, "loss": 0.00053, "grad_norm": 0.00121, "time": 0.3333}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84298, "loss": 0.00053, "grad_norm": 0.00119, "time": 0.33329}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.83611, "loss": 0.00053, "grad_norm": 0.00109, "time": 0.33403}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04722, "heatmap_loss": 0.00053, "acc_pose": 0.83313, "loss": 0.00053, "grad_norm": 0.0011, "time": 0.38858}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00053, "acc_pose": 0.84514, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.33687}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84871, "loss": 0.00053, "grad_norm": 0.00113, "time": 0.34089}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84127, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.33688}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.8417, "loss": 0.00053, "grad_norm": 0.00116, "time": 0.33763}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04716, "heatmap_loss": 0.00052, "acc_pose": 0.83501, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.39003}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.84846, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.33601}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.8501, "loss": 0.00053, "grad_norm": 0.00112, "time": 0.33609}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.85204, "loss": 0.00053, "grad_norm": 0.00113, "time": 0.33477}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84293, "loss": 0.00053, "grad_norm": 0.00112, "time": 0.33855}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04721, "heatmap_loss": 0.00053, "acc_pose": 0.84677, "loss": 0.00053, "grad_norm": 0.00109, "time": 0.3873}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84812, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33524}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83964, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.33626}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.85197, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.33562}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84037, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.33515}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04704, "heatmap_loss": 0.00052, "acc_pose": 0.85242, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.38691}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.86029, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.33512}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85167, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.3341}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.8528, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33361}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85135, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.33481}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04739, "heatmap_loss": 0.00053, "acc_pose": 0.83338, "loss": 0.00053, "grad_norm": 0.00121, "time": 0.38598}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00052, "acc_pose": 0.85662, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33472}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8526, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33694}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.85029, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.33416}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84545, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.33663}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04724, "heatmap_loss": 0.00053, "acc_pose": 0.84586, "loss": 0.00053, "grad_norm": 0.00116, "time": 0.39103}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85533, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33987}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00054, "heatmap_loss": 0.00053, "acc_pose": 0.83958, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.33745}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84476, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33683}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.86309, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33838}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04734, "heatmap_loss": 0.00053, "acc_pose": 0.85222, "loss": 0.00053, "grad_norm": 0.00107, "time": 0.38917}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85422, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33543}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85153, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.33611}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84984, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33434}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84525, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.33512}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04707, "heatmap_loss": 0.00052, "acc_pose": 0.84311, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.38739}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85044, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33423}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84195, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.33577}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.86852, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33528}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84884, "loss": 0.00052, "grad_norm": 0.00123, "time": 0.33731}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04768, "heatmap_loss": 0.00052, "acc_pose": 0.85438, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.38964}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84898, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.33871}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.85412, "loss": 0.00052, "grad_norm": 0.00102, "time": 0.33805}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84426, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33858}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.86151, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33848}
+{"mode": "val", "epoch": 180, "iter": 204, "lr": 0.0, "AP": 0.78831, "AP .5": 0.91564, "AP .75": 0.85561, "AP (M)": 0.71489, "AP (L)": 0.81969, "AR": 0.83876, "AR .5": 0.9534, "AR .75": 0.89798, "AR (M)": 0.7973, "AR (L)": 0.89929}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.047, "heatmap_loss": 0.00052, "acc_pose": 0.85817, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.38178}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84656, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33467}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84194, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33299}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00057, "heatmap_loss": 0.00053, "acc_pose": 0.83865, "loss": 0.00053, "grad_norm": 0.00108, "time": 0.33771}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85102, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33702}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04695, "heatmap_loss": 0.00052, "acc_pose": 0.84026, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.38488}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8596, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.33683}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84181, "loss": 0.00053, "grad_norm": 0.0011, "time": 0.33502}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84507, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33502}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.8334, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.33542}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00052, "acc_pose": 0.8551, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.38915}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84938, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33458}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85829, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.3355}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84993, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.33461}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84569, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33467}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0472, "heatmap_loss": 0.00053, "acc_pose": 0.86026, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.38593}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85698, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33704}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83392, "loss": 0.00053, "grad_norm": 0.0011, "time": 0.33418}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85112, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33445}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85379, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33389}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04738, "heatmap_loss": 0.00052, "acc_pose": 0.84404, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.38608}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.8528, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.33252}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.86194, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33385}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.86198, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33298}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.83467, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33507}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04714, "heatmap_loss": 0.00052, "acc_pose": 0.83819, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.38607}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84876, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33404}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85774, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33535}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85001, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33237}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.84151, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.33455}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0471, "heatmap_loss": 0.00052, "acc_pose": 0.85252, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.3863}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84405, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33383}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.84188, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33343}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84876, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33355}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84604, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33437}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0472, "heatmap_loss": 0.00052, "acc_pose": 0.85534, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.38702}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84282, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33474}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85502, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33856}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85569, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.3388}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85678, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33895}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04715, "heatmap_loss": 0.00052, "acc_pose": 0.84338, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.38684}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85521, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.33355}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84315, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.3355}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84805, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33483}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.85516, "loss": 0.00051, "grad_norm": 0.00107, "time": 0.33516}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04716, "heatmap_loss": 0.00052, "acc_pose": 0.85228, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.38489}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84166, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33339}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.8582, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33517}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85125, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.33313}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85391, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.3345}
+{"mode": "val", "epoch": 190, "iter": 204, "lr": 0.0, "AP": 0.78872, "AP .5": 0.91592, "AP .75": 0.85591, "AP (M)": 0.71607, "AP (L)": 0.81952, "AR": 0.83918, "AR .5": 0.95419, "AR .75": 0.89767, "AR (M)": 0.79806, "AR (L)": 0.89922}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04695, "heatmap_loss": 0.00052, "acc_pose": 0.85043, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.38471}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85698, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33662}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00052, "acc_pose": 0.85357, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33352}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00052, "acc_pose": 0.85175, "loss": 0.00052, "grad_norm": 0.00107, "time": 0.33449}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00052, "acc_pose": 0.84448, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.33292}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04728, "heatmap_loss": 0.00052, "acc_pose": 0.85138, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.38794}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84401, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33778}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85355, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33658}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84898, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33605}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85042, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33608}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04734, "heatmap_loss": 0.00052, "acc_pose": 0.84098, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.3892}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.83501, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33589}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84836, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33601}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.83398, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33561}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.8577, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33513}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04703, "heatmap_loss": 0.00052, "acc_pose": 0.85209, "loss": 0.00052, "grad_norm": 0.00103, "time": 0.38676}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84618, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33407}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84611, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33327}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85806, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33493}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.8556, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33295}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04719, "heatmap_loss": 0.00052, "acc_pose": 0.8456, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.3854}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.83727, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33489}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85682, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33422}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00051, "acc_pose": 0.84861, "loss": 0.00051, "grad_norm": 0.00119, "time": 0.33276}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84995, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33355}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04726, "heatmap_loss": 0.00052, "acc_pose": 0.84843, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.38606}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84923, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.336}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84706, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.33546}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84978, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33611}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84537, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.33472}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04731, "heatmap_loss": 0.00052, "acc_pose": 0.84694, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.38762}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85111, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.3333}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.85177, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33321}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84406, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.33243}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.83895, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.33532}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04718, "heatmap_loss": 0.00052, "acc_pose": 0.83653, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.38739}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.8386, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.33334}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84356, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33358}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85152, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33359}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.85819, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33441}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.0473, "heatmap_loss": 0.00052, "acc_pose": 0.8416, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.38699}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00051, "acc_pose": 0.85801, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.33314}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84693, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.33415}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84928, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33349}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.83571, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33384}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04718, "heatmap_loss": 0.00052, "acc_pose": 0.84902, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.38727}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84852, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33332}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85003, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.33426}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85713, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33301}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85013, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33457}
+{"mode": "val", "epoch": 200, "iter": 204, "lr": 0.0, "AP": 0.78889, "AP .5": 0.916, "AP .75": 0.85571, "AP (M)": 0.71595, "AP (L)": 0.8197, "AR": 0.83934, "AR .5": 0.95356, "AR .75": 0.89814, "AR (M)": 0.79831, "AR (L)": 0.89952}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04696, "heatmap_loss": 0.00052, "acc_pose": 0.83664, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.38552}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84926, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33818}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84952, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33634}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.85904, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.33695}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.84526, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.33497}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04728, "heatmap_loss": 0.00052, "acc_pose": 0.85755, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.38818}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85324, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33583}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.849, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33725}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.83817, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33817}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00053, "heatmap_loss": 0.00052, "acc_pose": 0.84794, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33606}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04715, "heatmap_loss": 0.00053, "acc_pose": 0.84629, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.3895}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00051, "acc_pose": 0.85102, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.33858}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.85173, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.33496}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.85939, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33482}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84414, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33504}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04735, "heatmap_loss": 0.00052, "acc_pose": 0.84862, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.38838}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84099, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.33718}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85072, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.33824}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.83625, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33841}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.85497, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.33668}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04719, "heatmap_loss": 0.00052, "acc_pose": 0.84991, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.38757}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.83909, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.3355}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85577, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.33532}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85189, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.33439}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85356, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33622}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04735, "heatmap_loss": 0.00052, "acc_pose": 0.85097, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.38857}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85485, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.33817}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.85313, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.33595}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85893, "loss": 0.00052, "grad_norm": 0.00107, "time": 0.33715}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85403, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.3382}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04722, "heatmap_loss": 0.00052, "acc_pose": 0.854, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.39022}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00052, "heatmap_loss": 0.00052, "acc_pose": 0.85532, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.33389}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85341, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.33523}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85475, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.33316}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00064, "heatmap_loss": 0.00052, "acc_pose": 0.84303, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33397}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04717, "heatmap_loss": 0.00052, "acc_pose": 0.84961, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.3891}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00051, "acc_pose": 0.86339, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.3361}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85601, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.33557}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.83866, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.33538}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.8509, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.33382}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04707, "heatmap_loss": 0.00051, "acc_pose": 0.85747, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.38559}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85995, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.33181}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.8621, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.33323}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84733, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.33279}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.00051, "heatmap_loss": 0.00051, "acc_pose": 0.86004, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.33477}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 24830, "data_time": 0.04726, "heatmap_loss": 0.00052, "acc_pose": 0.86195, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.38732}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00051, "acc_pose": 0.85057, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.33679}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 24830, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84483, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.33405}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 24830, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84714, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.33343}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 24830, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.84807, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.33371}
+{"mode": "val", "epoch": 210, "iter": 204, "lr": 0.0, "AP": 0.78904, "AP .5": 0.91575, "AP .75": 0.85585, "AP (M)": 0.71575, "AP (L)": 0.81985, "AR": 0.83953, "AR .5": 0.95356, "AR .75": 0.8983, "AR (M)": 0.79792, "AR (L)": 0.89996}
diff --git a/vendor/ViTPose/logs/vitpose-h.log.json b/vendor/ViTPose/logs/vitpose-h.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..d552903f596d246e16af425c2900ba883b402f2a
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-h.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+1041e5c", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.05567, "heatmap_loss": 0.00215, "acc_pose": 0.04953, "loss": 0.00215, "grad_norm": 0.00637, "time": 0.51652}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00205, "acc_pose": 0.16094, "loss": 0.00205, "grad_norm": 0.0038, "time": 0.32774}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00202, "acc_pose": 0.20773, "loss": 0.00202, "grad_norm": 0.00515, "time": 0.32836}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00197, "acc_pose": 0.21174, "loss": 0.00197, "grad_norm": 0.00459, "time": 0.32729}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00192, "acc_pose": 0.25739, "loss": 0.00192, "grad_norm": 0.00462, "time": 0.32776}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04664, "heatmap_loss": 0.0018, "acc_pose": 0.27058, "loss": 0.0018, "grad_norm": 0.00605, "time": 0.37812}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00166, "acc_pose": 0.38175, "loss": 0.00166, "grad_norm": 0.00501, "time": 0.32823}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00149, "acc_pose": 0.44795, "loss": 0.00149, "grad_norm": 0.0052, "time": 0.32869}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00136, "acc_pose": 0.50362, "loss": 0.00136, "grad_norm": 0.00529, "time": 0.32729}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00128, "acc_pose": 0.53678, "loss": 0.00128, "grad_norm": 0.00533, "time": 0.32817}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04839, "heatmap_loss": 0.00116, "acc_pose": 0.55837, "loss": 0.00116, "grad_norm": 0.00525, "time": 0.37551}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00112, "acc_pose": 0.61204, "loss": 0.00112, "grad_norm": 0.00425, "time": 0.32576}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00107, "acc_pose": 0.62502, "loss": 0.00107, "grad_norm": 0.00422, "time": 0.32452}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00105, "acc_pose": 0.62267, "loss": 0.00105, "grad_norm": 0.00382, "time": 0.32268}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00104, "acc_pose": 0.65149, "loss": 0.00104, "grad_norm": 0.00415, "time": 0.32428}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04662, "heatmap_loss": 0.001, "acc_pose": 0.66093, "loss": 0.001, "grad_norm": 0.00492, "time": 0.3765}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00098, "acc_pose": 0.65781, "loss": 0.00098, "grad_norm": 0.00407, "time": 0.32553}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00095, "acc_pose": 0.68332, "loss": 0.00095, "grad_norm": 0.00374, "time": 0.32545}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00095, "acc_pose": 0.6549, "loss": 0.00095, "grad_norm": 0.00411, "time": 0.32473}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00093, "acc_pose": 0.69335, "loss": 0.00093, "grad_norm": 0.00375, "time": 0.32527}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04661, "heatmap_loss": 0.00092, "acc_pose": 0.68647, "loss": 0.00092, "grad_norm": 0.00393, "time": 0.37438}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00091, "acc_pose": 0.69951, "loss": 0.00091, "grad_norm": 0.0036, "time": 0.32636}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00089, "acc_pose": 0.71795, "loss": 0.00089, "grad_norm": 0.00383, "time": 0.32515}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00091, "acc_pose": 0.703, "loss": 0.00091, "grad_norm": 0.00384, "time": 0.32477}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00089, "acc_pose": 0.70371, "loss": 0.00089, "grad_norm": 0.00385, "time": 0.32549}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04647, "heatmap_loss": 0.00087, "acc_pose": 0.69495, "loss": 0.00087, "grad_norm": 0.00343, "time": 0.37482}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00087, "acc_pose": 0.69909, "loss": 0.00087, "grad_norm": 0.00402, "time": 0.32374}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00086, "acc_pose": 0.72449, "loss": 0.00086, "grad_norm": 0.00367, "time": 0.32429}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00085, "acc_pose": 0.70728, "loss": 0.00085, "grad_norm": 0.00395, "time": 0.32813}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00085, "acc_pose": 0.71269, "loss": 0.00085, "grad_norm": 0.00346, "time": 0.32571}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04693, "heatmap_loss": 0.00084, "acc_pose": 0.71703, "loss": 0.00084, "grad_norm": 0.00351, "time": 0.37268}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00083, "acc_pose": 0.73078, "loss": 0.00083, "grad_norm": 0.00341, "time": 0.32417}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00082, "acc_pose": 0.71971, "loss": 0.00082, "grad_norm": 0.00367, "time": 0.32406}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00083, "acc_pose": 0.70956, "loss": 0.00083, "grad_norm": 0.00354, "time": 0.3234}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00082, "acc_pose": 0.73589, "loss": 0.00082, "grad_norm": 0.00309, "time": 0.3237}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04671, "heatmap_loss": 0.00081, "acc_pose": 0.72192, "loss": 0.00081, "grad_norm": 0.00384, "time": 0.37257}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00081, "acc_pose": 0.72415, "loss": 0.00081, "grad_norm": 0.00343, "time": 0.32438}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00082, "acc_pose": 0.73011, "loss": 0.00082, "grad_norm": 0.00349, "time": 0.32396}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00081, "acc_pose": 0.72514, "loss": 0.00081, "grad_norm": 0.0034, "time": 0.32601}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00079, "acc_pose": 0.74923, "loss": 0.00079, "grad_norm": 0.00313, "time": 0.32248}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04663, "heatmap_loss": 0.00079, "acc_pose": 0.73709, "loss": 0.00079, "grad_norm": 0.00327, "time": 0.3722}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00079, "acc_pose": 0.73952, "loss": 0.00079, "grad_norm": 0.00341, "time": 0.3255}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00079, "acc_pose": 0.75505, "loss": 0.00079, "grad_norm": 0.00366, "time": 0.32313}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00057, "heatmap_loss": 0.00078, "acc_pose": 0.73787, "loss": 0.00078, "grad_norm": 0.00329, "time": 0.32486}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00078, "acc_pose": 0.74848, "loss": 0.00078, "grad_norm": 0.00347, "time": 0.32403}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04647, "heatmap_loss": 0.00077, "acc_pose": 0.74953, "loss": 0.00077, "grad_norm": 0.00349, "time": 0.37392}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00077, "acc_pose": 0.7363, "loss": 0.00077, "grad_norm": 0.00329, "time": 0.32384}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00078, "acc_pose": 0.74774, "loss": 0.00078, "grad_norm": 0.00316, "time": 0.32244}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00077, "acc_pose": 0.74192, "loss": 0.00077, "grad_norm": 0.00331, "time": 0.32408}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00077, "acc_pose": 0.73766, "loss": 0.00077, "grad_norm": 0.00302, "time": 0.32297}
+{"mode": "val", "epoch": 10, "iter": 204, "lr": 0.0, "AP": 0.74147, "AP .5": 0.90127, "AP .75": 0.81772, "AP (M)": 0.66661, "AP (L)": 0.76941, "AR": 0.79775, "AR .5": 0.94112, "AR .75": 0.86508, "AR (M)": 0.75356, "AR (L)": 0.86191}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04723, "heatmap_loss": 0.00077, "acc_pose": 0.7569, "loss": 0.00077, "grad_norm": 0.00411, "time": 0.37294}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00075, "acc_pose": 0.75355, "loss": 0.00075, "grad_norm": 0.00387, "time": 0.3239}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00076, "acc_pose": 0.74638, "loss": 0.00076, "grad_norm": 0.0034, "time": 0.32541}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00076, "acc_pose": 0.76275, "loss": 0.00076, "grad_norm": 0.00315, "time": 0.32635}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00075, "acc_pose": 0.75953, "loss": 0.00075, "grad_norm": 0.00306, "time": 0.32487}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04791, "heatmap_loss": 0.00076, "acc_pose": 0.74813, "loss": 0.00076, "grad_norm": 0.00329, "time": 0.375}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00075, "acc_pose": 0.76311, "loss": 0.00075, "grad_norm": 0.00288, "time": 0.32557}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00075, "acc_pose": 0.75648, "loss": 0.00075, "grad_norm": 0.00326, "time": 0.32452}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00075, "acc_pose": 0.75389, "loss": 0.00075, "grad_norm": 0.00347, "time": 0.32649}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00074, "acc_pose": 0.76147, "loss": 0.00074, "grad_norm": 0.00306, "time": 0.32264}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04682, "heatmap_loss": 0.00075, "acc_pose": 0.74689, "loss": 0.00075, "grad_norm": 0.00341, "time": 0.37283}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00075, "acc_pose": 0.77314, "loss": 0.00075, "grad_norm": 0.00326, "time": 0.32424}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00074, "acc_pose": 0.76252, "loss": 0.00074, "grad_norm": 0.00367, "time": 0.32401}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00074, "acc_pose": 0.77588, "loss": 0.00074, "grad_norm": 0.00316, "time": 0.3243}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00074, "acc_pose": 0.75686, "loss": 0.00074, "grad_norm": 0.00301, "time": 0.32372}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04668, "heatmap_loss": 0.00073, "acc_pose": 0.75356, "loss": 0.00073, "grad_norm": 0.00316, "time": 0.37254}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00073, "acc_pose": 0.78212, "loss": 0.00073, "grad_norm": 0.00328, "time": 0.32467}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00073, "acc_pose": 0.76846, "loss": 0.00073, "grad_norm": 0.00317, "time": 0.32352}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00073, "acc_pose": 0.76538, "loss": 0.00073, "grad_norm": 0.00345, "time": 0.32407}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00072, "acc_pose": 0.77255, "loss": 0.00072, "grad_norm": 0.00311, "time": 0.32324}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00072, "acc_pose": 0.75011, "loss": 0.00072, "grad_norm": 0.003, "time": 0.37476}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00072, "acc_pose": 0.77722, "loss": 0.00072, "grad_norm": 0.00345, "time": 0.32702}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00073, "acc_pose": 0.75457, "loss": 0.00073, "grad_norm": 0.00282, "time": 0.32461}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00072, "acc_pose": 0.75458, "loss": 0.00072, "grad_norm": 0.00313, "time": 0.32447}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00072, "acc_pose": 0.74511, "loss": 0.00072, "grad_norm": 0.00285, "time": 0.32312}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04693, "heatmap_loss": 0.00072, "acc_pose": 0.77851, "loss": 0.00072, "grad_norm": 0.00297, "time": 0.37769}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00071, "acc_pose": 0.77297, "loss": 0.00071, "grad_norm": 0.00305, "time": 0.32719}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00071, "acc_pose": 0.76157, "loss": 0.00071, "grad_norm": 0.00312, "time": 0.32386}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00072, "acc_pose": 0.77714, "loss": 0.00072, "grad_norm": 0.00327, "time": 0.3259}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00071, "acc_pose": 0.77534, "loss": 0.00071, "grad_norm": 0.00327, "time": 0.32303}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04655, "heatmap_loss": 0.00071, "acc_pose": 0.77685, "loss": 0.00071, "grad_norm": 0.00298, "time": 0.37197}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.0007, "acc_pose": 0.77639, "loss": 0.0007, "grad_norm": 0.00269, "time": 0.32322}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.0007, "acc_pose": 0.7884, "loss": 0.0007, "grad_norm": 0.00276, "time": 0.32356}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00071, "acc_pose": 0.76703, "loss": 0.00071, "grad_norm": 0.0027, "time": 0.32467}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0007, "acc_pose": 0.77956, "loss": 0.0007, "grad_norm": 0.0029, "time": 0.32498}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.0007, "acc_pose": 0.77039, "loss": 0.0007, "grad_norm": 0.00279, "time": 0.37451}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0007, "acc_pose": 0.76741, "loss": 0.0007, "grad_norm": 0.00302, "time": 0.32604}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0007, "acc_pose": 0.78325, "loss": 0.0007, "grad_norm": 0.00297, "time": 0.32585}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0007, "acc_pose": 0.77032, "loss": 0.0007, "grad_norm": 0.00285, "time": 0.32445}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.0007, "acc_pose": 0.78363, "loss": 0.0007, "grad_norm": 0.0028, "time": 0.32458}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04668, "heatmap_loss": 0.0007, "acc_pose": 0.77448, "loss": 0.0007, "grad_norm": 0.00279, "time": 0.37387}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0007, "acc_pose": 0.79098, "loss": 0.0007, "grad_norm": 0.0032, "time": 0.32374}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00069, "acc_pose": 0.77327, "loss": 0.00069, "grad_norm": 0.00324, "time": 0.32371}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.77982, "loss": 0.00069, "grad_norm": 0.00288, "time": 0.32536}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.76326, "loss": 0.00069, "grad_norm": 0.00261, "time": 0.32777}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04674, "heatmap_loss": 0.00069, "acc_pose": 0.77948, "loss": 0.00069, "grad_norm": 0.00283, "time": 0.37649}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00069, "acc_pose": 0.79765, "loss": 0.00069, "grad_norm": 0.00265, "time": 0.32734}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.78873, "loss": 0.00069, "grad_norm": 0.00308, "time": 0.32836}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.76957, "loss": 0.00069, "grad_norm": 0.00317, "time": 0.32703}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00069, "acc_pose": 0.78062, "loss": 0.00069, "grad_norm": 0.00283, "time": 0.32454}
+{"mode": "val", "epoch": 20, "iter": 204, "lr": 0.0, "AP": 0.76181, "AP .5": 0.90579, "AP .75": 0.8328, "AP (M)": 0.68739, "AP (L)": 0.79194, "AR": 0.81544, "AR .5": 0.94584, "AR .75": 0.87815, "AR (M)": 0.77307, "AR (L)": 0.87748}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04729, "heatmap_loss": 0.00068, "acc_pose": 0.77734, "loss": 0.00068, "grad_norm": 0.00273, "time": 0.3686}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00068, "acc_pose": 0.78499, "loss": 0.00068, "grad_norm": 0.00298, "time": 0.3222}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00069, "acc_pose": 0.78703, "loss": 0.00069, "grad_norm": 0.00269, "time": 0.32471}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00068, "acc_pose": 0.774, "loss": 0.00068, "grad_norm": 0.00275, "time": 0.32458}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00069, "acc_pose": 0.78752, "loss": 0.00069, "grad_norm": 0.00301, "time": 0.32456}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04821, "heatmap_loss": 0.00068, "acc_pose": 0.7961, "loss": 0.00068, "grad_norm": 0.00291, "time": 0.37742}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00068, "acc_pose": 0.78779, "loss": 0.00068, "grad_norm": 0.00312, "time": 0.32715}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00067, "acc_pose": 0.79456, "loss": 0.00067, "grad_norm": 0.00248, "time": 0.32443}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00068, "acc_pose": 0.7921, "loss": 0.00068, "grad_norm": 0.0024, "time": 0.32632}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00067, "acc_pose": 0.79573, "loss": 0.00067, "grad_norm": 0.00269, "time": 0.32581}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00067, "acc_pose": 0.79427, "loss": 0.00067, "grad_norm": 0.00284, "time": 0.37374}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00067, "acc_pose": 0.79648, "loss": 0.00067, "grad_norm": 0.0031, "time": 0.32376}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00067, "acc_pose": 0.78753, "loss": 0.00067, "grad_norm": 0.00267, "time": 0.32437}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.77981, "loss": 0.00067, "grad_norm": 0.00265, "time": 0.32518}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00068, "acc_pose": 0.78753, "loss": 0.00068, "grad_norm": 0.00309, "time": 0.32531}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04662, "heatmap_loss": 0.00067, "acc_pose": 0.77909, "loss": 0.00067, "grad_norm": 0.00247, "time": 0.37338}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00067, "acc_pose": 0.79459, "loss": 0.00067, "grad_norm": 0.0025, "time": 0.32574}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00067, "acc_pose": 0.78856, "loss": 0.00067, "grad_norm": 0.00244, "time": 0.32599}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00067, "acc_pose": 0.77849, "loss": 0.00067, "grad_norm": 0.00257, "time": 0.32663}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.7888, "loss": 0.00067, "grad_norm": 0.00318, "time": 0.32298}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04694, "heatmap_loss": 0.00066, "acc_pose": 0.78427, "loss": 0.00066, "grad_norm": 0.00262, "time": 0.37552}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.7923, "loss": 0.00066, "grad_norm": 0.00251, "time": 0.32359}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00067, "acc_pose": 0.80074, "loss": 0.00067, "grad_norm": 0.0028, "time": 0.32286}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.7923, "loss": 0.00067, "grad_norm": 0.00294, "time": 0.32265}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.78118, "loss": 0.00066, "grad_norm": 0.0027, "time": 0.32251}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0465, "heatmap_loss": 0.00066, "acc_pose": 0.79343, "loss": 0.00066, "grad_norm": 0.00275, "time": 0.37459}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00066, "acc_pose": 0.81051, "loss": 0.00066, "grad_norm": 0.00248, "time": 0.32458}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.78514, "loss": 0.00066, "grad_norm": 0.00286, "time": 0.32319}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.79984, "loss": 0.00066, "grad_norm": 0.00271, "time": 0.32487}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.79718, "loss": 0.00066, "grad_norm": 0.00262, "time": 0.32554}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.00066, "acc_pose": 0.78902, "loss": 0.00066, "grad_norm": 0.00253, "time": 0.37498}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00066, "acc_pose": 0.78745, "loss": 0.00066, "grad_norm": 0.00278, "time": 0.32276}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00066, "acc_pose": 0.80874, "loss": 0.00066, "grad_norm": 0.00268, "time": 0.32333}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.77729, "loss": 0.00066, "grad_norm": 0.00255, "time": 0.3233}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00065, "acc_pose": 0.78313, "loss": 0.00065, "grad_norm": 0.00244, "time": 0.32467}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04662, "heatmap_loss": 0.00065, "acc_pose": 0.80567, "loss": 0.00065, "grad_norm": 0.00229, "time": 0.37404}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00065, "acc_pose": 0.79293, "loss": 0.00065, "grad_norm": 0.0026, "time": 0.32376}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00066, "acc_pose": 0.78567, "loss": 0.00066, "grad_norm": 0.00253, "time": 0.32464}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00065, "acc_pose": 0.79375, "loss": 0.00065, "grad_norm": 0.0026, "time": 0.32409}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00065, "acc_pose": 0.78837, "loss": 0.00065, "grad_norm": 0.00269, "time": 0.3228}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04697, "heatmap_loss": 0.00065, "acc_pose": 0.78903, "loss": 0.00065, "grad_norm": 0.00271, "time": 0.37452}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00066, "acc_pose": 0.79228, "loss": 0.00066, "grad_norm": 0.00269, "time": 0.32377}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00066, "acc_pose": 0.80083, "loss": 0.00066, "grad_norm": 0.00251, "time": 0.32388}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00064, "acc_pose": 0.79593, "loss": 0.00064, "grad_norm": 0.00287, "time": 0.32343}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00065, "acc_pose": 0.79332, "loss": 0.00065, "grad_norm": 0.00254, "time": 0.32499}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00065, "acc_pose": 0.78827, "loss": 0.00065, "grad_norm": 0.00246, "time": 0.37406}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00066, "acc_pose": 0.80152, "loss": 0.00066, "grad_norm": 0.00296, "time": 0.32405}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00065, "acc_pose": 0.79767, "loss": 0.00065, "grad_norm": 0.00269, "time": 0.32473}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00065, "acc_pose": 0.80328, "loss": 0.00065, "grad_norm": 0.00248, "time": 0.32272}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00065, "acc_pose": 0.78245, "loss": 0.00065, "grad_norm": 0.00268, "time": 0.32496}
+{"mode": "val", "epoch": 30, "iter": 204, "lr": 0.0, "AP": 0.76987, "AP .5": 0.90803, "AP .75": 0.84154, "AP (M)": 0.69658, "AP (L)": 0.79928, "AR": 0.82303, "AR .5": 0.94868, "AR .75": 0.88665, "AR (M)": 0.78009, "AR (L)": 0.8861}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04735, "heatmap_loss": 0.00065, "acc_pose": 0.80687, "loss": 0.00065, "grad_norm": 0.00282, "time": 0.3717}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00064, "acc_pose": 0.80407, "loss": 0.00064, "grad_norm": 0.0023, "time": 0.32339}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.79027, "loss": 0.00064, "grad_norm": 0.00225, "time": 0.32718}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00065, "acc_pose": 0.80901, "loss": 0.00065, "grad_norm": 0.00246, "time": 0.32403}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.80149, "loss": 0.00064, "grad_norm": 0.00246, "time": 0.32728}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04663, "heatmap_loss": 0.00064, "acc_pose": 0.80231, "loss": 0.00064, "grad_norm": 0.00233, "time": 0.37194}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00065, "acc_pose": 0.81558, "loss": 0.00065, "grad_norm": 0.0024, "time": 0.32341}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00064, "acc_pose": 0.80226, "loss": 0.00064, "grad_norm": 0.00237, "time": 0.32303}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.80177, "loss": 0.00064, "grad_norm": 0.00244, "time": 0.32479}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.79815, "loss": 0.00064, "grad_norm": 0.00244, "time": 0.32605}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00064, "acc_pose": 0.80332, "loss": 0.00064, "grad_norm": 0.00254, "time": 0.37399}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.80367, "loss": 0.00064, "grad_norm": 0.0024, "time": 0.32551}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00063, "acc_pose": 0.80105, "loss": 0.00063, "grad_norm": 0.00228, "time": 0.32477}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.80586, "loss": 0.00064, "grad_norm": 0.00242, "time": 0.32522}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.80469, "loss": 0.00064, "grad_norm": 0.00244, "time": 0.32487}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04674, "heatmap_loss": 0.00064, "acc_pose": 0.79638, "loss": 0.00064, "grad_norm": 0.00249, "time": 0.37359}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.79067, "loss": 0.00064, "grad_norm": 0.00238, "time": 0.32286}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.80854, "loss": 0.00063, "grad_norm": 0.00239, "time": 0.32345}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.80345, "loss": 0.00064, "grad_norm": 0.00263, "time": 0.32227}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00064, "acc_pose": 0.80514, "loss": 0.00064, "grad_norm": 0.00228, "time": 0.32335}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04668, "heatmap_loss": 0.00063, "acc_pose": 0.80378, "loss": 0.00063, "grad_norm": 0.00226, "time": 0.37463}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.80854, "loss": 0.00063, "grad_norm": 0.00234, "time": 0.32296}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.80346, "loss": 0.00063, "grad_norm": 0.00223, "time": 0.32441}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00064, "acc_pose": 0.79795, "loss": 0.00064, "grad_norm": 0.00262, "time": 0.32383}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80019, "loss": 0.00063, "grad_norm": 0.00266, "time": 0.32397}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04672, "heatmap_loss": 0.00063, "acc_pose": 0.79098, "loss": 0.00063, "grad_norm": 0.00234, "time": 0.37553}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.80415, "loss": 0.00063, "grad_norm": 0.00239, "time": 0.32501}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.79769, "loss": 0.00063, "grad_norm": 0.00268, "time": 0.32599}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00063, "acc_pose": 0.79619, "loss": 0.00063, "grad_norm": 0.0025, "time": 0.32387}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.80829, "loss": 0.00063, "grad_norm": 0.00232, "time": 0.32519}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00063, "acc_pose": 0.79413, "loss": 0.00063, "grad_norm": 0.00248, "time": 0.37406}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00063, "acc_pose": 0.79704, "loss": 0.00063, "grad_norm": 0.0023, "time": 0.32319}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.80057, "loss": 0.00063, "grad_norm": 0.00226, "time": 0.32433}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.81598, "loss": 0.00063, "grad_norm": 0.00233, "time": 0.32215}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.78691, "loss": 0.00063, "grad_norm": 0.00241, "time": 0.32327}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04681, "heatmap_loss": 0.00063, "acc_pose": 0.79838, "loss": 0.00063, "grad_norm": 0.00231, "time": 0.37246}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.8094, "loss": 0.00063, "grad_norm": 0.00246, "time": 0.32231}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00063, "acc_pose": 0.82011, "loss": 0.00063, "grad_norm": 0.00251, "time": 0.32397}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.8025, "loss": 0.00062, "grad_norm": 0.00198, "time": 0.3224}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00063, "acc_pose": 0.81782, "loss": 0.00063, "grad_norm": 0.00226, "time": 0.32359}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04681, "heatmap_loss": 0.00063, "acc_pose": 0.81769, "loss": 0.00063, "grad_norm": 0.00246, "time": 0.37451}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.79848, "loss": 0.00062, "grad_norm": 0.00228, "time": 0.32261}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.81221, "loss": 0.00062, "grad_norm": 0.00269, "time": 0.3251}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.7865, "loss": 0.00063, "grad_norm": 0.00213, "time": 0.32487}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00063, "acc_pose": 0.80351, "loss": 0.00063, "grad_norm": 0.00223, "time": 0.3245}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04662, "heatmap_loss": 0.00062, "acc_pose": 0.80077, "loss": 0.00062, "grad_norm": 0.00247, "time": 0.37405}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.80651, "loss": 0.00063, "grad_norm": 0.00246, "time": 0.32519}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00063, "acc_pose": 0.80995, "loss": 0.00063, "grad_norm": 0.00258, "time": 0.32624}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.81279, "loss": 0.00062, "grad_norm": 0.0023, "time": 0.32556}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.80071, "loss": 0.00062, "grad_norm": 0.00212, "time": 0.32476}
+{"mode": "val", "epoch": 40, "iter": 204, "lr": 0.0, "AP": 0.77527, "AP .5": 0.90982, "AP .75": 0.84488, "AP (M)": 0.7031, "AP (L)": 0.80372, "AR": 0.82724, "AR .5": 0.94962, "AR .75": 0.88854, "AR (M)": 0.78522, "AR (L)": 0.88844}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04791, "heatmap_loss": 0.00062, "acc_pose": 0.80798, "loss": 0.00062, "grad_norm": 0.00245, "time": 0.37099}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.80335, "loss": 0.00062, "grad_norm": 0.00248, "time": 0.32094}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00062, "acc_pose": 0.81895, "loss": 0.00062, "grad_norm": 0.00207, "time": 0.3256}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00062, "acc_pose": 0.80211, "loss": 0.00062, "grad_norm": 0.00236, "time": 0.32565}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00062, "acc_pose": 0.80961, "loss": 0.00062, "grad_norm": 0.00251, "time": 0.32428}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04633, "heatmap_loss": 0.00062, "acc_pose": 0.81503, "loss": 0.00062, "grad_norm": 0.00216, "time": 0.37276}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.80271, "loss": 0.00062, "grad_norm": 0.0021, "time": 0.32589}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00062, "acc_pose": 0.81108, "loss": 0.00062, "grad_norm": 0.0022, "time": 0.32594}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00061, "acc_pose": 0.81603, "loss": 0.00061, "grad_norm": 0.00224, "time": 0.32518}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00062, "acc_pose": 0.80624, "loss": 0.00062, "grad_norm": 0.00241, "time": 0.32429}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04684, "heatmap_loss": 0.00062, "acc_pose": 0.8041, "loss": 0.00062, "grad_norm": 0.00229, "time": 0.37462}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00061, "acc_pose": 0.81752, "loss": 0.00061, "grad_norm": 0.0022, "time": 0.32539}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.81031, "loss": 0.00061, "grad_norm": 0.00213, "time": 0.32659}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81225, "loss": 0.00062, "grad_norm": 0.00226, "time": 0.32587}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.79246, "loss": 0.00062, "grad_norm": 0.00211, "time": 0.32419}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04685, "heatmap_loss": 0.00061, "acc_pose": 0.80308, "loss": 0.00061, "grad_norm": 0.002, "time": 0.37716}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81306, "loss": 0.00062, "grad_norm": 0.00221, "time": 0.32558}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80618, "loss": 0.00061, "grad_norm": 0.002, "time": 0.3244}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.81786, "loss": 0.00061, "grad_norm": 0.0022, "time": 0.32464}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.80442, "loss": 0.00062, "grad_norm": 0.00221, "time": 0.32578}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04661, "heatmap_loss": 0.00062, "acc_pose": 0.81906, "loss": 0.00062, "grad_norm": 0.00208, "time": 0.37692}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.80903, "loss": 0.00061, "grad_norm": 0.00232, "time": 0.3259}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.79061, "loss": 0.00061, "grad_norm": 0.00202, "time": 0.32505}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80422, "loss": 0.00061, "grad_norm": 0.00246, "time": 0.32514}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00061, "acc_pose": 0.81755, "loss": 0.00061, "grad_norm": 0.00244, "time": 0.32439}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04681, "heatmap_loss": 0.00061, "acc_pose": 0.80317, "loss": 0.00061, "grad_norm": 0.0021, "time": 0.37394}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.82051, "loss": 0.00061, "grad_norm": 0.00219, "time": 0.32411}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00062, "acc_pose": 0.79971, "loss": 0.00062, "grad_norm": 0.00223, "time": 0.32342}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81712, "loss": 0.00061, "grad_norm": 0.00225, "time": 0.32393}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00061, "acc_pose": 0.82368, "loss": 0.00061, "grad_norm": 0.00241, "time": 0.32388}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00061, "acc_pose": 0.81654, "loss": 0.00061, "grad_norm": 0.00208, "time": 0.37596}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00061, "acc_pose": 0.82008, "loss": 0.00061, "grad_norm": 0.00219, "time": 0.32299}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00062, "acc_pose": 0.81185, "loss": 0.00062, "grad_norm": 0.00222, "time": 0.3227}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.8129, "loss": 0.00061, "grad_norm": 0.00221, "time": 0.32316}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.815, "loss": 0.00061, "grad_norm": 0.00214, "time": 0.32539}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00061, "acc_pose": 0.81022, "loss": 0.00061, "grad_norm": 0.00239, "time": 0.37541}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.83373, "loss": 0.0006, "grad_norm": 0.00212, "time": 0.32371}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80039, "loss": 0.00061, "grad_norm": 0.00212, "time": 0.32411}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.81076, "loss": 0.00061, "grad_norm": 0.00212, "time": 0.32457}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.8164, "loss": 0.00061, "grad_norm": 0.002, "time": 0.32445}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00061, "acc_pose": 0.81441, "loss": 0.00061, "grad_norm": 0.0021, "time": 0.37358}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.8015, "loss": 0.00061, "grad_norm": 0.00212, "time": 0.32577}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.80528, "loss": 0.0006, "grad_norm": 0.00213, "time": 0.32353}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.80637, "loss": 0.00061, "grad_norm": 0.00209, "time": 0.32247}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00061, "acc_pose": 0.82295, "loss": 0.00061, "grad_norm": 0.00211, "time": 0.32411}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04653, "heatmap_loss": 0.0006, "acc_pose": 0.83505, "loss": 0.0006, "grad_norm": 0.00213, "time": 0.37412}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00061, "acc_pose": 0.82471, "loss": 0.00061, "grad_norm": 0.00198, "time": 0.32448}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.81746, "loss": 0.0006, "grad_norm": 0.00212, "time": 0.32352}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.81096, "loss": 0.0006, "grad_norm": 0.00216, "time": 0.32281}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.79586, "loss": 0.0006, "grad_norm": 0.00216, "time": 0.32329}
+{"mode": "val", "epoch": 50, "iter": 204, "lr": 0.0, "AP": 0.77826, "AP .5": 0.91146, "AP .75": 0.84951, "AP (M)": 0.70476, "AP (L)": 0.80784, "AR": 0.8304, "AR .5": 0.95025, "AR .75": 0.8931, "AR (M)": 0.78913, "AR (L)": 0.89082}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04772, "heatmap_loss": 0.0006, "acc_pose": 0.81534, "loss": 0.0006, "grad_norm": 0.00215, "time": 0.37225}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0006, "acc_pose": 0.81989, "loss": 0.0006, "grad_norm": 0.00226, "time": 0.32598}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.81183, "loss": 0.0006, "grad_norm": 0.00212, "time": 0.32561}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80716, "loss": 0.00061, "grad_norm": 0.0021, "time": 0.32582}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.80474, "loss": 0.0006, "grad_norm": 0.00206, "time": 0.3254}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0465, "heatmap_loss": 0.0006, "acc_pose": 0.81029, "loss": 0.0006, "grad_norm": 0.00202, "time": 0.37238}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.81564, "loss": 0.0006, "grad_norm": 0.00236, "time": 0.32469}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.8246, "loss": 0.00061, "grad_norm": 0.00212, "time": 0.32499}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.82232, "loss": 0.0006, "grad_norm": 0.00215, "time": 0.32407}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80406, "loss": 0.00061, "grad_norm": 0.00223, "time": 0.32436}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04648, "heatmap_loss": 0.0006, "acc_pose": 0.81122, "loss": 0.0006, "grad_norm": 0.00208, "time": 0.3744}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.81502, "loss": 0.0006, "grad_norm": 0.0021, "time": 0.32329}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.82556, "loss": 0.0006, "grad_norm": 0.00248, "time": 0.32325}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.81675, "loss": 0.0006, "grad_norm": 0.0021, "time": 0.32332}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.82025, "loss": 0.0006, "grad_norm": 0.00216, "time": 0.3242}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04679, "heatmap_loss": 0.0006, "acc_pose": 0.80938, "loss": 0.0006, "grad_norm": 0.00189, "time": 0.37577}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.82723, "loss": 0.0006, "grad_norm": 0.00186, "time": 0.32359}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.82362, "loss": 0.00059, "grad_norm": 0.00207, "time": 0.32349}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.81575, "loss": 0.0006, "grad_norm": 0.00206, "time": 0.32285}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00061, "acc_pose": 0.82504, "loss": 0.00061, "grad_norm": 0.00205, "time": 0.32354}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04692, "heatmap_loss": 0.0006, "acc_pose": 0.82531, "loss": 0.0006, "grad_norm": 0.00222, "time": 0.37591}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.0006, "acc_pose": 0.82411, "loss": 0.0006, "grad_norm": 0.00192, "time": 0.32524}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0006, "acc_pose": 0.80836, "loss": 0.0006, "grad_norm": 0.00229, "time": 0.32515}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82613, "loss": 0.00059, "grad_norm": 0.00194, "time": 0.3268}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.81638, "loss": 0.00059, "grad_norm": 0.00205, "time": 0.32444}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0466, "heatmap_loss": 0.0006, "acc_pose": 0.80914, "loss": 0.0006, "grad_norm": 0.00202, "time": 0.37639}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0006, "acc_pose": 0.83261, "loss": 0.0006, "grad_norm": 0.00194, "time": 0.32552}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.81487, "loss": 0.00059, "grad_norm": 0.00204, "time": 0.32555}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.81048, "loss": 0.0006, "grad_norm": 0.00197, "time": 0.32534}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.81336, "loss": 0.0006, "grad_norm": 0.00196, "time": 0.32759}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.0006, "acc_pose": 0.81712, "loss": 0.0006, "grad_norm": 0.00203, "time": 0.37999}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.81657, "loss": 0.00059, "grad_norm": 0.00213, "time": 0.32524}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.8166, "loss": 0.00059, "grad_norm": 0.00198, "time": 0.32649}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.82524, "loss": 0.00059, "grad_norm": 0.0022, "time": 0.32619}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0006, "acc_pose": 0.80569, "loss": 0.0006, "grad_norm": 0.00196, "time": 0.32309}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04711, "heatmap_loss": 0.0006, "acc_pose": 0.81482, "loss": 0.0006, "grad_norm": 0.00227, "time": 0.37658}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00056, "heatmap_loss": 0.00059, "acc_pose": 0.81926, "loss": 0.00059, "grad_norm": 0.00233, "time": 0.32597}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00059, "acc_pose": 0.82175, "loss": 0.00059, "grad_norm": 0.00202, "time": 0.32441}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.81385, "loss": 0.00059, "grad_norm": 0.00209, "time": 0.32461}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00059, "acc_pose": 0.80919, "loss": 0.00059, "grad_norm": 0.00203, "time": 0.32585}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04734, "heatmap_loss": 0.00059, "acc_pose": 0.82157, "loss": 0.00059, "grad_norm": 0.00188, "time": 0.37461}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00059, "acc_pose": 0.82724, "loss": 0.00059, "grad_norm": 0.00198, "time": 0.32555}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00059, "acc_pose": 0.82494, "loss": 0.00059, "grad_norm": 0.00186, "time": 0.32434}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00058, "acc_pose": 0.81015, "loss": 0.00058, "grad_norm": 0.00191, "time": 0.32457}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00059, "acc_pose": 0.83173, "loss": 0.00059, "grad_norm": 0.00199, "time": 0.3248}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04732, "heatmap_loss": 0.00059, "acc_pose": 0.83082, "loss": 0.00059, "grad_norm": 0.00192, "time": 0.38675}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82079, "loss": 0.00059, "grad_norm": 0.00193, "time": 0.32474}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.82183, "loss": 0.00059, "grad_norm": 0.00203, "time": 0.32465}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.82888, "loss": 0.00059, "grad_norm": 0.002, "time": 0.32343}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81649, "loss": 0.00059, "grad_norm": 0.00198, "time": 0.32449}
+{"mode": "val", "epoch": 60, "iter": 204, "lr": 0.0, "AP": 0.7812, "AP .5": 0.91322, "AP .75": 0.85134, "AP (M)": 0.70776, "AP (L)": 0.81289, "AR": 0.83201, "AR .5": 0.95057, "AR .75": 0.89263, "AR (M)": 0.78995, "AR (L)": 0.89365}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04806, "heatmap_loss": 0.00059, "acc_pose": 0.81255, "loss": 0.00059, "grad_norm": 0.00192, "time": 0.37146}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00059, "acc_pose": 0.81422, "loss": 0.00059, "grad_norm": 0.00211, "time": 0.32402}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82988, "loss": 0.00058, "grad_norm": 0.00205, "time": 0.32494}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.81741, "loss": 0.00059, "grad_norm": 0.0022, "time": 0.32623}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00059, "acc_pose": 0.8299, "loss": 0.00059, "grad_norm": 0.00192, "time": 0.32394}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04738, "heatmap_loss": 0.00059, "acc_pose": 0.81353, "loss": 0.00059, "grad_norm": 0.00193, "time": 0.37389}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00059, "acc_pose": 0.82365, "loss": 0.00059, "grad_norm": 0.00201, "time": 0.32446}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00059, "acc_pose": 0.82347, "loss": 0.00059, "grad_norm": 0.00189, "time": 0.32509}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00059, "acc_pose": 0.82416, "loss": 0.00059, "grad_norm": 0.00204, "time": 0.32465}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00056, "heatmap_loss": 0.00059, "acc_pose": 0.81794, "loss": 0.00059, "grad_norm": 0.00175, "time": 0.32445}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04702, "heatmap_loss": 0.00058, "acc_pose": 0.81888, "loss": 0.00058, "grad_norm": 0.00205, "time": 0.37401}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00059, "acc_pose": 0.81626, "loss": 0.00059, "grad_norm": 0.00185, "time": 0.324}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00059, "acc_pose": 0.81957, "loss": 0.00059, "grad_norm": 0.00193, "time": 0.32397}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.81482, "loss": 0.00058, "grad_norm": 0.00179, "time": 0.32295}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.82809, "loss": 0.00059, "grad_norm": 0.00181, "time": 0.32526}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04692, "heatmap_loss": 0.00058, "acc_pose": 0.81634, "loss": 0.00058, "grad_norm": 0.00194, "time": 0.37399}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82989, "loss": 0.00058, "grad_norm": 0.00181, "time": 0.32216}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.81992, "loss": 0.00059, "grad_norm": 0.002, "time": 0.32305}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.80223, "loss": 0.00059, "grad_norm": 0.00202, "time": 0.32214}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.81832, "loss": 0.00058, "grad_norm": 0.00196, "time": 0.32335}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00058, "acc_pose": 0.83473, "loss": 0.00058, "grad_norm": 0.00172, "time": 0.37457}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00059, "acc_pose": 0.82924, "loss": 0.00059, "grad_norm": 0.002, "time": 0.32386}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00059, "acc_pose": 0.82494, "loss": 0.00059, "grad_norm": 0.00191, "time": 0.32293}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.8223, "loss": 0.00058, "grad_norm": 0.00189, "time": 0.32311}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82878, "loss": 0.00058, "grad_norm": 0.00195, "time": 0.32305}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04667, "heatmap_loss": 0.00058, "acc_pose": 0.82713, "loss": 0.00058, "grad_norm": 0.00226, "time": 0.37311}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00058, "acc_pose": 0.82899, "loss": 0.00058, "grad_norm": 0.00207, "time": 0.32468}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00058, "acc_pose": 0.80596, "loss": 0.00058, "grad_norm": 0.002, "time": 0.32382}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00059, "acc_pose": 0.8162, "loss": 0.00059, "grad_norm": 0.00185, "time": 0.3227}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.80683, "loss": 0.00058, "grad_norm": 0.0021, "time": 0.32448}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04725, "heatmap_loss": 0.00059, "acc_pose": 0.82287, "loss": 0.00059, "grad_norm": 0.00217, "time": 0.3741}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00059, "heatmap_loss": 0.00058, "acc_pose": 0.84392, "loss": 0.00058, "grad_norm": 0.00171, "time": 0.32303}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00058, "acc_pose": 0.82026, "loss": 0.00058, "grad_norm": 0.00187, "time": 0.32374}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.81405, "loss": 0.00058, "grad_norm": 0.00187, "time": 0.32247}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00059, "acc_pose": 0.82171, "loss": 0.00059, "grad_norm": 0.00192, "time": 0.32269}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04703, "heatmap_loss": 0.00058, "acc_pose": 0.81843, "loss": 0.00058, "grad_norm": 0.00176, "time": 0.3749}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0006, "heatmap_loss": 0.00058, "acc_pose": 0.81513, "loss": 0.00058, "grad_norm": 0.00173, "time": 0.32419}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00058, "acc_pose": 0.83275, "loss": 0.00058, "grad_norm": 0.00172, "time": 0.32335}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.81304, "loss": 0.00058, "grad_norm": 0.00182, "time": 0.32388}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.83381, "loss": 0.00058, "grad_norm": 0.00192, "time": 0.32232}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04731, "heatmap_loss": 0.00058, "acc_pose": 0.82444, "loss": 0.00058, "grad_norm": 0.00202, "time": 0.37272}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.83099, "loss": 0.00058, "grad_norm": 0.00188, "time": 0.32437}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.8264, "loss": 0.00057, "grad_norm": 0.00187, "time": 0.32445}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82165, "loss": 0.00058, "grad_norm": 0.00183, "time": 0.32371}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.81817, "loss": 0.00058, "grad_norm": 0.00181, "time": 0.32354}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04691, "heatmap_loss": 0.00058, "acc_pose": 0.82072, "loss": 0.00058, "grad_norm": 0.00184, "time": 0.37456}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.82685, "loss": 0.00058, "grad_norm": 0.00191, "time": 0.32248}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00057, "heatmap_loss": 0.00058, "acc_pose": 0.81899, "loss": 0.00058, "grad_norm": 0.00176, "time": 0.32271}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.81657, "loss": 0.00058, "grad_norm": 0.00184, "time": 0.32282}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82357, "loss": 0.00058, "grad_norm": 0.00199, "time": 0.32328}
+{"mode": "val", "epoch": 70, "iter": 204, "lr": 0.0, "AP": 0.78122, "AP .5": 0.91408, "AP .75": 0.85029, "AP (M)": 0.7083, "AP (L)": 0.81029, "AR": 0.83309, "AR .5": 0.95088, "AR .75": 0.89247, "AR (M)": 0.79175, "AR (L)": 0.89365}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04785, "heatmap_loss": 0.00058, "acc_pose": 0.82612, "loss": 0.00058, "grad_norm": 0.00176, "time": 0.37086}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00058, "acc_pose": 0.82591, "loss": 0.00058, "grad_norm": 0.00192, "time": 0.32135}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00058, "acc_pose": 0.81656, "loss": 0.00058, "grad_norm": 0.00191, "time": 0.32212}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.81699, "loss": 0.00058, "grad_norm": 0.002, "time": 0.32615}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.83871, "loss": 0.00058, "grad_norm": 0.00183, "time": 0.32577}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00058, "acc_pose": 0.83006, "loss": 0.00058, "grad_norm": 0.00195, "time": 0.37481}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.83039, "loss": 0.00058, "grad_norm": 0.00191, "time": 0.32499}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82023, "loss": 0.00058, "grad_norm": 0.00202, "time": 0.32552}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.83315, "loss": 0.00058, "grad_norm": 0.00195, "time": 0.32443}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.81654, "loss": 0.00058, "grad_norm": 0.00176, "time": 0.32468}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04676, "heatmap_loss": 0.00058, "acc_pose": 0.81892, "loss": 0.00058, "grad_norm": 0.00195, "time": 0.37514}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00058, "acc_pose": 0.83026, "loss": 0.00058, "grad_norm": 0.00193, "time": 0.3222}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00058, "acc_pose": 0.82083, "loss": 0.00058, "grad_norm": 0.00186, "time": 0.32231}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.8325, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.32216}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.82851, "loss": 0.00057, "grad_norm": 0.00161, "time": 0.32237}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04684, "heatmap_loss": 0.00057, "acc_pose": 0.82623, "loss": 0.00057, "grad_norm": 0.00187, "time": 0.37475}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.81404, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.32289}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00057, "acc_pose": 0.80968, "loss": 0.00057, "grad_norm": 0.00183, "time": 0.32378}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.81875, "loss": 0.00057, "grad_norm": 0.00188, "time": 0.32325}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.8195, "loss": 0.00058, "grad_norm": 0.00187, "time": 0.32299}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04692, "heatmap_loss": 0.00058, "acc_pose": 0.82799, "loss": 0.00058, "grad_norm": 0.00168, "time": 0.37366}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00057, "acc_pose": 0.82129, "loss": 0.00057, "grad_norm": 0.00168, "time": 0.32404}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82157, "loss": 0.00057, "grad_norm": 0.00184, "time": 0.32246}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.81599, "loss": 0.00058, "grad_norm": 0.00184, "time": 0.32304}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82517, "loss": 0.00057, "grad_norm": 0.00191, "time": 0.32373}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00057, "acc_pose": 0.8244, "loss": 0.00057, "grad_norm": 0.00181, "time": 0.37642}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00057, "acc_pose": 0.8268, "loss": 0.00057, "grad_norm": 0.00167, "time": 0.32435}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.83775, "loss": 0.00057, "grad_norm": 0.00185, "time": 0.32268}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82536, "loss": 0.00057, "grad_norm": 0.00198, "time": 0.3231}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00058, "acc_pose": 0.82471, "loss": 0.00058, "grad_norm": 0.00186, "time": 0.32249}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04677, "heatmap_loss": 0.00057, "acc_pose": 0.81674, "loss": 0.00057, "grad_norm": 0.00181, "time": 0.37376}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00058, "acc_pose": 0.82685, "loss": 0.00058, "grad_norm": 0.00186, "time": 0.32347}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82432, "loss": 0.00057, "grad_norm": 0.00157, "time": 0.32212}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82485, "loss": 0.00057, "grad_norm": 0.00174, "time": 0.32259}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82895, "loss": 0.00057, "grad_norm": 0.00189, "time": 0.32295}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04682, "heatmap_loss": 0.00057, "acc_pose": 0.83481, "loss": 0.00057, "grad_norm": 0.0018, "time": 0.37486}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.84069, "loss": 0.00056, "grad_norm": 0.00181, "time": 0.32349}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.84064, "loss": 0.00057, "grad_norm": 0.00192, "time": 0.32375}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.81104, "loss": 0.00057, "grad_norm": 0.00184, "time": 0.32362}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83269, "loss": 0.00057, "grad_norm": 0.0017, "time": 0.32246}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00057, "acc_pose": 0.83341, "loss": 0.00057, "grad_norm": 0.00189, "time": 0.37313}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00057, "acc_pose": 0.83499, "loss": 0.00057, "grad_norm": 0.002, "time": 0.32287}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00057, "acc_pose": 0.82071, "loss": 0.00057, "grad_norm": 0.00174, "time": 0.32277}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.84178, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.32267}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82363, "loss": 0.00057, "grad_norm": 0.00173, "time": 0.32322}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04689, "heatmap_loss": 0.00057, "acc_pose": 0.83408, "loss": 0.00057, "grad_norm": 0.00168, "time": 0.3747}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82497, "loss": 0.00057, "grad_norm": 0.00183, "time": 0.32281}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82721, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.325}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82681, "loss": 0.00057, "grad_norm": 0.00173, "time": 0.32579}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.81293, "loss": 0.00057, "grad_norm": 0.0017, "time": 0.32525}
+{"mode": "val", "epoch": 80, "iter": 204, "lr": 0.0, "AP": 0.78264, "AP .5": 0.91413, "AP .75": 0.85018, "AP (M)": 0.70972, "AP (L)": 0.81137, "AR": 0.83426, "AR .5": 0.95198, "AR .75": 0.89247, "AR (M)": 0.79323, "AR (L)": 0.89498}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0479, "heatmap_loss": 0.00057, "acc_pose": 0.82726, "loss": 0.00057, "grad_norm": 0.00171, "time": 0.37028}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.83478, "loss": 0.00057, "grad_norm": 0.00188, "time": 0.32128}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82884, "loss": 0.00057, "grad_norm": 0.00192, "time": 0.32139}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82936, "loss": 0.00057, "grad_norm": 0.00177, "time": 0.32253}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82465, "loss": 0.00057, "grad_norm": 0.00181, "time": 0.32386}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04659, "heatmap_loss": 0.00057, "acc_pose": 0.82879, "loss": 0.00057, "grad_norm": 0.00182, "time": 0.37249}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.8477, "loss": 0.00057, "grad_norm": 0.0018, "time": 0.32238}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.82578, "loss": 0.00057, "grad_norm": 0.00171, "time": 0.32358}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83724, "loss": 0.00057, "grad_norm": 0.00187, "time": 0.3224}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00057, "acc_pose": 0.81853, "loss": 0.00057, "grad_norm": 0.00176, "time": 0.3237}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04676, "heatmap_loss": 0.00057, "acc_pose": 0.81293, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.37295}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82644, "loss": 0.00057, "grad_norm": 0.00171, "time": 0.32344}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.82546, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.32218}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00057, "acc_pose": 0.82836, "loss": 0.00057, "grad_norm": 0.0016, "time": 0.32259}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00057, "acc_pose": 0.8386, "loss": 0.00057, "grad_norm": 0.0017, "time": 0.32218}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04689, "heatmap_loss": 0.00057, "acc_pose": 0.82124, "loss": 0.00057, "grad_norm": 0.00164, "time": 0.37477}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83104, "loss": 0.00056, "grad_norm": 0.00167, "time": 0.32263}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.82701, "loss": 0.00056, "grad_norm": 0.00165, "time": 0.32307}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.83927, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.32301}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00057, "acc_pose": 0.82391, "loss": 0.00057, "grad_norm": 0.00181, "time": 0.32375}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00057, "acc_pose": 0.8238, "loss": 0.00057, "grad_norm": 0.00168, "time": 0.3741}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.8354, "loss": 0.00057, "grad_norm": 0.00154, "time": 0.32326}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.83149, "loss": 0.00057, "grad_norm": 0.00183, "time": 0.32388}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00057, "acc_pose": 0.83354, "loss": 0.00057, "grad_norm": 0.00175, "time": 0.32291}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00056, "acc_pose": 0.84317, "loss": 0.00056, "grad_norm": 0.0018, "time": 0.32255}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04958, "heatmap_loss": 0.00057, "acc_pose": 0.82247, "loss": 0.00057, "grad_norm": 0.00173, "time": 0.37489}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.82678, "loss": 0.00056, "grad_norm": 0.00174, "time": 0.32343}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.83642, "loss": 0.00057, "grad_norm": 0.00174, "time": 0.3247}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00057, "acc_pose": 0.82733, "loss": 0.00057, "grad_norm": 0.00174, "time": 0.32363}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.82931, "loss": 0.00056, "grad_norm": 0.00169, "time": 0.3238}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0466, "heatmap_loss": 0.00056, "acc_pose": 0.84047, "loss": 0.00056, "grad_norm": 0.00168, "time": 0.37501}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00056, "acc_pose": 0.82529, "loss": 0.00056, "grad_norm": 0.00168, "time": 0.32525}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00056, "acc_pose": 0.83666, "loss": 0.00056, "grad_norm": 0.00171, "time": 0.32614}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83482, "loss": 0.00056, "grad_norm": 0.00168, "time": 0.3257}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.83973, "loss": 0.00056, "grad_norm": 0.00162, "time": 0.32486}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04661, "heatmap_loss": 0.00056, "acc_pose": 0.82566, "loss": 0.00056, "grad_norm": 0.00175, "time": 0.37489}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00056, "acc_pose": 0.82147, "loss": 0.00056, "grad_norm": 0.00164, "time": 0.32552}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.84105, "loss": 0.00056, "grad_norm": 0.00155, "time": 0.32588}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83633, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.32463}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.8416, "loss": 0.00056, "grad_norm": 0.00181, "time": 0.3244}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00056, "acc_pose": 0.83969, "loss": 0.00056, "grad_norm": 0.00167, "time": 0.37469}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.84024, "loss": 0.00056, "grad_norm": 0.00163, "time": 0.32414}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00057, "acc_pose": 0.83663, "loss": 0.00057, "grad_norm": 0.00178, "time": 0.32295}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83602, "loss": 0.00056, "grad_norm": 0.00158, "time": 0.32184}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83117, "loss": 0.00056, "grad_norm": 0.00159, "time": 0.32251}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00056, "acc_pose": 0.82081, "loss": 0.00056, "grad_norm": 0.00177, "time": 0.37256}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83686, "loss": 0.00056, "grad_norm": 0.00166, "time": 0.32191}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83808, "loss": 0.00056, "grad_norm": 0.00156, "time": 0.3227}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84136, "loss": 0.00056, "grad_norm": 0.00172, "time": 0.32301}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.82473, "loss": 0.00056, "grad_norm": 0.00159, "time": 0.32292}
+{"mode": "val", "epoch": 90, "iter": 204, "lr": 0.0, "AP": 0.7844, "AP .5": 0.91422, "AP .75": 0.85271, "AP (M)": 0.71217, "AP (L)": 0.81393, "AR": 0.83534, "AR .5": 0.95151, "AR .75": 0.89484, "AR (M)": 0.79405, "AR (L)": 0.89654}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04762, "heatmap_loss": 0.00056, "acc_pose": 0.83593, "loss": 0.00056, "grad_norm": 0.00178, "time": 0.36886}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.82744, "loss": 0.00056, "grad_norm": 0.00161, "time": 0.32141}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00056, "acc_pose": 0.82708, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.32312}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83185, "loss": 0.00056, "grad_norm": 0.00167, "time": 0.32204}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83184, "loss": 0.00056, "grad_norm": 0.00169, "time": 0.32417}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00056, "acc_pose": 0.85065, "loss": 0.00056, "grad_norm": 0.00164, "time": 0.37356}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00056, "acc_pose": 0.83257, "loss": 0.00056, "grad_norm": 0.00176, "time": 0.32513}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00056, "acc_pose": 0.81859, "loss": 0.00056, "grad_norm": 0.00169, "time": 0.32552}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.82274, "loss": 0.00056, "grad_norm": 0.00176, "time": 0.32501}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.82962, "loss": 0.00056, "grad_norm": 0.00156, "time": 0.32532}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04677, "heatmap_loss": 0.00056, "acc_pose": 0.82971, "loss": 0.00056, "grad_norm": 0.00166, "time": 0.3754}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.83076, "loss": 0.00056, "grad_norm": 0.00163, "time": 0.32471}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.84144, "loss": 0.00056, "grad_norm": 0.00157, "time": 0.32569}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83866, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.32308}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.85047, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.32385}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00055, "acc_pose": 0.83607, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.37658}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.84064, "loss": 0.00056, "grad_norm": 0.00169, "time": 0.32368}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.8235, "loss": 0.00056, "grad_norm": 0.00157, "time": 0.32566}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.83957, "loss": 0.00056, "grad_norm": 0.00158, "time": 0.32377}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83991, "loss": 0.00055, "grad_norm": 0.00182, "time": 0.3251}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04701, "heatmap_loss": 0.00056, "acc_pose": 0.82319, "loss": 0.00056, "grad_norm": 0.0017, "time": 0.3738}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83785, "loss": 0.00055, "grad_norm": 0.00167, "time": 0.32317}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.82905, "loss": 0.00056, "grad_norm": 0.00179, "time": 0.32278}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83589, "loss": 0.00055, "grad_norm": 0.00161, "time": 0.32307}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84266, "loss": 0.00056, "grad_norm": 0.00184, "time": 0.32518}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04659, "heatmap_loss": 0.00055, "acc_pose": 0.84008, "loss": 0.00055, "grad_norm": 0.00176, "time": 0.37333}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84531, "loss": 0.00055, "grad_norm": 0.00166, "time": 0.32317}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83799, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.3224}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84133, "loss": 0.00056, "grad_norm": 0.00162, "time": 0.32297}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00056, "acc_pose": 0.83151, "loss": 0.00056, "grad_norm": 0.00161, "time": 0.32359}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04699, "heatmap_loss": 0.00056, "acc_pose": 0.83488, "loss": 0.00056, "grad_norm": 0.00175, "time": 0.37326}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.82973, "loss": 0.00055, "grad_norm": 0.00172, "time": 0.32274}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84194, "loss": 0.00055, "grad_norm": 0.00159, "time": 0.32246}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.82935, "loss": 0.00056, "grad_norm": 0.00149, "time": 0.32407}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.8308, "loss": 0.00056, "grad_norm": 0.00157, "time": 0.32269}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04676, "heatmap_loss": 0.00055, "acc_pose": 0.83556, "loss": 0.00055, "grad_norm": 0.00159, "time": 0.37354}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.8335, "loss": 0.00056, "grad_norm": 0.00165, "time": 0.32389}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83886, "loss": 0.00055, "grad_norm": 0.00157, "time": 0.32353}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8244, "loss": 0.00055, "grad_norm": 0.00158, "time": 0.32398}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84274, "loss": 0.00056, "grad_norm": 0.00167, "time": 0.32236}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04654, "heatmap_loss": 0.00055, "acc_pose": 0.8453, "loss": 0.00055, "grad_norm": 0.00152, "time": 0.37594}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.84524, "loss": 0.00055, "grad_norm": 0.00167, "time": 0.3263}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00056, "acc_pose": 0.82581, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.32426}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00056, "acc_pose": 0.84046, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.32439}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83108, "loss": 0.00055, "grad_norm": 0.00173, "time": 0.32358}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04728, "heatmap_loss": 0.00055, "acc_pose": 0.8458, "loss": 0.00055, "grad_norm": 0.0016, "time": 0.37394}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83713, "loss": 0.00055, "grad_norm": 0.00148, "time": 0.32401}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.83033, "loss": 0.00056, "grad_norm": 0.00156, "time": 0.32336}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84711, "loss": 0.00056, "grad_norm": 0.00159, "time": 0.32336}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00065, "heatmap_loss": 0.00056, "acc_pose": 0.82294, "loss": 0.00056, "grad_norm": 0.00158, "time": 0.32299}
+{"mode": "val", "epoch": 100, "iter": 204, "lr": 0.0, "AP": 0.78551, "AP .5": 0.91569, "AP .75": 0.85367, "AP (M)": 0.71227, "AP (L)": 0.81505, "AR": 0.83679, "AR .5": 0.95309, "AR .75": 0.89657, "AR (M)": 0.79577, "AR (L)": 0.8974}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04792, "heatmap_loss": 0.00055, "acc_pose": 0.83899, "loss": 0.00055, "grad_norm": 0.00147, "time": 0.37107}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.8393, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.32244}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83298, "loss": 0.00056, "grad_norm": 0.00158, "time": 0.32177}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00056, "acc_pose": 0.83714, "loss": 0.00056, "grad_norm": 0.00148, "time": 0.3237}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82673, "loss": 0.00055, "grad_norm": 0.00165, "time": 0.32159}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04671, "heatmap_loss": 0.00055, "acc_pose": 0.83198, "loss": 0.00055, "grad_norm": 0.00156, "time": 0.3709}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83533, "loss": 0.00055, "grad_norm": 0.00157, "time": 0.32212}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00055, "acc_pose": 0.83127, "loss": 0.00055, "grad_norm": 0.00159, "time": 0.32171}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83327, "loss": 0.00055, "grad_norm": 0.00161, "time": 0.32389}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84696, "loss": 0.00055, "grad_norm": 0.00152, "time": 0.32163}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04671, "heatmap_loss": 0.00055, "acc_pose": 0.83395, "loss": 0.00055, "grad_norm": 0.00167, "time": 0.3742}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00055, "acc_pose": 0.83654, "loss": 0.00055, "grad_norm": 0.00161, "time": 0.32353}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84397, "loss": 0.00055, "grad_norm": 0.00168, "time": 0.32231}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.82611, "loss": 0.00055, "grad_norm": 0.00148, "time": 0.32384}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.84366, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.32263}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.00055, "acc_pose": 0.8418, "loss": 0.00055, "grad_norm": 0.00162, "time": 0.37398}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83313, "loss": 0.00055, "grad_norm": 0.00141, "time": 0.32322}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82789, "loss": 0.00055, "grad_norm": 0.00163, "time": 0.32318}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83747, "loss": 0.00055, "grad_norm": 0.00175, "time": 0.32463}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.82998, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.32483}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04684, "heatmap_loss": 0.00055, "acc_pose": 0.8364, "loss": 0.00055, "grad_norm": 0.00168, "time": 0.37736}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83504, "loss": 0.00055, "grad_norm": 0.00167, "time": 0.32586}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.82722, "loss": 0.00055, "grad_norm": 0.00156, "time": 0.32452}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83675, "loss": 0.00055, "grad_norm": 0.00161, "time": 0.32424}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.82881, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.32419}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04655, "heatmap_loss": 0.00055, "acc_pose": 0.82801, "loss": 0.00055, "grad_norm": 0.00159, "time": 0.3729}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83482, "loss": 0.00055, "grad_norm": 0.00148, "time": 0.32245}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.84111, "loss": 0.00055, "grad_norm": 0.00152, "time": 0.323}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84235, "loss": 0.00055, "grad_norm": 0.00155, "time": 0.32376}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83476, "loss": 0.00055, "grad_norm": 0.00146, "time": 0.32343}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04692, "heatmap_loss": 0.00055, "acc_pose": 0.8354, "loss": 0.00055, "grad_norm": 0.00164, "time": 0.37461}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00055, "acc_pose": 0.834, "loss": 0.00055, "grad_norm": 0.00154, "time": 0.32222}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00044, "heatmap_loss": 0.00055, "acc_pose": 0.84247, "loss": 0.00055, "grad_norm": 0.00156, "time": 0.32232}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83295, "loss": 0.00055, "grad_norm": 0.0015, "time": 0.32308}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.83696, "loss": 0.00055, "grad_norm": 0.00165, "time": 0.32297}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04686, "heatmap_loss": 0.00055, "acc_pose": 0.83707, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.37517}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83565, "loss": 0.00055, "grad_norm": 0.00154, "time": 0.32325}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82455, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.32333}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84403, "loss": 0.00055, "grad_norm": 0.00157, "time": 0.32281}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00055, "acc_pose": 0.84203, "loss": 0.00055, "grad_norm": 0.00153, "time": 0.3227}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04687, "heatmap_loss": 0.00054, "acc_pose": 0.83285, "loss": 0.00054, "grad_norm": 0.00155, "time": 0.37332}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.8352, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.32251}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00056, "acc_pose": 0.84333, "loss": 0.00056, "grad_norm": 0.0015, "time": 0.32218}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83545, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.32183}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83334, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.32216}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04699, "heatmap_loss": 0.00055, "acc_pose": 0.83143, "loss": 0.00055, "grad_norm": 0.00151, "time": 0.37369}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.82743, "loss": 0.00055, "grad_norm": 0.00155, "time": 0.32273}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00055, "acc_pose": 0.83242, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.32285}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83825, "loss": 0.00054, "grad_norm": 0.00156, "time": 0.3224}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0004, "heatmap_loss": 0.00055, "acc_pose": 0.83209, "loss": 0.00055, "grad_norm": 0.00155, "time": 0.32354}
+{"mode": "val", "epoch": 110, "iter": 204, "lr": 0.0, "AP": 0.78462, "AP .5": 0.91392, "AP .75": 0.85323, "AP (M)": 0.71225, "AP (L)": 0.81267, "AR": 0.83627, "AR .5": 0.95167, "AR .75": 0.89625, "AR (M)": 0.7953, "AR (L)": 0.89647}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04644, "heatmap_loss": 0.00054, "acc_pose": 0.82911, "loss": 0.00054, "grad_norm": 0.00145, "time": 0.36888}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.84072, "loss": 0.00054, "grad_norm": 0.00146, "time": 0.32182}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.8407, "loss": 0.00055, "grad_norm": 0.00167, "time": 0.32188}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83206, "loss": 0.00055, "grad_norm": 0.00158, "time": 0.32166}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82827, "loss": 0.00055, "grad_norm": 0.00155, "time": 0.32261}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00054, "acc_pose": 0.84251, "loss": 0.00054, "grad_norm": 0.00162, "time": 0.374}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.83644, "loss": 0.00055, "grad_norm": 0.00154, "time": 0.32268}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.85086, "loss": 0.00054, "grad_norm": 0.00143, "time": 0.32322}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84343, "loss": 0.00055, "grad_norm": 0.00163, "time": 0.32294}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.84368, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.3241}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04682, "heatmap_loss": 0.00054, "acc_pose": 0.84573, "loss": 0.00054, "grad_norm": 0.00145, "time": 0.37316}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.84943, "loss": 0.00055, "grad_norm": 0.0015, "time": 0.32319}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.83476, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.32209}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00056, "heatmap_loss": 0.00054, "acc_pose": 0.84924, "loss": 0.00054, "grad_norm": 0.00151, "time": 0.32277}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84131, "loss": 0.00054, "grad_norm": 0.00153, "time": 0.32338}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04953, "heatmap_loss": 0.00055, "acc_pose": 0.83987, "loss": 0.00055, "grad_norm": 0.00155, "time": 0.37338}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.84475, "loss": 0.00054, "grad_norm": 0.00136, "time": 0.32312}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83363, "loss": 0.00054, "grad_norm": 0.00141, "time": 0.32353}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84499, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.32325}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83971, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32303}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00055, "acc_pose": 0.83767, "loss": 0.00055, "grad_norm": 0.00196, "time": 0.37367}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84253, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.32327}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83707, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.32372}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84355, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.3225}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00055, "acc_pose": 0.82501, "loss": 0.00055, "grad_norm": 0.00142, "time": 0.32182}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04698, "heatmap_loss": 0.00055, "acc_pose": 0.84829, "loss": 0.00055, "grad_norm": 0.00164, "time": 0.37295}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.82977, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32459}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83735, "loss": 0.00054, "grad_norm": 0.00134, "time": 0.32498}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.85713, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.32484}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84761, "loss": 0.00054, "grad_norm": 0.00136, "time": 0.32527}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04693, "heatmap_loss": 0.00054, "acc_pose": 0.82432, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.37388}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83854, "loss": 0.00055, "grad_norm": 0.00147, "time": 0.32426}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84439, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.3232}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.84993, "loss": 0.00054, "grad_norm": 0.0015, "time": 0.32248}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83644, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.32225}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04697, "heatmap_loss": 0.00054, "acc_pose": 0.84206, "loss": 0.00054, "grad_norm": 0.00156, "time": 0.37273}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.84147, "loss": 0.00054, "grad_norm": 0.00146, "time": 0.32342}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00054, "acc_pose": 0.84558, "loss": 0.00054, "grad_norm": 0.00138, "time": 0.32496}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.82912, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.32337}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00055, "acc_pose": 0.84626, "loss": 0.00055, "grad_norm": 0.00142, "time": 0.32292}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00054, "acc_pose": 0.84743, "loss": 0.00054, "grad_norm": 0.00143, "time": 0.37416}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83944, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.32276}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84383, "loss": 0.00054, "grad_norm": 0.00159, "time": 0.32298}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84903, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.32209}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84025, "loss": 0.00054, "grad_norm": 0.0016, "time": 0.32471}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04668, "heatmap_loss": 0.00054, "acc_pose": 0.84816, "loss": 0.00054, "grad_norm": 0.00138, "time": 0.37537}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84777, "loss": 0.00053, "grad_norm": 0.00145, "time": 0.3228}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.8423, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.32323}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00055, "acc_pose": 0.83179, "loss": 0.00055, "grad_norm": 0.00142, "time": 0.32332}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84624, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.32333}
+{"mode": "val", "epoch": 120, "iter": 204, "lr": 0.0, "AP": 0.78706, "AP .5": 0.91593, "AP .75": 0.85463, "AP (M)": 0.71543, "AP (L)": 0.81557, "AR": 0.83764, "AR .5": 0.95403, "AR .75": 0.89657, "AR (M)": 0.79798, "AR (L)": 0.8968}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04817, "heatmap_loss": 0.00054, "acc_pose": 0.83374, "loss": 0.00054, "grad_norm": 0.00149, "time": 0.37107}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.85857, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.3242}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84313, "loss": 0.00054, "grad_norm": 0.00152, "time": 0.32244}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83581, "loss": 0.00054, "grad_norm": 0.00153, "time": 0.32481}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84806, "loss": 0.00054, "grad_norm": 0.0015, "time": 0.32497}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04997, "heatmap_loss": 0.00054, "acc_pose": 0.83863, "loss": 0.00054, "grad_norm": 0.00153, "time": 0.37481}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84265, "loss": 0.00054, "grad_norm": 0.00136, "time": 0.32485}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84507, "loss": 0.00054, "grad_norm": 0.00156, "time": 0.32394}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83637, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.32511}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.82739, "loss": 0.00054, "grad_norm": 0.00151, "time": 0.32509}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00054, "acc_pose": 0.83431, "loss": 0.00054, "grad_norm": 0.00151, "time": 0.37499}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83991, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32408}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.84679, "loss": 0.00054, "grad_norm": 0.00139, "time": 0.32283}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00054, "acc_pose": 0.84322, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.3243}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84929, "loss": 0.00053, "grad_norm": 0.00151, "time": 0.32328}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04691, "heatmap_loss": 0.00054, "acc_pose": 0.8455, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.37194}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.85055, "loss": 0.00054, "grad_norm": 0.00155, "time": 0.32324}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83033, "loss": 0.00054, "grad_norm": 0.00165, "time": 0.32368}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00059, "heatmap_loss": 0.00054, "acc_pose": 0.84022, "loss": 0.00054, "grad_norm": 0.00148, "time": 0.32213}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.85678, "loss": 0.00054, "grad_norm": 0.00145, "time": 0.32283}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00054, "acc_pose": 0.8416, "loss": 0.00054, "grad_norm": 0.00147, "time": 0.37652}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83447, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32439}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.847, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32455}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.84383, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32404}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00054, "acc_pose": 0.83526, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.32371}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04689, "heatmap_loss": 0.00054, "acc_pose": 0.83938, "loss": 0.00054, "grad_norm": 0.00151, "time": 0.37445}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84382, "loss": 0.00054, "grad_norm": 0.00152, "time": 0.32262}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.8484, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32259}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84793, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.32263}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84912, "loss": 0.00054, "grad_norm": 0.00135, "time": 0.32244}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04667, "heatmap_loss": 0.00053, "acc_pose": 0.84154, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.37556}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83471, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.32383}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84061, "loss": 0.00053, "grad_norm": 0.00137, "time": 0.32467}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84952, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.32376}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.83731, "loss": 0.00054, "grad_norm": 0.00162, "time": 0.32447}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04707, "heatmap_loss": 0.00053, "acc_pose": 0.84829, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.37635}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84218, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.32391}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.8433, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.32507}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.84648, "loss": 0.00054, "grad_norm": 0.00143, "time": 0.32347}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.8401, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.32287}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04698, "heatmap_loss": 0.00054, "acc_pose": 0.83464, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.37396}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00053, "acc_pose": 0.84045, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.32259}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.8342, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.324}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83954, "loss": 0.00053, "grad_norm": 0.00135, "time": 0.32246}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.83573, "loss": 0.00054, "grad_norm": 0.00148, "time": 0.32293}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04697, "heatmap_loss": 0.00053, "acc_pose": 0.8509, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.37622}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00054, "acc_pose": 0.83549, "loss": 0.00054, "grad_norm": 0.00142, "time": 0.3243}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.83871, "loss": 0.00053, "grad_norm": 0.00143, "time": 0.32478}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.8426, "loss": 0.00053, "grad_norm": 0.00157, "time": 0.32464}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.83628, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32392}
+{"mode": "val", "epoch": 130, "iter": 204, "lr": 0.0, "AP": 0.78806, "AP .5": 0.91484, "AP .75": 0.85457, "AP (M)": 0.71582, "AP (L)": 0.81705, "AR": 0.83838, "AR .5": 0.95198, "AR .75": 0.89798, "AR (M)": 0.79743, "AR (L)": 0.89881}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04818, "heatmap_loss": 0.00053, "acc_pose": 0.85731, "loss": 0.00053, "grad_norm": 0.0016, "time": 0.37229}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00054, "acc_pose": 0.85021, "loss": 0.00054, "grad_norm": 0.00144, "time": 0.32173}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84052, "loss": 0.00054, "grad_norm": 0.00141, "time": 0.32349}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.83505, "loss": 0.00053, "grad_norm": 0.0014, "time": 0.32492}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84913, "loss": 0.00054, "grad_norm": 0.00137, "time": 0.32288}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04672, "heatmap_loss": 0.00053, "acc_pose": 0.852, "loss": 0.00053, "grad_norm": 0.00146, "time": 0.37209}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00054, "acc_pose": 0.83966, "loss": 0.00054, "grad_norm": 0.00138, "time": 0.32345}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.84438, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.32295}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84021, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.32297}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85464, "loss": 0.00053, "grad_norm": 0.00154, "time": 0.32336}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04675, "heatmap_loss": 0.00053, "acc_pose": 0.83308, "loss": 0.00053, "grad_norm": 0.00127, "time": 0.3738}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.85895, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32255}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84569, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32348}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84147, "loss": 0.00053, "grad_norm": 0.00151, "time": 0.32368}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84349, "loss": 0.00054, "grad_norm": 0.00146, "time": 0.324}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.047, "heatmap_loss": 0.00053, "acc_pose": 0.83679, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.37462}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.84828, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32209}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84391, "loss": 0.00053, "grad_norm": 0.00146, "time": 0.32377}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.84398, "loss": 0.00053, "grad_norm": 0.00155, "time": 0.32317}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00054, "acc_pose": 0.84059, "loss": 0.00054, "grad_norm": 0.00132, "time": 0.32347}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04696, "heatmap_loss": 0.00053, "acc_pose": 0.85619, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.37517}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83608, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32318}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83932, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32299}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84019, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32215}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.85492, "loss": 0.00053, "grad_norm": 0.00135, "time": 0.32419}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00054, "acc_pose": 0.84504, "loss": 0.00054, "grad_norm": 0.00148, "time": 0.37747}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00053, "acc_pose": 0.84123, "loss": 0.00053, "grad_norm": 0.00148, "time": 0.3262}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84325, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32576}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85067, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.32426}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83843, "loss": 0.00053, "grad_norm": 0.0014, "time": 0.32448}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04748, "heatmap_loss": 0.00053, "acc_pose": 0.82879, "loss": 0.00053, "grad_norm": 0.00144, "time": 0.37676}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0006, "heatmap_loss": 0.00053, "acc_pose": 0.85159, "loss": 0.00053, "grad_norm": 0.00135, "time": 0.32547}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.85017, "loss": 0.00053, "grad_norm": 0.00141, "time": 0.32564}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00053, "acc_pose": 0.8524, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.3252}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.84219, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32605}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04889, "heatmap_loss": 0.00053, "acc_pose": 0.84944, "loss": 0.00053, "grad_norm": 0.00147, "time": 0.37555}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00053, "acc_pose": 0.84248, "loss": 0.00053, "grad_norm": 0.00131, "time": 0.32481}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.8435, "loss": 0.00053, "grad_norm": 0.00137, "time": 0.32336}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.8487, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.32433}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.85314, "loss": 0.00053, "grad_norm": 0.00145, "time": 0.32556}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04677, "heatmap_loss": 0.00053, "acc_pose": 0.83572, "loss": 0.00053, "grad_norm": 0.00146, "time": 0.37606}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84608, "loss": 0.00053, "grad_norm": 0.00137, "time": 0.32358}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00053, "acc_pose": 0.85297, "loss": 0.00053, "grad_norm": 0.00145, "time": 0.32417}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83869, "loss": 0.00053, "grad_norm": 0.00149, "time": 0.32322}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83774, "loss": 0.00053, "grad_norm": 0.0013, "time": 0.32534}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04686, "heatmap_loss": 0.00053, "acc_pose": 0.85942, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.37683}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84563, "loss": 0.00053, "grad_norm": 0.00147, "time": 0.32467}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85034, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32392}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84773, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32374}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85061, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.32383}
+{"mode": "val", "epoch": 140, "iter": 204, "lr": 0.0, "AP": 0.7877, "AP .5": 0.91544, "AP .75": 0.85469, "AP (M)": 0.71485, "AP (L)": 0.81659, "AR": 0.8384, "AR .5": 0.95324, "AR .75": 0.89814, "AR (M)": 0.79784, "AR (L)": 0.89788}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04666, "heatmap_loss": 0.00053, "acc_pose": 0.8377, "loss": 0.00053, "grad_norm": 0.00144, "time": 0.37098}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84193, "loss": 0.00053, "grad_norm": 0.0013, "time": 0.32421}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.83676, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32537}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.85091, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.32505}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00053, "acc_pose": 0.84813, "loss": 0.00053, "grad_norm": 0.00142, "time": 0.32522}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04691, "heatmap_loss": 0.00053, "acc_pose": 0.84799, "loss": 0.00053, "grad_norm": 0.00139, "time": 0.37564}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00053, "acc_pose": 0.84164, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32414}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.84599, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.32534}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83866, "loss": 0.00053, "grad_norm": 0.00144, "time": 0.32417}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84532, "loss": 0.00052, "grad_norm": 0.00125, "time": 0.32186}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04712, "heatmap_loss": 0.00052, "acc_pose": 0.85715, "loss": 0.00052, "grad_norm": 0.00134, "time": 0.37421}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83543, "loss": 0.00053, "grad_norm": 0.00135, "time": 0.32325}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.85129, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.32331}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.85214, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.32373}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85005, "loss": 0.00052, "grad_norm": 0.00139, "time": 0.3224}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00053, "acc_pose": 0.84036, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.37458}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00055, "heatmap_loss": 0.00053, "acc_pose": 0.8517, "loss": 0.00053, "grad_norm": 0.00131, "time": 0.32376}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84748, "loss": 0.00053, "grad_norm": 0.0013, "time": 0.32304}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.83601, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.32382}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.8511, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.3231}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04696, "heatmap_loss": 0.00053, "acc_pose": 0.8585, "loss": 0.00053, "grad_norm": 0.00143, "time": 0.37606}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.86452, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32404}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83956, "loss": 0.00053, "grad_norm": 0.00141, "time": 0.32222}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.84948, "loss": 0.00053, "grad_norm": 0.00126, "time": 0.32289}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.84417, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.32272}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00052, "acc_pose": 0.85805, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.37511}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8488, "loss": 0.00052, "grad_norm": 0.00138, "time": 0.32419}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83771, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32347}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85286, "loss": 0.00052, "grad_norm": 0.00133, "time": 0.32284}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00053, "acc_pose": 0.84219, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32474}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04676, "heatmap_loss": 0.00053, "acc_pose": 0.84297, "loss": 0.00053, "grad_norm": 0.00143, "time": 0.37374}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8551, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.32321}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.8489, "loss": 0.00053, "grad_norm": 0.00148, "time": 0.32506}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.84306, "loss": 0.00053, "grad_norm": 0.00138, "time": 0.32433}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8586, "loss": 0.00052, "grad_norm": 0.00138, "time": 0.32332}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0472, "heatmap_loss": 0.00052, "acc_pose": 0.8469, "loss": 0.00052, "grad_norm": 0.00141, "time": 0.37663}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.84872, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.32357}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84538, "loss": 0.00052, "grad_norm": 0.00125, "time": 0.32257}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00053, "acc_pose": 0.82596, "loss": 0.00053, "grad_norm": 0.0014, "time": 0.32258}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84147, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32402}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04677, "heatmap_loss": 0.00053, "acc_pose": 0.84923, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.37322}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84248, "loss": 0.00052, "grad_norm": 0.00141, "time": 0.3228}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.85336, "loss": 0.00052, "grad_norm": 0.00139, "time": 0.32383}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00053, "acc_pose": 0.83902, "loss": 0.00053, "grad_norm": 0.00144, "time": 0.32488}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.85668, "loss": 0.00052, "grad_norm": 0.0014, "time": 0.32432}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00052, "acc_pose": 0.85003, "loss": 0.00052, "grad_norm": 0.00143, "time": 0.37445}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00052, "acc_pose": 0.85067, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.32363}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.85412, "loss": 0.00052, "grad_norm": 0.00135, "time": 0.32303}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.83574, "loss": 0.00052, "grad_norm": 0.00139, "time": 0.32235}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.85379, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.32244}
+{"mode": "val", "epoch": 150, "iter": 204, "lr": 0.0, "AP": 0.78925, "AP .5": 0.91713, "AP .75": 0.85579, "AP (M)": 0.71799, "AP (L)": 0.81716, "AR": 0.84005, "AR .5": 0.95497, "AR .75": 0.89893, "AR (M)": 0.79959, "AR (L)": 0.89955}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04631, "heatmap_loss": 0.00052, "acc_pose": 0.83867, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.36983}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85009, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32287}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84811, "loss": 0.00052, "grad_norm": 0.00143, "time": 0.32362}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00053, "acc_pose": 0.82644, "loss": 0.00053, "grad_norm": 0.00136, "time": 0.32436}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00053, "acc_pose": 0.85167, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32395}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04661, "heatmap_loss": 0.00052, "acc_pose": 0.84907, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.377}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00053, "acc_pose": 0.8465, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.32343}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00052, "acc_pose": 0.84971, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32379}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00053, "acc_pose": 0.85493, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.32236}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84986, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32266}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00052, "acc_pose": 0.85877, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.3751}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85377, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.32333}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85281, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32404}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00053, "acc_pose": 0.83992, "loss": 0.00053, "grad_norm": 0.00131, "time": 0.32382}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.86184, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32413}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00053, "acc_pose": 0.84864, "loss": 0.00053, "grad_norm": 0.00143, "time": 0.37633}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.85049, "loss": 0.00052, "grad_norm": 0.00144, "time": 0.32615}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.85065, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32745}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84132, "loss": 0.00052, "grad_norm": 0.00133, "time": 0.32536}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85079, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32496}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04671, "heatmap_loss": 0.00052, "acc_pose": 0.8402, "loss": 0.00052, "grad_norm": 0.00129, "time": 0.37488}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85192, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.32236}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85848, "loss": 0.00052, "grad_norm": 0.00139, "time": 0.32211}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84776, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32271}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84585, "loss": 0.00052, "grad_norm": 0.00129, "time": 0.32169}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.00052, "acc_pose": 0.8407, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.37327}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0006, "heatmap_loss": 0.00052, "acc_pose": 0.85095, "loss": 0.00052, "grad_norm": 0.00123, "time": 0.32542}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.84246, "loss": 0.00052, "grad_norm": 0.00137, "time": 0.3242}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85356, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.3255}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84789, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32456}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04674, "heatmap_loss": 0.00052, "acc_pose": 0.85741, "loss": 0.00052, "grad_norm": 0.00146, "time": 0.37483}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8602, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32423}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85002, "loss": 0.00052, "grad_norm": 0.00129, "time": 0.32361}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84893, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.32325}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.83053, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32525}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04682, "heatmap_loss": 0.00052, "acc_pose": 0.83324, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.37495}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00052, "acc_pose": 0.84695, "loss": 0.00052, "grad_norm": 0.00136, "time": 0.32349}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.83715, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32241}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.84974, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.32144}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84928, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32288}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04689, "heatmap_loss": 0.00052, "acc_pose": 0.85114, "loss": 0.00052, "grad_norm": 0.00134, "time": 0.37471}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84625, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32554}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00052, "acc_pose": 0.85489, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32537}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.84569, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.32466}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8434, "loss": 0.00052, "grad_norm": 0.00129, "time": 0.32377}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04692, "heatmap_loss": 0.00053, "acc_pose": 0.84779, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.37273}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85846, "loss": 0.00052, "grad_norm": 0.00128, "time": 0.32355}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85471, "loss": 0.00052, "grad_norm": 0.00139, "time": 0.32483}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84363, "loss": 0.00051, "grad_norm": 0.00121, "time": 0.32458}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.83996, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32469}
+{"mode": "val", "epoch": 160, "iter": 204, "lr": 0.0, "AP": 0.78764, "AP .5": 0.91492, "AP .75": 0.85561, "AP (M)": 0.71576, "AP (L)": 0.81589, "AR": 0.83866, "AR .5": 0.95293, "AR .75": 0.89861, "AR (M)": 0.7985, "AR (L)": 0.89807}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04661, "heatmap_loss": 0.00052, "acc_pose": 0.85927, "loss": 0.00052, "grad_norm": 0.00137, "time": 0.36831}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85145, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32129}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85768, "loss": 0.00052, "grad_norm": 0.00125, "time": 0.32301}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85227, "loss": 0.00052, "grad_norm": 0.00133, "time": 0.32297}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00052, "acc_pose": 0.8391, "loss": 0.00052, "grad_norm": 0.00134, "time": 0.32197}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00052, "acc_pose": 0.85649, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.37515}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.85031, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32459}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85081, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32229}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85991, "loss": 0.00052, "grad_norm": 0.00125, "time": 0.32213}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84697, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.32243}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04667, "heatmap_loss": 0.00052, "acc_pose": 0.84037, "loss": 0.00052, "grad_norm": 0.00134, "time": 0.37305}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00062, "heatmap_loss": 0.00052, "acc_pose": 0.84844, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.3224}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84125, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.32218}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85077, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32406}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8458, "loss": 0.00052, "grad_norm": 0.00134, "time": 0.32203}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04691, "heatmap_loss": 0.00052, "acc_pose": 0.86019, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.37486}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.848, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.3231}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00062, "heatmap_loss": 0.00052, "acc_pose": 0.85179, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32424}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.86123, "loss": 0.00052, "grad_norm": 0.00129, "time": 0.32373}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85284, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32529}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04667, "heatmap_loss": 0.00052, "acc_pose": 0.86908, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.37401}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.8493, "loss": 0.00052, "grad_norm": 0.00127, "time": 0.32301}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.85276, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.32341}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00051, "acc_pose": 0.85359, "loss": 0.00051, "grad_norm": 0.00122, "time": 0.32313}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84933, "loss": 0.00051, "grad_norm": 0.00136, "time": 0.32504}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04679, "heatmap_loss": 0.00052, "acc_pose": 0.84394, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.37622}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00051, "acc_pose": 0.85081, "loss": 0.00051, "grad_norm": 0.00135, "time": 0.32463}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84814, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32373}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.85569, "loss": 0.00051, "grad_norm": 0.00129, "time": 0.32373}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00052, "acc_pose": 0.84315, "loss": 0.00052, "grad_norm": 0.00132, "time": 0.32527}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04665, "heatmap_loss": 0.00051, "acc_pose": 0.85611, "loss": 0.00051, "grad_norm": 0.00127, "time": 0.37592}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00052, "acc_pose": 0.86024, "loss": 0.00052, "grad_norm": 0.0013, "time": 0.32389}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.83754, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.32481}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00052, "acc_pose": 0.8629, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.32287}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84715, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.32211}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00052, "acc_pose": 0.83933, "loss": 0.00052, "grad_norm": 0.00131, "time": 0.37609}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00051, "acc_pose": 0.85393, "loss": 0.00051, "grad_norm": 0.00126, "time": 0.32369}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00052, "acc_pose": 0.84599, "loss": 0.00052, "grad_norm": 0.00126, "time": 0.32167}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85264, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.32206}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8441, "loss": 0.00052, "grad_norm": 0.00125, "time": 0.32307}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04672, "heatmap_loss": 0.00051, "acc_pose": 0.86042, "loss": 0.00051, "grad_norm": 0.00126, "time": 0.37432}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00051, "acc_pose": 0.84259, "loss": 0.00051, "grad_norm": 0.00127, "time": 0.32237}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.85274, "loss": 0.00051, "grad_norm": 0.0015, "time": 0.32297}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.85022, "loss": 0.00052, "grad_norm": 0.00136, "time": 0.3223}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00051, "acc_pose": 0.85731, "loss": 0.00051, "grad_norm": 0.0013, "time": 0.32228}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04694, "heatmap_loss": 0.00051, "acc_pose": 0.83989, "loss": 0.00051, "grad_norm": 0.00131, "time": 0.374}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00051, "acc_pose": 0.85015, "loss": 0.00051, "grad_norm": 0.00128, "time": 0.32297}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.8488, "loss": 0.00052, "grad_norm": 0.00123, "time": 0.32193}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00045, "heatmap_loss": 0.00051, "acc_pose": 0.85079, "loss": 0.00051, "grad_norm": 0.00126, "time": 0.32316}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84861, "loss": 0.00051, "grad_norm": 0.00126, "time": 0.32329}
+{"mode": "val", "epoch": 170, "iter": 204, "lr": 0.0, "AP": 0.78986, "AP .5": 0.91652, "AP .75": 0.85505, "AP (M)": 0.71856, "AP (L)": 0.81948, "AR": 0.83933, "AR .5": 0.95293, "AR .75": 0.89688, "AR (M)": 0.79885, "AR (L)": 0.89885}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04803, "heatmap_loss": 0.00052, "acc_pose": 0.84725, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.37107}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.85656, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.32286}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00051, "acc_pose": 0.8481, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.32432}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84873, "loss": 0.00051, "grad_norm": 0.00107, "time": 0.32425}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.84721, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.32364}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04669, "heatmap_loss": 0.00051, "acc_pose": 0.84247, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.37092}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.85699, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.32526}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.85591, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.32412}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85781, "loss": 0.0005, "grad_norm": 0.00115, "time": 0.32457}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84559, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.32548}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04682, "heatmap_loss": 0.0005, "acc_pose": 0.83973, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.3729}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85455, "loss": 0.0005, "grad_norm": 0.00114, "time": 0.32251}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.85967, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32263}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85858, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.32452}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.84533, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32255}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00051, "acc_pose": 0.85468, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.37481}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85693, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.3237}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00051, "acc_pose": 0.84826, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.32364}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.0005, "acc_pose": 0.86155, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.32476}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.84747, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.32349}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04702, "heatmap_loss": 0.0005, "acc_pose": 0.86316, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.3745}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86745, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32273}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.0005, "acc_pose": 0.86057, "loss": 0.0005, "grad_norm": 0.00112, "time": 0.32201}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0005, "acc_pose": 0.85869, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32346}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0005, "acc_pose": 0.85628, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32293}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04684, "heatmap_loss": 0.00051, "acc_pose": 0.83981, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.37362}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.86347, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.32648}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86156, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32357}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.86033, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.3227}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.85551, "loss": 0.00049, "grad_norm": 0.00109, "time": 0.32387}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04672, "heatmap_loss": 0.0005, "acc_pose": 0.85526, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.37576}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86632, "loss": 0.0005, "grad_norm": 0.00113, "time": 0.32467}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.84786, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.32414}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85411, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.3232}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.0005, "acc_pose": 0.87172, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.32286}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.0005, "acc_pose": 0.86226, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.37361}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.85991, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.32268}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85982, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32455}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86132, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.323}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.0005, "acc_pose": 0.85304, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32302}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04678, "heatmap_loss": 0.0005, "acc_pose": 0.8523, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.37645}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85938, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.32406}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.0005, "acc_pose": 0.85456, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32461}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.87401, "loss": 0.0005, "grad_norm": 0.00117, "time": 0.32453}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.0005, "acc_pose": 0.86148, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32559}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04678, "heatmap_loss": 0.0005, "acc_pose": 0.86298, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.37385}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.8563, "loss": 0.0005, "grad_norm": 0.0012, "time": 0.32251}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86703, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.32397}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85615, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.32606}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.86934, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.32338}
+{"mode": "val", "epoch": 180, "iter": 204, "lr": 0.0, "AP": 0.79054, "AP .5": 0.9165, "AP .75": 0.85682, "AP (M)": 0.71893, "AP (L)": 0.81926, "AR": 0.84063, "AR .5": 0.95387, "AR .75": 0.89956, "AR (M)": 0.79984, "AR (L)": 0.90026}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04787, "heatmap_loss": 0.0005, "acc_pose": 0.86933, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.37109}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.8506, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32193}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.85227, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.3249}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.84878, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32654}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.85482, "loss": 0.0005, "grad_norm": 0.00113, "time": 0.32432}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04651, "heatmap_loss": 0.0005, "acc_pose": 0.84654, "loss": 0.0005, "grad_norm": 0.00112, "time": 0.37079}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.87001, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.3231}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0005, "acc_pose": 0.85067, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32343}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85372, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32223}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.84675, "loss": 0.0005, "grad_norm": 0.00113, "time": 0.32281}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04681, "heatmap_loss": 0.0005, "acc_pose": 0.86125, "loss": 0.0005, "grad_norm": 0.001, "time": 0.37324}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00049, "acc_pose": 0.86066, "loss": 0.00049, "grad_norm": 0.00108, "time": 0.32157}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.87019, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.322}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85801, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.32314}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.8515, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32236}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.0005, "acc_pose": 0.86858, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.37279}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.86261, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32284}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0005, "acc_pose": 0.84357, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32193}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.85889, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.32435}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86366, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.3232}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00049, "acc_pose": 0.85775, "loss": 0.00049, "grad_norm": 0.0011, "time": 0.37489}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86119, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.32409}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.0005, "acc_pose": 0.87505, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.32343}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86851, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.32244}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.84717, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32327}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.0005, "acc_pose": 0.84567, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.37307}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.85563, "loss": 0.00049, "grad_norm": 0.00098, "time": 0.32161}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.86497, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.32306}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.86035, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32273}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85215, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32286}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04802, "heatmap_loss": 0.00049, "acc_pose": 0.86525, "loss": 0.00049, "grad_norm": 0.00115, "time": 0.3731}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00049, "acc_pose": 0.85384, "loss": 0.00049, "grad_norm": 0.00101, "time": 0.32346}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85261, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.32466}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.86147, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32333}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85646, "loss": 0.0005, "grad_norm": 0.00114, "time": 0.32392}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04675, "heatmap_loss": 0.00049, "acc_pose": 0.86708, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.37421}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.85401, "loss": 0.00049, "grad_norm": 0.0011, "time": 0.3256}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00108, "heatmap_loss": 0.0005, "acc_pose": 0.86692, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.32693}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86209, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32586}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86565, "loss": 0.0005, "grad_norm": 0.00101, "time": 0.32591}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0468, "heatmap_loss": 0.0005, "acc_pose": 0.85598, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.37417}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86088, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32353}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85178, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32371}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86095, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.3232}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86351, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32266}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.0005, "acc_pose": 0.86477, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.37471}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.84958, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.32227}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.87141, "loss": 0.00049, "grad_norm": 0.00117, "time": 0.32304}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85828, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32294}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86192, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32263}
+{"mode": "val", "epoch": 190, "iter": 204, "lr": 0.0, "AP": 0.78958, "AP .5": 0.91593, "AP .75": 0.85621, "AP (M)": 0.7175, "AP (L)": 0.81813, "AR": 0.84022, "AR .5": 0.9534, "AR .75": 0.89924, "AR (M)": 0.79891, "AR (L)": 0.90063}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04687, "heatmap_loss": 0.0005, "acc_pose": 0.86127, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.36929}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86395, "loss": 0.00049, "grad_norm": 0.00109, "time": 0.3212}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86421, "loss": 0.00049, "grad_norm": 0.00111, "time": 0.32297}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.86283, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.32245}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85537, "loss": 0.00049, "grad_norm": 0.00114, "time": 0.32506}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00049, "acc_pose": 0.857, "loss": 0.00049, "grad_norm": 0.00108, "time": 0.37272}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85581, "loss": 0.00049, "grad_norm": 0.001, "time": 0.32327}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.0005, "acc_pose": 0.86274, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.32254}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85256, "loss": 0.00049, "grad_norm": 0.0011, "time": 0.32265}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.8594, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.32242}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04729, "heatmap_loss": 0.00049, "acc_pose": 0.85098, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.37391}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85483, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32281}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.0005, "acc_pose": 0.85323, "loss": 0.0005, "grad_norm": 0.00122, "time": 0.32195}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.84336, "loss": 0.0005, "grad_norm": 0.00105, "time": 0.32238}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86848, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.32423}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04673, "heatmap_loss": 0.00049, "acc_pose": 0.86147, "loss": 0.00049, "grad_norm": 0.001, "time": 0.37283}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85407, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32454}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85441, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.32221}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.86135, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.323}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00049, "acc_pose": 0.86483, "loss": 0.00049, "grad_norm": 0.0011, "time": 0.32248}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04678, "heatmap_loss": 0.00049, "acc_pose": 0.85724, "loss": 0.00049, "grad_norm": 0.00112, "time": 0.37386}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00049, "acc_pose": 0.84723, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32241}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.86895, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32266}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.85632, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.32363}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86181, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32308}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04688, "heatmap_loss": 0.00049, "acc_pose": 0.85642, "loss": 0.00049, "grad_norm": 0.00111, "time": 0.37348}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.85915, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.32298}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85752, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32261}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85418, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32311}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85527, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.32262}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00049, "acc_pose": 0.85781, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.37565}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.864, "loss": 0.0005, "grad_norm": 0.00112, "time": 0.32381}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.8594, "loss": 0.00049, "grad_norm": 0.00111, "time": 0.32493}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.84926, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32581}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.0005, "acc_pose": 0.85096, "loss": 0.0005, "grad_norm": 0.00112, "time": 0.32359}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04687, "heatmap_loss": 0.00049, "acc_pose": 0.84343, "loss": 0.00049, "grad_norm": 0.0011, "time": 0.37428}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.0005, "acc_pose": 0.85499, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.325}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85271, "loss": 0.00049, "grad_norm": 0.00111, "time": 0.32446}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.8585, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32342}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86432, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32318}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04689, "heatmap_loss": 0.00049, "acc_pose": 0.85437, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.3745}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86869, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32543}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00054, "heatmap_loss": 0.00049, "acc_pose": 0.85417, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.32574}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86205, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32395}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.84767, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.32286}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04674, "heatmap_loss": 0.00049, "acc_pose": 0.85541, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.37715}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85468, "loss": 0.00049, "grad_norm": 0.00108, "time": 0.32583}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86156, "loss": 0.00049, "grad_norm": 0.00109, "time": 0.3233}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86614, "loss": 0.00049, "grad_norm": 0.00109, "time": 0.32448}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86275, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32348}
+{"mode": "val", "epoch": 200, "iter": 204, "lr": 0.0, "AP": 0.78987, "AP .5": 0.91568, "AP .75": 0.85612, "AP (M)": 0.71717, "AP (L)": 0.81886, "AR": 0.84048, "AR .5": 0.95277, "AR .75": 0.89924, "AR (M)": 0.79896, "AR (L)": 0.90104}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04675, "heatmap_loss": 0.00049, "acc_pose": 0.84667, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.36962}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85353, "loss": 0.00049, "grad_norm": 0.00108, "time": 0.32224}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.86284, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32332}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86738, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.32324}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.85159, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.3243}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04703, "heatmap_loss": 0.00049, "acc_pose": 0.86622, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.37556}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86302, "loss": 0.00049, "grad_norm": 0.001, "time": 0.32581}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86288, "loss": 0.00049, "grad_norm": 0.001, "time": 0.32337}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85241, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32251}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.85676, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.3225}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04678, "heatmap_loss": 0.0005, "acc_pose": 0.85659, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.37434}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86464, "loss": 0.00049, "grad_norm": 0.00101, "time": 0.32314}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85911, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.32281}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00049, "acc_pose": 0.87028, "loss": 0.00049, "grad_norm": 0.00098, "time": 0.32288}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85486, "loss": 0.00049, "grad_norm": 0.00101, "time": 0.32388}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04706, "heatmap_loss": 0.00049, "acc_pose": 0.85848, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.3765}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.85455, "loss": 0.00049, "grad_norm": 0.00098, "time": 0.32454}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86096, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32486}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.84853, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32485}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86495, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.32557}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04694, "heatmap_loss": 0.00049, "acc_pose": 0.85921, "loss": 0.00049, "grad_norm": 0.00105, "time": 0.37403}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.85244, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32432}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.86384, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32239}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00049, "acc_pose": 0.85922, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.32317}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.86467, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32417}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04707, "heatmap_loss": 0.00049, "acc_pose": 0.86402, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.37678}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.85906, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32445}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.86334, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.32692}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.86804, "loss": 0.00049, "grad_norm": 0.001, "time": 0.32586}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00053, "heatmap_loss": 0.00049, "acc_pose": 0.86223, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.32536}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04704, "heatmap_loss": 0.00049, "acc_pose": 0.86083, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.3783}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.86502, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32547}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85839, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32422}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.86901, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32372}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.0005, "acc_pose": 0.84884, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.32386}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04683, "heatmap_loss": 0.00049, "acc_pose": 0.86017, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.37522}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.86999, "loss": 0.00049, "grad_norm": 0.001, "time": 0.32276}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.86133, "loss": 0.00049, "grad_norm": 0.00108, "time": 0.3227}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85387, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32296}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00052, "heatmap_loss": 0.00049, "acc_pose": 0.85796, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32256}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.0467, "heatmap_loss": 0.00049, "acc_pose": 0.86387, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.37469}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.86792, "loss": 0.00049, "grad_norm": 0.00104, "time": 0.32384}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.0005, "heatmap_loss": 0.00049, "acc_pose": 0.8725, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.32397}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00049, "acc_pose": 0.85694, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.32445}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00049, "heatmap_loss": 0.00049, "acc_pose": 0.87064, "loss": 0.00049, "grad_norm": 0.00107, "time": 0.32289}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 24462, "data_time": 0.04691, "heatmap_loss": 0.00049, "acc_pose": 0.86493, "loss": 0.00049, "grad_norm": 0.001, "time": 0.37391}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 24462, "data_time": 0.00048, "heatmap_loss": 0.00048, "acc_pose": 0.85559, "loss": 0.00048, "grad_norm": 0.00103, "time": 0.32314}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 24462, "data_time": 0.00047, "heatmap_loss": 0.00049, "acc_pose": 0.85734, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.3226}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 24462, "data_time": 0.00046, "heatmap_loss": 0.00049, "acc_pose": 0.86224, "loss": 0.00049, "grad_norm": 0.00106, "time": 0.32269}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 24462, "data_time": 0.00051, "heatmap_loss": 0.00049, "acc_pose": 0.86098, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.32308}
+{"mode": "val", "epoch": 210, "iter": 204, "lr": 0.0, "AP": 0.78994, "AP .5": 0.91587, "AP .75": 0.85625, "AP (M)": 0.71787, "AP (L)": 0.81901, "AR": 0.84055, "AR .5": 0.95356, "AR .75": 0.89861, "AR (M)": 0.79986, "AR (L)": 0.90052}
diff --git a/vendor/ViTPose/logs/vitpose-l-simple.log.json b/vendor/ViTPose/logs/vitpose-l-simple.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..93037b0c7cb2eda160ce25b5a970937a30d8dbc4
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-l-simple.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+94ca136", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.06538, "heatmap_loss": 0.00291, "acc_pose": 0.00785, "loss": 0.00291, "grad_norm": 0.25708, "time": 0.84648}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0004, "heatmap_loss": 0.00226, "acc_pose": 0.03368, "loss": 0.00226, "grad_norm": 0.09541, "time": 0.69663}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00224, "acc_pose": 0.07765, "loss": 0.00224, "grad_norm": 0.17212, "time": 0.69597}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00223, "acc_pose": 0.1182, "loss": 0.00223, "grad_norm": 0.21995, "time": 0.69584}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00213, "acc_pose": 0.16296, "loss": 0.00213, "grad_norm": 0.14615, "time": 0.69562}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05601, "heatmap_loss": 0.00205, "acc_pose": 0.21299, "loss": 0.00205, "grad_norm": 0.16281, "time": 0.75353}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00198, "acc_pose": 0.25391, "loss": 0.00198, "grad_norm": 0.21748, "time": 0.69704}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00183, "acc_pose": 0.31657, "loss": 0.00183, "grad_norm": 0.24283, "time": 0.69783}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00164, "acc_pose": 0.37912, "loss": 0.00164, "grad_norm": 0.17538, "time": 0.69786}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00151, "acc_pose": 0.43824, "loss": 0.00151, "grad_norm": 0.16043, "time": 0.69738}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05524, "heatmap_loss": 0.00137, "acc_pose": 0.51359, "loss": 0.00137, "grad_norm": 0.14373, "time": 0.75447}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00128, "acc_pose": 0.54235, "loss": 0.00128, "grad_norm": 0.12366, "time": 0.69807}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00125, "acc_pose": 0.5573, "loss": 0.00125, "grad_norm": 0.12072, "time": 0.69635}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00122, "acc_pose": 0.57748, "loss": 0.00122, "grad_norm": 0.14141, "time": 0.69692}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0012, "acc_pose": 0.58849, "loss": 0.0012, "grad_norm": 0.14754, "time": 0.69556}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05528, "heatmap_loss": 0.00114, "acc_pose": 0.6095, "loss": 0.00114, "grad_norm": 0.12635, "time": 0.7535}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00113, "acc_pose": 0.61405, "loss": 0.00113, "grad_norm": 0.14789, "time": 0.69587}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00108, "acc_pose": 0.63081, "loss": 0.00108, "grad_norm": 0.08684, "time": 0.69598}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00037, "heatmap_loss": 0.00108, "acc_pose": 0.6349, "loss": 0.00108, "grad_norm": 0.11365, "time": 0.69639}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00106, "acc_pose": 0.64164, "loss": 0.00106, "grad_norm": 0.10001, "time": 0.69615}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.055, "heatmap_loss": 0.00103, "acc_pose": 0.65137, "loss": 0.00103, "grad_norm": 0.09576, "time": 0.75338}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00103, "acc_pose": 0.64747, "loss": 0.00103, "grad_norm": 0.11236, "time": 0.69706}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00101, "acc_pose": 0.65191, "loss": 0.00101, "grad_norm": 0.08795, "time": 0.6965}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.001, "acc_pose": 0.665, "loss": 0.001, "grad_norm": 0.0911, "time": 0.69663}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00099, "acc_pose": 0.67084, "loss": 0.00099, "grad_norm": 0.09352, "time": 0.69671}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0544, "heatmap_loss": 0.00097, "acc_pose": 0.67444, "loss": 0.00097, "grad_norm": 0.09012, "time": 0.75498}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00097, "acc_pose": 0.67268, "loss": 0.00097, "grad_norm": 0.09774, "time": 0.69616}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00098, "acc_pose": 0.67355, "loss": 0.00098, "grad_norm": 0.12428, "time": 0.69681}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00095, "acc_pose": 0.67892, "loss": 0.00095, "grad_norm": 0.08697, "time": 0.69609}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00094, "acc_pose": 0.68988, "loss": 0.00094, "grad_norm": 0.04047, "time": 0.69643}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05581, "heatmap_loss": 0.00093, "acc_pose": 0.68849, "loss": 0.00093, "grad_norm": 0.07157, "time": 0.75393}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00092, "acc_pose": 0.6881, "loss": 0.00092, "grad_norm": 0.06267, "time": 0.69656}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00091, "acc_pose": 0.6877, "loss": 0.00091, "grad_norm": 0.06026, "time": 0.69655}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00026, "heatmap_loss": 0.00091, "acc_pose": 0.69898, "loss": 0.00091, "grad_norm": 0.0694, "time": 0.6961}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0009, "acc_pose": 0.70037, "loss": 0.0009, "grad_norm": 0.05356, "time": 0.69638}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05451, "heatmap_loss": 0.0009, "acc_pose": 0.70245, "loss": 0.0009, "grad_norm": 0.07093, "time": 0.75451}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00089, "acc_pose": 0.70572, "loss": 0.00089, "grad_norm": 0.07298, "time": 0.69624}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00088, "acc_pose": 0.70575, "loss": 0.00088, "grad_norm": 0.03863, "time": 0.69637}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00088, "acc_pose": 0.71028, "loss": 0.00088, "grad_norm": 0.05938, "time": 0.69636}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00087, "acc_pose": 0.70469, "loss": 0.00087, "grad_norm": 0.0476, "time": 0.69654}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05835, "heatmap_loss": 0.00086, "acc_pose": 0.71661, "loss": 0.00086, "grad_norm": 0.05424, "time": 0.75687}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00087, "acc_pose": 0.7197, "loss": 0.00087, "grad_norm": 0.07312, "time": 0.69576}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00086, "acc_pose": 0.72063, "loss": 0.00086, "grad_norm": 0.04031, "time": 0.69645}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00086, "acc_pose": 0.71881, "loss": 0.00086, "grad_norm": 0.03684, "time": 0.69628}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00085, "acc_pose": 0.72263, "loss": 0.00085, "grad_norm": 0.04102, "time": 0.69541}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05499, "heatmap_loss": 0.00085, "acc_pose": 0.72914, "loss": 0.00085, "grad_norm": 0.04856, "time": 0.75393}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00085, "acc_pose": 0.71337, "loss": 0.00085, "grad_norm": 0.05241, "time": 0.69609}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00084, "acc_pose": 0.72701, "loss": 0.00084, "grad_norm": 0.03831, "time": 0.69696}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00083, "acc_pose": 0.72863, "loss": 0.00083, "grad_norm": 0.04784, "time": 0.69664}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00084, "acc_pose": 0.73316, "loss": 0.00084, "grad_norm": 0.05382, "time": 0.69643}
+{"mode": "val", "epoch": 10, "iter": 407, "lr": 0.0, "AP": 0.70828, "AP .5": 0.89337, "AP .75": 0.78857, "AP (M)": 0.63201, "AP (L)": 0.73693, "AR": 0.76637, "AR .5": 0.93372, "AR .75": 0.838, "AR (M)": 0.72267, "AR (L)": 0.82936}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05503, "heatmap_loss": 0.00083, "acc_pose": 0.728, "loss": 0.00083, "grad_norm": 0.0389, "time": 0.74856}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00083, "acc_pose": 0.7275, "loss": 0.00083, "grad_norm": 0.04898, "time": 0.69513}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00083, "acc_pose": 0.73019, "loss": 0.00083, "grad_norm": 0.03439, "time": 0.69578}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00082, "acc_pose": 0.7264, "loss": 0.00082, "grad_norm": 0.03532, "time": 0.69585}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00082, "acc_pose": 0.73625, "loss": 0.00082, "grad_norm": 0.05058, "time": 0.69563}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05623, "heatmap_loss": 0.00081, "acc_pose": 0.74029, "loss": 0.00081, "grad_norm": 0.02524, "time": 0.75372}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0008, "acc_pose": 0.73636, "loss": 0.0008, "grad_norm": 0.02423, "time": 0.69556}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00082, "acc_pose": 0.73611, "loss": 0.00082, "grad_norm": 0.041, "time": 0.69578}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0008, "acc_pose": 0.7301, "loss": 0.0008, "grad_norm": 0.0322, "time": 0.69583}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00081, "acc_pose": 0.73332, "loss": 0.00081, "grad_norm": 0.02841, "time": 0.69584}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05503, "heatmap_loss": 0.0008, "acc_pose": 0.73849, "loss": 0.0008, "grad_norm": 0.03616, "time": 0.75385}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00079, "acc_pose": 0.74161, "loss": 0.00079, "grad_norm": 0.01908, "time": 0.69629}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00079, "acc_pose": 0.74179, "loss": 0.00079, "grad_norm": 0.02257, "time": 0.69606}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00081, "acc_pose": 0.742, "loss": 0.00081, "grad_norm": 0.0412, "time": 0.69601}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00079, "acc_pose": 0.74822, "loss": 0.00079, "grad_norm": 0.02212, "time": 0.69649}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05544, "heatmap_loss": 0.00079, "acc_pose": 0.74519, "loss": 0.00079, "grad_norm": 0.02162, "time": 0.7532}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00079, "acc_pose": 0.75213, "loss": 0.00079, "grad_norm": 0.02677, "time": 0.69651}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00078, "acc_pose": 0.74337, "loss": 0.00078, "grad_norm": 0.01776, "time": 0.69604}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00078, "acc_pose": 0.75158, "loss": 0.00078, "grad_norm": 0.0255, "time": 0.6959}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00078, "acc_pose": 0.75101, "loss": 0.00078, "grad_norm": 0.01759, "time": 0.69577}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05446, "heatmap_loss": 0.00078, "acc_pose": 0.74275, "loss": 0.00078, "grad_norm": 0.01758, "time": 0.75393}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00078, "acc_pose": 0.74625, "loss": 0.00078, "grad_norm": 0.01843, "time": 0.69559}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00081, "acc_pose": 0.73808, "loss": 0.00081, "grad_norm": 0.05078, "time": 0.69623}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00078, "acc_pose": 0.75355, "loss": 0.00078, "grad_norm": 0.01383, "time": 0.69757}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00077, "acc_pose": 0.75089, "loss": 0.00077, "grad_norm": 0.0101, "time": 0.69659}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05638, "heatmap_loss": 0.00077, "acc_pose": 0.75619, "loss": 0.00077, "grad_norm": 0.00984, "time": 0.75447}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00076, "acc_pose": 0.75551, "loss": 0.00076, "grad_norm": 0.00916, "time": 0.69566}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00076, "acc_pose": 0.74815, "loss": 0.00076, "grad_norm": 0.0089, "time": 0.69635}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00076, "acc_pose": 0.75545, "loss": 0.00076, "grad_norm": 0.00951, "time": 0.69588}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00076, "acc_pose": 0.75955, "loss": 0.00076, "grad_norm": 0.0114, "time": 0.6956}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05582, "heatmap_loss": 0.00076, "acc_pose": 0.75128, "loss": 0.00076, "grad_norm": 0.01854, "time": 0.75305}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00075, "acc_pose": 0.75477, "loss": 0.00075, "grad_norm": 0.00872, "time": 0.69558}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00075, "acc_pose": 0.76379, "loss": 0.00075, "grad_norm": 0.00724, "time": 0.69556}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00075, "acc_pose": 0.76293, "loss": 0.00075, "grad_norm": 0.00833, "time": 0.69586}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00075, "acc_pose": 0.76286, "loss": 0.00075, "grad_norm": 0.00799, "time": 0.69576}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05658, "heatmap_loss": 0.00075, "acc_pose": 0.75753, "loss": 0.00075, "grad_norm": 0.00795, "time": 0.75423}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00049, "heatmap_loss": 0.00075, "acc_pose": 0.76101, "loss": 0.00075, "grad_norm": 0.00973, "time": 0.69582}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00075, "acc_pose": 0.76092, "loss": 0.00075, "grad_norm": 0.01187, "time": 0.69622}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00074, "acc_pose": 0.76321, "loss": 0.00074, "grad_norm": 0.00721, "time": 0.69551}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00074, "acc_pose": 0.76138, "loss": 0.00074, "grad_norm": 0.01141, "time": 0.69554}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05614, "heatmap_loss": 0.00074, "acc_pose": 0.75963, "loss": 0.00074, "grad_norm": 0.00706, "time": 0.75256}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00074, "acc_pose": 0.75992, "loss": 0.00074, "grad_norm": 0.00695, "time": 0.69615}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.74971, "loss": 0.00074, "grad_norm": 0.00932, "time": 0.6961}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00041, "heatmap_loss": 0.00075, "acc_pose": 0.75948, "loss": 0.00075, "grad_norm": 0.01314, "time": 0.69617}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00073, "acc_pose": 0.77009, "loss": 0.00073, "grad_norm": 0.00619, "time": 0.69573}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05706, "heatmap_loss": 0.00073, "acc_pose": 0.76716, "loss": 0.00073, "grad_norm": 0.0055, "time": 0.75365}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00073, "acc_pose": 0.77197, "loss": 0.00073, "grad_norm": 0.00362, "time": 0.69555}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00073, "acc_pose": 0.76223, "loss": 0.00073, "grad_norm": 0.00499, "time": 0.69552}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00074, "acc_pose": 0.76941, "loss": 0.00074, "grad_norm": 0.00545, "time": 0.69573}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00073, "acc_pose": 0.76357, "loss": 0.00073, "grad_norm": 0.00496, "time": 0.69584}
+{"mode": "val", "epoch": 20, "iter": 407, "lr": 0.0, "AP": 0.73847, "AP .5": 0.90222, "AP .75": 0.81752, "AP (M)": 0.66397, "AP (L)": 0.76326, "AR": 0.79444, "AR .5": 0.94159, "AR .75": 0.86477, "AR (M)": 0.75198, "AR (L)": 0.85563}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05688, "heatmap_loss": 0.00073, "acc_pose": 0.77141, "loss": 0.00073, "grad_norm": 0.00477, "time": 0.75252}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00073, "acc_pose": 0.77438, "loss": 0.00073, "grad_norm": 0.00378, "time": 0.69598}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.76814, "loss": 0.00073, "grad_norm": 0.00325, "time": 0.69589}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00073, "acc_pose": 0.76952, "loss": 0.00073, "grad_norm": 0.00336, "time": 0.69543}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00073, "acc_pose": 0.77607, "loss": 0.00073, "grad_norm": 0.00387, "time": 0.69595}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05665, "heatmap_loss": 0.00073, "acc_pose": 0.77325, "loss": 0.00073, "grad_norm": 0.00528, "time": 0.75276}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00072, "acc_pose": 0.77093, "loss": 0.00072, "grad_norm": 0.00412, "time": 0.6955}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00072, "acc_pose": 0.76606, "loss": 0.00072, "grad_norm": 0.00334, "time": 0.69533}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00072, "acc_pose": 0.78132, "loss": 0.00072, "grad_norm": 0.00272, "time": 0.69581}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00072, "acc_pose": 0.77508, "loss": 0.00072, "grad_norm": 0.00249, "time": 0.6952}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05586, "heatmap_loss": 0.00072, "acc_pose": 0.77034, "loss": 0.00072, "grad_norm": 0.00413, "time": 0.75358}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00072, "acc_pose": 0.76627, "loss": 0.00072, "grad_norm": 0.00213, "time": 0.6956}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.77458, "loss": 0.00071, "grad_norm": 0.0025, "time": 0.69597}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.7784, "loss": 0.00071, "grad_norm": 0.00205, "time": 0.69571}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.77409, "loss": 0.00071, "grad_norm": 0.00193, "time": 0.69616}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05606, "heatmap_loss": 0.00071, "acc_pose": 0.77467, "loss": 0.00071, "grad_norm": 0.00204, "time": 0.75288}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.78462, "loss": 0.00071, "grad_norm": 0.00205, "time": 0.69509}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.77292, "loss": 0.00071, "grad_norm": 0.00185, "time": 0.69528}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00071, "acc_pose": 0.76935, "loss": 0.00071, "grad_norm": 0.00177, "time": 0.69535}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00071, "acc_pose": 0.77851, "loss": 0.00071, "grad_norm": 0.002, "time": 0.69535}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05588, "heatmap_loss": 0.0007, "acc_pose": 0.78087, "loss": 0.0007, "grad_norm": 0.00188, "time": 0.75351}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0007, "acc_pose": 0.77671, "loss": 0.0007, "grad_norm": 0.00169, "time": 0.69555}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77389, "loss": 0.0007, "grad_norm": 0.00166, "time": 0.69565}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00071, "acc_pose": 0.77568, "loss": 0.00071, "grad_norm": 0.00171, "time": 0.69577}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0007, "acc_pose": 0.77717, "loss": 0.0007, "grad_norm": 0.00162, "time": 0.69543}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05625, "heatmap_loss": 0.0007, "acc_pose": 0.77567, "loss": 0.0007, "grad_norm": 0.00164, "time": 0.75267}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77489, "loss": 0.0007, "grad_norm": 0.00149, "time": 0.69565}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77794, "loss": 0.0007, "grad_norm": 0.00141, "time": 0.69618}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77684, "loss": 0.0007, "grad_norm": 0.00151, "time": 0.69577}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78441, "loss": 0.00069, "grad_norm": 0.00151, "time": 0.69577}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0558, "heatmap_loss": 0.0007, "acc_pose": 0.78548, "loss": 0.0007, "grad_norm": 0.00156, "time": 0.75392}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0007, "acc_pose": 0.78606, "loss": 0.0007, "grad_norm": 0.00152, "time": 0.69585}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0007, "acc_pose": 0.78338, "loss": 0.0007, "grad_norm": 0.00141, "time": 0.69585}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.78702, "loss": 0.0007, "grad_norm": 0.0014, "time": 0.69551}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.77891, "loss": 0.00069, "grad_norm": 0.00141, "time": 0.69551}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05625, "heatmap_loss": 0.00069, "acc_pose": 0.78498, "loss": 0.00069, "grad_norm": 0.0014, "time": 0.75421}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.77939, "loss": 0.00069, "grad_norm": 0.0013, "time": 0.69555}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00069, "acc_pose": 0.7789, "loss": 0.00069, "grad_norm": 0.00135, "time": 0.69601}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78564, "loss": 0.00069, "grad_norm": 0.00136, "time": 0.69531}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.78303, "loss": 0.00069, "grad_norm": 0.00132, "time": 0.69523}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05669, "heatmap_loss": 0.00069, "acc_pose": 0.7868, "loss": 0.00069, "grad_norm": 0.00138, "time": 0.75283}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78239, "loss": 0.00069, "grad_norm": 0.00132, "time": 0.69546}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00069, "acc_pose": 0.78125, "loss": 0.00069, "grad_norm": 0.00127, "time": 0.69588}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.78721, "loss": 0.00069, "grad_norm": 0.00128, "time": 0.69577}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78377, "loss": 0.00068, "grad_norm": 0.00128, "time": 0.69511}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05613, "heatmap_loss": 0.00068, "acc_pose": 0.77982, "loss": 0.00068, "grad_norm": 0.00127, "time": 0.75285}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78863, "loss": 0.00069, "grad_norm": 0.00134, "time": 0.69521}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.7862, "loss": 0.00068, "grad_norm": 0.00128, "time": 0.69517}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.78756, "loss": 0.00068, "grad_norm": 0.00126, "time": 0.69538}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.78774, "loss": 0.00068, "grad_norm": 0.00129, "time": 0.69569}
+{"mode": "val", "epoch": 30, "iter": 407, "lr": 0.0, "AP": 0.75186, "AP .5": 0.90538, "AP .75": 0.82958, "AP (M)": 0.67827, "AP (L)": 0.77732, "AR": 0.80705, "AR .5": 0.94616, "AR .75": 0.87531, "AR (M)": 0.76575, "AR (L)": 0.86696}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05478, "heatmap_loss": 0.00068, "acc_pose": 0.78942, "loss": 0.00068, "grad_norm": 0.00126, "time": 0.74822}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.78511, "loss": 0.00068, "grad_norm": 0.00128, "time": 0.69449}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.78658, "loss": 0.00068, "grad_norm": 0.00131, "time": 0.69496}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.7889, "loss": 0.00068, "grad_norm": 0.00127, "time": 0.69466}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.79464, "loss": 0.00068, "grad_norm": 0.00125, "time": 0.69499}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0565, "heatmap_loss": 0.00068, "acc_pose": 0.79264, "loss": 0.00068, "grad_norm": 0.00124, "time": 0.75355}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00068, "acc_pose": 0.79763, "loss": 0.00068, "grad_norm": 0.00122, "time": 0.69604}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.78514, "loss": 0.00067, "grad_norm": 0.00124, "time": 0.69561}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79703, "loss": 0.00067, "grad_norm": 0.00128, "time": 0.69527}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00067, "acc_pose": 0.79587, "loss": 0.00067, "grad_norm": 0.0013, "time": 0.69536}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05627, "heatmap_loss": 0.00067, "acc_pose": 0.7946, "loss": 0.00067, "grad_norm": 0.00128, "time": 0.75283}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78967, "loss": 0.00068, "grad_norm": 0.00126, "time": 0.69522}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79698, "loss": 0.00067, "grad_norm": 0.00124, "time": 0.69536}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.79289, "loss": 0.00068, "grad_norm": 0.00131, "time": 0.69505}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79768, "loss": 0.00067, "grad_norm": 0.00123, "time": 0.6952}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05585, "heatmap_loss": 0.00067, "acc_pose": 0.7883, "loss": 0.00067, "grad_norm": 0.00126, "time": 0.7524}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00067, "acc_pose": 0.79129, "loss": 0.00067, "grad_norm": 0.00133, "time": 0.69481}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.7955, "loss": 0.00067, "grad_norm": 0.00127, "time": 0.69519}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00048, "heatmap_loss": 0.00067, "acc_pose": 0.79428, "loss": 0.00067, "grad_norm": 0.0013, "time": 0.69498}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79421, "loss": 0.00067, "grad_norm": 0.00131, "time": 0.69574}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0596, "heatmap_loss": 0.00066, "acc_pose": 0.79367, "loss": 0.00066, "grad_norm": 0.00126, "time": 0.75606}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00067, "acc_pose": 0.79199, "loss": 0.00067, "grad_norm": 0.00123, "time": 0.69544}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.78903, "loss": 0.00066, "grad_norm": 0.00124, "time": 0.69548}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79751, "loss": 0.00067, "grad_norm": 0.00126, "time": 0.69575}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.80051, "loss": 0.00066, "grad_norm": 0.00133, "time": 0.69487}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05567, "heatmap_loss": 0.00066, "acc_pose": 0.79199, "loss": 0.00066, "grad_norm": 0.0012, "time": 0.75207}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00067, "acc_pose": 0.79775, "loss": 0.00067, "grad_norm": 0.0013, "time": 0.69516}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79125, "loss": 0.00066, "grad_norm": 0.00119, "time": 0.6953}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.79808, "loss": 0.00066, "grad_norm": 0.00134, "time": 0.6955}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79217, "loss": 0.00066, "grad_norm": 0.00121, "time": 0.69509}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05562, "heatmap_loss": 0.00066, "acc_pose": 0.79065, "loss": 0.00066, "grad_norm": 0.00126, "time": 0.7542}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00026, "heatmap_loss": 0.00067, "acc_pose": 0.79073, "loss": 0.00067, "grad_norm": 0.00124, "time": 0.69587}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79235, "loss": 0.00066, "grad_norm": 0.0012, "time": 0.69514}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.80341, "loss": 0.00066, "grad_norm": 0.00123, "time": 0.69534}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79145, "loss": 0.00066, "grad_norm": 0.00117, "time": 0.6949}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05623, "heatmap_loss": 0.00066, "acc_pose": 0.79894, "loss": 0.00066, "grad_norm": 0.0012, "time": 0.7547}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79898, "loss": 0.00066, "grad_norm": 0.00125, "time": 0.69532}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.79647, "loss": 0.00066, "grad_norm": 0.00125, "time": 0.6955}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.79656, "loss": 0.00066, "grad_norm": 0.0012, "time": 0.6949}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79245, "loss": 0.00066, "grad_norm": 0.0012, "time": 0.69484}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05602, "heatmap_loss": 0.00066, "acc_pose": 0.79636, "loss": 0.00066, "grad_norm": 0.00124, "time": 0.75158}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79728, "loss": 0.00065, "grad_norm": 0.00134, "time": 0.69523}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.79335, "loss": 0.00065, "grad_norm": 0.00128, "time": 0.69534}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.79153, "loss": 0.00066, "grad_norm": 0.00119, "time": 0.69505}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79425, "loss": 0.00066, "grad_norm": 0.00128, "time": 0.69472}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05592, "heatmap_loss": 0.00065, "acc_pose": 0.8023, "loss": 0.00065, "grad_norm": 0.00118, "time": 0.75257}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79401, "loss": 0.00065, "grad_norm": 0.00122, "time": 0.69539}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00066, "acc_pose": 0.80023, "loss": 0.00066, "grad_norm": 0.00121, "time": 0.6952}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79573, "loss": 0.00066, "grad_norm": 0.00119, "time": 0.69521}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.80177, "loss": 0.00065, "grad_norm": 0.00122, "time": 0.69537}
+{"mode": "val", "epoch": 40, "iter": 407, "lr": 0.0, "AP": 0.75691, "AP .5": 0.90678, "AP .75": 0.83128, "AP (M)": 0.68208, "AP (L)": 0.78327, "AR": 0.81272, "AR .5": 0.94789, "AR .75": 0.87736, "AR (M)": 0.77088, "AR (L)": 0.87347}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05499, "heatmap_loss": 0.00065, "acc_pose": 0.80236, "loss": 0.00065, "grad_norm": 0.00119, "time": 0.74832}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.79906, "loss": 0.00065, "grad_norm": 0.00126, "time": 0.69511}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.80588, "loss": 0.00065, "grad_norm": 0.0012, "time": 0.69574}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80054, "loss": 0.00065, "grad_norm": 0.00117, "time": 0.69512}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79445, "loss": 0.00065, "grad_norm": 0.00123, "time": 0.69537}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05526, "heatmap_loss": 0.00065, "acc_pose": 0.79951, "loss": 0.00065, "grad_norm": 0.00117, "time": 0.75122}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.79579, "loss": 0.00065, "grad_norm": 0.00121, "time": 0.69532}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.79467, "loss": 0.00065, "grad_norm": 0.0012, "time": 0.69531}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80078, "loss": 0.00065, "grad_norm": 0.00119, "time": 0.69528}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.79756, "loss": 0.00065, "grad_norm": 0.00127, "time": 0.69499}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05633, "heatmap_loss": 0.00065, "acc_pose": 0.80595, "loss": 0.00065, "grad_norm": 0.00118, "time": 0.75211}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80364, "loss": 0.00065, "grad_norm": 0.00121, "time": 0.69534}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80246, "loss": 0.00065, "grad_norm": 0.00125, "time": 0.69532}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80166, "loss": 0.00065, "grad_norm": 0.00121, "time": 0.69563}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.81199, "loss": 0.00064, "grad_norm": 0.00118, "time": 0.69509}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0557, "heatmap_loss": 0.00064, "acc_pose": 0.80713, "loss": 0.00064, "grad_norm": 0.0012, "time": 0.75205}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80058, "loss": 0.00064, "grad_norm": 0.00119, "time": 0.69514}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80109, "loss": 0.00065, "grad_norm": 0.00119, "time": 0.69507}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80349, "loss": 0.00064, "grad_norm": 0.00118, "time": 0.69531}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.8025, "loss": 0.00064, "grad_norm": 0.00116, "time": 0.69527}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05665, "heatmap_loss": 0.00064, "acc_pose": 0.80466, "loss": 0.00064, "grad_norm": 0.00121, "time": 0.75215}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.8008, "loss": 0.00064, "grad_norm": 0.00123, "time": 0.69552}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.79911, "loss": 0.00064, "grad_norm": 0.00118, "time": 0.69516}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00041, "heatmap_loss": 0.00064, "acc_pose": 0.79881, "loss": 0.00064, "grad_norm": 0.0012, "time": 0.69511}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80615, "loss": 0.00064, "grad_norm": 0.00118, "time": 0.69479}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05651, "heatmap_loss": 0.00064, "acc_pose": 0.8037, "loss": 0.00064, "grad_norm": 0.00122, "time": 0.75194}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80651, "loss": 0.00064, "grad_norm": 0.00119, "time": 0.69526}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.79929, "loss": 0.00064, "grad_norm": 0.00122, "time": 0.69502}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80247, "loss": 0.00064, "grad_norm": 0.00123, "time": 0.6954}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80379, "loss": 0.00064, "grad_norm": 0.00126, "time": 0.69502}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05638, "heatmap_loss": 0.00064, "acc_pose": 0.80186, "loss": 0.00064, "grad_norm": 0.00119, "time": 0.75305}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.8113, "loss": 0.00064, "grad_norm": 0.00123, "time": 0.69552}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00064, "acc_pose": 0.79661, "loss": 0.00064, "grad_norm": 0.00116, "time": 0.69491}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80651, "loss": 0.00064, "grad_norm": 0.00126, "time": 0.69508}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.81079, "loss": 0.00064, "grad_norm": 0.00114, "time": 0.69425}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05617, "heatmap_loss": 0.00063, "acc_pose": 0.80848, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.75455}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80096, "loss": 0.00063, "grad_norm": 0.00121, "time": 0.6954}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80798, "loss": 0.00063, "grad_norm": 0.00122, "time": 0.69587}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80545, "loss": 0.00064, "grad_norm": 0.00118, "time": 0.69584}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00064, "acc_pose": 0.80954, "loss": 0.00064, "grad_norm": 0.00116, "time": 0.69527}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05579, "heatmap_loss": 0.00064, "acc_pose": 0.80389, "loss": 0.00064, "grad_norm": 0.00119, "time": 0.75266}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80133, "loss": 0.00064, "grad_norm": 0.00125, "time": 0.69473}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00041, "heatmap_loss": 0.00063, "acc_pose": 0.80561, "loss": 0.00063, "grad_norm": 0.00115, "time": 0.69569}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.8016, "loss": 0.00063, "grad_norm": 0.00119, "time": 0.69541}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.8109, "loss": 0.00064, "grad_norm": 0.00119, "time": 0.69534}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05531, "heatmap_loss": 0.00063, "acc_pose": 0.81558, "loss": 0.00063, "grad_norm": 0.00119, "time": 0.7517}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.80449, "loss": 0.00063, "grad_norm": 0.00119, "time": 0.69551}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00063, "acc_pose": 0.80852, "loss": 0.00063, "grad_norm": 0.00119, "time": 0.69565}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80748, "loss": 0.00063, "grad_norm": 0.0012, "time": 0.69537}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00063, "acc_pose": 0.80117, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.69476}
+{"mode": "val", "epoch": 50, "iter": 407, "lr": 0.0, "AP": 0.76138, "AP .5": 0.90762, "AP .75": 0.83584, "AP (M)": 0.68764, "AP (L)": 0.78745, "AR": 0.81633, "AR .5": 0.94915, "AR .75": 0.88098, "AR (M)": 0.77506, "AR (L)": 0.8764}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05631, "heatmap_loss": 0.00062, "acc_pose": 0.81414, "loss": 0.00062, "grad_norm": 0.00121, "time": 0.75093}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80669, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.69523}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.80804, "loss": 0.00063, "grad_norm": 0.00123, "time": 0.69488}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81175, "loss": 0.00063, "grad_norm": 0.00119, "time": 0.69524}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.8133, "loss": 0.00063, "grad_norm": 0.00114, "time": 0.69539}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05542, "heatmap_loss": 0.00063, "acc_pose": 0.81117, "loss": 0.00063, "grad_norm": 0.0012, "time": 0.75106}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81516, "loss": 0.00063, "grad_norm": 0.00115, "time": 0.6959}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.811, "loss": 0.00063, "grad_norm": 0.00122, "time": 0.69578}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80993, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.69555}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80685, "loss": 0.00063, "grad_norm": 0.00118, "time": 0.69518}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05555, "heatmap_loss": 0.00063, "acc_pose": 0.81119, "loss": 0.00063, "grad_norm": 0.00118, "time": 0.75264}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00063, "acc_pose": 0.80723, "loss": 0.00063, "grad_norm": 0.00114, "time": 0.6954}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00049, "heatmap_loss": 0.00062, "acc_pose": 0.81115, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.69584}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00039, "heatmap_loss": 0.00063, "acc_pose": 0.81204, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.69562}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80801, "loss": 0.00063, "grad_norm": 0.00121, "time": 0.69576}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05649, "heatmap_loss": 0.00062, "acc_pose": 0.8128, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.75318}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80764, "loss": 0.00063, "grad_norm": 0.00116, "time": 0.69499}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.81188, "loss": 0.00062, "grad_norm": 0.00114, "time": 0.69576}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81275, "loss": 0.00063, "grad_norm": 0.00113, "time": 0.69496}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.81171, "loss": 0.00063, "grad_norm": 0.00115, "time": 0.69471}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05622, "heatmap_loss": 0.00062, "acc_pose": 0.81553, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.75293}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80452, "loss": 0.00062, "grad_norm": 0.00114, "time": 0.69566}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00063, "acc_pose": 0.8055, "loss": 0.00063, "grad_norm": 0.00117, "time": 0.69504}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.8147, "loss": 0.00062, "grad_norm": 0.00111, "time": 0.69498}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81093, "loss": 0.00062, "grad_norm": 0.00117, "time": 0.69484}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05885, "heatmap_loss": 0.00063, "acc_pose": 0.81078, "loss": 0.00063, "grad_norm": 0.00118, "time": 0.75666}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81164, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.69502}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80789, "loss": 0.00062, "grad_norm": 0.00124, "time": 0.69524}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80512, "loss": 0.00062, "grad_norm": 0.00118, "time": 0.69569}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.80859, "loss": 0.00062, "grad_norm": 0.00121, "time": 0.69533}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05641, "heatmap_loss": 0.00062, "acc_pose": 0.81332, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.75287}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.80848, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.69623}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81301, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.69515}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81936, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.69557}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81109, "loss": 0.00062, "grad_norm": 0.00112, "time": 0.69499}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05697, "heatmap_loss": 0.00062, "acc_pose": 0.81522, "loss": 0.00062, "grad_norm": 0.00119, "time": 0.75304}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.8105, "loss": 0.00062, "grad_norm": 0.00119, "time": 0.69518}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81129, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.69533}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.8194, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.69551}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81521, "loss": 0.00062, "grad_norm": 0.00119, "time": 0.69544}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05695, "heatmap_loss": 0.00061, "acc_pose": 0.81845, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.75261}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.815, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.69566}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81527, "loss": 0.00062, "grad_norm": 0.00118, "time": 0.69559}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81372, "loss": 0.00061, "grad_norm": 0.0012, "time": 0.69485}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81618, "loss": 0.00062, "grad_norm": 0.00115, "time": 0.69541}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05572, "heatmap_loss": 0.00061, "acc_pose": 0.81602, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.75183}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.80952, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.69514}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80912, "loss": 0.00062, "grad_norm": 0.0011, "time": 0.6951}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.81187, "loss": 0.00062, "grad_norm": 0.00113, "time": 0.69503}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.8055, "loss": 0.00061, "grad_norm": 0.00115, "time": 0.69457}
+{"mode": "val", "epoch": 60, "iter": 407, "lr": 0.0, "AP": 0.76504, "AP .5": 0.90827, "AP .75": 0.83948, "AP (M)": 0.69105, "AP (L)": 0.79177, "AR": 0.81955, "AR .5": 0.94884, "AR .75": 0.88445, "AR (M)": 0.77823, "AR (L)": 0.87986}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05635, "heatmap_loss": 0.00061, "acc_pose": 0.80604, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.74937}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00048, "heatmap_loss": 0.00061, "acc_pose": 0.80755, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.69518}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00061, "acc_pose": 0.81048, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.69526}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81522, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.69513}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81409, "loss": 0.00062, "grad_norm": 0.00113, "time": 0.69528}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05559, "heatmap_loss": 0.00061, "acc_pose": 0.81028, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.75207}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81159, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.69539}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.81012, "loss": 0.00062, "grad_norm": 0.00116, "time": 0.69571}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00061, "acc_pose": 0.81963, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.69566}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00061, "acc_pose": 0.81821, "loss": 0.00061, "grad_norm": 0.00113, "time": 0.69576}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05595, "heatmap_loss": 0.00061, "acc_pose": 0.814, "loss": 0.00061, "grad_norm": 0.0012, "time": 0.75363}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81585, "loss": 0.00061, "grad_norm": 0.00124, "time": 0.69553}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81838, "loss": 0.00061, "grad_norm": 0.00115, "time": 0.69624}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81914, "loss": 0.00061, "grad_norm": 0.00111, "time": 0.69588}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81817, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.6953}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05784, "heatmap_loss": 0.00061, "acc_pose": 0.81454, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.75324}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81538, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.69566}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00061, "acc_pose": 0.81937, "loss": 0.00061, "grad_norm": 0.00115, "time": 0.69519}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81444, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.69504}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81472, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.695}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05716, "heatmap_loss": 0.00061, "acc_pose": 0.81647, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.75321}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81645, "loss": 0.00061, "grad_norm": 0.00112, "time": 0.69517}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81739, "loss": 0.00061, "grad_norm": 0.00115, "time": 0.69513}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81346, "loss": 0.00061, "grad_norm": 0.00118, "time": 0.69572}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81467, "loss": 0.00061, "grad_norm": 0.00119, "time": 0.6951}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05605, "heatmap_loss": 0.0006, "acc_pose": 0.82169, "loss": 0.0006, "grad_norm": 0.00109, "time": 0.75307}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00061, "acc_pose": 0.81172, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.69554}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81819, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.6956}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81894, "loss": 0.0006, "grad_norm": 0.0011, "time": 0.69469}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81551, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.69454}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05544, "heatmap_loss": 0.00061, "acc_pose": 0.81986, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.75299}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81193, "loss": 0.00061, "grad_norm": 0.0011, "time": 0.69466}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81924, "loss": 0.00061, "grad_norm": 0.00117, "time": 0.69497}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.8205, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.69519}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81886, "loss": 0.00061, "grad_norm": 0.00109, "time": 0.69581}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05609, "heatmap_loss": 0.0006, "acc_pose": 0.80955, "loss": 0.0006, "grad_norm": 0.00118, "time": 0.75312}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.0006, "acc_pose": 0.81646, "loss": 0.0006, "grad_norm": 0.00111, "time": 0.69587}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.0006, "acc_pose": 0.819, "loss": 0.0006, "grad_norm": 0.00119, "time": 0.69532}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0006, "acc_pose": 0.81624, "loss": 0.0006, "grad_norm": 0.00117, "time": 0.69501}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81778, "loss": 0.00061, "grad_norm": 0.00116, "time": 0.69506}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05599, "heatmap_loss": 0.0006, "acc_pose": 0.81688, "loss": 0.0006, "grad_norm": 0.00109, "time": 0.75437}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81513, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.69531}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.81312, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.69532}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.82249, "loss": 0.0006, "grad_norm": 0.00111, "time": 0.69486}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81901, "loss": 0.00061, "grad_norm": 0.00114, "time": 0.69448}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05618, "heatmap_loss": 0.0006, "acc_pose": 0.81631, "loss": 0.0006, "grad_norm": 0.00115, "time": 0.75329}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81876, "loss": 0.0006, "grad_norm": 0.00105, "time": 0.69566}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81163, "loss": 0.0006, "grad_norm": 0.00108, "time": 0.69606}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.8223, "loss": 0.0006, "grad_norm": 0.00111, "time": 0.69579}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81437, "loss": 0.0006, "grad_norm": 0.00112, "time": 0.69576}
+{"mode": "val", "epoch": 70, "iter": 407, "lr": 0.0, "AP": 0.76747, "AP .5": 0.91137, "AP .75": 0.84067, "AP (M)": 0.69403, "AP (L)": 0.79471, "AR": 0.82143, "AR .5": 0.95057, "AR .75": 0.88508, "AR (M)": 0.78082, "AR (L)": 0.88101}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05583, "heatmap_loss": 0.0006, "acc_pose": 0.82052, "loss": 0.0006, "grad_norm": 0.0011, "time": 0.74912}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.0006, "acc_pose": 0.81508, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.6947}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.0006, "acc_pose": 0.82013, "loss": 0.0006, "grad_norm": 0.00109, "time": 0.69461}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.0006, "acc_pose": 0.82301, "loss": 0.0006, "grad_norm": 0.00106, "time": 0.69488}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.0006, "acc_pose": 0.82477, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.69514}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0569, "heatmap_loss": 0.0006, "acc_pose": 0.82152, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.75336}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0006, "acc_pose": 0.81743, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.69535}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81248, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.69555}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0006, "acc_pose": 0.82302, "loss": 0.0006, "grad_norm": 0.00115, "time": 0.69499}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81692, "loss": 0.0006, "grad_norm": 0.00111, "time": 0.69558}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05625, "heatmap_loss": 0.00059, "acc_pose": 0.82287, "loss": 0.00059, "grad_norm": 0.00124, "time": 0.75181}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.8226, "loss": 0.0006, "grad_norm": 0.00118, "time": 0.69522}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81172, "loss": 0.0006, "grad_norm": 0.00111, "time": 0.69519}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.82349, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.69492}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0006, "acc_pose": 0.82207, "loss": 0.0006, "grad_norm": 0.00118, "time": 0.69542}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05643, "heatmap_loss": 0.0006, "acc_pose": 0.81865, "loss": 0.0006, "grad_norm": 0.00109, "time": 0.75324}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.0006, "acc_pose": 0.81702, "loss": 0.0006, "grad_norm": 0.0011, "time": 0.69508}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00042, "heatmap_loss": 0.0006, "acc_pose": 0.81331, "loss": 0.0006, "grad_norm": 0.00104, "time": 0.69551}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.82315, "loss": 0.0006, "grad_norm": 0.00115, "time": 0.69501}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.0006, "acc_pose": 0.81714, "loss": 0.0006, "grad_norm": 0.0011, "time": 0.69504}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05659, "heatmap_loss": 0.0006, "acc_pose": 0.82422, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.75326}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82231, "loss": 0.00059, "grad_norm": 0.00115, "time": 0.69496}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.81814, "loss": 0.00059, "grad_norm": 0.00116, "time": 0.69556}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.82412, "loss": 0.0006, "grad_norm": 0.00118, "time": 0.69484}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00059, "acc_pose": 0.81916, "loss": 0.00059, "grad_norm": 0.00109, "time": 0.69525}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05704, "heatmap_loss": 0.0006, "acc_pose": 0.81864, "loss": 0.0006, "grad_norm": 0.00116, "time": 0.75426}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0004, "heatmap_loss": 0.0006, "acc_pose": 0.82061, "loss": 0.0006, "grad_norm": 0.00116, "time": 0.69489}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00059, "acc_pose": 0.826, "loss": 0.00059, "grad_norm": 0.00107, "time": 0.69514}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82402, "loss": 0.00059, "grad_norm": 0.00113, "time": 0.69525}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.82206, "loss": 0.0006, "grad_norm": 0.00117, "time": 0.69508}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0565, "heatmap_loss": 0.00059, "acc_pose": 0.8199, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.75396}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81595, "loss": 0.0006, "grad_norm": 0.00114, "time": 0.69497}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82232, "loss": 0.00059, "grad_norm": 0.0011, "time": 0.69563}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81917, "loss": 0.00059, "grad_norm": 0.00116, "time": 0.6951}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82565, "loss": 0.00059, "grad_norm": 0.00109, "time": 0.69502}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05812, "heatmap_loss": 0.00059, "acc_pose": 0.82859, "loss": 0.00059, "grad_norm": 0.00117, "time": 0.75391}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.83061, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69503}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.82154, "loss": 0.0006, "grad_norm": 0.00113, "time": 0.69522}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.81727, "loss": 0.00059, "grad_norm": 0.00122, "time": 0.69535}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82317, "loss": 0.00059, "grad_norm": 0.00115, "time": 0.69567}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05863, "heatmap_loss": 0.00059, "acc_pose": 0.82554, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.75691}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00039, "heatmap_loss": 0.00059, "acc_pose": 0.81472, "loss": 0.00059, "grad_norm": 0.00116, "time": 0.69514}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.8165, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.69522}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82533, "loss": 0.00059, "grad_norm": 0.00104, "time": 0.69536}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00059, "acc_pose": 0.82552, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69551}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05622, "heatmap_loss": 0.00059, "acc_pose": 0.82789, "loss": 0.00059, "grad_norm": 0.00109, "time": 0.75329}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82823, "loss": 0.00059, "grad_norm": 0.00107, "time": 0.69517}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81794, "loss": 0.00059, "grad_norm": 0.00106, "time": 0.69513}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82521, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69498}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82079, "loss": 0.00059, "grad_norm": 0.00115, "time": 0.69516}
+{"mode": "val", "epoch": 80, "iter": 407, "lr": 0.0, "AP": 0.76984, "AP .5": 0.91211, "AP .75": 0.8411, "AP (M)": 0.69614, "AP (L)": 0.79653, "AR": 0.82365, "AR .5": 0.95057, "AR .75": 0.88618, "AR (M)": 0.78317, "AR (L)": 0.88276}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05487, "heatmap_loss": 0.00059, "acc_pose": 0.82501, "loss": 0.00059, "grad_norm": 0.00104, "time": 0.74719}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81928, "loss": 0.00059, "grad_norm": 0.00116, "time": 0.69432}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81822, "loss": 0.00059, "grad_norm": 0.00108, "time": 0.69481}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82629, "loss": 0.00059, "grad_norm": 0.00108, "time": 0.69474}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00059, "acc_pose": 0.82429, "loss": 0.00059, "grad_norm": 0.0011, "time": 0.69484}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05587, "heatmap_loss": 0.00059, "acc_pose": 0.82811, "loss": 0.00059, "grad_norm": 0.00108, "time": 0.75121}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00059, "acc_pose": 0.82706, "loss": 0.00059, "grad_norm": 0.00108, "time": 0.69462}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82845, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69502}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82593, "loss": 0.00059, "grad_norm": 0.00106, "time": 0.69458}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82492, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69508}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05512, "heatmap_loss": 0.00059, "acc_pose": 0.82533, "loss": 0.00059, "grad_norm": 0.00116, "time": 0.75365}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82026, "loss": 0.00059, "grad_norm": 0.0011, "time": 0.69534}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00059, "acc_pose": 0.82045, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69505}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00059, "acc_pose": 0.82101, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.69513}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00059, "acc_pose": 0.82314, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.6953}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05593, "heatmap_loss": 0.00058, "acc_pose": 0.82891, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.75308}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82238, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.69529}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.82204, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69518}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.8288, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.69478}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81996, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.69498}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0565, "heatmap_loss": 0.00059, "acc_pose": 0.82341, "loss": 0.00059, "grad_norm": 0.00111, "time": 0.75196}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.83231, "loss": 0.00059, "grad_norm": 0.00109, "time": 0.69521}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82428, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69467}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.8285, "loss": 0.00058, "grad_norm": 0.00118, "time": 0.69511}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.83052, "loss": 0.00059, "grad_norm": 0.0011, "time": 0.69553}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0561, "heatmap_loss": 0.00058, "acc_pose": 0.82959, "loss": 0.00058, "grad_norm": 0.00114, "time": 0.75282}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82372, "loss": 0.00059, "grad_norm": 0.00107, "time": 0.69515}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00042, "heatmap_loss": 0.00059, "acc_pose": 0.82058, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69498}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.82496, "loss": 0.00058, "grad_norm": 0.00114, "time": 0.69458}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82715, "loss": 0.00058, "grad_norm": 0.00113, "time": 0.6942}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05806, "heatmap_loss": 0.00058, "acc_pose": 0.82948, "loss": 0.00058, "grad_norm": 0.00113, "time": 0.75499}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83271, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69561}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81985, "loss": 0.00059, "grad_norm": 0.00105, "time": 0.69553}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82861, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.69576}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82787, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69528}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0557, "heatmap_loss": 0.00058, "acc_pose": 0.82915, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.75538}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82629, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69522}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82565, "loss": 0.00058, "grad_norm": 0.00112, "time": 0.69568}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82801, "loss": 0.00058, "grad_norm": 0.00107, "time": 0.69591}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83369, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69526}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0565, "heatmap_loss": 0.00058, "acc_pose": 0.81622, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.75529}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82569, "loss": 0.00058, "grad_norm": 0.00114, "time": 0.69567}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82154, "loss": 0.00059, "grad_norm": 0.00112, "time": 0.69554}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82535, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.69542}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83295, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.6958}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05641, "heatmap_loss": 0.00058, "acc_pose": 0.8281, "loss": 0.00058, "grad_norm": 0.00114, "time": 0.75442}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00047, "heatmap_loss": 0.00058, "acc_pose": 0.82692, "loss": 0.00058, "grad_norm": 0.00107, "time": 0.69632}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.81635, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.69585}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82739, "loss": 0.00059, "grad_norm": 0.00108, "time": 0.69601}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82972, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69576}
+{"mode": "val", "epoch": 90, "iter": 407, "lr": 0.0, "AP": 0.77309, "AP .5": 0.91237, "AP .75": 0.84841, "AP (M)": 0.70115, "AP (L)": 0.79969, "AR": 0.82547, "AR .5": 0.95041, "AR .75": 0.89074, "AR (M)": 0.78541, "AR (L)": 0.88417}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05569, "heatmap_loss": 0.00058, "acc_pose": 0.83298, "loss": 0.00058, "grad_norm": 0.00116, "time": 0.75061}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82929, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.6949}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83041, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69537}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.82427, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69528}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82944, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.69547}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05542, "heatmap_loss": 0.00058, "acc_pose": 0.82948, "loss": 0.00058, "grad_norm": 0.00112, "time": 0.75321}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82789, "loss": 0.00058, "grad_norm": 0.00105, "time": 0.69623}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82615, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.69554}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.83071, "loss": 0.00058, "grad_norm": 0.00116, "time": 0.69585}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.83118, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69558}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05681, "heatmap_loss": 0.00057, "acc_pose": 0.82929, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.75443}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82289, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69557}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83108, "loss": 0.00058, "grad_norm": 0.00117, "time": 0.69584}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83133, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.69522}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83573, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69569}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05834, "heatmap_loss": 0.00057, "acc_pose": 0.82674, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.75467}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00058, "acc_pose": 0.8346, "loss": 0.00058, "grad_norm": 0.00105, "time": 0.69626}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.82927, "loss": 0.00057, "grad_norm": 0.0011, "time": 0.69541}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00058, "acc_pose": 0.83256, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.69561}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83434, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.69558}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05752, "heatmap_loss": 0.00058, "acc_pose": 0.82944, "loss": 0.00058, "grad_norm": 0.00117, "time": 0.75624}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82633, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.69625}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82389, "loss": 0.00058, "grad_norm": 0.00108, "time": 0.69613}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83085, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.69559}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82999, "loss": 0.00057, "grad_norm": 0.00106, "time": 0.69603}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0578, "heatmap_loss": 0.00057, "acc_pose": 0.83148, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.75479}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83595, "loss": 0.00057, "grad_norm": 0.00114, "time": 0.69595}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83185, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.6956}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82817, "loss": 0.00058, "grad_norm": 0.00113, "time": 0.69562}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83462, "loss": 0.00058, "grad_norm": 0.00114, "time": 0.6965}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.06016, "heatmap_loss": 0.00057, "acc_pose": 0.83122, "loss": 0.00057, "grad_norm": 0.00105, "time": 0.75667}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.8329, "loss": 0.00058, "grad_norm": 0.00109, "time": 0.69538}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83203, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69568}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82951, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.69537}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83128, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69504}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05701, "heatmap_loss": 0.00057, "acc_pose": 0.82594, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.75482}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.83112, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.6959}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83181, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69555}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83415, "loss": 0.00058, "grad_norm": 0.00106, "time": 0.6955}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82934, "loss": 0.00058, "grad_norm": 0.00112, "time": 0.69525}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05632, "heatmap_loss": 0.00057, "acc_pose": 0.83629, "loss": 0.00057, "grad_norm": 0.00106, "time": 0.75378}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82914, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69581}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83368, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69554}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82654, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69528}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83351, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.69557}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05628, "heatmap_loss": 0.00056, "acc_pose": 0.83458, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.75787}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.82982, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.69485}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.8321, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69524}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83695, "loss": 0.00057, "grad_norm": 0.00113, "time": 0.69489}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.8287, "loss": 0.00058, "grad_norm": 0.0011, "time": 0.69471}
+{"mode": "val", "epoch": 100, "iter": 407, "lr": 0.0, "AP": 0.77593, "AP .5": 0.91417, "AP .75": 0.84768, "AP (M)": 0.70196, "AP (L)": 0.80539, "AR": 0.82771, "AR .5": 0.95135, "AR .75": 0.89027, "AR (M)": 0.78637, "AR (L)": 0.88789}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05542, "heatmap_loss": 0.00057, "acc_pose": 0.82542, "loss": 0.00057, "grad_norm": 0.00117, "time": 0.74883}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82386, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69523}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83518, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69557}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83335, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.69548}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83293, "loss": 0.00057, "grad_norm": 0.0011, "time": 0.69539}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05617, "heatmap_loss": 0.00056, "acc_pose": 0.83922, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.75304}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.82856, "loss": 0.00057, "grad_norm": 0.00106, "time": 0.69548}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82713, "loss": 0.00057, "grad_norm": 0.00113, "time": 0.69608}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.83579, "loss": 0.00058, "grad_norm": 0.00111, "time": 0.69622}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83647, "loss": 0.00057, "grad_norm": 0.00113, "time": 0.69595}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05782, "heatmap_loss": 0.00057, "acc_pose": 0.83566, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.75442}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.8292, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69543}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.83108, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69521}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8319, "loss": 0.00056, "grad_norm": 0.0011, "time": 0.69518}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83317, "loss": 0.00057, "grad_norm": 0.00114, "time": 0.69519}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05651, "heatmap_loss": 0.00057, "acc_pose": 0.82987, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.75397}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8349, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.69527}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83236, "loss": 0.00057, "grad_norm": 0.00103, "time": 0.69561}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.8365, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.69532}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83351, "loss": 0.00057, "grad_norm": 0.0011, "time": 0.69537}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05603, "heatmap_loss": 0.00057, "acc_pose": 0.83631, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.75329}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83687, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.69535}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83357, "loss": 0.00057, "grad_norm": 0.00107, "time": 0.69515}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.84087, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.6955}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.82999, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69607}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05965, "heatmap_loss": 0.00056, "acc_pose": 0.83234, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.75705}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00057, "acc_pose": 0.83123, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69552}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82566, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69537}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00057, "acc_pose": 0.83544, "loss": 0.00057, "grad_norm": 0.0011, "time": 0.69571}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83563, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.6955}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05687, "heatmap_loss": 0.00056, "acc_pose": 0.83607, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.75462}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83127, "loss": 0.00057, "grad_norm": 0.00112, "time": 0.69623}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83706, "loss": 0.00057, "grad_norm": 0.00102, "time": 0.69574}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83149, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.69587}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83123, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69553}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05684, "heatmap_loss": 0.00056, "acc_pose": 0.83748, "loss": 0.00056, "grad_norm": 0.00112, "time": 0.75411}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83274, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69552}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83274, "loss": 0.00057, "grad_norm": 0.00116, "time": 0.69616}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83546, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69514}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83331, "loss": 0.00056, "grad_norm": 0.00109, "time": 0.69497}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05664, "heatmap_loss": 0.00056, "acc_pose": 0.83873, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.75339}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83222, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69564}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83869, "loss": 0.00057, "grad_norm": 0.00104, "time": 0.69573}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83733, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69562}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83388, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.6955}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05911, "heatmap_loss": 0.00057, "acc_pose": 0.82931, "loss": 0.00057, "grad_norm": 0.00111, "time": 0.75681}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.83354, "loss": 0.00056, "grad_norm": 0.00115, "time": 0.69595}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00057, "acc_pose": 0.83345, "loss": 0.00057, "grad_norm": 0.00109, "time": 0.69541}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.84429, "loss": 0.00056, "grad_norm": 0.00112, "time": 0.69531}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83877, "loss": 0.00056, "grad_norm": 0.0011, "time": 0.69548}
+{"mode": "val", "epoch": 110, "iter": 407, "lr": 0.0, "AP": 0.77469, "AP .5": 0.91314, "AP .75": 0.84845, "AP (M)": 0.70217, "AP (L)": 0.80104, "AR": 0.82739, "AR .5": 0.95041, "AR .75": 0.89169, "AR (M)": 0.78806, "AR (L)": 0.8851}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05587, "heatmap_loss": 0.00056, "acc_pose": 0.83448, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.7488}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83324, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69543}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83176, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.69552}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83781, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.69538}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83089, "loss": 0.00056, "grad_norm": 0.00109, "time": 0.69565}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05523, "heatmap_loss": 0.00056, "acc_pose": 0.83813, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.7573}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83034, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69503}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83757, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69605}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.84285, "loss": 0.00056, "grad_norm": 0.00113, "time": 0.69549}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83599, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69525}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05518, "heatmap_loss": 0.00056, "acc_pose": 0.83693, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.75311}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83579, "loss": 0.00057, "grad_norm": 0.00108, "time": 0.69545}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83445, "loss": 0.00057, "grad_norm": 0.00106, "time": 0.69575}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83735, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.69537}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83106, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.6956}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05674, "heatmap_loss": 0.00056, "acc_pose": 0.83178, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.75341}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83772, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.69509}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83084, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.69533}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83939, "loss": 0.00056, "grad_norm": 0.001, "time": 0.69501}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83941, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69527}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05623, "heatmap_loss": 0.00056, "acc_pose": 0.83412, "loss": 0.00056, "grad_norm": 0.00111, "time": 0.75383}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00045, "heatmap_loss": 0.00055, "acc_pose": 0.8366, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.6955}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83339, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.69552}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.8363, "loss": 0.00056, "grad_norm": 0.00102, "time": 0.69521}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00056, "acc_pose": 0.8367, "loss": 0.00056, "grad_norm": 0.0011, "time": 0.69481}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0566, "heatmap_loss": 0.00056, "acc_pose": 0.84223, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.75404}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.82977, "loss": 0.00056, "grad_norm": 0.00112, "time": 0.69539}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00042, "heatmap_loss": 0.00056, "acc_pose": 0.83209, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69578}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00056, "acc_pose": 0.83922, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.69541}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83781, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.69531}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05643, "heatmap_loss": 0.00056, "acc_pose": 0.83376, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.75342}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83888, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69531}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83936, "loss": 0.00056, "grad_norm": 0.00098, "time": 0.69492}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83149, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69485}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.84118, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.6947}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0558, "heatmap_loss": 0.00056, "acc_pose": 0.83893, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.75348}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00056, "acc_pose": 0.83234, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69581}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00056, "acc_pose": 0.83662, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.6955}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00056, "acc_pose": 0.83833, "loss": 0.00056, "grad_norm": 0.001, "time": 0.69538}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00056, "acc_pose": 0.83843, "loss": 0.00056, "grad_norm": 0.00109, "time": 0.69498}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05709, "heatmap_loss": 0.00055, "acc_pose": 0.84482, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.75256}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.841, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.69569}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83112, "loss": 0.00056, "grad_norm": 0.00112, "time": 0.69521}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83614, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69565}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83427, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.69549}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05583, "heatmap_loss": 0.00056, "acc_pose": 0.8424, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.75627}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83964, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.69545}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83556, "loss": 0.00055, "grad_norm": 0.00101, "time": 0.69562}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.8364, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69527}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84282, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69509}
+{"mode": "val", "epoch": 120, "iter": 407, "lr": 0.0, "AP": 0.77656, "AP .5": 0.91384, "AP .75": 0.85017, "AP (M)": 0.70304, "AP (L)": 0.80356, "AR": 0.82917, "AR .5": 0.95167, "AR .75": 0.8931, "AR (M)": 0.78916, "AR (L)": 0.88785}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05622, "heatmap_loss": 0.00056, "acc_pose": 0.83682, "loss": 0.00056, "grad_norm": 0.00104, "time": 0.75079}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.841, "loss": 0.00055, "grad_norm": 0.00109, "time": 0.695}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.83884, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69488}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.8388, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.6947}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83907, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69536}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05651, "heatmap_loss": 0.00055, "acc_pose": 0.83479, "loss": 0.00055, "grad_norm": 0.00112, "time": 0.75218}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.84188, "loss": 0.00056, "grad_norm": 0.00103, "time": 0.69549}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83966, "loss": 0.00055, "grad_norm": 0.001, "time": 0.69538}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83993, "loss": 0.00056, "grad_norm": 0.0011, "time": 0.69539}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83576, "loss": 0.00056, "grad_norm": 0.00107, "time": 0.69486}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05737, "heatmap_loss": 0.00055, "acc_pose": 0.84265, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.75289}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84256, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.69526}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84307, "loss": 0.00055, "grad_norm": 0.00106, "time": 0.69518}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84008, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.6954}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.84115, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69533}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05656, "heatmap_loss": 0.00055, "acc_pose": 0.83227, "loss": 0.00055, "grad_norm": 0.001, "time": 0.75651}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83619, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.6958}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.83871, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.69575}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00044, "heatmap_loss": 0.00056, "acc_pose": 0.83732, "loss": 0.00056, "grad_norm": 0.00109, "time": 0.69533}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83844, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.69458}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05708, "heatmap_loss": 0.00055, "acc_pose": 0.83829, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.75496}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83766, "loss": 0.00055, "grad_norm": 0.001, "time": 0.69575}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83641, "loss": 0.00055, "grad_norm": 0.00099, "time": 0.69553}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83146, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.69471}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84098, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69485}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05601, "heatmap_loss": 0.00055, "acc_pose": 0.83725, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.75344}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.8387, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.69496}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83917, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.69529}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84468, "loss": 0.00055, "grad_norm": 0.00112, "time": 0.6954}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84261, "loss": 0.00055, "grad_norm": 0.00106, "time": 0.6951}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05662, "heatmap_loss": 0.00055, "acc_pose": 0.84359, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.75305}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83524, "loss": 0.00056, "grad_norm": 0.00105, "time": 0.69538}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83941, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69568}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84158, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.69569}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84173, "loss": 0.00055, "grad_norm": 0.00113, "time": 0.69566}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05587, "heatmap_loss": 0.00055, "acc_pose": 0.84009, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.75325}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.83958, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.69458}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84433, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.69485}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00039, "heatmap_loss": 0.00055, "acc_pose": 0.83185, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.69465}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84169, "loss": 0.00055, "grad_norm": 0.001, "time": 0.69475}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05581, "heatmap_loss": 0.00055, "acc_pose": 0.84144, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.75412}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84217, "loss": 0.00055, "grad_norm": 0.00099, "time": 0.69554}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83753, "loss": 0.00055, "grad_norm": 0.0011, "time": 0.69497}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84145, "loss": 0.00055, "grad_norm": 0.00111, "time": 0.69546}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83594, "loss": 0.00055, "grad_norm": 0.0011, "time": 0.6955}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05693, "heatmap_loss": 0.00055, "acc_pose": 0.84311, "loss": 0.00055, "grad_norm": 0.00106, "time": 0.75357}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83146, "loss": 0.00056, "grad_norm": 0.00108, "time": 0.69498}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83605, "loss": 0.00055, "grad_norm": 0.00101, "time": 0.69503}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84034, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.69475}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84298, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.69508}
+{"mode": "val", "epoch": 130, "iter": 407, "lr": 0.0, "AP": 0.77538, "AP .5": 0.91309, "AP .75": 0.84818, "AP (M)": 0.70127, "AP (L)": 0.80338, "AR": 0.82813, "AR .5": 0.95135, "AR .75": 0.89122, "AR (M)": 0.78727, "AR (L)": 0.88803}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05511, "heatmap_loss": 0.00055, "acc_pose": 0.84618, "loss": 0.00055, "grad_norm": 0.00098, "time": 0.74839}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.84069, "loss": 0.00055, "grad_norm": 0.00108, "time": 0.69474}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.83695, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.69535}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84609, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69528}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84108, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.6953}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05718, "heatmap_loss": 0.00054, "acc_pose": 0.84062, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.75307}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.84358, "loss": 0.00056, "grad_norm": 0.00113, "time": 0.69521}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83816, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.69532}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84091, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.69585}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84824, "loss": 0.00055, "grad_norm": 0.0011, "time": 0.69565}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05694, "heatmap_loss": 0.00054, "acc_pose": 0.83653, "loss": 0.00054, "grad_norm": 0.00098, "time": 0.75308}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.83807, "loss": 0.00055, "grad_norm": 0.00102, "time": 0.6954}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83988, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69604}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83784, "loss": 0.00055, "grad_norm": 0.00107, "time": 0.69528}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00055, "acc_pose": 0.83536, "loss": 0.00055, "grad_norm": 0.00108, "time": 0.69457}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05636, "heatmap_loss": 0.00054, "acc_pose": 0.83996, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.75388}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83903, "loss": 0.00055, "grad_norm": 0.00106, "time": 0.69544}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83732, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69503}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84938, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.69587}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83481, "loss": 0.00056, "grad_norm": 0.00106, "time": 0.69535}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05686, "heatmap_loss": 0.00054, "acc_pose": 0.84195, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.75261}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83855, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.69514}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83887, "loss": 0.00055, "grad_norm": 0.00111, "time": 0.69541}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84167, "loss": 0.00055, "grad_norm": 0.00109, "time": 0.6957}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84354, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.6957}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05603, "heatmap_loss": 0.00055, "acc_pose": 0.84085, "loss": 0.00055, "grad_norm": 0.00101, "time": 0.75447}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.84701, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.69584}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83795, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.69499}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83869, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69487}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84529, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.69516}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05573, "heatmap_loss": 0.00055, "acc_pose": 0.84151, "loss": 0.00055, "grad_norm": 0.00105, "time": 0.75369}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.8383, "loss": 0.00055, "grad_norm": 0.00108, "time": 0.6959}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.83859, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.69528}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84322, "loss": 0.00055, "grad_norm": 0.00114, "time": 0.69507}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84287, "loss": 0.00054, "grad_norm": 0.001, "time": 0.69509}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05659, "heatmap_loss": 0.00055, "acc_pose": 0.84101, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.75393}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.8441, "loss": 0.00054, "grad_norm": 0.00114, "time": 0.69495}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84073, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69537}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84445, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69553}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84726, "loss": 0.00054, "grad_norm": 0.00099, "time": 0.69536}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05716, "heatmap_loss": 0.00054, "acc_pose": 0.83731, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.7542}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84424, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69534}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84318, "loss": 0.00055, "grad_norm": 0.00103, "time": 0.69591}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84337, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69535}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00054, "acc_pose": 0.8401, "loss": 0.00054, "grad_norm": 0.001, "time": 0.695}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05628, "heatmap_loss": 0.00054, "acc_pose": 0.84353, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.75313}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84044, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69542}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84041, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69514}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.8411, "loss": 0.00055, "grad_norm": 0.00108, "time": 0.69544}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84269, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69529}
+{"mode": "val", "epoch": 140, "iter": 407, "lr": 0.0, "AP": 0.77693, "AP .5": 0.91334, "AP .75": 0.84867, "AP (M)": 0.70369, "AP (L)": 0.80391, "AR": 0.82966, "AR .5": 0.95214, "AR .75": 0.89137, "AR (M)": 0.78929, "AR (L)": 0.88874}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05657, "heatmap_loss": 0.00055, "acc_pose": 0.84298, "loss": 0.00055, "grad_norm": 0.001, "time": 0.75014}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84914, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.69449}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83568, "loss": 0.00054, "grad_norm": 0.00109, "time": 0.69505}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.8446, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.6951}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.8443, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69571}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0587, "heatmap_loss": 0.00054, "acc_pose": 0.84699, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.7567}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.83559, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.69505}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.8419, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69555}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84458, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69534}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84248, "loss": 0.00054, "grad_norm": 0.00099, "time": 0.69557}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05612, "heatmap_loss": 0.00054, "acc_pose": 0.85031, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.75369}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84192, "loss": 0.00055, "grad_norm": 0.00109, "time": 0.69571}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84238, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69569}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.8429, "loss": 0.00054, "grad_norm": 0.00098, "time": 0.69558}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84491, "loss": 0.00054, "grad_norm": 0.00109, "time": 0.69579}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05618, "heatmap_loss": 0.00054, "acc_pose": 0.83934, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.75341}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.83989, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69557}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83828, "loss": 0.00054, "grad_norm": 0.0011, "time": 0.69562}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83876, "loss": 0.00055, "grad_norm": 0.00104, "time": 0.69505}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84324, "loss": 0.00054, "grad_norm": 0.001, "time": 0.69538}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.06011, "heatmap_loss": 0.00054, "acc_pose": 0.84465, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.75692}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84467, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69551}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84161, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.69546}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.83903, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.6963}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84999, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69514}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05586, "heatmap_loss": 0.00054, "acc_pose": 0.83882, "loss": 0.00054, "grad_norm": 0.00108, "time": 0.75466}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84193, "loss": 0.00054, "grad_norm": 0.00107, "time": 0.69582}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83415, "loss": 0.00054, "grad_norm": 0.00097, "time": 0.69579}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84322, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.6958}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84566, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69595}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0556, "heatmap_loss": 0.00054, "acc_pose": 0.85007, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.75373}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84072, "loss": 0.00054, "grad_norm": 0.001, "time": 0.69532}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84305, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.6954}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84009, "loss": 0.00054, "grad_norm": 0.0011, "time": 0.69589}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.83907, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69549}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05597, "heatmap_loss": 0.00054, "acc_pose": 0.84233, "loss": 0.00054, "grad_norm": 0.00107, "time": 0.75318}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84475, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.69554}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84382, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.6954}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84704, "loss": 0.00054, "grad_norm": 0.00107, "time": 0.69534}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84723, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.69544}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05551, "heatmap_loss": 0.00053, "acc_pose": 0.84117, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.75361}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.85128, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69528}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.85039, "loss": 0.00054, "grad_norm": 0.00096, "time": 0.69578}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84278, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69581}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.85287, "loss": 0.00054, "grad_norm": 0.00107, "time": 0.69577}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05693, "heatmap_loss": 0.00054, "acc_pose": 0.84796, "loss": 0.00054, "grad_norm": 0.00109, "time": 0.75383}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85291, "loss": 0.00053, "grad_norm": 0.001, "time": 0.69487}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84636, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69553}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84972, "loss": 0.00053, "grad_norm": 0.00113, "time": 0.69512}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84542, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69539}
+{"mode": "val", "epoch": 150, "iter": 407, "lr": 0.0, "AP": 0.77833, "AP .5": 0.91398, "AP .75": 0.84911, "AP (M)": 0.70482, "AP (L)": 0.80475, "AR": 0.83114, "AR .5": 0.95309, "AR .75": 0.89185, "AR (M)": 0.79131, "AR (L)": 0.88945}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05784, "heatmap_loss": 0.00054, "acc_pose": 0.84057, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.75142}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84718, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69492}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84586, "loss": 0.00054, "grad_norm": 0.00101, "time": 0.69506}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84522, "loss": 0.00054, "grad_norm": 0.00108, "time": 0.69503}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84812, "loss": 0.00054, "grad_norm": 0.0011, "time": 0.69537}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05639, "heatmap_loss": 0.00053, "acc_pose": 0.84548, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.75245}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84867, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69541}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84896, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.69519}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84611, "loss": 0.00054, "grad_norm": 0.00099, "time": 0.69541}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.85242, "loss": 0.00054, "grad_norm": 0.00111, "time": 0.69519}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05587, "heatmap_loss": 0.00053, "acc_pose": 0.85125, "loss": 0.00053, "grad_norm": 0.00105, "time": 0.75504}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85215, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69539}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00053, "acc_pose": 0.84631, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.6956}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84571, "loss": 0.00054, "grad_norm": 0.00105, "time": 0.69545}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84365, "loss": 0.00054, "grad_norm": 0.001, "time": 0.69548}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0558, "heatmap_loss": 0.00053, "acc_pose": 0.84466, "loss": 0.00053, "grad_norm": 0.001, "time": 0.75421}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84982, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69579}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.83772, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69526}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84302, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69506}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84831, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69544}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0561, "heatmap_loss": 0.00054, "acc_pose": 0.84602, "loss": 0.00054, "grad_norm": 0.00095, "time": 0.75339}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84937, "loss": 0.00053, "grad_norm": 0.001, "time": 0.69569}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.85239, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69495}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84639, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69586}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00053, "acc_pose": 0.84598, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.69522}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05615, "heatmap_loss": 0.00053, "acc_pose": 0.84551, "loss": 0.00053, "grad_norm": 0.00098, "time": 0.75405}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84412, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.69493}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84306, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69525}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84651, "loss": 0.00053, "grad_norm": 0.00105, "time": 0.69517}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84568, "loss": 0.00053, "grad_norm": 0.001, "time": 0.69534}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05638, "heatmap_loss": 0.00053, "acc_pose": 0.85289, "loss": 0.00053, "grad_norm": 0.001, "time": 0.75252}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.85332, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69493}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84787, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69516}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.85357, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69535}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84509, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69505}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05908, "heatmap_loss": 0.00054, "acc_pose": 0.84379, "loss": 0.00054, "grad_norm": 0.00106, "time": 0.75611}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00053, "acc_pose": 0.84641, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69523}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00053, "acc_pose": 0.84279, "loss": 0.00053, "grad_norm": 0.00094, "time": 0.69501}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.85146, "loss": 0.00054, "grad_norm": 0.00102, "time": 0.69559}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.8484, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.69533}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05597, "heatmap_loss": 0.00053, "acc_pose": 0.84242, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.75367}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.8399, "loss": 0.00053, "grad_norm": 0.00098, "time": 0.6954}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84751, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69565}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84713, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69535}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84482, "loss": 0.00053, "grad_norm": 0.00109, "time": 0.69542}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05601, "heatmap_loss": 0.00054, "acc_pose": 0.85055, "loss": 0.00054, "grad_norm": 0.00104, "time": 0.75399}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84841, "loss": 0.00054, "grad_norm": 0.00103, "time": 0.69512}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84769, "loss": 0.00053, "grad_norm": 0.00098, "time": 0.69554}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84411, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69543}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84519, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69567}
+{"mode": "val", "epoch": 160, "iter": 407, "lr": 0.0, "AP": 0.77764, "AP .5": 0.91324, "AP .75": 0.85034, "AP (M)": 0.70411, "AP (L)": 0.80411, "AR": 0.83064, "AR .5": 0.95214, "AR .75": 0.89373, "AR (M)": 0.78981, "AR (L)": 0.88997}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05547, "heatmap_loss": 0.00053, "acc_pose": 0.85137, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.7486}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85481, "loss": 0.00053, "grad_norm": 0.00095, "time": 0.69488}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84522, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.6945}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00053, "acc_pose": 0.85163, "loss": 0.00053, "grad_norm": 0.001, "time": 0.69481}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84339, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69522}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05509, "heatmap_loss": 0.00053, "acc_pose": 0.85147, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.75382}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84482, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69503}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85166, "loss": 0.00053, "grad_norm": 0.00107, "time": 0.69557}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84557, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69487}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84765, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.69558}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05567, "heatmap_loss": 0.00053, "acc_pose": 0.85141, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.75301}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84664, "loss": 0.00053, "grad_norm": 0.00095, "time": 0.69525}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84346, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69559}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84345, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69614}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84655, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69533}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05993, "heatmap_loss": 0.00053, "acc_pose": 0.84749, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.75698}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00053, "acc_pose": 0.84674, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.69561}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00053, "acc_pose": 0.84488, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.6955}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84826, "loss": 0.00053, "grad_norm": 0.00106, "time": 0.69546}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.8534, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69518}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05632, "heatmap_loss": 0.00053, "acc_pose": 0.84978, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.75458}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84881, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69507}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85397, "loss": 0.00053, "grad_norm": 0.00106, "time": 0.69548}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84909, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69529}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00053, "acc_pose": 0.85702, "loss": 0.00053, "grad_norm": 0.00097, "time": 0.69502}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05706, "heatmap_loss": 0.00053, "acc_pose": 0.85043, "loss": 0.00053, "grad_norm": 0.00106, "time": 0.75387}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84804, "loss": 0.00053, "grad_norm": 0.00111, "time": 0.69583}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.8501, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69552}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85016, "loss": 0.00052, "grad_norm": 0.00104, "time": 0.69527}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85066, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69522}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05544, "heatmap_loss": 0.00053, "acc_pose": 0.85454, "loss": 0.00053, "grad_norm": 0.00106, "time": 0.75421}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.85042, "loss": 0.00053, "grad_norm": 0.00108, "time": 0.69524}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84468, "loss": 0.00053, "grad_norm": 0.001, "time": 0.69528}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85816, "loss": 0.00052, "grad_norm": 0.00101, "time": 0.69496}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84438, "loss": 0.00053, "grad_norm": 0.00101, "time": 0.69484}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05681, "heatmap_loss": 0.00053, "acc_pose": 0.85729, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.75387}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85427, "loss": 0.00052, "grad_norm": 0.00098, "time": 0.6954}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84701, "loss": 0.00053, "grad_norm": 0.00099, "time": 0.69495}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84841, "loss": 0.00053, "grad_norm": 0.00104, "time": 0.6954}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00039, "heatmap_loss": 0.00053, "acc_pose": 0.85386, "loss": 0.00053, "grad_norm": 0.00116, "time": 0.69526}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05543, "heatmap_loss": 0.00053, "acc_pose": 0.85378, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.75409}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00052, "acc_pose": 0.84656, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.69538}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.00053, "acc_pose": 0.85528, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69529}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.8506, "loss": 0.00053, "grad_norm": 0.00103, "time": 0.69511}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85611, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69548}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05536, "heatmap_loss": 0.00053, "acc_pose": 0.85035, "loss": 0.00053, "grad_norm": 0.00095, "time": 0.75391}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84327, "loss": 0.00052, "grad_norm": 0.00101, "time": 0.69499}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84512, "loss": 0.00053, "grad_norm": 0.00102, "time": 0.69496}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84987, "loss": 0.00052, "grad_norm": 0.00104, "time": 0.69583}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85043, "loss": 0.00052, "grad_norm": 0.00106, "time": 0.69529}
+{"mode": "val", "epoch": 170, "iter": 407, "lr": 0.0, "AP": 0.77895, "AP .5": 0.91391, "AP .75": 0.85122, "AP (M)": 0.70606, "AP (L)": 0.80577, "AR": 0.83141, "AR .5": 0.95246, "AR .75": 0.89405, "AR (M)": 0.79137, "AR (L)": 0.89}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05643, "heatmap_loss": 0.00052, "acc_pose": 0.85244, "loss": 0.00052, "grad_norm": 0.00103, "time": 0.74984}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85471, "loss": 0.00052, "grad_norm": 0.00098, "time": 0.69468}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85711, "loss": 0.00052, "grad_norm": 0.00102, "time": 0.69478}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00052, "acc_pose": 0.85901, "loss": 0.00052, "grad_norm": 0.00102, "time": 0.69491}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85206, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.69478}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05667, "heatmap_loss": 0.00051, "acc_pose": 0.85311, "loss": 0.00051, "grad_norm": 0.00093, "time": 0.75191}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84762, "loss": 0.00052, "grad_norm": 0.00099, "time": 0.69521}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85267, "loss": 0.00052, "grad_norm": 0.00096, "time": 0.69508}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.858, "loss": 0.00051, "grad_norm": 0.00101, "time": 0.69501}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85898, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69537}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05669, "heatmap_loss": 0.00051, "acc_pose": 0.8513, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.75378}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85369, "loss": 0.00051, "grad_norm": 0.00098, "time": 0.69536}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00051, "acc_pose": 0.85318, "loss": 0.00051, "grad_norm": 0.00099, "time": 0.69533}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.86007, "loss": 0.00051, "grad_norm": 0.00099, "time": 0.69531}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85503, "loss": 0.00051, "grad_norm": 0.00101, "time": 0.69578}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05721, "heatmap_loss": 0.00052, "acc_pose": 0.85516, "loss": 0.00052, "grad_norm": 0.00102, "time": 0.7548}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.00051, "acc_pose": 0.85018, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69466}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85647, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.69535}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85664, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69521}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.852, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69512}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05616, "heatmap_loss": 0.0005, "acc_pose": 0.85791, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.75407}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85735, "loss": 0.00051, "grad_norm": 0.00093, "time": 0.69528}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85407, "loss": 0.00051, "grad_norm": 0.00103, "time": 0.69515}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85776, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.69534}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85933, "loss": 0.00051, "grad_norm": 0.00091, "time": 0.69508}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05644, "heatmap_loss": 0.00051, "acc_pose": 0.85528, "loss": 0.00051, "grad_norm": 0.00092, "time": 0.75402}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85417, "loss": 0.00051, "grad_norm": 0.00097, "time": 0.69546}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.84979, "loss": 0.00051, "grad_norm": 0.001, "time": 0.69517}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85035, "loss": 0.00051, "grad_norm": 0.00101, "time": 0.69533}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85415, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69546}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.06039, "heatmap_loss": 0.0005, "acc_pose": 0.85835, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.7571}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85425, "loss": 0.00051, "grad_norm": 0.00097, "time": 0.69576}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85262, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69491}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85688, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.69497}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85301, "loss": 0.00051, "grad_norm": 0.001, "time": 0.69567}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05577, "heatmap_loss": 0.00051, "acc_pose": 0.85724, "loss": 0.00051, "grad_norm": 0.00099, "time": 0.75476}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85669, "loss": 0.00051, "grad_norm": 0.00096, "time": 0.69555}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.8602, "loss": 0.00051, "grad_norm": 0.00099, "time": 0.6958}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85271, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69524}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00035, "heatmap_loss": 0.00051, "acc_pose": 0.86198, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.69532}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05595, "heatmap_loss": 0.0005, "acc_pose": 0.85457, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.7524}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85508, "loss": 0.00051, "grad_norm": 0.00097, "time": 0.69526}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85269, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69544}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86078, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69568}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85984, "loss": 0.00051, "grad_norm": 0.00093, "time": 0.69557}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05887, "heatmap_loss": 0.00051, "acc_pose": 0.85966, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.75691}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.00051, "acc_pose": 0.86141, "loss": 0.00051, "grad_norm": 0.001, "time": 0.6961}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.00051, "acc_pose": 0.8567, "loss": 0.00051, "grad_norm": 0.00103, "time": 0.69524}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85853, "loss": 0.00051, "grad_norm": 0.00101, "time": 0.69513}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85607, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69565}
+{"mode": "val", "epoch": 180, "iter": 407, "lr": 0.0, "AP": 0.78132, "AP .5": 0.91426, "AP .75": 0.85289, "AP (M)": 0.7082, "AP (L)": 0.80753, "AR": 0.83383, "AR .5": 0.95403, "AR .75": 0.89578, "AR (M)": 0.79314, "AR (L)": 0.89309}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05569, "heatmap_loss": 0.00051, "acc_pose": 0.85729, "loss": 0.00051, "grad_norm": 0.00096, "time": 0.74839}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86094, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69496}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.857, "loss": 0.00051, "grad_norm": 0.00097, "time": 0.69504}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85756, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69494}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85642, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69517}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05613, "heatmap_loss": 0.0005, "acc_pose": 0.8591, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75366}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85682, "loss": 0.00051, "grad_norm": 0.001, "time": 0.69546}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85569, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.6949}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85746, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69526}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85658, "loss": 0.00051, "grad_norm": 0.00101, "time": 0.69507}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05663, "heatmap_loss": 0.0005, "acc_pose": 0.85679, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75204}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85301, "loss": 0.00051, "grad_norm": 0.00098, "time": 0.69493}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85494, "loss": 0.0005, "grad_norm": 0.0009, "time": 0.69485}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86237, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69477}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.8592, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69479}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05554, "heatmap_loss": 0.00051, "acc_pose": 0.85533, "loss": 0.00051, "grad_norm": 0.00093, "time": 0.75444}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85943, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69491}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85412, "loss": 0.00051, "grad_norm": 0.00094, "time": 0.69515}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85284, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.6952}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.0005, "acc_pose": 0.85933, "loss": 0.0005, "grad_norm": 0.001, "time": 0.69516}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05617, "heatmap_loss": 0.0005, "acc_pose": 0.86186, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.75221}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85714, "loss": 0.00051, "grad_norm": 0.00096, "time": 0.6948}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86224, "loss": 0.0005, "grad_norm": 0.00087, "time": 0.69517}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85868, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.69498}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85917, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69553}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05796, "heatmap_loss": 0.0005, "acc_pose": 0.85101, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.75576}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.0005, "acc_pose": 0.85396, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69529}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85616, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69534}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85855, "loss": 0.00051, "grad_norm": 0.00095, "time": 0.69527}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.0005, "acc_pose": 0.85996, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69506}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05495, "heatmap_loss": 0.0005, "acc_pose": 0.86065, "loss": 0.0005, "grad_norm": 0.001, "time": 0.75518}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85926, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69491}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85561, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.69519}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85572, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69524}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85897, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69483}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05667, "heatmap_loss": 0.0005, "acc_pose": 0.85713, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75205}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86301, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69535}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00036, "heatmap_loss": 0.00051, "acc_pose": 0.85923, "loss": 0.00051, "grad_norm": 0.00097, "time": 0.69526}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.8589, "loss": 0.0005, "grad_norm": 0.00091, "time": 0.69558}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86169, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69561}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05676, "heatmap_loss": 0.0005, "acc_pose": 0.8601, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.75238}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85815, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69466}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85583, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69562}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85683, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69501}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86572, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.6948}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05635, "heatmap_loss": 0.00051, "acc_pose": 0.85608, "loss": 0.00051, "grad_norm": 0.00098, "time": 0.75364}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85841, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69505}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86091, "loss": 0.0005, "grad_norm": 0.00089, "time": 0.6948}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86077, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69481}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86024, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69481}
+{"mode": "val", "epoch": 190, "iter": 407, "lr": 0.0, "AP": 0.78167, "AP .5": 0.91369, "AP .75": 0.85324, "AP (M)": 0.70808, "AP (L)": 0.80864, "AR": 0.83408, "AR .5": 0.9534, "AR .75": 0.89578, "AR (M)": 0.7929, "AR (L)": 0.89424}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05794, "heatmap_loss": 0.0005, "acc_pose": 0.86008, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.7516}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86128, "loss": 0.0005, "grad_norm": 0.00101, "time": 0.69457}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85908, "loss": 0.0005, "grad_norm": 0.00091, "time": 0.69446}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86084, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.6946}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85624, "loss": 0.0005, "grad_norm": 0.00091, "time": 0.69481}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0557, "heatmap_loss": 0.0005, "acc_pose": 0.85738, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.75123}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85743, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69498}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85701, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69497}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85873, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69504}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86521, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69495}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05647, "heatmap_loss": 0.0005, "acc_pose": 0.85765, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.75295}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86157, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69507}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85949, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.6948}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85739, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69485}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85567, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69529}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0562, "heatmap_loss": 0.0005, "acc_pose": 0.85981, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.75217}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.858, "loss": 0.0005, "grad_norm": 0.0009, "time": 0.69496}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85641, "loss": 0.0005, "grad_norm": 0.00091, "time": 0.69513}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86409, "loss": 0.0005, "grad_norm": 0.00088, "time": 0.69509}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.86149, "loss": 0.00049, "grad_norm": 0.00102, "time": 0.69487}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05851, "heatmap_loss": 0.0005, "acc_pose": 0.8566, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.75561}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85363, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69467}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86281, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.69518}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.85706, "loss": 0.00049, "grad_norm": 0.00093, "time": 0.69495}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86146, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.69484}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05664, "heatmap_loss": 0.0005, "acc_pose": 0.8589, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.75273}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.0005, "acc_pose": 0.86809, "loss": 0.0005, "grad_norm": 0.00089, "time": 0.69504}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85332, "loss": 0.0005, "grad_norm": 0.00089, "time": 0.69485}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86372, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69503}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85762, "loss": 0.0005, "grad_norm": 0.00091, "time": 0.69471}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05621, "heatmap_loss": 0.0005, "acc_pose": 0.86058, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.75392}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86226, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.6942}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85104, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69455}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00042, "heatmap_loss": 0.0005, "acc_pose": 0.8562, "loss": 0.0005, "grad_norm": 0.00089, "time": 0.69477}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85832, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69475}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05675, "heatmap_loss": 0.0005, "acc_pose": 0.86594, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75342}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00029, "heatmap_loss": 0.0005, "acc_pose": 0.85845, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69492}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.0005, "acc_pose": 0.85904, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69495}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85645, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.69488}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.8635, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69522}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05539, "heatmap_loss": 0.0005, "acc_pose": 0.8588, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75308}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.86261, "loss": 0.00049, "grad_norm": 0.00095, "time": 0.69585}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85812, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.69519}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85742, "loss": 0.0005, "grad_norm": 0.00097, "time": 0.69499}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00052, "heatmap_loss": 0.0005, "acc_pose": 0.86443, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.69501}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.0555, "heatmap_loss": 0.0005, "acc_pose": 0.86041, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.75401}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86071, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69466}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85184, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69463}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.86382, "loss": 0.00049, "grad_norm": 0.00093, "time": 0.69593}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86135, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69549}
+{"mode": "val", "epoch": 200, "iter": 407, "lr": 0.0, "AP": 0.78169, "AP .5": 0.9144, "AP .75": 0.85288, "AP (M)": 0.70858, "AP (L)": 0.80822, "AR": 0.83391, "AR .5": 0.9534, "AR .75": 0.89531, "AR (M)": 0.7938, "AR (L)": 0.89253}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05477, "heatmap_loss": 0.0005, "acc_pose": 0.85915, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.74857}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85649, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.6947}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85957, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.6946}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86209, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.6948}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85767, "loss": 0.0005, "grad_norm": 0.00098, "time": 0.6946}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05654, "heatmap_loss": 0.0005, "acc_pose": 0.85442, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.75243}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00027, "heatmap_loss": 0.0005, "acc_pose": 0.861, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69485}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0004, "heatmap_loss": 0.00049, "acc_pose": 0.86006, "loss": 0.00049, "grad_norm": 0.00095, "time": 0.69512}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86045, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.69542}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86602, "loss": 0.0005, "grad_norm": 0.00088, "time": 0.69485}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05626, "heatmap_loss": 0.0005, "acc_pose": 0.85855, "loss": 0.0005, "grad_norm": 0.00099, "time": 0.75156}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00033, "heatmap_loss": 0.0005, "acc_pose": 0.86466, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69465}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.86163, "loss": 0.00049, "grad_norm": 0.00095, "time": 0.69445}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85818, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.69455}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86038, "loss": 0.00049, "grad_norm": 0.0009, "time": 0.69477}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05632, "heatmap_loss": 0.00049, "acc_pose": 0.85628, "loss": 0.00049, "grad_norm": 0.00096, "time": 0.75326}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.8614, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69463}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.86057, "loss": 0.00049, "grad_norm": 0.00093, "time": 0.69468}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85863, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69474}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85883, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69436}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05653, "heatmap_loss": 0.00049, "acc_pose": 0.86453, "loss": 0.00049, "grad_norm": 0.00095, "time": 0.75417}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86031, "loss": 0.0005, "grad_norm": 0.00087, "time": 0.69521}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.86178, "loss": 0.0005, "grad_norm": 0.0009, "time": 0.69502}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86704, "loss": 0.00049, "grad_norm": 0.00103, "time": 0.69519}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86177, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.69542}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05597, "heatmap_loss": 0.0005, "acc_pose": 0.86078, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.75467}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85392, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69506}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.85725, "loss": 0.00049, "grad_norm": 0.00091, "time": 0.69556}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.0005, "acc_pose": 0.86507, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.69496}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00042, "heatmap_loss": 0.0005, "acc_pose": 0.86552, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.6949}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05628, "heatmap_loss": 0.00049, "acc_pose": 0.85832, "loss": 0.00049, "grad_norm": 0.00098, "time": 0.75324}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.0005, "acc_pose": 0.85485, "loss": 0.0005, "grad_norm": 0.0009, "time": 0.69514}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86354, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69555}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.8606, "loss": 0.00049, "grad_norm": 0.00089, "time": 0.69481}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.8655, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69535}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05578, "heatmap_loss": 0.00049, "acc_pose": 0.86224, "loss": 0.00049, "grad_norm": 0.00091, "time": 0.7526}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86093, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.69479}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85722, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69511}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00034, "heatmap_loss": 0.0005, "acc_pose": 0.86088, "loss": 0.0005, "grad_norm": 0.00089, "time": 0.69499}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86282, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69501}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05911, "heatmap_loss": 0.00049, "acc_pose": 0.85923, "loss": 0.00049, "grad_norm": 0.001, "time": 0.75628}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00028, "heatmap_loss": 0.0005, "acc_pose": 0.86426, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.69487}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.85536, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.69501}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86371, "loss": 0.00049, "grad_norm": 0.00089, "time": 0.69506}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86392, "loss": 0.0005, "grad_norm": 0.00092, "time": 0.69552}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 9430, "data_time": 0.05676, "heatmap_loss": 0.0005, "acc_pose": 0.85934, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.75548}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86196, "loss": 0.0005, "grad_norm": 0.00093, "time": 0.69532}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85591, "loss": 0.0005, "grad_norm": 0.00096, "time": 0.69534}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 9430, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86334, "loss": 0.00049, "grad_norm": 0.00094, "time": 0.69505}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 9430, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.85899, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.6952}
+{"mode": "val", "epoch": 210, "iter": 407, "lr": 0.0, "AP": 0.78183, "AP .5": 0.91391, "AP .75": 0.85347, "AP (M)": 0.70853, "AP (L)": 0.80858, "AR": 0.83408, "AR .5": 0.9534, "AR .75": 0.89578, "AR (M)": 0.79391, "AR (L)": 0.89279}
diff --git a/vendor/ViTPose/logs/vitpose-l.log.json b/vendor/ViTPose/logs/vitpose-l.log.json
new file mode 100644
index 0000000000000000000000000000000000000000..bbe7fea82542c54f33c8318a8ad5d17fe64d7eeb
--- /dev/null
+++ b/vendor/ViTPose/logs/vitpose-l.log.json
@@ -0,0 +1,1072 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) [GCC 9.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: A100-SXM4-40GB\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.3.r11.3/compiler.29920130_0\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.9.0a0+c3d40fd\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash N/A)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86\n - CuDNN 8.2.1\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.1, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.10.0a0\nOpenCV: 4.5.5\nMMCV: 1.3.9\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.3\nMMPose: 0.24.0+71c8bf8", "seed": 0, "hook_msgs": {}}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.06845, "heatmap_loss": 0.00215, "acc_pose": 0.04833, "loss": 0.00215, "grad_norm": 0.00662, "time": 0.80814}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00202, "acc_pose": 0.17241, "loss": 0.00202, "grad_norm": 0.00301, "time": 0.65605}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0019, "acc_pose": 0.25311, "loss": 0.0019, "grad_norm": 0.00397, "time": 0.65634}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0017, "acc_pose": 0.35247, "loss": 0.0017, "grad_norm": 0.00441, "time": 0.65623}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00147, "acc_pose": 0.45338, "loss": 0.00147, "grad_norm": 0.00428, "time": 0.65609}
+{"mode": "train", "epoch": 2, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0561, "heatmap_loss": 0.00124, "acc_pose": 0.55135, "loss": 0.00124, "grad_norm": 0.00409, "time": 0.71186}
+{"mode": "train", "epoch": 2, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00117, "acc_pose": 0.5917, "loss": 0.00117, "grad_norm": 0.00369, "time": 0.65627}
+{"mode": "train", "epoch": 2, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00112, "acc_pose": 0.60123, "loss": 0.00112, "grad_norm": 0.00398, "time": 0.65624}
+{"mode": "train", "epoch": 2, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00108, "acc_pose": 0.61332, "loss": 0.00108, "grad_norm": 0.00387, "time": 0.65709}
+{"mode": "train", "epoch": 2, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00105, "acc_pose": 0.63425, "loss": 0.00105, "grad_norm": 0.00365, "time": 0.65695}
+{"mode": "train", "epoch": 3, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05631, "heatmap_loss": 0.001, "acc_pose": 0.65555, "loss": 0.001, "grad_norm": 0.00352, "time": 0.71304}
+{"mode": "train", "epoch": 3, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00097, "acc_pose": 0.66405, "loss": 0.00097, "grad_norm": 0.00368, "time": 0.65644}
+{"mode": "train", "epoch": 3, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00097, "acc_pose": 0.66894, "loss": 0.00097, "grad_norm": 0.00311, "time": 0.65626}
+{"mode": "train", "epoch": 3, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00094, "acc_pose": 0.68423, "loss": 0.00094, "grad_norm": 0.00349, "time": 0.65671}
+{"mode": "train", "epoch": 3, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00093, "acc_pose": 0.68407, "loss": 0.00093, "grad_norm": 0.00323, "time": 0.65689}
+{"mode": "train", "epoch": 4, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05676, "heatmap_loss": 0.00091, "acc_pose": 0.69757, "loss": 0.00091, "grad_norm": 0.00321, "time": 0.71421}
+{"mode": "train", "epoch": 4, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.0009, "acc_pose": 0.69656, "loss": 0.0009, "grad_norm": 0.00362, "time": 0.65668}
+{"mode": "train", "epoch": 4, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00089, "acc_pose": 0.70636, "loss": 0.00089, "grad_norm": 0.00313, "time": 0.65721}
+{"mode": "train", "epoch": 4, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00088, "acc_pose": 0.71063, "loss": 0.00088, "grad_norm": 0.00294, "time": 0.65713}
+{"mode": "train", "epoch": 4, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00087, "acc_pose": 0.71051, "loss": 0.00087, "grad_norm": 0.00328, "time": 0.65686}
+{"mode": "train", "epoch": 5, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05683, "heatmap_loss": 0.00086, "acc_pose": 0.7147, "loss": 0.00086, "grad_norm": 0.00325, "time": 0.71401}
+{"mode": "train", "epoch": 5, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00086, "acc_pose": 0.71026, "loss": 0.00086, "grad_norm": 0.00316, "time": 0.65613}
+{"mode": "train", "epoch": 5, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00045, "heatmap_loss": 0.00085, "acc_pose": 0.71429, "loss": 0.00085, "grad_norm": 0.00324, "time": 0.65694}
+{"mode": "train", "epoch": 5, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00085, "acc_pose": 0.72067, "loss": 0.00085, "grad_norm": 0.00327, "time": 0.65665}
+{"mode": "train", "epoch": 5, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00083, "acc_pose": 0.72617, "loss": 0.00083, "grad_norm": 0.00371, "time": 0.65642}
+{"mode": "train", "epoch": 6, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05546, "heatmap_loss": 0.00083, "acc_pose": 0.72704, "loss": 0.00083, "grad_norm": 0.00315, "time": 0.71233}
+{"mode": "train", "epoch": 6, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00082, "acc_pose": 0.72234, "loss": 0.00082, "grad_norm": 0.00284, "time": 0.65668}
+{"mode": "train", "epoch": 6, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00082, "acc_pose": 0.72467, "loss": 0.00082, "grad_norm": 0.00332, "time": 0.65648}
+{"mode": "train", "epoch": 6, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00081, "acc_pose": 0.72821, "loss": 0.00081, "grad_norm": 0.00341, "time": 0.65652}
+{"mode": "train", "epoch": 6, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00082, "acc_pose": 0.73683, "loss": 0.00082, "grad_norm": 0.00299, "time": 0.65639}
+{"mode": "train", "epoch": 7, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0558, "heatmap_loss": 0.0008, "acc_pose": 0.73265, "loss": 0.0008, "grad_norm": 0.00278, "time": 0.71353}
+{"mode": "train", "epoch": 7, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0008, "acc_pose": 0.73282, "loss": 0.0008, "grad_norm": 0.00284, "time": 0.65649}
+{"mode": "train", "epoch": 7, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00079, "acc_pose": 0.73592, "loss": 0.00079, "grad_norm": 0.00303, "time": 0.65669}
+{"mode": "train", "epoch": 7, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00079, "acc_pose": 0.73794, "loss": 0.00079, "grad_norm": 0.00297, "time": 0.65615}
+{"mode": "train", "epoch": 7, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00079, "acc_pose": 0.74212, "loss": 0.00079, "grad_norm": 0.00345, "time": 0.6571}
+{"mode": "train", "epoch": 8, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05573, "heatmap_loss": 0.00078, "acc_pose": 0.74219, "loss": 0.00078, "grad_norm": 0.00357, "time": 0.71237}
+{"mode": "train", "epoch": 8, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00078, "acc_pose": 0.7449, "loss": 0.00078, "grad_norm": 0.00296, "time": 0.65738}
+{"mode": "train", "epoch": 8, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00041, "heatmap_loss": 0.00078, "acc_pose": 0.74136, "loss": 0.00078, "grad_norm": 0.00276, "time": 0.65769}
+{"mode": "train", "epoch": 8, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00078, "acc_pose": 0.74911, "loss": 0.00078, "grad_norm": 0.00293, "time": 0.65678}
+{"mode": "train", "epoch": 8, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00077, "acc_pose": 0.75009, "loss": 0.00077, "grad_norm": 0.00339, "time": 0.65675}
+{"mode": "train", "epoch": 9, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0565, "heatmap_loss": 0.00076, "acc_pose": 0.75381, "loss": 0.00076, "grad_norm": 0.00287, "time": 0.71405}
+{"mode": "train", "epoch": 9, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00076, "acc_pose": 0.75417, "loss": 0.00076, "grad_norm": 0.00278, "time": 0.6574}
+{"mode": "train", "epoch": 9, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00077, "acc_pose": 0.75353, "loss": 0.00077, "grad_norm": 0.00299, "time": 0.6565}
+{"mode": "train", "epoch": 9, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00076, "acc_pose": 0.75399, "loss": 0.00076, "grad_norm": 0.00335, "time": 0.65658}
+{"mode": "train", "epoch": 9, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00076, "acc_pose": 0.75727, "loss": 0.00076, "grad_norm": 0.00308, "time": 0.65668}
+{"mode": "train", "epoch": 10, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05596, "heatmap_loss": 0.00076, "acc_pose": 0.76032, "loss": 0.00076, "grad_norm": 0.0031, "time": 0.71269}
+{"mode": "train", "epoch": 10, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00076, "acc_pose": 0.75087, "loss": 0.00076, "grad_norm": 0.00275, "time": 0.65637}
+{"mode": "train", "epoch": 10, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00075, "acc_pose": 0.75933, "loss": 0.00075, "grad_norm": 0.00322, "time": 0.65722}
+{"mode": "train", "epoch": 10, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00075, "acc_pose": 0.75828, "loss": 0.00075, "grad_norm": 0.00317, "time": 0.65661}
+{"mode": "train", "epoch": 10, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00075, "acc_pose": 0.76572, "loss": 0.00075, "grad_norm": 0.00281, "time": 0.65638}
+{"mode": "val", "epoch": 10, "iter": 407, "lr": 0.0, "AP": 0.73634, "AP .5": 0.90274, "AP .75": 0.81459, "AP (M)": 0.6623, "AP (L)": 0.76279, "AR": 0.79213, "AR .5": 0.94238, "AR .75": 0.86004, "AR (M)": 0.75026, "AR (L)": 0.85295}
+{"mode": "train", "epoch": 11, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05645, "heatmap_loss": 0.00074, "acc_pose": 0.76148, "loss": 0.00074, "grad_norm": 0.0027, "time": 0.71099}
+{"mode": "train", "epoch": 11, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00074, "acc_pose": 0.75961, "loss": 0.00074, "grad_norm": 0.00305, "time": 0.65704}
+{"mode": "train", "epoch": 11, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.76053, "loss": 0.00074, "grad_norm": 0.00281, "time": 0.65666}
+{"mode": "train", "epoch": 11, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00074, "acc_pose": 0.75765, "loss": 0.00074, "grad_norm": 0.00268, "time": 0.65682}
+{"mode": "train", "epoch": 11, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00074, "acc_pose": 0.76331, "loss": 0.00074, "grad_norm": 0.00305, "time": 0.65683}
+{"mode": "train", "epoch": 12, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05767, "heatmap_loss": 0.00073, "acc_pose": 0.76707, "loss": 0.00073, "grad_norm": 0.00278, "time": 0.71396}
+{"mode": "train", "epoch": 12, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00073, "acc_pose": 0.76483, "loss": 0.00073, "grad_norm": 0.00268, "time": 0.6568}
+{"mode": "train", "epoch": 12, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00074, "acc_pose": 0.76696, "loss": 0.00074, "grad_norm": 0.00287, "time": 0.65677}
+{"mode": "train", "epoch": 12, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00073, "acc_pose": 0.75617, "loss": 0.00073, "grad_norm": 0.00373, "time": 0.65685}
+{"mode": "train", "epoch": 12, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00073, "acc_pose": 0.76287, "loss": 0.00073, "grad_norm": 0.0025, "time": 0.65653}
+{"mode": "train", "epoch": 13, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05686, "heatmap_loss": 0.00072, "acc_pose": 0.7653, "loss": 0.00072, "grad_norm": 0.0024, "time": 0.71344}
+{"mode": "train", "epoch": 13, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00072, "acc_pose": 0.77061, "loss": 0.00072, "grad_norm": 0.0029, "time": 0.65681}
+{"mode": "train", "epoch": 13, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00072, "acc_pose": 0.7689, "loss": 0.00072, "grad_norm": 0.00257, "time": 0.65688}
+{"mode": "train", "epoch": 13, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00072, "acc_pose": 0.7716, "loss": 0.00072, "grad_norm": 0.00276, "time": 0.65697}
+{"mode": "train", "epoch": 13, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00072, "acc_pose": 0.77556, "loss": 0.00072, "grad_norm": 0.00267, "time": 0.65706}
+{"mode": "train", "epoch": 14, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05703, "heatmap_loss": 0.00072, "acc_pose": 0.77378, "loss": 0.00072, "grad_norm": 0.00279, "time": 0.71362}
+{"mode": "train", "epoch": 14, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00072, "acc_pose": 0.77664, "loss": 0.00072, "grad_norm": 0.00277, "time": 0.65683}
+{"mode": "train", "epoch": 14, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00072, "acc_pose": 0.77005, "loss": 0.00072, "grad_norm": 0.00306, "time": 0.65695}
+{"mode": "train", "epoch": 14, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00071, "acc_pose": 0.77354, "loss": 0.00071, "grad_norm": 0.00292, "time": 0.6572}
+{"mode": "train", "epoch": 14, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00071, "acc_pose": 0.77582, "loss": 0.00071, "grad_norm": 0.00293, "time": 0.65693}
+{"mode": "train", "epoch": 15, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05657, "heatmap_loss": 0.00071, "acc_pose": 0.76827, "loss": 0.00071, "grad_norm": 0.00297, "time": 0.71309}
+{"mode": "train", "epoch": 15, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00027, "heatmap_loss": 0.00071, "acc_pose": 0.77158, "loss": 0.00071, "grad_norm": 0.0027, "time": 0.65664}
+{"mode": "train", "epoch": 15, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00071, "acc_pose": 0.7631, "loss": 0.00071, "grad_norm": 0.00271, "time": 0.65702}
+{"mode": "train", "epoch": 15, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00027, "heatmap_loss": 0.00071, "acc_pose": 0.77626, "loss": 0.00071, "grad_norm": 0.00257, "time": 0.65689}
+{"mode": "train", "epoch": 15, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00071, "acc_pose": 0.77703, "loss": 0.00071, "grad_norm": 0.0025, "time": 0.65729}
+{"mode": "train", "epoch": 16, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05767, "heatmap_loss": 0.0007, "acc_pose": 0.77805, "loss": 0.0007, "grad_norm": 0.00292, "time": 0.71429}
+{"mode": "train", "epoch": 16, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.7804, "loss": 0.0007, "grad_norm": 0.00266, "time": 0.65664}
+{"mode": "train", "epoch": 16, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0007, "acc_pose": 0.77341, "loss": 0.0007, "grad_norm": 0.00251, "time": 0.65675}
+{"mode": "train", "epoch": 16, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77406, "loss": 0.0007, "grad_norm": 0.00244, "time": 0.65659}
+{"mode": "train", "epoch": 16, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.77973, "loss": 0.0007, "grad_norm": 0.00278, "time": 0.6568}
+{"mode": "train", "epoch": 17, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05657, "heatmap_loss": 0.0007, "acc_pose": 0.77578, "loss": 0.0007, "grad_norm": 0.00252, "time": 0.71304}
+{"mode": "train", "epoch": 17, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.77618, "loss": 0.00069, "grad_norm": 0.00258, "time": 0.65659}
+{"mode": "train", "epoch": 17, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78352, "loss": 0.00069, "grad_norm": 0.00287, "time": 0.65694}
+{"mode": "train", "epoch": 17, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0007, "acc_pose": 0.78323, "loss": 0.0007, "grad_norm": 0.00288, "time": 0.65677}
+{"mode": "train", "epoch": 17, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.78667, "loss": 0.00069, "grad_norm": 0.00247, "time": 0.65689}
+{"mode": "train", "epoch": 18, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05919, "heatmap_loss": 0.00069, "acc_pose": 0.78029, "loss": 0.00069, "grad_norm": 0.00281, "time": 0.71619}
+{"mode": "train", "epoch": 18, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78093, "loss": 0.00069, "grad_norm": 0.00262, "time": 0.6566}
+{"mode": "train", "epoch": 18, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.7824, "loss": 0.00069, "grad_norm": 0.00272, "time": 0.6568}
+{"mode": "train", "epoch": 18, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.78795, "loss": 0.00069, "grad_norm": 0.00243, "time": 0.65679}
+{"mode": "train", "epoch": 18, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00069, "acc_pose": 0.78204, "loss": 0.00069, "grad_norm": 0.00237, "time": 0.65655}
+{"mode": "train", "epoch": 19, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05722, "heatmap_loss": 0.00069, "acc_pose": 0.78159, "loss": 0.00069, "grad_norm": 0.00233, "time": 0.71384}
+{"mode": "train", "epoch": 19, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00069, "acc_pose": 0.78203, "loss": 0.00069, "grad_norm": 0.00281, "time": 0.65693}
+{"mode": "train", "epoch": 19, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00069, "acc_pose": 0.76887, "loss": 0.00069, "grad_norm": 0.0026, "time": 0.65673}
+{"mode": "train", "epoch": 19, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00068, "acc_pose": 0.78149, "loss": 0.00068, "grad_norm": 0.0024, "time": 0.65665}
+{"mode": "train", "epoch": 19, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.79064, "loss": 0.00068, "grad_norm": 0.00261, "time": 0.65678}
+{"mode": "train", "epoch": 20, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05914, "heatmap_loss": 0.00068, "acc_pose": 0.78764, "loss": 0.00068, "grad_norm": 0.00248, "time": 0.71588}
+{"mode": "train", "epoch": 20, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.79079, "loss": 0.00068, "grad_norm": 0.00231, "time": 0.65671}
+{"mode": "train", "epoch": 20, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00068, "acc_pose": 0.78168, "loss": 0.00068, "grad_norm": 0.00254, "time": 0.65712}
+{"mode": "train", "epoch": 20, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00068, "acc_pose": 0.78905, "loss": 0.00068, "grad_norm": 0.00277, "time": 0.65708}
+{"mode": "train", "epoch": 20, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78481, "loss": 0.00068, "grad_norm": 0.00257, "time": 0.65717}
+{"mode": "val", "epoch": 20, "iter": 407, "lr": 0.0, "AP": 0.75447, "AP .5": 0.90483, "AP .75": 0.83072, "AP (M)": 0.68124, "AP (L)": 0.78224, "AR": 0.8088, "AR .5": 0.94443, "AR .75": 0.8772, "AR (M)": 0.76741, "AR (L)": 0.86942}
+{"mode": "train", "epoch": 21, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05636, "heatmap_loss": 0.00068, "acc_pose": 0.78704, "loss": 0.00068, "grad_norm": 0.00253, "time": 0.70903}
+{"mode": "train", "epoch": 21, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.78842, "loss": 0.00067, "grad_norm": 0.00239, "time": 0.65572}
+{"mode": "train", "epoch": 21, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.78461, "loss": 0.00068, "grad_norm": 0.00234, "time": 0.65658}
+{"mode": "train", "epoch": 21, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.7906, "loss": 0.00068, "grad_norm": 0.00227, "time": 0.6565}
+{"mode": "train", "epoch": 21, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00068, "acc_pose": 0.7923, "loss": 0.00068, "grad_norm": 0.00247, "time": 0.65654}
+{"mode": "train", "epoch": 22, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05757, "heatmap_loss": 0.00067, "acc_pose": 0.7911, "loss": 0.00067, "grad_norm": 0.00245, "time": 0.71461}
+{"mode": "train", "epoch": 22, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00067, "acc_pose": 0.78852, "loss": 0.00067, "grad_norm": 0.00235, "time": 0.65702}
+{"mode": "train", "epoch": 22, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.78798, "loss": 0.00067, "grad_norm": 0.00248, "time": 0.657}
+{"mode": "train", "epoch": 22, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.79555, "loss": 0.00067, "grad_norm": 0.00228, "time": 0.65754}
+{"mode": "train", "epoch": 22, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00067, "acc_pose": 0.79277, "loss": 0.00067, "grad_norm": 0.00243, "time": 0.65689}
+{"mode": "train", "epoch": 23, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0568, "heatmap_loss": 0.00067, "acc_pose": 0.78734, "loss": 0.00067, "grad_norm": 0.00247, "time": 0.71378}
+{"mode": "train", "epoch": 23, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00067, "acc_pose": 0.78432, "loss": 0.00067, "grad_norm": 0.00233, "time": 0.6565}
+{"mode": "train", "epoch": 23, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.79388, "loss": 0.00066, "grad_norm": 0.00288, "time": 0.65699}
+{"mode": "train", "epoch": 23, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79624, "loss": 0.00066, "grad_norm": 0.00263, "time": 0.65732}
+{"mode": "train", "epoch": 23, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00067, "acc_pose": 0.7932, "loss": 0.00067, "grad_norm": 0.00246, "time": 0.65695}
+{"mode": "train", "epoch": 24, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05651, "heatmap_loss": 0.00066, "acc_pose": 0.79224, "loss": 0.00066, "grad_norm": 0.00243, "time": 0.71345}
+{"mode": "train", "epoch": 24, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00066, "acc_pose": 0.80346, "loss": 0.00066, "grad_norm": 0.00242, "time": 0.65711}
+{"mode": "train", "epoch": 24, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.78652, "loss": 0.00066, "grad_norm": 0.00239, "time": 0.65696}
+{"mode": "train", "epoch": 24, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00066, "acc_pose": 0.78495, "loss": 0.00066, "grad_norm": 0.00242, "time": 0.65686}
+{"mode": "train", "epoch": 24, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79419, "loss": 0.00066, "grad_norm": 0.00265, "time": 0.65697}
+{"mode": "train", "epoch": 25, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05675, "heatmap_loss": 0.00066, "acc_pose": 0.79865, "loss": 0.00066, "grad_norm": 0.00238, "time": 0.7138}
+{"mode": "train", "epoch": 25, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.79146, "loss": 0.00066, "grad_norm": 0.00228, "time": 0.65673}
+{"mode": "train", "epoch": 25, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.7884, "loss": 0.00066, "grad_norm": 0.00246, "time": 0.65713}
+{"mode": "train", "epoch": 25, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.79075, "loss": 0.00066, "grad_norm": 0.00281, "time": 0.657}
+{"mode": "train", "epoch": 25, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00066, "acc_pose": 0.79164, "loss": 0.00066, "grad_norm": 0.00225, "time": 0.65685}
+{"mode": "train", "epoch": 26, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05648, "heatmap_loss": 0.00066, "acc_pose": 0.79291, "loss": 0.00066, "grad_norm": 0.00232, "time": 0.7138}
+{"mode": "train", "epoch": 26, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79163, "loss": 0.00066, "grad_norm": 0.00246, "time": 0.65683}
+{"mode": "train", "epoch": 26, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79363, "loss": 0.00065, "grad_norm": 0.00236, "time": 0.65707}
+{"mode": "train", "epoch": 26, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00066, "acc_pose": 0.79388, "loss": 0.00066, "grad_norm": 0.00224, "time": 0.657}
+{"mode": "train", "epoch": 26, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80029, "loss": 0.00065, "grad_norm": 0.00238, "time": 0.6573}
+{"mode": "train", "epoch": 27, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05678, "heatmap_loss": 0.00065, "acc_pose": 0.80022, "loss": 0.00065, "grad_norm": 0.00228, "time": 0.71494}
+{"mode": "train", "epoch": 27, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00065, "acc_pose": 0.79938, "loss": 0.00065, "grad_norm": 0.00236, "time": 0.65729}
+{"mode": "train", "epoch": 27, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00066, "acc_pose": 0.7979, "loss": 0.00066, "grad_norm": 0.00244, "time": 0.65686}
+{"mode": "train", "epoch": 27, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.80485, "loss": 0.00065, "grad_norm": 0.00239, "time": 0.65675}
+{"mode": "train", "epoch": 27, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.79318, "loss": 0.00065, "grad_norm": 0.00242, "time": 0.65687}
+{"mode": "train", "epoch": 28, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05666, "heatmap_loss": 0.00065, "acc_pose": 0.80145, "loss": 0.00065, "grad_norm": 0.00214, "time": 0.71301}
+{"mode": "train", "epoch": 28, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79412, "loss": 0.00065, "grad_norm": 0.00236, "time": 0.65659}
+{"mode": "train", "epoch": 28, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.79594, "loss": 0.00065, "grad_norm": 0.00235, "time": 0.65713}
+{"mode": "train", "epoch": 28, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.80472, "loss": 0.00065, "grad_norm": 0.00258, "time": 0.65712}
+{"mode": "train", "epoch": 28, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.79781, "loss": 0.00065, "grad_norm": 0.00226, "time": 0.65698}
+{"mode": "train", "epoch": 29, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05731, "heatmap_loss": 0.00065, "acc_pose": 0.80365, "loss": 0.00065, "grad_norm": 0.00225, "time": 0.71396}
+{"mode": "train", "epoch": 29, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79638, "loss": 0.00065, "grad_norm": 0.00224, "time": 0.65703}
+{"mode": "train", "epoch": 29, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00065, "acc_pose": 0.79311, "loss": 0.00065, "grad_norm": 0.00243, "time": 0.65693}
+{"mode": "train", "epoch": 29, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00065, "acc_pose": 0.80223, "loss": 0.00065, "grad_norm": 0.00237, "time": 0.65687}
+{"mode": "train", "epoch": 29, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80321, "loss": 0.00064, "grad_norm": 0.00227, "time": 0.65697}
+{"mode": "train", "epoch": 30, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05733, "heatmap_loss": 0.00064, "acc_pose": 0.79573, "loss": 0.00064, "grad_norm": 0.00208, "time": 0.71382}
+{"mode": "train", "epoch": 30, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00065, "acc_pose": 0.80052, "loss": 0.00065, "grad_norm": 0.00226, "time": 0.65685}
+{"mode": "train", "epoch": 30, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.79942, "loss": 0.00064, "grad_norm": 0.00235, "time": 0.65738}
+{"mode": "train", "epoch": 30, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80231, "loss": 0.00064, "grad_norm": 0.00235, "time": 0.6572}
+{"mode": "train", "epoch": 30, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80187, "loss": 0.00064, "grad_norm": 0.00206, "time": 0.65701}
+{"mode": "val", "epoch": 30, "iter": 407, "lr": 0.0, "AP": 0.7629, "AP .5": 0.90698, "AP .75": 0.8369, "AP (M)": 0.68864, "AP (L)": 0.78944, "AR": 0.81725, "AR .5": 0.94773, "AR .75": 0.8824, "AR (M)": 0.77596, "AR (L)": 0.87726}
+{"mode": "train", "epoch": 31, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05601, "heatmap_loss": 0.00064, "acc_pose": 0.80354, "loss": 0.00064, "grad_norm": 0.00247, "time": 0.70822}
+{"mode": "train", "epoch": 31, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.80007, "loss": 0.00064, "grad_norm": 0.00228, "time": 0.65626}
+{"mode": "train", "epoch": 31, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.8007, "loss": 0.00064, "grad_norm": 0.00212, "time": 0.65666}
+{"mode": "train", "epoch": 31, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00064, "acc_pose": 0.80342, "loss": 0.00064, "grad_norm": 0.00236, "time": 0.65671}
+{"mode": "train", "epoch": 31, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80814, "loss": 0.00064, "grad_norm": 0.00212, "time": 0.65668}
+{"mode": "train", "epoch": 32, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05673, "heatmap_loss": 0.00064, "acc_pose": 0.80551, "loss": 0.00064, "grad_norm": 0.00223, "time": 0.7132}
+{"mode": "train", "epoch": 32, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.81187, "loss": 0.00064, "grad_norm": 0.0021, "time": 0.65662}
+{"mode": "train", "epoch": 32, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80193, "loss": 0.00064, "grad_norm": 0.00223, "time": 0.65656}
+{"mode": "train", "epoch": 32, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00064, "acc_pose": 0.80625, "loss": 0.00064, "grad_norm": 0.00219, "time": 0.65656}
+{"mode": "train", "epoch": 32, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.81177, "loss": 0.00064, "grad_norm": 0.00218, "time": 0.65691}
+{"mode": "train", "epoch": 33, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05709, "heatmap_loss": 0.00063, "acc_pose": 0.8111, "loss": 0.00063, "grad_norm": 0.00209, "time": 0.71417}
+{"mode": "train", "epoch": 33, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80279, "loss": 0.00064, "grad_norm": 0.00208, "time": 0.65693}
+{"mode": "train", "epoch": 33, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80847, "loss": 0.00063, "grad_norm": 0.0024, "time": 0.65722}
+{"mode": "train", "epoch": 33, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00064, "acc_pose": 0.80424, "loss": 0.00064, "grad_norm": 0.00242, "time": 0.65747}
+{"mode": "train", "epoch": 33, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00064, "acc_pose": 0.81241, "loss": 0.00064, "grad_norm": 0.00213, "time": 0.65754}
+{"mode": "train", "epoch": 34, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05658, "heatmap_loss": 0.00063, "acc_pose": 0.80267, "loss": 0.00063, "grad_norm": 0.00223, "time": 0.7144}
+{"mode": "train", "epoch": 34, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80298, "loss": 0.00063, "grad_norm": 0.00218, "time": 0.65704}
+{"mode": "train", "epoch": 34, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80791, "loss": 0.00063, "grad_norm": 0.00196, "time": 0.65662}
+{"mode": "train", "epoch": 34, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80471, "loss": 0.00063, "grad_norm": 0.00194, "time": 0.65676}
+{"mode": "train", "epoch": 34, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80862, "loss": 0.00063, "grad_norm": 0.00203, "time": 0.65672}
+{"mode": "train", "epoch": 35, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05735, "heatmap_loss": 0.00062, "acc_pose": 0.80915, "loss": 0.00062, "grad_norm": 0.00201, "time": 0.71445}
+{"mode": "train", "epoch": 35, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80721, "loss": 0.00063, "grad_norm": 0.00229, "time": 0.65716}
+{"mode": "train", "epoch": 35, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80209, "loss": 0.00063, "grad_norm": 0.00213, "time": 0.65709}
+{"mode": "train", "epoch": 35, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00063, "acc_pose": 0.80965, "loss": 0.00063, "grad_norm": 0.00199, "time": 0.65707}
+{"mode": "train", "epoch": 35, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.81439, "loss": 0.00063, "grad_norm": 0.00209, "time": 0.65748}
+{"mode": "train", "epoch": 36, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05685, "heatmap_loss": 0.00062, "acc_pose": 0.80001, "loss": 0.00062, "grad_norm": 0.00212, "time": 0.71369}
+{"mode": "train", "epoch": 36, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81158, "loss": 0.00063, "grad_norm": 0.00213, "time": 0.65685}
+{"mode": "train", "epoch": 36, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80463, "loss": 0.00063, "grad_norm": 0.00199, "time": 0.65727}
+{"mode": "train", "epoch": 36, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.81261, "loss": 0.00063, "grad_norm": 0.00238, "time": 0.65702}
+{"mode": "train", "epoch": 36, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.807, "loss": 0.00063, "grad_norm": 0.0021, "time": 0.65756}
+{"mode": "train", "epoch": 37, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05616, "heatmap_loss": 0.00062, "acc_pose": 0.80485, "loss": 0.00062, "grad_norm": 0.00207, "time": 0.71275}
+{"mode": "train", "epoch": 37, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80202, "loss": 0.00063, "grad_norm": 0.00221, "time": 0.65701}
+{"mode": "train", "epoch": 37, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80508, "loss": 0.00062, "grad_norm": 0.00217, "time": 0.65683}
+{"mode": "train", "epoch": 37, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.8172, "loss": 0.00062, "grad_norm": 0.0021, "time": 0.65735}
+{"mode": "train", "epoch": 37, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00063, "acc_pose": 0.80337, "loss": 0.00063, "grad_norm": 0.00239, "time": 0.65719}
+{"mode": "train", "epoch": 38, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05708, "heatmap_loss": 0.00062, "acc_pose": 0.81093, "loss": 0.00062, "grad_norm": 0.00189, "time": 0.71382}
+{"mode": "train", "epoch": 38, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81207, "loss": 0.00062, "grad_norm": 0.00218, "time": 0.65653}
+{"mode": "train", "epoch": 38, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80568, "loss": 0.00063, "grad_norm": 0.00222, "time": 0.65702}
+{"mode": "train", "epoch": 38, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80719, "loss": 0.00062, "grad_norm": 0.00201, "time": 0.65683}
+{"mode": "train", "epoch": 38, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.80346, "loss": 0.00062, "grad_norm": 0.00202, "time": 0.65681}
+{"mode": "train", "epoch": 39, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05773, "heatmap_loss": 0.00062, "acc_pose": 0.80809, "loss": 0.00062, "grad_norm": 0.00196, "time": 0.71442}
+{"mode": "train", "epoch": 39, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80958, "loss": 0.00062, "grad_norm": 0.00209, "time": 0.65664}
+{"mode": "train", "epoch": 39, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80484, "loss": 0.00062, "grad_norm": 0.00221, "time": 0.65657}
+{"mode": "train", "epoch": 39, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80376, "loss": 0.00062, "grad_norm": 0.00191, "time": 0.65658}
+{"mode": "train", "epoch": 39, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00063, "acc_pose": 0.80851, "loss": 0.00063, "grad_norm": 0.00193, "time": 0.65665}
+{"mode": "train", "epoch": 40, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05697, "heatmap_loss": 0.00061, "acc_pose": 0.81252, "loss": 0.00061, "grad_norm": 0.00197, "time": 0.71327}
+{"mode": "train", "epoch": 40, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80708, "loss": 0.00062, "grad_norm": 0.00192, "time": 0.6566}
+{"mode": "train", "epoch": 40, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.81383, "loss": 0.00062, "grad_norm": 0.00225, "time": 0.65661}
+{"mode": "train", "epoch": 40, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.80746, "loss": 0.00062, "grad_norm": 0.0021, "time": 0.65657}
+{"mode": "train", "epoch": 40, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.8129, "loss": 0.00062, "grad_norm": 0.002, "time": 0.65666}
+{"mode": "val", "epoch": 40, "iter": 407, "lr": 0.0, "AP": 0.76736, "AP .5": 0.91117, "AP .75": 0.84085, "AP (M)": 0.69408, "AP (L)": 0.79345, "AR": 0.82218, "AR .5": 0.95057, "AR .75": 0.88744, "AR (M)": 0.7826, "AR (L)": 0.88023}
+{"mode": "train", "epoch": 41, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05551, "heatmap_loss": 0.00061, "acc_pose": 0.81245, "loss": 0.00061, "grad_norm": 0.00209, "time": 0.70817}
+{"mode": "train", "epoch": 41, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00062, "acc_pose": 0.81238, "loss": 0.00062, "grad_norm": 0.00219, "time": 0.65606}
+{"mode": "train", "epoch": 41, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.8174, "loss": 0.00062, "grad_norm": 0.002, "time": 0.65666}
+{"mode": "train", "epoch": 41, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81264, "loss": 0.00062, "grad_norm": 0.00192, "time": 0.65627}
+{"mode": "train", "epoch": 41, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00062, "acc_pose": 0.80506, "loss": 0.00062, "grad_norm": 0.00199, "time": 0.65645}
+{"mode": "train", "epoch": 42, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05694, "heatmap_loss": 0.00061, "acc_pose": 0.81002, "loss": 0.00061, "grad_norm": 0.00186, "time": 0.71393}
+{"mode": "train", "epoch": 42, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.80699, "loss": 0.00062, "grad_norm": 0.00199, "time": 0.65706}
+{"mode": "train", "epoch": 42, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00062, "acc_pose": 0.80476, "loss": 0.00062, "grad_norm": 0.00198, "time": 0.65707}
+{"mode": "train", "epoch": 42, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81603, "loss": 0.00061, "grad_norm": 0.00214, "time": 0.65692}
+{"mode": "train", "epoch": 42, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00062, "acc_pose": 0.81092, "loss": 0.00062, "grad_norm": 0.00183, "time": 0.6569}
+{"mode": "train", "epoch": 43, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05743, "heatmap_loss": 0.00061, "acc_pose": 0.81699, "loss": 0.00061, "grad_norm": 0.00196, "time": 0.71498}
+{"mode": "train", "epoch": 43, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.8144, "loss": 0.00062, "grad_norm": 0.002, "time": 0.6571}
+{"mode": "train", "epoch": 43, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81477, "loss": 0.00061, "grad_norm": 0.0021, "time": 0.6572}
+{"mode": "train", "epoch": 43, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00062, "acc_pose": 0.81315, "loss": 0.00062, "grad_norm": 0.00215, "time": 0.65708}
+{"mode": "train", "epoch": 43, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.82042, "loss": 0.00061, "grad_norm": 0.00175, "time": 0.65718}
+{"mode": "train", "epoch": 44, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05689, "heatmap_loss": 0.00061, "acc_pose": 0.8148, "loss": 0.00061, "grad_norm": 0.00189, "time": 0.71416}
+{"mode": "train", "epoch": 44, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81225, "loss": 0.00061, "grad_norm": 0.00189, "time": 0.65707}
+{"mode": "train", "epoch": 44, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81535, "loss": 0.00061, "grad_norm": 0.00191, "time": 0.65667}
+{"mode": "train", "epoch": 44, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81262, "loss": 0.00061, "grad_norm": 0.00202, "time": 0.65691}
+{"mode": "train", "epoch": 44, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.81162, "loss": 0.00061, "grad_norm": 0.00207, "time": 0.65685}
+{"mode": "train", "epoch": 45, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05714, "heatmap_loss": 0.00061, "acc_pose": 0.81749, "loss": 0.00061, "grad_norm": 0.0023, "time": 0.71418}
+{"mode": "train", "epoch": 45, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81231, "loss": 0.00061, "grad_norm": 0.00205, "time": 0.65681}
+{"mode": "train", "epoch": 45, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00061, "acc_pose": 0.80782, "loss": 0.00061, "grad_norm": 0.00211, "time": 0.65668}
+{"mode": "train", "epoch": 45, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81186, "loss": 0.00061, "grad_norm": 0.00192, "time": 0.6564}
+{"mode": "train", "epoch": 45, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81904, "loss": 0.00061, "grad_norm": 0.00176, "time": 0.65684}
+{"mode": "train", "epoch": 46, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05756, "heatmap_loss": 0.00061, "acc_pose": 0.81589, "loss": 0.00061, "grad_norm": 0.00198, "time": 0.71438}
+{"mode": "train", "epoch": 46, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81722, "loss": 0.00061, "grad_norm": 0.00196, "time": 0.65715}
+{"mode": "train", "epoch": 46, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.81133, "loss": 0.00061, "grad_norm": 0.00204, "time": 0.65692}
+{"mode": "train", "epoch": 46, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.81781, "loss": 0.00061, "grad_norm": 0.00184, "time": 0.65711}
+{"mode": "train", "epoch": 46, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81913, "loss": 0.00061, "grad_norm": 0.00176, "time": 0.65706}
+{"mode": "train", "epoch": 47, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05738, "heatmap_loss": 0.00061, "acc_pose": 0.81312, "loss": 0.00061, "grad_norm": 0.00186, "time": 0.71485}
+{"mode": "train", "epoch": 47, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00061, "acc_pose": 0.82257, "loss": 0.00061, "grad_norm": 0.00206, "time": 0.65705}
+{"mode": "train", "epoch": 47, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.80699, "loss": 0.00061, "grad_norm": 0.00185, "time": 0.65675}
+{"mode": "train", "epoch": 47, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81639, "loss": 0.00061, "grad_norm": 0.00186, "time": 0.65665}
+{"mode": "train", "epoch": 47, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.82266, "loss": 0.00061, "grad_norm": 0.00192, "time": 0.65689}
+{"mode": "train", "epoch": 48, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05725, "heatmap_loss": 0.0006, "acc_pose": 0.81726, "loss": 0.0006, "grad_norm": 0.00199, "time": 0.71377}
+{"mode": "train", "epoch": 48, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81297, "loss": 0.0006, "grad_norm": 0.00177, "time": 0.65701}
+{"mode": "train", "epoch": 48, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81833, "loss": 0.0006, "grad_norm": 0.00189, "time": 0.65673}
+{"mode": "train", "epoch": 48, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00061, "acc_pose": 0.816, "loss": 0.00061, "grad_norm": 0.00182, "time": 0.65683}
+{"mode": "train", "epoch": 48, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00061, "acc_pose": 0.82057, "loss": 0.00061, "grad_norm": 0.00182, "time": 0.65672}
+{"mode": "train", "epoch": 49, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05622, "heatmap_loss": 0.0006, "acc_pose": 0.81674, "loss": 0.0006, "grad_norm": 0.00179, "time": 0.71502}
+{"mode": "train", "epoch": 49, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.81351, "loss": 0.00061, "grad_norm": 0.00197, "time": 0.65644}
+{"mode": "train", "epoch": 49, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81712, "loss": 0.0006, "grad_norm": 0.00192, "time": 0.6573}
+{"mode": "train", "epoch": 49, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.0006, "acc_pose": 0.81285, "loss": 0.0006, "grad_norm": 0.00192, "time": 0.65737}
+{"mode": "train", "epoch": 49, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00061, "acc_pose": 0.82138, "loss": 0.00061, "grad_norm": 0.00189, "time": 0.65679}
+{"mode": "train", "epoch": 50, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05602, "heatmap_loss": 0.0006, "acc_pose": 0.82519, "loss": 0.0006, "grad_norm": 0.00206, "time": 0.71324}
+{"mode": "train", "epoch": 50, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81546, "loss": 0.0006, "grad_norm": 0.00195, "time": 0.65656}
+{"mode": "train", "epoch": 50, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81672, "loss": 0.0006, "grad_norm": 0.00201, "time": 0.65666}
+{"mode": "train", "epoch": 50, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81819, "loss": 0.0006, "grad_norm": 0.00182, "time": 0.65668}
+{"mode": "train", "epoch": 50, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81157, "loss": 0.0006, "grad_norm": 0.00171, "time": 0.65672}
+{"mode": "val", "epoch": 50, "iter": 407, "lr": 0.0, "AP": 0.7695, "AP .5": 0.90845, "AP .75": 0.8426, "AP (M)": 0.69611, "AP (L)": 0.79689, "AR": 0.82338, "AR .5": 0.94962, "AR .75": 0.88948, "AR (M)": 0.78249, "AR (L)": 0.88305}
+{"mode": "train", "epoch": 51, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0575, "heatmap_loss": 0.0006, "acc_pose": 0.82292, "loss": 0.0006, "grad_norm": 0.00171, "time": 0.71111}
+{"mode": "train", "epoch": 51, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.81437, "loss": 0.0006, "grad_norm": 0.00178, "time": 0.65664}
+{"mode": "train", "epoch": 51, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.0006, "acc_pose": 0.81762, "loss": 0.0006, "grad_norm": 0.00196, "time": 0.65677}
+{"mode": "train", "epoch": 51, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.82175, "loss": 0.0006, "grad_norm": 0.00172, "time": 0.65676}
+{"mode": "train", "epoch": 51, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.0006, "acc_pose": 0.82446, "loss": 0.0006, "grad_norm": 0.00172, "time": 0.65641}
+{"mode": "train", "epoch": 52, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05672, "heatmap_loss": 0.0006, "acc_pose": 0.81974, "loss": 0.0006, "grad_norm": 0.00175, "time": 0.71319}
+{"mode": "train", "epoch": 52, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.82396, "loss": 0.0006, "grad_norm": 0.00175, "time": 0.65688}
+{"mode": "train", "epoch": 52, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.82181, "loss": 0.0006, "grad_norm": 0.00177, "time": 0.6566}
+{"mode": "train", "epoch": 52, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.8208, "loss": 0.00059, "grad_norm": 0.00165, "time": 0.65677}
+{"mode": "train", "epoch": 52, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00039, "heatmap_loss": 0.0006, "acc_pose": 0.81904, "loss": 0.0006, "grad_norm": 0.00171, "time": 0.6569}
+{"mode": "train", "epoch": 53, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05652, "heatmap_loss": 0.0006, "acc_pose": 0.82065, "loss": 0.0006, "grad_norm": 0.00175, "time": 0.71288}
+{"mode": "train", "epoch": 53, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81997, "loss": 0.0006, "grad_norm": 0.00208, "time": 0.65697}
+{"mode": "train", "epoch": 53, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0006, "acc_pose": 0.81721, "loss": 0.0006, "grad_norm": 0.00211, "time": 0.65695}
+{"mode": "train", "epoch": 53, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81958, "loss": 0.0006, "grad_norm": 0.00172, "time": 0.65705}
+{"mode": "train", "epoch": 53, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.0006, "acc_pose": 0.81644, "loss": 0.0006, "grad_norm": 0.00176, "time": 0.65698}
+{"mode": "train", "epoch": 54, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05678, "heatmap_loss": 0.00059, "acc_pose": 0.82299, "loss": 0.00059, "grad_norm": 0.00197, "time": 0.71395}
+{"mode": "train", "epoch": 54, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0006, "acc_pose": 0.81762, "loss": 0.0006, "grad_norm": 0.00185, "time": 0.65703}
+{"mode": "train", "epoch": 54, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81921, "loss": 0.00059, "grad_norm": 0.00191, "time": 0.65715}
+{"mode": "train", "epoch": 54, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.8209, "loss": 0.0006, "grad_norm": 0.00177, "time": 0.65693}
+{"mode": "train", "epoch": 54, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.82029, "loss": 0.0006, "grad_norm": 0.00187, "time": 0.65696}
+{"mode": "train", "epoch": 55, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05693, "heatmap_loss": 0.00059, "acc_pose": 0.82807, "loss": 0.00059, "grad_norm": 0.0019, "time": 0.71359}
+{"mode": "train", "epoch": 55, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.81812, "loss": 0.00059, "grad_norm": 0.00169, "time": 0.65691}
+{"mode": "train", "epoch": 55, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0006, "acc_pose": 0.81426, "loss": 0.0006, "grad_norm": 0.0017, "time": 0.65708}
+{"mode": "train", "epoch": 55, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82184, "loss": 0.00059, "grad_norm": 0.00177, "time": 0.65705}
+{"mode": "train", "epoch": 55, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.81941, "loss": 0.00059, "grad_norm": 0.00179, "time": 0.65738}
+{"mode": "train", "epoch": 56, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05687, "heatmap_loss": 0.0006, "acc_pose": 0.82346, "loss": 0.0006, "grad_norm": 0.00182, "time": 0.71316}
+{"mode": "train", "epoch": 56, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82, "loss": 0.00059, "grad_norm": 0.00163, "time": 0.65698}
+{"mode": "train", "epoch": 56, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00059, "acc_pose": 0.81471, "loss": 0.00059, "grad_norm": 0.00168, "time": 0.65705}
+{"mode": "train", "epoch": 56, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81356, "loss": 0.00059, "grad_norm": 0.0018, "time": 0.65683}
+{"mode": "train", "epoch": 56, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82136, "loss": 0.00059, "grad_norm": 0.00193, "time": 0.65699}
+{"mode": "train", "epoch": 57, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05828, "heatmap_loss": 0.0006, "acc_pose": 0.82503, "loss": 0.0006, "grad_norm": 0.00175, "time": 0.71609}
+{"mode": "train", "epoch": 57, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.81867, "loss": 0.00059, "grad_norm": 0.00173, "time": 0.65682}
+{"mode": "train", "epoch": 57, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82435, "loss": 0.00059, "grad_norm": 0.00184, "time": 0.65675}
+{"mode": "train", "epoch": 57, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82809, "loss": 0.00059, "grad_norm": 0.00185, "time": 0.65704}
+{"mode": "train", "epoch": 57, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82382, "loss": 0.00059, "grad_norm": 0.00151, "time": 0.65713}
+{"mode": "train", "epoch": 58, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05677, "heatmap_loss": 0.00059, "acc_pose": 0.82895, "loss": 0.00059, "grad_norm": 0.00178, "time": 0.71376}
+{"mode": "train", "epoch": 58, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82192, "loss": 0.00059, "grad_norm": 0.00178, "time": 0.65675}
+{"mode": "train", "epoch": 58, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82081, "loss": 0.00059, "grad_norm": 0.00183, "time": 0.65696}
+{"mode": "train", "epoch": 58, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82535, "loss": 0.00059, "grad_norm": 0.00185, "time": 0.65704}
+{"mode": "train", "epoch": 58, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00059, "acc_pose": 0.82581, "loss": 0.00059, "grad_norm": 0.00174, "time": 0.65672}
+{"mode": "train", "epoch": 59, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0569, "heatmap_loss": 0.00059, "acc_pose": 0.82467, "loss": 0.00059, "grad_norm": 0.00177, "time": 0.71399}
+{"mode": "train", "epoch": 59, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82657, "loss": 0.00059, "grad_norm": 0.00169, "time": 0.65696}
+{"mode": "train", "epoch": 59, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82659, "loss": 0.00059, "grad_norm": 0.00159, "time": 0.657}
+{"mode": "train", "epoch": 59, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82456, "loss": 0.00059, "grad_norm": 0.00173, "time": 0.65688}
+{"mode": "train", "epoch": 59, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82844, "loss": 0.00059, "grad_norm": 0.0018, "time": 0.6567}
+{"mode": "train", "epoch": 60, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05726, "heatmap_loss": 0.00059, "acc_pose": 0.82544, "loss": 0.00059, "grad_norm": 0.00185, "time": 0.71382}
+{"mode": "train", "epoch": 60, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82381, "loss": 0.00058, "grad_norm": 0.00178, "time": 0.65696}
+{"mode": "train", "epoch": 60, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81943, "loss": 0.00059, "grad_norm": 0.00158, "time": 0.65683}
+{"mode": "train", "epoch": 60, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82322, "loss": 0.00059, "grad_norm": 0.0018, "time": 0.65674}
+{"mode": "train", "epoch": 60, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81461, "loss": 0.00059, "grad_norm": 0.0016, "time": 0.6567}
+{"mode": "val", "epoch": 60, "iter": 407, "lr": 0.0, "AP": 0.7715, "AP .5": 0.90828, "AP .75": 0.84254, "AP (M)": 0.69783, "AP (L)": 0.7985, "AR": 0.82549, "AR .5": 0.94978, "AR .75": 0.88933, "AR (M)": 0.78465, "AR (L)": 0.88543}
+{"mode": "train", "epoch": 61, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05577, "heatmap_loss": 0.00058, "acc_pose": 0.8157, "loss": 0.00058, "grad_norm": 0.00169, "time": 0.70941}
+{"mode": "train", "epoch": 61, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.81693, "loss": 0.00059, "grad_norm": 0.00164, "time": 0.65674}
+{"mode": "train", "epoch": 61, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.82073, "loss": 0.00058, "grad_norm": 0.00168, "time": 0.65646}
+{"mode": "train", "epoch": 61, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82426, "loss": 0.00059, "grad_norm": 0.00172, "time": 0.65661}
+{"mode": "train", "epoch": 61, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.8228, "loss": 0.00059, "grad_norm": 0.00158, "time": 0.65678}
+{"mode": "train", "epoch": 62, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05698, "heatmap_loss": 0.00059, "acc_pose": 0.82012, "loss": 0.00059, "grad_norm": 0.00175, "time": 0.7142}
+{"mode": "train", "epoch": 62, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00059, "acc_pose": 0.82056, "loss": 0.00059, "grad_norm": 0.00167, "time": 0.65676}
+{"mode": "train", "epoch": 62, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.8217, "loss": 0.00059, "grad_norm": 0.00166, "time": 0.65708}
+{"mode": "train", "epoch": 62, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82686, "loss": 0.00058, "grad_norm": 0.00171, "time": 0.65712}
+{"mode": "train", "epoch": 62, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82564, "loss": 0.00058, "grad_norm": 0.00166, "time": 0.65662}
+{"mode": "train", "epoch": 63, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05673, "heatmap_loss": 0.00058, "acc_pose": 0.82614, "loss": 0.00058, "grad_norm": 0.00167, "time": 0.71485}
+{"mode": "train", "epoch": 63, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82129, "loss": 0.00058, "grad_norm": 0.00172, "time": 0.65638}
+{"mode": "train", "epoch": 63, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82737, "loss": 0.00058, "grad_norm": 0.0016, "time": 0.65687}
+{"mode": "train", "epoch": 63, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82741, "loss": 0.00058, "grad_norm": 0.00161, "time": 0.6567}
+{"mode": "train", "epoch": 63, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82388, "loss": 0.00059, "grad_norm": 0.00154, "time": 0.6567}
+{"mode": "train", "epoch": 64, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05703, "heatmap_loss": 0.00058, "acc_pose": 0.82538, "loss": 0.00058, "grad_norm": 0.00159, "time": 0.7138}
+{"mode": "train", "epoch": 64, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00037, "heatmap_loss": 0.00058, "acc_pose": 0.82421, "loss": 0.00058, "grad_norm": 0.00175, "time": 0.65719}
+{"mode": "train", "epoch": 64, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82758, "loss": 0.00058, "grad_norm": 0.00168, "time": 0.65683}
+{"mode": "train", "epoch": 64, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82337, "loss": 0.00058, "grad_norm": 0.0017, "time": 0.65666}
+{"mode": "train", "epoch": 64, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82223, "loss": 0.00058, "grad_norm": 0.00169, "time": 0.65676}
+{"mode": "train", "epoch": 65, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05733, "heatmap_loss": 0.00058, "acc_pose": 0.82486, "loss": 0.00058, "grad_norm": 0.00173, "time": 0.71437}
+{"mode": "train", "epoch": 65, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00039, "heatmap_loss": 0.00058, "acc_pose": 0.82677, "loss": 0.00058, "grad_norm": 0.00167, "time": 0.65722}
+{"mode": "train", "epoch": 65, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00059, "acc_pose": 0.82432, "loss": 0.00059, "grad_norm": 0.00171, "time": 0.65669}
+{"mode": "train", "epoch": 65, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82353, "loss": 0.00058, "grad_norm": 0.0016, "time": 0.65698}
+{"mode": "train", "epoch": 65, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82503, "loss": 0.00058, "grad_norm": 0.00167, "time": 0.65697}
+{"mode": "train", "epoch": 66, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05689, "heatmap_loss": 0.00058, "acc_pose": 0.82984, "loss": 0.00058, "grad_norm": 0.00161, "time": 0.71375}
+{"mode": "train", "epoch": 66, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82217, "loss": 0.00058, "grad_norm": 0.00147, "time": 0.65714}
+{"mode": "train", "epoch": 66, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82611, "loss": 0.00058, "grad_norm": 0.00152, "time": 0.6568}
+{"mode": "train", "epoch": 66, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82553, "loss": 0.00058, "grad_norm": 0.0016, "time": 0.65693}
+{"mode": "train", "epoch": 66, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.8253, "loss": 0.00058, "grad_norm": 0.00163, "time": 0.65702}
+{"mode": "train", "epoch": 67, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05675, "heatmap_loss": 0.00058, "acc_pose": 0.83023, "loss": 0.00058, "grad_norm": 0.0018, "time": 0.71412}
+{"mode": "train", "epoch": 67, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82033, "loss": 0.00058, "grad_norm": 0.00166, "time": 0.65699}
+{"mode": "train", "epoch": 67, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82873, "loss": 0.00058, "grad_norm": 0.00166, "time": 0.65743}
+{"mode": "train", "epoch": 67, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.83106, "loss": 0.00058, "grad_norm": 0.00149, "time": 0.6574}
+{"mode": "train", "epoch": 67, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00058, "acc_pose": 0.82847, "loss": 0.00058, "grad_norm": 0.00186, "time": 0.65727}
+{"mode": "train", "epoch": 68, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05657, "heatmap_loss": 0.00058, "acc_pose": 0.8212, "loss": 0.00058, "grad_norm": 0.00157, "time": 0.71291}
+{"mode": "train", "epoch": 68, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82535, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.65684}
+{"mode": "train", "epoch": 68, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83185, "loss": 0.00058, "grad_norm": 0.00156, "time": 0.65717}
+{"mode": "train", "epoch": 68, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82545, "loss": 0.00058, "grad_norm": 0.00156, "time": 0.6569}
+{"mode": "train", "epoch": 68, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82609, "loss": 0.00058, "grad_norm": 0.00166, "time": 0.65705}
+{"mode": "train", "epoch": 69, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05671, "heatmap_loss": 0.00058, "acc_pose": 0.82484, "loss": 0.00058, "grad_norm": 0.00178, "time": 0.71452}
+{"mode": "train", "epoch": 69, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82546, "loss": 0.00058, "grad_norm": 0.00158, "time": 0.65688}
+{"mode": "train", "epoch": 69, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82441, "loss": 0.00057, "grad_norm": 0.00146, "time": 0.65706}
+{"mode": "train", "epoch": 69, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83666, "loss": 0.00057, "grad_norm": 0.00148, "time": 0.65728}
+{"mode": "train", "epoch": 69, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82822, "loss": 0.00058, "grad_norm": 0.00153, "time": 0.6572}
+{"mode": "train", "epoch": 70, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05632, "heatmap_loss": 0.00058, "acc_pose": 0.82515, "loss": 0.00058, "grad_norm": 0.00169, "time": 0.71348}
+{"mode": "train", "epoch": 70, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82766, "loss": 0.00058, "grad_norm": 0.0016, "time": 0.65706}
+{"mode": "train", "epoch": 70, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.81953, "loss": 0.00058, "grad_norm": 0.0017, "time": 0.65678}
+{"mode": "train", "epoch": 70, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82841, "loss": 0.00057, "grad_norm": 0.00181, "time": 0.65709}
+{"mode": "train", "epoch": 70, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.82441, "loss": 0.00057, "grad_norm": 0.00159, "time": 0.65674}
+{"mode": "val", "epoch": 70, "iter": 407, "lr": 0.0, "AP": 0.77484, "AP .5": 0.91393, "AP .75": 0.84832, "AP (M)": 0.70247, "AP (L)": 0.80235, "AR": 0.82645, "AR .5": 0.95135, "AR .75": 0.89106, "AR (M)": 0.78724, "AR (L)": 0.88424}
+{"mode": "train", "epoch": 71, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0558, "heatmap_loss": 0.00057, "acc_pose": 0.83013, "loss": 0.00057, "grad_norm": 0.00158, "time": 0.70853}
+{"mode": "train", "epoch": 71, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00058, "acc_pose": 0.82467, "loss": 0.00058, "grad_norm": 0.00158, "time": 0.65615}
+{"mode": "train", "epoch": 71, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00058, "acc_pose": 0.82643, "loss": 0.00058, "grad_norm": 0.00155, "time": 0.65678}
+{"mode": "train", "epoch": 71, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83304, "loss": 0.00057, "grad_norm": 0.00163, "time": 0.6566}
+{"mode": "train", "epoch": 71, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83419, "loss": 0.00058, "grad_norm": 0.00162, "time": 0.65663}
+{"mode": "train", "epoch": 72, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05737, "heatmap_loss": 0.00057, "acc_pose": 0.83018, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.71422}
+{"mode": "train", "epoch": 72, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82693, "loss": 0.00058, "grad_norm": 0.00175, "time": 0.65712}
+{"mode": "train", "epoch": 72, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.82107, "loss": 0.00058, "grad_norm": 0.00147, "time": 0.65713}
+{"mode": "train", "epoch": 72, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83376, "loss": 0.00057, "grad_norm": 0.00151, "time": 0.65703}
+{"mode": "train", "epoch": 72, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00058, "acc_pose": 0.82327, "loss": 0.00058, "grad_norm": 0.0016, "time": 0.65668}
+{"mode": "train", "epoch": 73, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05674, "heatmap_loss": 0.00057, "acc_pose": 0.83319, "loss": 0.00057, "grad_norm": 0.00179, "time": 0.71364}
+{"mode": "train", "epoch": 73, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83417, "loss": 0.00057, "grad_norm": 0.0016, "time": 0.65723}
+{"mode": "train", "epoch": 73, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.82045, "loss": 0.00057, "grad_norm": 0.00157, "time": 0.65696}
+{"mode": "train", "epoch": 73, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0004, "heatmap_loss": 0.00057, "acc_pose": 0.82964, "loss": 0.00057, "grad_norm": 0.0015, "time": 0.65707}
+{"mode": "train", "epoch": 73, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00058, "acc_pose": 0.83056, "loss": 0.00058, "grad_norm": 0.00149, "time": 0.65706}
+{"mode": "train", "epoch": 74, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05711, "heatmap_loss": 0.00057, "acc_pose": 0.82944, "loss": 0.00057, "grad_norm": 0.00157, "time": 0.71438}
+{"mode": "train", "epoch": 74, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.82638, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.65682}
+{"mode": "train", "epoch": 74, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.81976, "loss": 0.00057, "grad_norm": 0.00141, "time": 0.65735}
+{"mode": "train", "epoch": 74, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00041, "heatmap_loss": 0.00057, "acc_pose": 0.83306, "loss": 0.00057, "grad_norm": 0.00151, "time": 0.65709}
+{"mode": "train", "epoch": 74, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82448, "loss": 0.00057, "grad_norm": 0.00146, "time": 0.65717}
+{"mode": "train", "epoch": 75, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05736, "heatmap_loss": 0.00057, "acc_pose": 0.83018, "loss": 0.00057, "grad_norm": 0.00156, "time": 0.71519}
+{"mode": "train", "epoch": 75, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.83104, "loss": 0.00057, "grad_norm": 0.00161, "time": 0.65695}
+{"mode": "train", "epoch": 75, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.82661, "loss": 0.00057, "grad_norm": 0.00143, "time": 0.65728}
+{"mode": "train", "epoch": 75, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00057, "acc_pose": 0.83354, "loss": 0.00057, "grad_norm": 0.00163, "time": 0.65732}
+{"mode": "train", "epoch": 75, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.82859, "loss": 0.00057, "grad_norm": 0.00144, "time": 0.65701}
+{"mode": "train", "epoch": 76, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05659, "heatmap_loss": 0.00057, "acc_pose": 0.83072, "loss": 0.00057, "grad_norm": 0.00145, "time": 0.71463}
+{"mode": "train", "epoch": 76, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00042, "heatmap_loss": 0.00057, "acc_pose": 0.82958, "loss": 0.00057, "grad_norm": 0.00144, "time": 0.65736}
+{"mode": "train", "epoch": 76, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00057, "acc_pose": 0.8354, "loss": 0.00057, "grad_norm": 0.00148, "time": 0.65681}
+{"mode": "train", "epoch": 76, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83055, "loss": 0.00057, "grad_norm": 0.00145, "time": 0.65688}
+{"mode": "train", "epoch": 76, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83113, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.65729}
+{"mode": "train", "epoch": 77, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05678, "heatmap_loss": 0.00057, "acc_pose": 0.82797, "loss": 0.00057, "grad_norm": 0.00157, "time": 0.71348}
+{"mode": "train", "epoch": 77, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82584, "loss": 0.00057, "grad_norm": 0.00174, "time": 0.65716}
+{"mode": "train", "epoch": 77, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83372, "loss": 0.00056, "grad_norm": 0.00149, "time": 0.65687}
+{"mode": "train", "epoch": 77, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00057, "acc_pose": 0.82734, "loss": 0.00057, "grad_norm": 0.00162, "time": 0.65682}
+{"mode": "train", "epoch": 77, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83362, "loss": 0.00057, "grad_norm": 0.00156, "time": 0.65688}
+{"mode": "train", "epoch": 78, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05715, "heatmap_loss": 0.00057, "acc_pose": 0.83866, "loss": 0.00057, "grad_norm": 0.00153, "time": 0.71466}
+{"mode": "train", "epoch": 78, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83775, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.65721}
+{"mode": "train", "epoch": 78, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82986, "loss": 0.00057, "grad_norm": 0.00149, "time": 0.65759}
+{"mode": "train", "epoch": 78, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.82398, "loss": 0.00057, "grad_norm": 0.00151, "time": 0.65726}
+{"mode": "train", "epoch": 78, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83179, "loss": 0.00057, "grad_norm": 0.0015, "time": 0.6574}
+{"mode": "train", "epoch": 79, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05617, "heatmap_loss": 0.00057, "acc_pose": 0.83269, "loss": 0.00057, "grad_norm": 0.0015, "time": 0.71348}
+{"mode": "train", "epoch": 79, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.82072, "loss": 0.00056, "grad_norm": 0.0015, "time": 0.65749}
+{"mode": "train", "epoch": 79, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.82593, "loss": 0.00056, "grad_norm": 0.00144, "time": 0.65682}
+{"mode": "train", "epoch": 79, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00057, "acc_pose": 0.83449, "loss": 0.00057, "grad_norm": 0.00152, "time": 0.65695}
+{"mode": "train", "epoch": 79, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83447, "loss": 0.00057, "grad_norm": 0.00148, "time": 0.65704}
+{"mode": "train", "epoch": 80, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05661, "heatmap_loss": 0.00057, "acc_pose": 0.83598, "loss": 0.00057, "grad_norm": 0.00154, "time": 0.71436}
+{"mode": "train", "epoch": 80, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83556, "loss": 0.00056, "grad_norm": 0.0016, "time": 0.65746}
+{"mode": "train", "epoch": 80, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.82544, "loss": 0.00056, "grad_norm": 0.00142, "time": 0.6573}
+{"mode": "train", "epoch": 80, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.8326, "loss": 0.00056, "grad_norm": 0.0014, "time": 0.65725}
+{"mode": "train", "epoch": 80, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00057, "acc_pose": 0.82995, "loss": 0.00057, "grad_norm": 0.00152, "time": 0.65745}
+{"mode": "val", "epoch": 80, "iter": 407, "lr": 0.0, "AP": 0.77541, "AP .5": 0.9128, "AP .75": 0.84733, "AP (M)": 0.70222, "AP (L)": 0.80208, "AR": 0.82856, "AR .5": 0.95135, "AR .75": 0.89185, "AR (M)": 0.78831, "AR (L)": 0.88737}
+{"mode": "train", "epoch": 81, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05556, "heatmap_loss": 0.00056, "acc_pose": 0.83358, "loss": 0.00056, "grad_norm": 0.00151, "time": 0.7082}
+{"mode": "train", "epoch": 81, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.82997, "loss": 0.00056, "grad_norm": 0.00172, "time": 0.65616}
+{"mode": "train", "epoch": 81, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00039, "heatmap_loss": 0.00057, "acc_pose": 0.82811, "loss": 0.00057, "grad_norm": 0.0016, "time": 0.65665}
+{"mode": "train", "epoch": 81, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00057, "acc_pose": 0.83257, "loss": 0.00057, "grad_norm": 0.0015, "time": 0.65663}
+{"mode": "train", "epoch": 81, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8344, "loss": 0.00056, "grad_norm": 0.00148, "time": 0.65657}
+{"mode": "train", "epoch": 82, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05603, "heatmap_loss": 0.00056, "acc_pose": 0.83675, "loss": 0.00056, "grad_norm": 0.00144, "time": 0.71245}
+{"mode": "train", "epoch": 82, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83571, "loss": 0.00056, "grad_norm": 0.00138, "time": 0.65694}
+{"mode": "train", "epoch": 82, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00026, "heatmap_loss": 0.00056, "acc_pose": 0.83438, "loss": 0.00056, "grad_norm": 0.00143, "time": 0.65706}
+{"mode": "train", "epoch": 82, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00056, "acc_pose": 0.83221, "loss": 0.00056, "grad_norm": 0.00155, "time": 0.65709}
+{"mode": "train", "epoch": 82, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83136, "loss": 0.00056, "grad_norm": 0.00148, "time": 0.65711}
+{"mode": "train", "epoch": 83, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05657, "heatmap_loss": 0.00056, "acc_pose": 0.83412, "loss": 0.00056, "grad_norm": 0.00149, "time": 0.71352}
+{"mode": "train", "epoch": 83, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00057, "acc_pose": 0.83382, "loss": 0.00057, "grad_norm": 0.00143, "time": 0.65687}
+{"mode": "train", "epoch": 83, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00041, "heatmap_loss": 0.00057, "acc_pose": 0.83112, "loss": 0.00057, "grad_norm": 0.00148, "time": 0.65717}
+{"mode": "train", "epoch": 83, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83051, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.65704}
+{"mode": "train", "epoch": 83, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83451, "loss": 0.00056, "grad_norm": 0.00147, "time": 0.65684}
+{"mode": "train", "epoch": 84, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05866, "heatmap_loss": 0.00056, "acc_pose": 0.83733, "loss": 0.00056, "grad_norm": 0.00139, "time": 0.7154}
+{"mode": "train", "epoch": 84, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.82854, "loss": 0.00056, "grad_norm": 0.00145, "time": 0.65701}
+{"mode": "train", "epoch": 84, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83025, "loss": 0.00056, "grad_norm": 0.00136, "time": 0.65685}
+{"mode": "train", "epoch": 84, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83769, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.65742}
+{"mode": "train", "epoch": 84, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.82786, "loss": 0.00056, "grad_norm": 0.00142, "time": 0.65741}
+{"mode": "train", "epoch": 85, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05833, "heatmap_loss": 0.00056, "acc_pose": 0.83268, "loss": 0.00056, "grad_norm": 0.00152, "time": 0.715}
+{"mode": "train", "epoch": 85, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83988, "loss": 0.00056, "grad_norm": 0.00152, "time": 0.65685}
+{"mode": "train", "epoch": 85, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83257, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.65706}
+{"mode": "train", "epoch": 85, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83623, "loss": 0.00056, "grad_norm": 0.00147, "time": 0.6571}
+{"mode": "train", "epoch": 85, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83882, "loss": 0.00056, "grad_norm": 0.00152, "time": 0.65727}
+{"mode": "train", "epoch": 86, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05634, "heatmap_loss": 0.00056, "acc_pose": 0.8374, "loss": 0.00056, "grad_norm": 0.00155, "time": 0.71347}
+{"mode": "train", "epoch": 86, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83034, "loss": 0.00056, "grad_norm": 0.00144, "time": 0.65737}
+{"mode": "train", "epoch": 86, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00056, "acc_pose": 0.83195, "loss": 0.00056, "grad_norm": 0.00142, "time": 0.65746}
+{"mode": "train", "epoch": 86, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83396, "loss": 0.00056, "grad_norm": 0.00134, "time": 0.6572}
+{"mode": "train", "epoch": 86, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8358, "loss": 0.00056, "grad_norm": 0.00143, "time": 0.65736}
+{"mode": "train", "epoch": 87, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05638, "heatmap_loss": 0.00056, "acc_pose": 0.83896, "loss": 0.00056, "grad_norm": 0.00147, "time": 0.71333}
+{"mode": "train", "epoch": 87, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.84188, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.65707}
+{"mode": "train", "epoch": 87, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.82784, "loss": 0.00056, "grad_norm": 0.00134, "time": 0.65671}
+{"mode": "train", "epoch": 87, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.8396, "loss": 0.00056, "grad_norm": 0.00138, "time": 0.65669}
+{"mode": "train", "epoch": 87, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83626, "loss": 0.00056, "grad_norm": 0.00135, "time": 0.65708}
+{"mode": "train", "epoch": 88, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05628, "heatmap_loss": 0.00056, "acc_pose": 0.83514, "loss": 0.00056, "grad_norm": 0.00139, "time": 0.71336}
+{"mode": "train", "epoch": 88, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83413, "loss": 0.00056, "grad_norm": 0.00141, "time": 0.65722}
+{"mode": "train", "epoch": 88, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83355, "loss": 0.00056, "grad_norm": 0.00141, "time": 0.65699}
+{"mode": "train", "epoch": 88, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.83653, "loss": 0.00056, "grad_norm": 0.00139, "time": 0.65744}
+{"mode": "train", "epoch": 88, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.84423, "loss": 0.00056, "grad_norm": 0.00145, "time": 0.65725}
+{"mode": "train", "epoch": 89, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05656, "heatmap_loss": 0.00056, "acc_pose": 0.82526, "loss": 0.00056, "grad_norm": 0.00139, "time": 0.71371}
+{"mode": "train", "epoch": 89, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83401, "loss": 0.00056, "grad_norm": 0.00142, "time": 0.65733}
+{"mode": "train", "epoch": 89, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83129, "loss": 0.00056, "grad_norm": 0.00154, "time": 0.657}
+{"mode": "train", "epoch": 89, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00043, "heatmap_loss": 0.00056, "acc_pose": 0.83589, "loss": 0.00056, "grad_norm": 0.00154, "time": 0.65702}
+{"mode": "train", "epoch": 89, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00055, "acc_pose": 0.83994, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.65707}
+{"mode": "train", "epoch": 90, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05727, "heatmap_loss": 0.00056, "acc_pose": 0.83491, "loss": 0.00056, "grad_norm": 0.00141, "time": 0.7142}
+{"mode": "train", "epoch": 90, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83712, "loss": 0.00055, "grad_norm": 0.00134, "time": 0.65668}
+{"mode": "train", "epoch": 90, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.82813, "loss": 0.00056, "grad_norm": 0.00143, "time": 0.65687}
+{"mode": "train", "epoch": 90, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83801, "loss": 0.00056, "grad_norm": 0.00146, "time": 0.6571}
+{"mode": "train", "epoch": 90, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83847, "loss": 0.00055, "grad_norm": 0.00134, "time": 0.65672}
+{"mode": "val", "epoch": 90, "iter": 407, "lr": 0.0, "AP": 0.77853, "AP .5": 0.91432, "AP .75": 0.85026, "AP (M)": 0.70669, "AP (L)": 0.80589, "AR": 0.8298, "AR .5": 0.95198, "AR .75": 0.892, "AR (M)": 0.78984, "AR (L)": 0.88844}
+{"mode": "train", "epoch": 91, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05629, "heatmap_loss": 0.00055, "acc_pose": 0.84163, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.7097}
+{"mode": "train", "epoch": 91, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83793, "loss": 0.00055, "grad_norm": 0.00141, "time": 0.65611}
+{"mode": "train", "epoch": 91, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.84206, "loss": 0.00056, "grad_norm": 0.00151, "time": 0.65683}
+{"mode": "train", "epoch": 91, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83179, "loss": 0.00056, "grad_norm": 0.0014, "time": 0.65641}
+{"mode": "train", "epoch": 91, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83985, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.65686}
+{"mode": "train", "epoch": 92, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05658, "heatmap_loss": 0.00056, "acc_pose": 0.83673, "loss": 0.00056, "grad_norm": 0.00135, "time": 0.71307}
+{"mode": "train", "epoch": 92, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.8361, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65689}
+{"mode": "train", "epoch": 92, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83213, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.65681}
+{"mode": "train", "epoch": 92, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00056, "acc_pose": 0.83469, "loss": 0.00056, "grad_norm": 0.00156, "time": 0.65711}
+{"mode": "train", "epoch": 92, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83977, "loss": 0.00056, "grad_norm": 0.00143, "time": 0.65704}
+{"mode": "train", "epoch": 93, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0563, "heatmap_loss": 0.00055, "acc_pose": 0.83472, "loss": 0.00055, "grad_norm": 0.00164, "time": 0.71465}
+{"mode": "train", "epoch": 93, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83178, "loss": 0.00056, "grad_norm": 0.00138, "time": 0.65711}
+{"mode": "train", "epoch": 93, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84042, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.65746}
+{"mode": "train", "epoch": 93, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00056, "acc_pose": 0.83957, "loss": 0.00056, "grad_norm": 0.00135, "time": 0.65708}
+{"mode": "train", "epoch": 93, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84322, "loss": 0.00055, "grad_norm": 0.00133, "time": 0.65734}
+{"mode": "train", "epoch": 94, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05685, "heatmap_loss": 0.00055, "acc_pose": 0.83533, "loss": 0.00055, "grad_norm": 0.00138, "time": 0.71346}
+{"mode": "train", "epoch": 94, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84367, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65706}
+{"mode": "train", "epoch": 94, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83308, "loss": 0.00055, "grad_norm": 0.00145, "time": 0.65707}
+{"mode": "train", "epoch": 94, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84063, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65702}
+{"mode": "train", "epoch": 94, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84384, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65699}
+{"mode": "train", "epoch": 95, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05684, "heatmap_loss": 0.00055, "acc_pose": 0.83765, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.71404}
+{"mode": "train", "epoch": 95, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83556, "loss": 0.00055, "grad_norm": 0.00162, "time": 0.6572}
+{"mode": "train", "epoch": 95, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.82883, "loss": 0.00055, "grad_norm": 0.00142, "time": 0.65725}
+{"mode": "train", "epoch": 95, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83672, "loss": 0.00055, "grad_norm": 0.00149, "time": 0.65709}
+{"mode": "train", "epoch": 95, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83771, "loss": 0.00055, "grad_norm": 0.0014, "time": 0.65694}
+{"mode": "train", "epoch": 96, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05721, "heatmap_loss": 0.00055, "acc_pose": 0.84125, "loss": 0.00055, "grad_norm": 0.00132, "time": 0.71457}
+{"mode": "train", "epoch": 96, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84113, "loss": 0.00055, "grad_norm": 0.00143, "time": 0.65754}
+{"mode": "train", "epoch": 96, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83835, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.65773}
+{"mode": "train", "epoch": 96, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83685, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.65787}
+{"mode": "train", "epoch": 96, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00056, "acc_pose": 0.83913, "loss": 0.00056, "grad_norm": 0.00147, "time": 0.65736}
+{"mode": "train", "epoch": 97, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05691, "heatmap_loss": 0.00055, "acc_pose": 0.84006, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.71376}
+{"mode": "train", "epoch": 97, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.84337, "loss": 0.00055, "grad_norm": 0.00137, "time": 0.65768}
+{"mode": "train", "epoch": 97, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00043, "heatmap_loss": 0.00055, "acc_pose": 0.8387, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65709}
+{"mode": "train", "epoch": 97, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83729, "loss": 0.00055, "grad_norm": 0.00153, "time": 0.65726}
+{"mode": "train", "epoch": 97, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83963, "loss": 0.00055, "grad_norm": 0.00138, "time": 0.65715}
+{"mode": "train", "epoch": 98, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05676, "heatmap_loss": 0.00055, "acc_pose": 0.8338, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.71284}
+{"mode": "train", "epoch": 98, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83853, "loss": 0.00055, "grad_norm": 0.00141, "time": 0.65696}
+{"mode": "train", "epoch": 98, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0004, "heatmap_loss": 0.00055, "acc_pose": 0.83717, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.65684}
+{"mode": "train", "epoch": 98, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84119, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65688}
+{"mode": "train", "epoch": 98, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00056, "acc_pose": 0.83817, "loss": 0.00056, "grad_norm": 0.00133, "time": 0.65695}
+{"mode": "train", "epoch": 99, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05831, "heatmap_loss": 0.00055, "acc_pose": 0.84306, "loss": 0.00055, "grad_norm": 0.00128, "time": 0.71504}
+{"mode": "train", "epoch": 99, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83934, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.65651}
+{"mode": "train", "epoch": 99, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84081, "loss": 0.00055, "grad_norm": 0.00137, "time": 0.65707}
+{"mode": "train", "epoch": 99, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83275, "loss": 0.00055, "grad_norm": 0.00138, "time": 0.65726}
+{"mode": "train", "epoch": 99, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84036, "loss": 0.00055, "grad_norm": 0.00148, "time": 0.65707}
+{"mode": "train", "epoch": 100, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05796, "heatmap_loss": 0.00054, "acc_pose": 0.83852, "loss": 0.00054, "grad_norm": 0.00136, "time": 0.71517}
+{"mode": "train", "epoch": 100, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83882, "loss": 0.00055, "grad_norm": 0.00133, "time": 0.65711}
+{"mode": "train", "epoch": 100, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.83768, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65697}
+{"mode": "train", "epoch": 100, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00055, "acc_pose": 0.84679, "loss": 0.00055, "grad_norm": 0.00144, "time": 0.65692}
+{"mode": "train", "epoch": 100, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00055, "acc_pose": 0.83539, "loss": 0.00055, "grad_norm": 0.00141, "time": 0.65681}
+{"mode": "val", "epoch": 100, "iter": 407, "lr": 0.0, "AP": 0.77876, "AP .5": 0.91447, "AP .75": 0.84989, "AP (M)": 0.70522, "AP (L)": 0.80754, "AR": 0.83079, "AR .5": 0.95246, "AR .75": 0.89373, "AR (M)": 0.79006, "AR (L)": 0.89082}
+{"mode": "train", "epoch": 101, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05541, "heatmap_loss": 0.00055, "acc_pose": 0.83334, "loss": 0.00055, "grad_norm": 0.00137, "time": 0.70814}
+{"mode": "train", "epoch": 101, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83331, "loss": 0.00055, "grad_norm": 0.00133, "time": 0.656}
+{"mode": "train", "epoch": 101, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.84162, "loss": 0.00055, "grad_norm": 0.00134, "time": 0.65669}
+{"mode": "train", "epoch": 101, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.8401, "loss": 0.00055, "grad_norm": 0.00126, "time": 0.65646}
+{"mode": "train", "epoch": 101, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.83998, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.6563}
+{"mode": "train", "epoch": 102, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05705, "heatmap_loss": 0.00054, "acc_pose": 0.84441, "loss": 0.00054, "grad_norm": 0.00134, "time": 0.71373}
+{"mode": "train", "epoch": 102, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00054, "acc_pose": 0.83761, "loss": 0.00054, "grad_norm": 0.00137, "time": 0.65698}
+{"mode": "train", "epoch": 102, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00055, "acc_pose": 0.83366, "loss": 0.00055, "grad_norm": 0.00125, "time": 0.65692}
+{"mode": "train", "epoch": 102, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00042, "heatmap_loss": 0.00055, "acc_pose": 0.84533, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.65687}
+{"mode": "train", "epoch": 102, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00055, "acc_pose": 0.84376, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.65723}
+{"mode": "train", "epoch": 103, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05651, "heatmap_loss": 0.00055, "acc_pose": 0.8442, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.71465}
+{"mode": "train", "epoch": 103, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00043, "heatmap_loss": 0.00055, "acc_pose": 0.83679, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.65713}
+{"mode": "train", "epoch": 103, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83936, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.65687}
+{"mode": "train", "epoch": 103, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84205, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.65712}
+{"mode": "train", "epoch": 103, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84296, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.6566}
+{"mode": "train", "epoch": 104, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05648, "heatmap_loss": 0.00055, "acc_pose": 0.83513, "loss": 0.00055, "grad_norm": 0.00137, "time": 0.71444}
+{"mode": "train", "epoch": 104, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00041, "heatmap_loss": 0.00054, "acc_pose": 0.84415, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.657}
+{"mode": "train", "epoch": 104, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.842, "loss": 0.00055, "grad_norm": 0.0013, "time": 0.65683}
+{"mode": "train", "epoch": 104, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.84579, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.65694}
+{"mode": "train", "epoch": 104, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.841, "loss": 0.00055, "grad_norm": 0.00136, "time": 0.65711}
+{"mode": "train", "epoch": 105, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05687, "heatmap_loss": 0.00055, "acc_pose": 0.84483, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.71404}
+{"mode": "train", "epoch": 105, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84139, "loss": 0.00054, "grad_norm": 0.00121, "time": 0.65685}
+{"mode": "train", "epoch": 105, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84053, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.6569}
+{"mode": "train", "epoch": 105, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84766, "loss": 0.00054, "grad_norm": 0.00131, "time": 0.65696}
+{"mode": "train", "epoch": 105, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00055, "acc_pose": 0.83673, "loss": 0.00055, "grad_norm": 0.00139, "time": 0.6572}
+{"mode": "train", "epoch": 106, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05663, "heatmap_loss": 0.00054, "acc_pose": 0.84065, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.71382}
+{"mode": "train", "epoch": 106, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84077, "loss": 0.00054, "grad_norm": 0.00133, "time": 0.6573}
+{"mode": "train", "epoch": 106, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.83362, "loss": 0.00054, "grad_norm": 0.00128, "time": 0.65728}
+{"mode": "train", "epoch": 106, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84614, "loss": 0.00054, "grad_norm": 0.00132, "time": 0.65711}
+{"mode": "train", "epoch": 106, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.8407, "loss": 0.00054, "grad_norm": 0.00135, "time": 0.65734}
+{"mode": "train", "epoch": 107, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05779, "heatmap_loss": 0.00054, "acc_pose": 0.8427, "loss": 0.00054, "grad_norm": 0.00131, "time": 0.71435}
+{"mode": "train", "epoch": 107, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.83935, "loss": 0.00055, "grad_norm": 0.00134, "time": 0.65704}
+{"mode": "train", "epoch": 107, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84513, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65714}
+{"mode": "train", "epoch": 107, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00055, "acc_pose": 0.84292, "loss": 0.00055, "grad_norm": 0.00131, "time": 0.65672}
+{"mode": "train", "epoch": 107, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00054, "acc_pose": 0.84003, "loss": 0.00054, "grad_norm": 0.00139, "time": 0.65744}
+{"mode": "train", "epoch": 108, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05731, "heatmap_loss": 0.00054, "acc_pose": 0.84476, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.71413}
+{"mode": "train", "epoch": 108, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.84255, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.65659}
+{"mode": "train", "epoch": 108, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00054, "acc_pose": 0.84191, "loss": 0.00054, "grad_norm": 0.00137, "time": 0.65675}
+{"mode": "train", "epoch": 108, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.84296, "loss": 0.00054, "grad_norm": 0.00128, "time": 0.657}
+{"mode": "train", "epoch": 108, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84243, "loss": 0.00054, "grad_norm": 0.00127, "time": 0.65697}
+{"mode": "train", "epoch": 109, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05741, "heatmap_loss": 0.00054, "acc_pose": 0.84587, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.71426}
+{"mode": "train", "epoch": 109, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00055, "acc_pose": 0.84048, "loss": 0.00055, "grad_norm": 0.00135, "time": 0.65659}
+{"mode": "train", "epoch": 109, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84597, "loss": 0.00054, "grad_norm": 0.00129, "time": 0.65709}
+{"mode": "train", "epoch": 109, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84404, "loss": 0.00054, "grad_norm": 0.00135, "time": 0.65725}
+{"mode": "train", "epoch": 109, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00027, "heatmap_loss": 0.00054, "acc_pose": 0.84457, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65668}
+{"mode": "train", "epoch": 110, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05659, "heatmap_loss": 0.00054, "acc_pose": 0.83876, "loss": 0.00054, "grad_norm": 0.00135, "time": 0.71366}
+{"mode": "train", "epoch": 110, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84223, "loss": 0.00054, "grad_norm": 0.00146, "time": 0.6567}
+{"mode": "train", "epoch": 110, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84047, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65677}
+{"mode": "train", "epoch": 110, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.85076, "loss": 0.00054, "grad_norm": 0.00131, "time": 0.657}
+{"mode": "train", "epoch": 110, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84737, "loss": 0.00054, "grad_norm": 0.00128, "time": 0.65728}
+{"mode": "val", "epoch": 110, "iter": 407, "lr": 0.0, "AP": 0.77874, "AP .5": 0.91472, "AP .75": 0.85021, "AP (M)": 0.70734, "AP (L)": 0.80531, "AR": 0.83032, "AR .5": 0.95246, "AR .75": 0.89373, "AR (M)": 0.79039, "AR (L)": 0.88863}
+{"mode": "train", "epoch": 111, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05759, "heatmap_loss": 0.00054, "acc_pose": 0.84228, "loss": 0.00054, "grad_norm": 0.00128, "time": 0.71015}
+{"mode": "train", "epoch": 111, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.83947, "loss": 0.00054, "grad_norm": 0.0014, "time": 0.65602}
+{"mode": "train", "epoch": 111, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83783, "loss": 0.00054, "grad_norm": 0.00146, "time": 0.65669}
+{"mode": "train", "epoch": 111, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84481, "loss": 0.00054, "grad_norm": 0.00139, "time": 0.65658}
+{"mode": "train", "epoch": 111, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84181, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65656}
+{"mode": "train", "epoch": 112, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05794, "heatmap_loss": 0.00054, "acc_pose": 0.84649, "loss": 0.00054, "grad_norm": 0.00127, "time": 0.71447}
+{"mode": "train", "epoch": 112, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83705, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.65693}
+{"mode": "train", "epoch": 112, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84702, "loss": 0.00054, "grad_norm": 0.00132, "time": 0.65692}
+{"mode": "train", "epoch": 112, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.85025, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.65669}
+{"mode": "train", "epoch": 112, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84401, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.65712}
+{"mode": "train", "epoch": 113, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05669, "heatmap_loss": 0.00054, "acc_pose": 0.84632, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.71341}
+{"mode": "train", "epoch": 113, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84328, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.65705}
+{"mode": "train", "epoch": 113, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84354, "loss": 0.00054, "grad_norm": 0.00123, "time": 0.6579}
+{"mode": "train", "epoch": 113, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00054, "acc_pose": 0.84807, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.65742}
+{"mode": "train", "epoch": 113, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.83915, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65744}
+{"mode": "train", "epoch": 114, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05639, "heatmap_loss": 0.00054, "acc_pose": 0.83833, "loss": 0.00054, "grad_norm": 0.00129, "time": 0.71333}
+{"mode": "train", "epoch": 114, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84189, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65674}
+{"mode": "train", "epoch": 114, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.83938, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65671}
+{"mode": "train", "epoch": 114, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84824, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.65683}
+{"mode": "train", "epoch": 114, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84732, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.65717}
+{"mode": "train", "epoch": 115, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05674, "heatmap_loss": 0.00054, "acc_pose": 0.84381, "loss": 0.00054, "grad_norm": 0.00117, "time": 0.71398}
+{"mode": "train", "epoch": 115, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84608, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.6573}
+{"mode": "train", "epoch": 115, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.83948, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.65703}
+{"mode": "train", "epoch": 115, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.846, "loss": 0.00053, "grad_norm": 0.00128, "time": 0.65722}
+{"mode": "train", "epoch": 115, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84605, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.65707}
+{"mode": "train", "epoch": 116, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05649, "heatmap_loss": 0.00054, "acc_pose": 0.84872, "loss": 0.00054, "grad_norm": 0.00128, "time": 0.71352}
+{"mode": "train", "epoch": 116, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.83899, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.65722}
+{"mode": "train", "epoch": 116, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84177, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65731}
+{"mode": "train", "epoch": 116, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.84491, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65727}
+{"mode": "train", "epoch": 116, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84412, "loss": 0.00054, "grad_norm": 0.0013, "time": 0.65721}
+{"mode": "train", "epoch": 117, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05678, "heatmap_loss": 0.00054, "acc_pose": 0.84178, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.71343}
+{"mode": "train", "epoch": 117, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84487, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.65752}
+{"mode": "train", "epoch": 117, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84635, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.6575}
+{"mode": "train", "epoch": 117, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84112, "loss": 0.00054, "grad_norm": 0.00129, "time": 0.65724}
+{"mode": "train", "epoch": 117, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84887, "loss": 0.00054, "grad_norm": 0.00124, "time": 0.657}
+{"mode": "train", "epoch": 118, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05637, "heatmap_loss": 0.00054, "acc_pose": 0.84469, "loss": 0.00054, "grad_norm": 0.00145, "time": 0.7132}
+{"mode": "train", "epoch": 118, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84073, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65747}
+{"mode": "train", "epoch": 118, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84637, "loss": 0.00054, "grad_norm": 0.00118, "time": 0.65735}
+{"mode": "train", "epoch": 118, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84605, "loss": 0.00053, "grad_norm": 0.00132, "time": 0.65751}
+{"mode": "train", "epoch": 118, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84699, "loss": 0.00054, "grad_norm": 0.00129, "time": 0.65756}
+{"mode": "train", "epoch": 119, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05647, "heatmap_loss": 0.00053, "acc_pose": 0.85215, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.71454}
+{"mode": "train", "epoch": 119, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00044, "heatmap_loss": 0.00053, "acc_pose": 0.85015, "loss": 0.00053, "grad_norm": 0.00111, "time": 0.65756}
+{"mode": "train", "epoch": 119, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.83735, "loss": 0.00054, "grad_norm": 0.00122, "time": 0.65733}
+{"mode": "train", "epoch": 119, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.84803, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65754}
+{"mode": "train", "epoch": 119, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84083, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65735}
+{"mode": "train", "epoch": 120, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05733, "heatmap_loss": 0.00053, "acc_pose": 0.8509, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.71403}
+{"mode": "train", "epoch": 120, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84607, "loss": 0.00053, "grad_norm": 0.00128, "time": 0.6571}
+{"mode": "train", "epoch": 120, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00053, "acc_pose": 0.8458, "loss": 0.00053, "grad_norm": 0.00121, "time": 0.65768}
+{"mode": "train", "epoch": 120, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84454, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65755}
+{"mode": "train", "epoch": 120, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84816, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.65738}
+{"mode": "val", "epoch": 120, "iter": 407, "lr": 0.0, "AP": 0.77938, "AP .5": 0.91283, "AP .75": 0.85084, "AP (M)": 0.70665, "AP (L)": 0.80764, "AR": 0.83111, "AR .5": 0.95088, "AR .75": 0.89358, "AR (M)": 0.79096, "AR (L)": 0.89045}
+{"mode": "train", "epoch": 121, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05619, "heatmap_loss": 0.00053, "acc_pose": 0.84398, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.71076}
+{"mode": "train", "epoch": 121, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.851, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.65721}
+{"mode": "train", "epoch": 121, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00054, "acc_pose": 0.84794, "loss": 0.00054, "grad_norm": 0.00125, "time": 0.65743}
+{"mode": "train", "epoch": 121, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84888, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.65702}
+{"mode": "train", "epoch": 121, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84608, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.65743}
+{"mode": "train", "epoch": 122, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05804, "heatmap_loss": 0.00053, "acc_pose": 0.84551, "loss": 0.00053, "grad_norm": 0.00128, "time": 0.71487}
+{"mode": "train", "epoch": 122, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00054, "acc_pose": 0.8491, "loss": 0.00054, "grad_norm": 0.0012, "time": 0.65647}
+{"mode": "train", "epoch": 122, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00053, "acc_pose": 0.84665, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.65684}
+{"mode": "train", "epoch": 122, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84878, "loss": 0.00053, "grad_norm": 0.00112, "time": 0.65674}
+{"mode": "train", "epoch": 122, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00054, "acc_pose": 0.84239, "loss": 0.00054, "grad_norm": 0.00131, "time": 0.65704}
+{"mode": "train", "epoch": 123, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05666, "heatmap_loss": 0.00053, "acc_pose": 0.85217, "loss": 0.00053, "grad_norm": 0.00126, "time": 0.71302}
+{"mode": "train", "epoch": 123, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.8504, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.65668}
+{"mode": "train", "epoch": 123, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84989, "loss": 0.00053, "grad_norm": 0.00133, "time": 0.65699}
+{"mode": "train", "epoch": 123, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84575, "loss": 0.00053, "grad_norm": 0.00126, "time": 0.65678}
+{"mode": "train", "epoch": 123, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84772, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.65704}
+{"mode": "train", "epoch": 124, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05729, "heatmap_loss": 0.00053, "acc_pose": 0.84173, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.71422}
+{"mode": "train", "epoch": 124, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84397, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.6567}
+{"mode": "train", "epoch": 124, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84799, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.65716}
+{"mode": "train", "epoch": 124, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00054, "acc_pose": 0.84447, "loss": 0.00054, "grad_norm": 0.00126, "time": 0.65688}
+{"mode": "train", "epoch": 124, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00042, "heatmap_loss": 0.00053, "acc_pose": 0.84556, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.65719}
+{"mode": "train", "epoch": 125, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05682, "heatmap_loss": 0.00053, "acc_pose": 0.84609, "loss": 0.00053, "grad_norm": 0.00128, "time": 0.71327}
+{"mode": "train", "epoch": 125, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84684, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65645}
+{"mode": "train", "epoch": 125, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84376, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.6566}
+{"mode": "train", "epoch": 125, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.8407, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.65661}
+{"mode": "train", "epoch": 125, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00053, "acc_pose": 0.84701, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.65673}
+{"mode": "train", "epoch": 126, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05699, "heatmap_loss": 0.00053, "acc_pose": 0.84801, "loss": 0.00053, "grad_norm": 0.00124, "time": 0.71349}
+{"mode": "train", "epoch": 126, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84636, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.65708}
+{"mode": "train", "epoch": 126, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00037, "heatmap_loss": 0.00053, "acc_pose": 0.84539, "loss": 0.00053, "grad_norm": 0.0013, "time": 0.65689}
+{"mode": "train", "epoch": 126, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85116, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.65711}
+{"mode": "train", "epoch": 126, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84817, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.65687}
+{"mode": "train", "epoch": 127, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05678, "heatmap_loss": 0.00053, "acc_pose": 0.85082, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.71338}
+{"mode": "train", "epoch": 127, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00054, "acc_pose": 0.84249, "loss": 0.00054, "grad_norm": 0.00112, "time": 0.65721}
+{"mode": "train", "epoch": 127, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84715, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.65712}
+{"mode": "train", "epoch": 127, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84955, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.65719}
+{"mode": "train", "epoch": 127, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85065, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65699}
+{"mode": "train", "epoch": 128, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0567, "heatmap_loss": 0.00053, "acc_pose": 0.8483, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.7137}
+{"mode": "train", "epoch": 128, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84864, "loss": 0.00053, "grad_norm": 0.00121, "time": 0.65713}
+{"mode": "train", "epoch": 128, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.85122, "loss": 0.00053, "grad_norm": 0.00119, "time": 0.6574}
+{"mode": "train", "epoch": 128, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.8363, "loss": 0.00053, "grad_norm": 0.00134, "time": 0.65711}
+{"mode": "train", "epoch": 128, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84676, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.65708}
+{"mode": "train", "epoch": 129, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05888, "heatmap_loss": 0.00053, "acc_pose": 0.85016, "loss": 0.00053, "grad_norm": 0.00131, "time": 0.71521}
+{"mode": "train", "epoch": 129, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84811, "loss": 0.00053, "grad_norm": 0.00127, "time": 0.65692}
+{"mode": "train", "epoch": 129, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84545, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.65683}
+{"mode": "train", "epoch": 129, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85016, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.65717}
+{"mode": "train", "epoch": 129, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84181, "loss": 0.00053, "grad_norm": 0.00116, "time": 0.65676}
+{"mode": "train", "epoch": 130, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05732, "heatmap_loss": 0.00053, "acc_pose": 0.84994, "loss": 0.00053, "grad_norm": 0.00119, "time": 0.71449}
+{"mode": "train", "epoch": 130, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00053, "acc_pose": 0.83903, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.65714}
+{"mode": "train", "epoch": 130, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84364, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.65722}
+{"mode": "train", "epoch": 130, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84641, "loss": 0.00053, "grad_norm": 0.00125, "time": 0.65726}
+{"mode": "train", "epoch": 130, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.85063, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65723}
+{"mode": "val", "epoch": 130, "iter": 407, "lr": 0.0, "AP": 0.77908, "AP .5": 0.91447, "AP .75": 0.85039, "AP (M)": 0.7058, "AP (L)": 0.80735, "AR": 0.83105, "AR .5": 0.9534, "AR .75": 0.89295, "AR (M)": 0.79014, "AR (L)": 0.89108}
+{"mode": "train", "epoch": 131, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05603, "heatmap_loss": 0.00053, "acc_pose": 0.85422, "loss": 0.00053, "grad_norm": 0.00119, "time": 0.70867}
+{"mode": "train", "epoch": 131, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84952, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65598}
+{"mode": "train", "epoch": 131, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.84699, "loss": 0.00053, "grad_norm": 0.00128, "time": 0.6568}
+{"mode": "train", "epoch": 131, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85026, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.65674}
+{"mode": "train", "epoch": 131, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00053, "acc_pose": 0.85034, "loss": 0.00053, "grad_norm": 0.00123, "time": 0.65671}
+{"mode": "train", "epoch": 132, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.057, "heatmap_loss": 0.00052, "acc_pose": 0.84568, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.71468}
+{"mode": "train", "epoch": 132, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00053, "acc_pose": 0.84953, "loss": 0.00053, "grad_norm": 0.00111, "time": 0.65724}
+{"mode": "train", "epoch": 132, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84687, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.65703}
+{"mode": "train", "epoch": 132, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84784, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.65674}
+{"mode": "train", "epoch": 132, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85471, "loss": 0.00053, "grad_norm": 0.00122, "time": 0.65683}
+{"mode": "train", "epoch": 133, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05711, "heatmap_loss": 0.00052, "acc_pose": 0.84423, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.71506}
+{"mode": "train", "epoch": 133, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.8486, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.6566}
+{"mode": "train", "epoch": 133, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84618, "loss": 0.00053, "grad_norm": 0.0012, "time": 0.65708}
+{"mode": "train", "epoch": 133, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84365, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.65717}
+{"mode": "train", "epoch": 133, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.84428, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.65727}
+{"mode": "train", "epoch": 134, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05676, "heatmap_loss": 0.00052, "acc_pose": 0.85121, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.71353}
+{"mode": "train", "epoch": 134, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84783, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.6566}
+{"mode": "train", "epoch": 134, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84734, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65696}
+{"mode": "train", "epoch": 134, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.85444, "loss": 0.00052, "grad_norm": 0.00123, "time": 0.65694}
+{"mode": "train", "epoch": 134, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.84108, "loss": 0.00053, "grad_norm": 0.00115, "time": 0.65683}
+{"mode": "train", "epoch": 135, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05689, "heatmap_loss": 0.00052, "acc_pose": 0.85049, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.71385}
+{"mode": "train", "epoch": 135, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.84649, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.65686}
+{"mode": "train", "epoch": 135, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84787, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.657}
+{"mode": "train", "epoch": 135, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00053, "acc_pose": 0.85078, "loss": 0.00053, "grad_norm": 0.00129, "time": 0.65661}
+{"mode": "train", "epoch": 135, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00053, "acc_pose": 0.85285, "loss": 0.00053, "grad_norm": 0.00118, "time": 0.65725}
+{"mode": "train", "epoch": 136, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05711, "heatmap_loss": 0.00053, "acc_pose": 0.84831, "loss": 0.00053, "grad_norm": 0.00113, "time": 0.71548}
+{"mode": "train", "epoch": 136, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00053, "acc_pose": 0.85523, "loss": 0.00053, "grad_norm": 0.00121, "time": 0.65779}
+{"mode": "train", "epoch": 136, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84833, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.65732}
+{"mode": "train", "epoch": 136, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84791, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.65741}
+{"mode": "train", "epoch": 136, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85182, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65698}
+{"mode": "train", "epoch": 137, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05639, "heatmap_loss": 0.00052, "acc_pose": 0.84786, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.71416}
+{"mode": "train", "epoch": 137, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00053, "acc_pose": 0.84618, "loss": 0.00053, "grad_norm": 0.00117, "time": 0.65723}
+{"mode": "train", "epoch": 137, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84786, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.65756}
+{"mode": "train", "epoch": 137, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85243, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.6574}
+{"mode": "train", "epoch": 137, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85031, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.65747}
+{"mode": "train", "epoch": 138, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05694, "heatmap_loss": 0.00052, "acc_pose": 0.84605, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.71453}
+{"mode": "train", "epoch": 138, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85464, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65752}
+{"mode": "train", "epoch": 138, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85005, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.65704}
+{"mode": "train", "epoch": 138, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84815, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.65739}
+{"mode": "train", "epoch": 138, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85362, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65709}
+{"mode": "train", "epoch": 139, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05632, "heatmap_loss": 0.00052, "acc_pose": 0.84716, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.71346}
+{"mode": "train", "epoch": 139, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84746, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65696}
+{"mode": "train", "epoch": 139, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00048, "heatmap_loss": 0.00052, "acc_pose": 0.84988, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.65739}
+{"mode": "train", "epoch": 139, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84964, "loss": 0.00052, "grad_norm": 0.00123, "time": 0.65728}
+{"mode": "train", "epoch": 139, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.849, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.65751}
+{"mode": "train", "epoch": 140, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05726, "heatmap_loss": 0.00052, "acc_pose": 0.85255, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.71404}
+{"mode": "train", "epoch": 140, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.84767, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65706}
+{"mode": "train", "epoch": 140, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00052, "acc_pose": 0.85084, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65763}
+{"mode": "train", "epoch": 140, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00027, "heatmap_loss": 0.00053, "acc_pose": 0.85029, "loss": 0.00053, "grad_norm": 0.00114, "time": 0.65724}
+{"mode": "train", "epoch": 140, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84992, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65753}
+{"mode": "val", "epoch": 140, "iter": 407, "lr": 0.0, "AP": 0.78109, "AP .5": 0.91405, "AP .75": 0.85084, "AP (M)": 0.70818, "AP (L)": 0.80947, "AR": 0.8327, "AR .5": 0.95277, "AR .75": 0.89373, "AR (M)": 0.79246, "AR (L)": 0.89194}
+{"mode": "train", "epoch": 141, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05856, "heatmap_loss": 0.00052, "acc_pose": 0.84929, "loss": 0.00052, "grad_norm": 0.00122, "time": 0.71139}
+{"mode": "train", "epoch": 141, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85332, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.65637}
+{"mode": "train", "epoch": 141, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84396, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65672}
+{"mode": "train", "epoch": 141, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.8524, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65696}
+{"mode": "train", "epoch": 141, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84799, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65681}
+{"mode": "train", "epoch": 142, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05875, "heatmap_loss": 0.00052, "acc_pose": 0.85404, "loss": 0.00052, "grad_norm": 0.0012, "time": 0.71591}
+{"mode": "train", "epoch": 142, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84473, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.65757}
+{"mode": "train", "epoch": 142, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85301, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65716}
+{"mode": "train", "epoch": 142, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84956, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65693}
+{"mode": "train", "epoch": 142, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.85032, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65679}
+{"mode": "train", "epoch": 143, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05687, "heatmap_loss": 0.00052, "acc_pose": 0.8587, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.71376}
+{"mode": "train", "epoch": 143, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85236, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.65614}
+{"mode": "train", "epoch": 143, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85418, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65642}
+{"mode": "train", "epoch": 143, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.84939, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65689}
+{"mode": "train", "epoch": 143, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.84998, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.657}
+{"mode": "train", "epoch": 144, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05727, "heatmap_loss": 0.00052, "acc_pose": 0.84693, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.71444}
+{"mode": "train", "epoch": 144, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.84856, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.65712}
+{"mode": "train", "epoch": 144, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.8478, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.65686}
+{"mode": "train", "epoch": 144, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84685, "loss": 0.00052, "grad_norm": 0.00118, "time": 0.65702}
+{"mode": "train", "epoch": 144, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85095, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65701}
+{"mode": "train", "epoch": 145, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05712, "heatmap_loss": 0.00052, "acc_pose": 0.85204, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.71472}
+{"mode": "train", "epoch": 145, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85361, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.65737}
+{"mode": "train", "epoch": 145, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84824, "loss": 0.00052, "grad_norm": 0.00124, "time": 0.65684}
+{"mode": "train", "epoch": 145, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84676, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65713}
+{"mode": "train", "epoch": 145, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.85812, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65693}
+{"mode": "train", "epoch": 146, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05762, "heatmap_loss": 0.00052, "acc_pose": 0.84946, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.71431}
+{"mode": "train", "epoch": 146, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.84975, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.65665}
+{"mode": "train", "epoch": 146, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.84659, "loss": 0.00052, "grad_norm": 0.00105, "time": 0.6567}
+{"mode": "train", "epoch": 146, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00037, "heatmap_loss": 0.00051, "acc_pose": 0.85168, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65682}
+{"mode": "train", "epoch": 146, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85453, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65708}
+{"mode": "train", "epoch": 147, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05734, "heatmap_loss": 0.00051, "acc_pose": 0.85656, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.71427}
+{"mode": "train", "epoch": 147, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85206, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65674}
+{"mode": "train", "epoch": 147, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85233, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65712}
+{"mode": "train", "epoch": 147, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.84621, "loss": 0.00052, "grad_norm": 0.00117, "time": 0.65664}
+{"mode": "train", "epoch": 147, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00052, "acc_pose": 0.84798, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.65709}
+{"mode": "train", "epoch": 148, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05604, "heatmap_loss": 0.00052, "acc_pose": 0.8513, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.71365}
+{"mode": "train", "epoch": 148, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85127, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65685}
+{"mode": "train", "epoch": 148, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85418, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.6569}
+{"mode": "train", "epoch": 148, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85583, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65751}
+{"mode": "train", "epoch": 148, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85662, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65702}
+{"mode": "train", "epoch": 149, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05679, "heatmap_loss": 0.00051, "acc_pose": 0.84781, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.71351}
+{"mode": "train", "epoch": 149, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.86024, "loss": 0.00052, "grad_norm": 0.00108, "time": 0.65727}
+{"mode": "train", "epoch": 149, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85976, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65741}
+{"mode": "train", "epoch": 149, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.84839, "loss": 0.00052, "grad_norm": 0.00119, "time": 0.65731}
+{"mode": "train", "epoch": 149, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85611, "loss": 0.00052, "grad_norm": 0.00115, "time": 0.65733}
+{"mode": "train", "epoch": 150, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05725, "heatmap_loss": 0.00051, "acc_pose": 0.8541, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.71509}
+{"mode": "train", "epoch": 150, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00047, "heatmap_loss": 0.00051, "acc_pose": 0.85749, "loss": 0.00051, "grad_norm": 0.00119, "time": 0.65716}
+{"mode": "train", "epoch": 150, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85398, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.65666}
+{"mode": "train", "epoch": 150, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85894, "loss": 0.00051, "grad_norm": 0.00117, "time": 0.65689}
+{"mode": "train", "epoch": 150, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.85217, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65669}
+{"mode": "val", "epoch": 150, "iter": 407, "lr": 0.0, "AP": 0.77962, "AP .5": 0.91525, "AP .75": 0.84975, "AP (M)": 0.70633, "AP (L)": 0.80897, "AR": 0.83145, "AR .5": 0.95293, "AR .75": 0.89342, "AR (M)": 0.78995, "AR (L)": 0.89238}
+{"mode": "train", "epoch": 151, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05581, "heatmap_loss": 0.00051, "acc_pose": 0.8527, "loss": 0.00051, "grad_norm": 0.00117, "time": 0.70883}
+{"mode": "train", "epoch": 151, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85459, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65641}
+{"mode": "train", "epoch": 151, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85422, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65688}
+{"mode": "train", "epoch": 151, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85115, "loss": 0.00052, "grad_norm": 0.00116, "time": 0.65638}
+{"mode": "train", "epoch": 151, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.8583, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.65654}
+{"mode": "train", "epoch": 152, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05705, "heatmap_loss": 0.00051, "acc_pose": 0.85502, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.71334}
+{"mode": "train", "epoch": 152, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00052, "acc_pose": 0.8564, "loss": 0.00052, "grad_norm": 0.0011, "time": 0.65647}
+{"mode": "train", "epoch": 152, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85617, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.65683}
+{"mode": "train", "epoch": 152, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.8516, "loss": 0.00052, "grad_norm": 0.00114, "time": 0.65692}
+{"mode": "train", "epoch": 152, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00052, "acc_pose": 0.85976, "loss": 0.00052, "grad_norm": 0.00111, "time": 0.65679}
+{"mode": "train", "epoch": 153, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0566, "heatmap_loss": 0.00051, "acc_pose": 0.85681, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.71384}
+{"mode": "train", "epoch": 153, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86106, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.65695}
+{"mode": "train", "epoch": 153, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85669, "loss": 0.00051, "grad_norm": 0.00107, "time": 0.65724}
+{"mode": "train", "epoch": 153, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85156, "loss": 0.00052, "grad_norm": 0.00109, "time": 0.65683}
+{"mode": "train", "epoch": 153, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00051, "acc_pose": 0.8537, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.6573}
+{"mode": "train", "epoch": 154, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05675, "heatmap_loss": 0.00051, "acc_pose": 0.85228, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.7132}
+{"mode": "train", "epoch": 154, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0005, "heatmap_loss": 0.00051, "acc_pose": 0.85556, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65731}
+{"mode": "train", "epoch": 154, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00052, "acc_pose": 0.847, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65681}
+{"mode": "train", "epoch": 154, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85169, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65664}
+{"mode": "train", "epoch": 154, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00051, "acc_pose": 0.85216, "loss": 0.00051, "grad_norm": 0.00104, "time": 0.6568}
+{"mode": "train", "epoch": 155, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05697, "heatmap_loss": 0.00051, "acc_pose": 0.85386, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.71366}
+{"mode": "train", "epoch": 155, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85138, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65708}
+{"mode": "train", "epoch": 155, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85623, "loss": 0.00051, "grad_norm": 0.00117, "time": 0.65689}
+{"mode": "train", "epoch": 155, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85458, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.65701}
+{"mode": "train", "epoch": 155, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85395, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.65664}
+{"mode": "train", "epoch": 156, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05735, "heatmap_loss": 0.00051, "acc_pose": 0.85045, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.71435}
+{"mode": "train", "epoch": 156, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.8529, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65735}
+{"mode": "train", "epoch": 156, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00052, "acc_pose": 0.85239, "loss": 0.00052, "grad_norm": 0.00113, "time": 0.65714}
+{"mode": "train", "epoch": 156, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85453, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.657}
+{"mode": "train", "epoch": 156, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85226, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65728}
+{"mode": "train", "epoch": 157, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05923, "heatmap_loss": 0.00051, "acc_pose": 0.86102, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.71594}
+{"mode": "train", "epoch": 157, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86106, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.6571}
+{"mode": "train", "epoch": 157, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85417, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65669}
+{"mode": "train", "epoch": 157, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86083, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65705}
+{"mode": "train", "epoch": 157, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00051, "acc_pose": 0.85465, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65667}
+{"mode": "train", "epoch": 158, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05626, "heatmap_loss": 0.00052, "acc_pose": 0.85075, "loss": 0.00052, "grad_norm": 0.00121, "time": 0.71427}
+{"mode": "train", "epoch": 158, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85457, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65665}
+{"mode": "train", "epoch": 158, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85012, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.65695}
+{"mode": "train", "epoch": 158, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85645, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65688}
+{"mode": "train", "epoch": 158, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00051, "acc_pose": 0.85554, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.65708}
+{"mode": "train", "epoch": 159, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05645, "heatmap_loss": 0.00051, "acc_pose": 0.85191, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.71356}
+{"mode": "train", "epoch": 159, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.84796, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.65667}
+{"mode": "train", "epoch": 159, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85715, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65701}
+{"mode": "train", "epoch": 159, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85444, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.65704}
+{"mode": "train", "epoch": 159, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.8519, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65716}
+{"mode": "train", "epoch": 160, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05713, "heatmap_loss": 0.00052, "acc_pose": 0.85729, "loss": 0.00052, "grad_norm": 0.00112, "time": 0.71378}
+{"mode": "train", "epoch": 160, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.85758, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65677}
+{"mode": "train", "epoch": 160, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85362, "loss": 0.00051, "grad_norm": 0.00104, "time": 0.65678}
+{"mode": "train", "epoch": 160, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85279, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65708}
+{"mode": "train", "epoch": 160, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85205, "loss": 0.00051, "grad_norm": 0.00117, "time": 0.6574}
+{"mode": "val", "epoch": 160, "iter": 407, "lr": 0.0, "AP": 0.78006, "AP .5": 0.91366, "AP .75": 0.85084, "AP (M)": 0.70701, "AP (L)": 0.80802, "AR": 0.83259, "AR .5": 0.95324, "AR .75": 0.89468, "AR (M)": 0.79145, "AR (L)": 0.89294}
+{"mode": "train", "epoch": 161, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05641, "heatmap_loss": 0.00051, "acc_pose": 0.85867, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.70935}
+{"mode": "train", "epoch": 161, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.8618, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65629}
+{"mode": "train", "epoch": 161, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85212, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65658}
+{"mode": "train", "epoch": 161, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85741, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65636}
+{"mode": "train", "epoch": 161, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00051, "acc_pose": 0.85065, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.65655}
+{"mode": "train", "epoch": 162, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05615, "heatmap_loss": 0.0005, "acc_pose": 0.85865, "loss": 0.0005, "grad_norm": 0.00104, "time": 0.71418}
+{"mode": "train", "epoch": 162, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85413, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.65678}
+{"mode": "train", "epoch": 162, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85805, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65685}
+{"mode": "train", "epoch": 162, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85584, "loss": 0.00051, "grad_norm": 0.00104, "time": 0.65685}
+{"mode": "train", "epoch": 162, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.8553, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.65683}
+{"mode": "train", "epoch": 163, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05594, "heatmap_loss": 0.0005, "acc_pose": 0.85999, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.71418}
+{"mode": "train", "epoch": 163, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.85198, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.65654}
+{"mode": "train", "epoch": 163, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85115, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65695}
+{"mode": "train", "epoch": 163, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85101, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.6567}
+{"mode": "train", "epoch": 163, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85243, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65691}
+{"mode": "train", "epoch": 164, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05712, "heatmap_loss": 0.00051, "acc_pose": 0.85402, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.71412}
+{"mode": "train", "epoch": 164, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85376, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65694}
+{"mode": "train", "epoch": 164, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85442, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.65701}
+{"mode": "train", "epoch": 164, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85498, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.65699}
+{"mode": "train", "epoch": 164, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86116, "loss": 0.00051, "grad_norm": 0.00105, "time": 0.65702}
+{"mode": "train", "epoch": 165, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05654, "heatmap_loss": 0.00051, "acc_pose": 0.85641, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.71412}
+{"mode": "train", "epoch": 165, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85707, "loss": 0.00051, "grad_norm": 0.00109, "time": 0.65668}
+{"mode": "train", "epoch": 165, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.86117, "loss": 0.00051, "grad_norm": 0.00113, "time": 0.65679}
+{"mode": "train", "epoch": 165, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.8561, "loss": 0.0005, "grad_norm": 0.00103, "time": 0.65691}
+{"mode": "train", "epoch": 165, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86619, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.65688}
+{"mode": "train", "epoch": 166, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05717, "heatmap_loss": 0.00051, "acc_pose": 0.85879, "loss": 0.00051, "grad_norm": 0.00115, "time": 0.71451}
+{"mode": "train", "epoch": 166, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85532, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.6566}
+{"mode": "train", "epoch": 166, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85744, "loss": 0.0005, "grad_norm": 0.00114, "time": 0.65665}
+{"mode": "train", "epoch": 166, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85521, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.65702}
+{"mode": "train", "epoch": 166, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.85835, "loss": 0.00051, "grad_norm": 0.00116, "time": 0.65668}
+{"mode": "train", "epoch": 167, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05639, "heatmap_loss": 0.00051, "acc_pose": 0.85976, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.71281}
+{"mode": "train", "epoch": 167, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00051, "acc_pose": 0.85688, "loss": 0.00051, "grad_norm": 0.00112, "time": 0.65674}
+{"mode": "train", "epoch": 167, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00051, "acc_pose": 0.8546, "loss": 0.00051, "grad_norm": 0.00102, "time": 0.65742}
+{"mode": "train", "epoch": 167, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.86503, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.65691}
+{"mode": "train", "epoch": 167, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.8534, "loss": 0.00051, "grad_norm": 0.00106, "time": 0.65712}
+{"mode": "train", "epoch": 168, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05706, "heatmap_loss": 0.00051, "acc_pose": 0.86455, "loss": 0.00051, "grad_norm": 0.00114, "time": 0.71354}
+{"mode": "train", "epoch": 168, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85966, "loss": 0.0005, "grad_norm": 0.00108, "time": 0.65635}
+{"mode": "train", "epoch": 168, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00051, "acc_pose": 0.8537, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.65671}
+{"mode": "train", "epoch": 168, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.85605, "loss": 0.00051, "grad_norm": 0.0011, "time": 0.65709}
+{"mode": "train", "epoch": 168, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00051, "acc_pose": 0.86197, "loss": 0.00051, "grad_norm": 0.00108, "time": 0.65689}
+{"mode": "train", "epoch": 169, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0563, "heatmap_loss": 0.0005, "acc_pose": 0.86231, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.7125}
+{"mode": "train", "epoch": 169, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.0005, "acc_pose": 0.84999, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.65638}
+{"mode": "train", "epoch": 169, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86195, "loss": 0.00051, "grad_norm": 0.00107, "time": 0.65633}
+{"mode": "train", "epoch": 169, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.0005, "acc_pose": 0.85814, "loss": 0.0005, "grad_norm": 0.00107, "time": 0.6567}
+{"mode": "train", "epoch": 169, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00051, "acc_pose": 0.86322, "loss": 0.00051, "grad_norm": 0.00102, "time": 0.65671}
+{"mode": "train", "epoch": 170, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05726, "heatmap_loss": 0.00051, "acc_pose": 0.85875, "loss": 0.00051, "grad_norm": 0.00111, "time": 0.71374}
+{"mode": "train", "epoch": 170, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.85467, "loss": 0.0005, "grad_norm": 0.00111, "time": 0.65748}
+{"mode": "train", "epoch": 170, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00043, "heatmap_loss": 0.0005, "acc_pose": 0.8534, "loss": 0.0005, "grad_norm": 0.00106, "time": 0.65759}
+{"mode": "train", "epoch": 170, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00026, "heatmap_loss": 0.0005, "acc_pose": 0.8583, "loss": 0.0005, "grad_norm": 0.0011, "time": 0.6572}
+{"mode": "train", "epoch": 170, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.0005, "acc_pose": 0.85659, "loss": 0.0005, "grad_norm": 0.00109, "time": 0.65715}
+{"mode": "val", "epoch": 170, "iter": 407, "lr": 0.0, "AP": 0.77922, "AP .5": 0.9135, "AP .75": 0.84918, "AP (M)": 0.70737, "AP (L)": 0.80591, "AR": 0.83147, "AR .5": 0.95277, "AR .75": 0.89279, "AR (M)": 0.79131, "AR (L)": 0.89101}
+{"mode": "train", "epoch": 171, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05626, "heatmap_loss": 0.0005, "acc_pose": 0.85899, "loss": 0.0005, "grad_norm": 0.00102, "time": 0.70928}
+{"mode": "train", "epoch": 171, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86102, "loss": 0.00049, "grad_norm": 0.00099, "time": 0.65604}
+{"mode": "train", "epoch": 171, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.0005, "acc_pose": 0.86637, "loss": 0.0005, "grad_norm": 0.00095, "time": 0.6564}
+{"mode": "train", "epoch": 171, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86336, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.65656}
+{"mode": "train", "epoch": 171, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.85939, "loss": 0.00049, "grad_norm": 0.00094, "time": 0.65668}
+{"mode": "train", "epoch": 172, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05711, "heatmap_loss": 0.00049, "acc_pose": 0.86151, "loss": 0.00049, "grad_norm": 0.00094, "time": 0.71424}
+{"mode": "train", "epoch": 172, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.85441, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.65683}
+{"mode": "train", "epoch": 172, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.0005, "acc_pose": 0.86065, "loss": 0.0005, "grad_norm": 0.00094, "time": 0.65662}
+{"mode": "train", "epoch": 172, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00049, "acc_pose": 0.86565, "loss": 0.00049, "grad_norm": 0.001, "time": 0.65678}
+{"mode": "train", "epoch": 172, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86478, "loss": 0.00049, "grad_norm": 0.00093, "time": 0.65654}
+{"mode": "train", "epoch": 173, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05738, "heatmap_loss": 0.00049, "acc_pose": 0.86233, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.71383}
+{"mode": "train", "epoch": 173, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.8643, "loss": 0.00049, "grad_norm": 0.00094, "time": 0.65711}
+{"mode": "train", "epoch": 173, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86127, "loss": 0.00049, "grad_norm": 0.00091, "time": 0.65697}
+{"mode": "train", "epoch": 173, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00049, "acc_pose": 0.86706, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.65696}
+{"mode": "train", "epoch": 173, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00049, "acc_pose": 0.86308, "loss": 0.00049, "grad_norm": 0.00101, "time": 0.6572}
+{"mode": "train", "epoch": 174, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05649, "heatmap_loss": 0.00049, "acc_pose": 0.86405, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.71284}
+{"mode": "train", "epoch": 174, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.85893, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.65671}
+{"mode": "train", "epoch": 174, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00049, "acc_pose": 0.86518, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.65701}
+{"mode": "train", "epoch": 174, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00051, "heatmap_loss": 0.00048, "acc_pose": 0.86721, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65687}
+{"mode": "train", "epoch": 174, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.85961, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65682}
+{"mode": "train", "epoch": 175, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05635, "heatmap_loss": 0.00048, "acc_pose": 0.86773, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.71329}
+{"mode": "train", "epoch": 175, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86362, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.657}
+{"mode": "train", "epoch": 175, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.8661, "loss": 0.00049, "grad_norm": 0.00096, "time": 0.65697}
+{"mode": "train", "epoch": 175, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86775, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65688}
+{"mode": "train", "epoch": 175, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86936, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65702}
+{"mode": "train", "epoch": 176, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05734, "heatmap_loss": 0.00049, "acc_pose": 0.86182, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.71409}
+{"mode": "train", "epoch": 176, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00048, "acc_pose": 0.86429, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.65699}
+{"mode": "train", "epoch": 176, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00049, "acc_pose": 0.857, "loss": 0.00049, "grad_norm": 0.00094, "time": 0.65712}
+{"mode": "train", "epoch": 176, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.85722, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.6572}
+{"mode": "train", "epoch": 176, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86566, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65715}
+{"mode": "train", "epoch": 177, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05706, "heatmap_loss": 0.00048, "acc_pose": 0.86746, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.71446}
+{"mode": "train", "epoch": 177, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00049, "acc_pose": 0.86263, "loss": 0.00049, "grad_norm": 0.00092, "time": 0.65704}
+{"mode": "train", "epoch": 177, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00049, "acc_pose": 0.86036, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.65695}
+{"mode": "train", "epoch": 177, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86487, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65687}
+{"mode": "train", "epoch": 177, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86228, "loss": 0.00048, "grad_norm": 0.00097, "time": 0.65709}
+{"mode": "train", "epoch": 178, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05618, "heatmap_loss": 0.00048, "acc_pose": 0.867, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.71339}
+{"mode": "train", "epoch": 178, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86565, "loss": 0.00048, "grad_norm": 0.00098, "time": 0.65646}
+{"mode": "train", "epoch": 178, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00048, "acc_pose": 0.86769, "loss": 0.00048, "grad_norm": 0.00099, "time": 0.65675}
+{"mode": "train", "epoch": 178, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.8629, "loss": 0.00048, "grad_norm": 0.00089, "time": 0.65683}
+{"mode": "train", "epoch": 178, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86777, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.65678}
+{"mode": "train", "epoch": 179, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05762, "heatmap_loss": 0.00048, "acc_pose": 0.86307, "loss": 0.00048, "grad_norm": 0.00099, "time": 0.71486}
+{"mode": "train", "epoch": 179, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86203, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.65664}
+{"mode": "train", "epoch": 179, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86441, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65698}
+{"mode": "train", "epoch": 179, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86904, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65693}
+{"mode": "train", "epoch": 179, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86992, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65723}
+{"mode": "train", "epoch": 180, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05692, "heatmap_loss": 0.00048, "acc_pose": 0.86621, "loss": 0.00048, "grad_norm": 0.00088, "time": 0.71343}
+{"mode": "train", "epoch": 180, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86856, "loss": 0.00048, "grad_norm": 0.00098, "time": 0.65676}
+{"mode": "train", "epoch": 180, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00048, "acc_pose": 0.86146, "loss": 0.00048, "grad_norm": 0.00102, "time": 0.65666}
+{"mode": "train", "epoch": 180, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86684, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.65664}
+{"mode": "train", "epoch": 180, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86713, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65693}
+{"mode": "val", "epoch": 180, "iter": 407, "lr": 0.0, "AP": 0.78293, "AP .5": 0.91406, "AP .75": 0.85147, "AP (M)": 0.70998, "AP (L)": 0.81009, "AR": 0.83462, "AR .5": 0.95356, "AR .75": 0.89452, "AR (M)": 0.79394, "AR (L)": 0.8945}
+{"mode": "train", "epoch": 181, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05604, "heatmap_loss": 0.00048, "acc_pose": 0.86597, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.70879}
+{"mode": "train", "epoch": 181, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86711, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.65614}
+{"mode": "train", "epoch": 181, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00048, "acc_pose": 0.8635, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.6568}
+{"mode": "train", "epoch": 181, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00048, "acc_pose": 0.86352, "loss": 0.00048, "grad_norm": 0.00097, "time": 0.65682}
+{"mode": "train", "epoch": 181, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86593, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65664}
+{"mode": "train", "epoch": 182, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05724, "heatmap_loss": 0.00048, "acc_pose": 0.86557, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.71434}
+{"mode": "train", "epoch": 182, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86851, "loss": 0.00048, "grad_norm": 0.00088, "time": 0.65681}
+{"mode": "train", "epoch": 182, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00025, "heatmap_loss": 0.00048, "acc_pose": 0.86275, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65695}
+{"mode": "train", "epoch": 182, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86809, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65718}
+{"mode": "train", "epoch": 182, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.8652, "loss": 0.00048, "grad_norm": 0.00098, "time": 0.65708}
+{"mode": "train", "epoch": 183, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05663, "heatmap_loss": 0.00048, "acc_pose": 0.86418, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.71417}
+{"mode": "train", "epoch": 183, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86417, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65714}
+{"mode": "train", "epoch": 183, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86381, "loss": 0.00048, "grad_norm": 0.00088, "time": 0.6569}
+{"mode": "train", "epoch": 183, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86722, "loss": 0.00048, "grad_norm": 0.001, "time": 0.65729}
+{"mode": "train", "epoch": 183, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86688, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.65704}
+{"mode": "train", "epoch": 184, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05733, "heatmap_loss": 0.00049, "acc_pose": 0.86064, "loss": 0.00049, "grad_norm": 0.00097, "time": 0.71401}
+{"mode": "train", "epoch": 184, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86872, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65712}
+{"mode": "train", "epoch": 184, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00043, "heatmap_loss": 0.00048, "acc_pose": 0.86355, "loss": 0.00048, "grad_norm": 0.00089, "time": 0.65676}
+{"mode": "train", "epoch": 184, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86084, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65694}
+{"mode": "train", "epoch": 184, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00048, "acc_pose": 0.8685, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.65665}
+{"mode": "train", "epoch": 185, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05934, "heatmap_loss": 0.00048, "acc_pose": 0.87037, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.71632}
+{"mode": "train", "epoch": 185, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86775, "loss": 0.00048, "grad_norm": 0.00097, "time": 0.65698}
+{"mode": "train", "epoch": 185, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86937, "loss": 0.00048, "grad_norm": 0.00085, "time": 0.65717}
+{"mode": "train", "epoch": 185, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86908, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.65702}
+{"mode": "train", "epoch": 185, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86604, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65704}
+{"mode": "train", "epoch": 186, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05863, "heatmap_loss": 0.00048, "acc_pose": 0.86317, "loss": 0.00048, "grad_norm": 0.00089, "time": 0.71512}
+{"mode": "train", "epoch": 186, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00048, "acc_pose": 0.86595, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65726}
+{"mode": "train", "epoch": 186, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86666, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65689}
+{"mode": "train", "epoch": 186, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86551, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.65666}
+{"mode": "train", "epoch": 186, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86902, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65702}
+{"mode": "train", "epoch": 187, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05567, "heatmap_loss": 0.00048, "acc_pose": 0.87353, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.71262}
+{"mode": "train", "epoch": 187, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86678, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.6568}
+{"mode": "train", "epoch": 187, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.8663, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65727}
+{"mode": "train", "epoch": 187, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00048, "acc_pose": 0.86625, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65664}
+{"mode": "train", "epoch": 187, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86853, "loss": 0.00048, "grad_norm": 0.00098, "time": 0.65681}
+{"mode": "train", "epoch": 188, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05633, "heatmap_loss": 0.00047, "acc_pose": 0.8657, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.71328}
+{"mode": "train", "epoch": 188, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.87236, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65674}
+{"mode": "train", "epoch": 188, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86851, "loss": 0.00048, "grad_norm": 0.00099, "time": 0.65651}
+{"mode": "train", "epoch": 188, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00048, "acc_pose": 0.86704, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65692}
+{"mode": "train", "epoch": 188, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.87131, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.65688}
+{"mode": "train", "epoch": 189, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05741, "heatmap_loss": 0.00048, "acc_pose": 0.87024, "loss": 0.00048, "grad_norm": 0.00087, "time": 0.7138}
+{"mode": "train", "epoch": 189, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86724, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65687}
+{"mode": "train", "epoch": 189, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86375, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65701}
+{"mode": "train", "epoch": 189, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86574, "loss": 0.00048, "grad_norm": 0.00089, "time": 0.65724}
+{"mode": "train", "epoch": 189, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.87527, "loss": 0.00047, "grad_norm": 0.00083, "time": 0.65705}
+{"mode": "train", "epoch": 190, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05735, "heatmap_loss": 0.00048, "acc_pose": 0.86331, "loss": 0.00048, "grad_norm": 0.00096, "time": 0.71426}
+{"mode": "train", "epoch": 190, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.87089, "loss": 0.00048, "grad_norm": 0.00091, "time": 0.6567}
+{"mode": "train", "epoch": 190, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86881, "loss": 0.00048, "grad_norm": 0.00089, "time": 0.6566}
+{"mode": "train", "epoch": 190, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87141, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65702}
+{"mode": "train", "epoch": 190, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.87044, "loss": 0.00048, "grad_norm": 0.00088, "time": 0.65673}
+{"mode": "val", "epoch": 190, "iter": 407, "lr": 0.0, "AP": 0.78185, "AP .5": 0.91344, "AP .75": 0.84973, "AP (M)": 0.70883, "AP (L)": 0.80934, "AR": 0.83336, "AR .5": 0.95246, "AR .75": 0.89185, "AR (M)": 0.79219, "AR (L)": 0.89368}
+{"mode": "train", "epoch": 191, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05561, "heatmap_loss": 0.00048, "acc_pose": 0.87075, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.70966}
+{"mode": "train", "epoch": 191, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00048, "acc_pose": 0.86917, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.65675}
+{"mode": "train", "epoch": 191, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86928, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65702}
+{"mode": "train", "epoch": 191, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86987, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.6569}
+{"mode": "train", "epoch": 191, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00047, "acc_pose": 0.86658, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65667}
+{"mode": "train", "epoch": 192, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05717, "heatmap_loss": 0.00048, "acc_pose": 0.8661, "loss": 0.00048, "grad_norm": 0.00098, "time": 0.71503}
+{"mode": "train", "epoch": 192, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00047, "acc_pose": 0.8691, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.6567}
+{"mode": "train", "epoch": 192, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86644, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.65688}
+{"mode": "train", "epoch": 192, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86721, "loss": 0.00048, "grad_norm": 0.00094, "time": 0.65673}
+{"mode": "train", "epoch": 192, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.87417, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65683}
+{"mode": "train", "epoch": 193, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05686, "heatmap_loss": 0.00048, "acc_pose": 0.86665, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.7135}
+{"mode": "train", "epoch": 193, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.87074, "loss": 0.00047, "grad_norm": 0.00086, "time": 0.65703}
+{"mode": "train", "epoch": 193, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00048, "acc_pose": 0.86945, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65705}
+{"mode": "train", "epoch": 193, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.8672, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65663}
+{"mode": "train", "epoch": 193, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.868, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65701}
+{"mode": "train", "epoch": 194, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05701, "heatmap_loss": 0.00048, "acc_pose": 0.87018, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.71435}
+{"mode": "train", "epoch": 194, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.8689, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65704}
+{"mode": "train", "epoch": 194, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.86628, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65696}
+{"mode": "train", "epoch": 194, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87296, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.65699}
+{"mode": "train", "epoch": 194, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00047, "acc_pose": 0.87231, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.65731}
+{"mode": "train", "epoch": 195, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05666, "heatmap_loss": 0.00047, "acc_pose": 0.86747, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.71438}
+{"mode": "train", "epoch": 195, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86382, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65702}
+{"mode": "train", "epoch": 195, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86944, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65691}
+{"mode": "train", "epoch": 195, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00047, "acc_pose": 0.86731, "loss": 0.00047, "grad_norm": 0.00097, "time": 0.65708}
+{"mode": "train", "epoch": 195, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87118, "loss": 0.00047, "grad_norm": 0.00099, "time": 0.65662}
+{"mode": "train", "epoch": 196, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05715, "heatmap_loss": 0.00047, "acc_pose": 0.86723, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.71372}
+{"mode": "train", "epoch": 196, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.87617, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65621}
+{"mode": "train", "epoch": 196, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.86245, "loss": 0.00048, "grad_norm": 0.00093, "time": 0.65699}
+{"mode": "train", "epoch": 196, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87498, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65708}
+{"mode": "train", "epoch": 196, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00047, "acc_pose": 0.86658, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65694}
+{"mode": "train", "epoch": 197, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05728, "heatmap_loss": 0.00047, "acc_pose": 0.87066, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.71417}
+{"mode": "train", "epoch": 197, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87252, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.65699}
+{"mode": "train", "epoch": 197, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86194, "loss": 0.00047, "grad_norm": 0.00097, "time": 0.657}
+{"mode": "train", "epoch": 197, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.86723, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.65684}
+{"mode": "train", "epoch": 197, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.87027, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65681}
+{"mode": "train", "epoch": 198, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05727, "heatmap_loss": 0.00047, "acc_pose": 0.87597, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.71412}
+{"mode": "train", "epoch": 198, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00048, "acc_pose": 0.86943, "loss": 0.00048, "grad_norm": 0.00092, "time": 0.65694}
+{"mode": "train", "epoch": 198, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00039, "heatmap_loss": 0.00047, "acc_pose": 0.86821, "loss": 0.00047, "grad_norm": 0.00096, "time": 0.65695}
+{"mode": "train", "epoch": 198, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.8656, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65702}
+{"mode": "train", "epoch": 198, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00047, "acc_pose": 0.87097, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65753}
+{"mode": "train", "epoch": 199, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05705, "heatmap_loss": 0.00047, "acc_pose": 0.87127, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.71421}
+{"mode": "train", "epoch": 199, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00047, "acc_pose": 0.87154, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65717}
+{"mode": "train", "epoch": 199, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00028, "heatmap_loss": 0.00048, "acc_pose": 0.86719, "loss": 0.00048, "grad_norm": 0.00095, "time": 0.65721}
+{"mode": "train", "epoch": 199, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86873, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65707}
+{"mode": "train", "epoch": 199, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87264, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65706}
+{"mode": "train", "epoch": 200, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0592, "heatmap_loss": 0.00047, "acc_pose": 0.86813, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.71561}
+{"mode": "train", "epoch": 200, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86916, "loss": 0.00047, "grad_norm": 0.00096, "time": 0.65676}
+{"mode": "train", "epoch": 200, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00047, "acc_pose": 0.86388, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65706}
+{"mode": "train", "epoch": 200, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00039, "heatmap_loss": 0.00047, "acc_pose": 0.87428, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65703}
+{"mode": "train", "epoch": 200, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86857, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.65746}
+{"mode": "val", "epoch": 200, "iter": 407, "lr": 0.0, "AP": 0.78268, "AP .5": 0.91405, "AP .75": 0.8517, "AP (M)": 0.70939, "AP (L)": 0.81027, "AR": 0.83459, "AR .5": 0.9534, "AR .75": 0.89421, "AR (M)": 0.79334, "AR (L)": 0.89498}
+{"mode": "train", "epoch": 201, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05586, "heatmap_loss": 0.00047, "acc_pose": 0.8669, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.7095}
+{"mode": "train", "epoch": 201, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00029, "heatmap_loss": 0.00047, "acc_pose": 0.86595, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65619}
+{"mode": "train", "epoch": 201, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86781, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65682}
+{"mode": "train", "epoch": 201, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87448, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.65671}
+{"mode": "train", "epoch": 201, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00047, "acc_pose": 0.86799, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65678}
+{"mode": "train", "epoch": 202, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05686, "heatmap_loss": 0.00047, "acc_pose": 0.86668, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.71356}
+{"mode": "train", "epoch": 202, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00048, "acc_pose": 0.87259, "loss": 0.00048, "grad_norm": 0.0009, "time": 0.65658}
+{"mode": "train", "epoch": 202, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86752, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65678}
+{"mode": "train", "epoch": 202, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86947, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65676}
+{"mode": "train", "epoch": 202, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87506, "loss": 0.00047, "grad_norm": 0.00086, "time": 0.65645}
+{"mode": "train", "epoch": 203, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.0562, "heatmap_loss": 0.00047, "acc_pose": 0.86458, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.71249}
+{"mode": "train", "epoch": 203, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.87133, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.65669}
+{"mode": "train", "epoch": 203, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87278, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65708}
+{"mode": "train", "epoch": 203, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00047, "acc_pose": 0.86824, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65682}
+{"mode": "train", "epoch": 203, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87015, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.6576}
+{"mode": "train", "epoch": 204, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05734, "heatmap_loss": 0.00047, "acc_pose": 0.86884, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.71482}
+{"mode": "train", "epoch": 204, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87174, "loss": 0.00047, "grad_norm": 0.00086, "time": 0.65717}
+{"mode": "train", "epoch": 204, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87004, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65708}
+{"mode": "train", "epoch": 204, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00036, "heatmap_loss": 0.00047, "acc_pose": 0.86835, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.65708}
+{"mode": "train", "epoch": 204, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86763, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.65735}
+{"mode": "train", "epoch": 205, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05738, "heatmap_loss": 0.00047, "acc_pose": 0.87289, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.71413}
+{"mode": "train", "epoch": 205, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86988, "loss": 0.00047, "grad_norm": 0.00085, "time": 0.65691}
+{"mode": "train", "epoch": 205, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.87018, "loss": 0.00047, "grad_norm": 0.00093, "time": 0.65714}
+{"mode": "train", "epoch": 205, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87679, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65687}
+{"mode": "train", "epoch": 205, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87297, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.65685}
+{"mode": "train", "epoch": 206, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05756, "heatmap_loss": 0.00047, "acc_pose": 0.87096, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.71383}
+{"mode": "train", "epoch": 206, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00044, "heatmap_loss": 0.00047, "acc_pose": 0.86364, "loss": 0.00047, "grad_norm": 0.00095, "time": 0.65681}
+{"mode": "train", "epoch": 206, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00035, "heatmap_loss": 0.00047, "acc_pose": 0.86934, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65669}
+{"mode": "train", "epoch": 206, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00047, "acc_pose": 0.87196, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65704}
+{"mode": "train", "epoch": 206, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.87375, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65715}
+{"mode": "train", "epoch": 207, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05672, "heatmap_loss": 0.00047, "acc_pose": 0.86894, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.71386}
+{"mode": "train", "epoch": 207, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86576, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65703}
+{"mode": "train", "epoch": 207, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.86995, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65698}
+{"mode": "train", "epoch": 207, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86979, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.65662}
+{"mode": "train", "epoch": 207, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.8701, "loss": 0.00047, "grad_norm": 0.00094, "time": 0.65677}
+{"mode": "train", "epoch": 208, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05685, "heatmap_loss": 0.00047, "acc_pose": 0.87002, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.71378}
+{"mode": "train", "epoch": 208, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86923, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.657}
+{"mode": "train", "epoch": 208, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86864, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65702}
+{"mode": "train", "epoch": 208, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87145, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.6567}
+{"mode": "train", "epoch": 208, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.87036, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.65686}
+{"mode": "train", "epoch": 209, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05764, "heatmap_loss": 0.00047, "acc_pose": 0.87082, "loss": 0.00047, "grad_norm": 0.00087, "time": 0.71463}
+{"mode": "train", "epoch": 209, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.87421, "loss": 0.00047, "grad_norm": 0.00092, "time": 0.65657}
+{"mode": "train", "epoch": 209, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.86906, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65694}
+{"mode": "train", "epoch": 209, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.0003, "heatmap_loss": 0.00047, "acc_pose": 0.87368, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65677}
+{"mode": "train", "epoch": 209, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00031, "heatmap_loss": 0.00047, "acc_pose": 0.87518, "loss": 0.00047, "grad_norm": 0.0009, "time": 0.65698}
+{"mode": "train", "epoch": 210, "iter": 50, "lr": 0.0, "memory": 8639, "data_time": 0.05699, "heatmap_loss": 0.00047, "acc_pose": 0.86707, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.71379}
+{"mode": "train", "epoch": 210, "iter": 100, "lr": 0.0, "memory": 8639, "data_time": 0.00034, "heatmap_loss": 0.00047, "acc_pose": 0.87464, "loss": 0.00047, "grad_norm": 0.00089, "time": 0.65728}
+{"mode": "train", "epoch": 210, "iter": 150, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.86555, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65717}
+{"mode": "train", "epoch": 210, "iter": 200, "lr": 0.0, "memory": 8639, "data_time": 0.00032, "heatmap_loss": 0.00047, "acc_pose": 0.87329, "loss": 0.00047, "grad_norm": 0.00088, "time": 0.65737}
+{"mode": "train", "epoch": 210, "iter": 250, "lr": 0.0, "memory": 8639, "data_time": 0.00033, "heatmap_loss": 0.00047, "acc_pose": 0.8687, "loss": 0.00047, "grad_norm": 0.00091, "time": 0.65728}
+{"mode": "val", "epoch": 210, "iter": 407, "lr": 0.0, "AP": 0.78321, "AP .5": 0.91374, "AP .75": 0.85213, "AP (M)": 0.7103, "AP (L)": 0.81079, "AR": 0.83487, "AR .5": 0.95293, "AR .75": 0.89452, "AR (M)": 0.79342, "AR (L)": 0.89554}
diff --git a/vendor/ViTPose/mmcv_custom/__init__.py b/vendor/ViTPose/mmcv_custom/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..23cb66e9336d6e87483eba5313976c3aa2de5e61
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+from .checkpoint import load_checkpoint
+from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
+from .apex_runner.optimizer import DistOptimizerHook_custom
+
+__all__ = ['load_checkpoint', 'LayerDecayOptimizerConstructor', 'DistOptimizerHook_custom']
diff --git a/vendor/ViTPose/mmcv_custom/apex_runner/__init__.py b/vendor/ViTPose/mmcv_custom/apex_runner/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b90d2cbaa978c67c83ce3a8393d172d5714e210
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/apex_runner/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .checkpoint import save_checkpoint
+from .apex_iter_based_runner import IterBasedRunnerAmp
+
+
+__all__ = [
+ 'save_checkpoint', 'IterBasedRunnerAmp',
+]
diff --git a/vendor/ViTPose/mmcv_custom/apex_runner/apex_iter_based_runner.py b/vendor/ViTPose/mmcv_custom/apex_runner/apex_iter_based_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..571733b091574607ba1ba39648da6a051a769d34
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/apex_runner/apex_iter_based_runner.py
@@ -0,0 +1,103 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import platform
+import shutil
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from mmcv.runner import RUNNERS, IterBasedRunner
+from .checkpoint import save_checkpoint
+
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+@RUNNERS.register_module()
+class IterBasedRunnerAmp(IterBasedRunner):
+ """Iteration-based Runner with AMP support.
+
+ This runner train models iteration by iteration.
+ """
+
+ def save_checkpoint(self,
+ out_dir,
+ filename_tmpl='iter_{}.pth',
+ meta=None,
+ save_optimizer=True,
+ create_symlink=False):
+ """Save checkpoint to file.
+
+ Args:
+ out_dir (str): Directory to save checkpoint files.
+ filename_tmpl (str, optional): Checkpoint file template.
+ Defaults to 'iter_{}.pth'.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ Defaults to None.
+ save_optimizer (bool, optional): Whether save optimizer.
+ Defaults to True.
+ create_symlink (bool, optional): Whether create symlink to the
+ latest checkpoint file. Defaults to True.
+ """
+ if meta is None:
+ meta = dict(iter=self.iter + 1, epoch=self.epoch + 1)
+ elif isinstance(meta, dict):
+ meta.update(iter=self.iter + 1, epoch=self.epoch + 1)
+ else:
+ raise TypeError(
+ f'meta should be a dict or None, but got {type(meta)}')
+ if self.meta is not None:
+ meta.update(self.meta)
+
+ filename = filename_tmpl.format(self.iter + 1)
+ filepath = osp.join(out_dir, filename)
+ optimizer = self.optimizer if save_optimizer else None
+ save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
+ # in some environments, `os.symlink` is not supported, you may need to
+ # set `create_symlink` to False
+ # if create_symlink:
+ # dst_file = osp.join(out_dir, 'latest.pth')
+ # if platform.system() != 'Windows':
+ # mmcv.symlink(filename, dst_file)
+ # else:
+ # shutil.copy(filepath, dst_file)
+
+ def resume(self,
+ checkpoint,
+ resume_optimizer=True,
+ map_location='default'):
+ if map_location == 'default':
+ if torch.cuda.is_available():
+ device_id = torch.cuda.current_device()
+ checkpoint = self.load_checkpoint(
+ checkpoint,
+ map_location=lambda storage, loc: storage.cuda(device_id))
+ else:
+ checkpoint = self.load_checkpoint(checkpoint)
+ else:
+ checkpoint = self.load_checkpoint(
+ checkpoint, map_location=map_location)
+
+ self._epoch = checkpoint['meta']['epoch']
+ self._iter = checkpoint['meta']['iter']
+ self._inner_iter = checkpoint['meta']['iter']
+ if 'optimizer' in checkpoint and resume_optimizer:
+ if isinstance(self.optimizer, Optimizer):
+ self.optimizer.load_state_dict(checkpoint['optimizer'])
+ elif isinstance(self.optimizer, dict):
+ for k in self.optimizer.keys():
+ self.optimizer[k].load_state_dict(
+ checkpoint['optimizer'][k])
+ else:
+ raise TypeError(
+ 'Optimizer should be dict or torch.optim.Optimizer '
+ f'but got {type(self.optimizer)}')
+
+ if 'amp' in checkpoint:
+ apex.amp.load_state_dict(checkpoint['amp'])
+ self.logger.info('load amp state dict')
+
+ self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')
diff --git a/vendor/ViTPose/mmcv_custom/apex_runner/checkpoint.py b/vendor/ViTPose/mmcv_custom/apex_runner/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..b04167e0fc5f16bc33e793830ebb9c4ef15ef1ed
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/apex_runner/checkpoint.py
@@ -0,0 +1,85 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import time
+from tempfile import TemporaryDirectory
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from mmcv.parallel import is_module_wrapper
+from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
+
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+ """Save checkpoint to file.
+
+ The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
+ ``optimizer``, ``amp``. By default ``meta`` will contain version
+ and time info.
+
+ Args:
+ model (Module): Module whose params are to be saved.
+ filename (str): Checkpoint filename.
+ optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ """
+ if meta is None:
+ meta = {}
+ elif not isinstance(meta, dict):
+ raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+ meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+
+ if is_module_wrapper(model):
+ model = model.module
+
+ if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+ # save class name to the meta
+ meta.update(CLASSES=model.CLASSES)
+
+ checkpoint = {
+ 'meta': meta,
+ 'state_dict': weights_to_cpu(get_state_dict(model))
+ }
+ # save optimizer state dict in the checkpoint
+ if isinstance(optimizer, Optimizer):
+ checkpoint['optimizer'] = optimizer.state_dict()
+ elif isinstance(optimizer, dict):
+ checkpoint['optimizer'] = {}
+ for name, optim in optimizer.items():
+ checkpoint['optimizer'][name] = optim.state_dict()
+
+ # save amp state dict in the checkpoint
+ checkpoint['amp'] = apex.amp.state_dict()
+
+ if filename.startswith('pavi://'):
+ try:
+ from pavi import modelcloud
+ from pavi.exception import NodeNotFoundError
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ model_path = filename[7:]
+ root = modelcloud.Folder()
+ model_dir, model_name = osp.split(model_path)
+ try:
+ model = modelcloud.get(model_dir)
+ except NodeNotFoundError:
+ model = root.create_training_model(model_dir)
+ with TemporaryDirectory() as tmp_dir:
+ checkpoint_file = osp.join(tmp_dir, model_name)
+ with open(checkpoint_file, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
+ model.create_file(checkpoint_file, name=model_name)
+ else:
+ mmcv.mkdir_or_exist(osp.dirname(filename))
+ # immediately flush buffer
+ with open(filename, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
diff --git a/vendor/ViTPose/mmcv_custom/apex_runner/optimizer.py b/vendor/ViTPose/mmcv_custom/apex_runner/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc42989b569e63bbf008bbbd2700fe217399e9f
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/apex_runner/optimizer.py
@@ -0,0 +1,33 @@
+from mmcv.runner import OptimizerHook, HOOKS
+try:
+ import apex
+except:
+ print('apex is not installed')
+
+
+@HOOKS.register_module()
+class DistOptimizerHook_custom(OptimizerHook):
+ """Optimizer hook for distributed training."""
+
+ def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+ self.update_interval = update_interval
+ self.use_fp16 = use_fp16
+
+ def before_run(self, runner):
+ runner.optimizer.zero_grad()
+
+ def after_train_iter(self, runner):
+ runner.outputs['loss'] /= self.update_interval
+ if self.use_fp16:
+ with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
+ scaled_loss.backward()
+ else:
+ runner.outputs['loss'].backward()
+ if self.every_n_iters(runner, self.update_interval):
+ if self.grad_clip is not None:
+ self.clip_grads(runner.model.parameters())
+ runner.optimizer.step()
+ runner.optimizer.zero_grad()
diff --git a/vendor/ViTPose/mmcv_custom/checkpoint.py b/vendor/ViTPose/mmcv_custom/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..52c9bac8a5eb89a4009e837ea338cd271e0a5bc7
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/checkpoint.py
@@ -0,0 +1,552 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import io
+import os
+import os.path as osp
+import pkgutil
+import time
+import warnings
+from collections import OrderedDict
+from importlib import import_module
+from tempfile import TemporaryDirectory
+
+import torch
+import torchvision
+from torch.optim import Optimizer
+from torch.utils import model_zoo
+from torch.nn import functional as F
+
+import mmcv
+from mmcv.fileio import FileClient
+from mmcv.fileio import load as load_file
+from mmcv.parallel import is_module_wrapper
+from mmcv.utils import mkdir_or_exist
+from mmcv.runner import get_dist_info
+
+from scipy import interpolate
+import numpy as np
+import math
+import re
+import copy
+
+ENV_MMCV_HOME = 'MMCV_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+
+
+def _get_mmcv_home():
+ mmcv_home = os.path.expanduser(
+ os.getenv(
+ ENV_MMCV_HOME,
+ os.path.join(
+ os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')))
+
+ mkdir_or_exist(mmcv_home)
+ return mmcv_home
+
+
+def load_state_dict(module, state_dict, strict=False, logger=None):
+ """Load state_dict to a module.
+
+ This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+ Default value for ``strict`` is set to ``False`` and the message for
+ param mismatch will be shown even if strict is False.
+
+ Args:
+ module (Module): Module that receives the state_dict.
+ state_dict (OrderedDict): Weights.
+ strict (bool): whether to strictly enforce that the keys
+ in :attr:`state_dict` match the keys returned by this module's
+ :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+ logger (:obj:`logging.Logger`, optional): Logger to log the error
+ message. If not specified, print function will be used.
+ """
+ unexpected_keys = []
+ all_missing_keys = []
+ err_msg = []
+
+ metadata = getattr(state_dict, '_metadata', None)
+ state_dict = state_dict.copy()
+ if metadata is not None:
+ state_dict._metadata = metadata
+
+ # use _load_from_state_dict to enable checkpoint version control
+ def load(module, prefix=''):
+ # recursively check parallel module in case that the model has a
+ # complicated structure, e.g., nn.Module(nn.Module(DDP))
+ if is_module_wrapper(module):
+ module = module.module
+ local_metadata = {} if metadata is None else metadata.get(
+ prefix[:-1], {})
+ module._load_from_state_dict(state_dict, prefix, local_metadata, True,
+ all_missing_keys, unexpected_keys,
+ err_msg)
+ for name, child in module._modules.items():
+ if child is not None:
+ load(child, prefix + name + '.')
+
+ load(module)
+ load = None # break load->load reference cycle
+
+ # ignore "num_batches_tracked" of BN layers
+ missing_keys = [
+ key for key in all_missing_keys if 'num_batches_tracked' not in key
+ ]
+
+ if unexpected_keys:
+ err_msg.append('unexpected key in source '
+ f'state_dict: {", ".join(unexpected_keys)}\n')
+ if missing_keys:
+ err_msg.append(
+ f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
+
+ rank, _ = get_dist_info()
+ if len(err_msg) > 0 and rank == 0:
+ err_msg.insert(
+ 0, 'The model and loaded state dict do not match exactly\n')
+ err_msg = '\n'.join(err_msg)
+ if strict:
+ raise RuntimeError(err_msg)
+ elif logger is not None:
+ logger.warning(err_msg)
+ else:
+ print(err_msg)
+
+
+def load_url_dist(url, model_dir=None, map_location="cpu"):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ if rank == 0:
+ checkpoint = model_zoo.load_url(url, model_dir=model_dir, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ checkpoint = model_zoo.load_url(url, model_dir=model_dir, map_location=map_location)
+ return checkpoint
+
+
+def load_pavimodel_dist(model_path, map_location=None):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ try:
+ from pavi import modelcloud
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ if rank == 0:
+ model = modelcloud.get(model_path)
+ with TemporaryDirectory() as tmp_dir:
+ downloaded_file = osp.join(tmp_dir, model.name)
+ model.download(downloaded_file)
+ checkpoint = torch.load(downloaded_file, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ model = modelcloud.get(model_path)
+ with TemporaryDirectory() as tmp_dir:
+ downloaded_file = osp.join(tmp_dir, model.name)
+ model.download(downloaded_file)
+ checkpoint = torch.load(
+ downloaded_file, map_location=map_location)
+ return checkpoint
+
+
+def load_fileclient_dist(filename, backend, map_location):
+ """In distributed setting, this function only download checkpoint at local
+ rank 0."""
+ rank, world_size = get_dist_info()
+ rank = int(os.environ.get('LOCAL_RANK', rank))
+ allowed_backends = ['ceph']
+ if backend not in allowed_backends:
+ raise ValueError(f'Load from Backend {backend} is not supported.')
+ if rank == 0:
+ fileclient = FileClient(backend=backend)
+ buffer = io.BytesIO(fileclient.get(filename))
+ checkpoint = torch.load(buffer, map_location=map_location)
+ if world_size > 1:
+ torch.distributed.barrier()
+ if rank > 0:
+ fileclient = FileClient(backend=backend)
+ buffer = io.BytesIO(fileclient.get(filename))
+ checkpoint = torch.load(buffer, map_location=map_location)
+ return checkpoint
+
+
+def get_torchvision_models():
+ model_urls = dict()
+ for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__):
+ if ispkg:
+ continue
+ _zoo = import_module(f'torchvision.models.{name}')
+ if hasattr(_zoo, 'model_urls'):
+ _urls = getattr(_zoo, 'model_urls')
+ model_urls.update(_urls)
+ return model_urls
+
+
+def get_external_models():
+ mmcv_home = _get_mmcv_home()
+ default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')
+ default_urls = load_file(default_json_path)
+ assert isinstance(default_urls, dict)
+ external_json_path = osp.join(mmcv_home, 'open_mmlab.json')
+ if osp.exists(external_json_path):
+ external_urls = load_file(external_json_path)
+ assert isinstance(external_urls, dict)
+ default_urls.update(external_urls)
+
+ return default_urls
+
+
+def get_mmcls_models():
+ mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json')
+ mmcls_urls = load_file(mmcls_json_path)
+
+ return mmcls_urls
+
+
+def get_deprecated_model_names():
+ deprecate_json_path = osp.join(mmcv.__path__[0],
+ 'model_zoo/deprecated.json')
+ deprecate_urls = load_file(deprecate_json_path)
+ assert isinstance(deprecate_urls, dict)
+
+ return deprecate_urls
+
+
+def _process_mmcls_checkpoint(checkpoint):
+ state_dict = checkpoint['state_dict']
+ new_state_dict = OrderedDict()
+ for k, v in state_dict.items():
+ if k.startswith('backbone.'):
+ new_state_dict[k[9:]] = v
+ new_checkpoint = dict(state_dict=new_state_dict)
+
+ return new_checkpoint
+
+
+def _load_checkpoint(filename, map_location=None):
+ """Load checkpoint from somewhere (modelzoo, file, url).
+
+ Args:
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+ details.
+ map_location (str | None): Same as :func:`torch.load`. Default: None.
+
+ Returns:
+ dict | OrderedDict: The loaded checkpoint. It can be either an
+ OrderedDict storing model weights or a dict containing other
+ information, which depends on the checkpoint.
+ """
+ if filename.startswith('modelzoo://'):
+ warnings.warn('The URL scheme of "modelzoo://" is deprecated, please '
+ 'use "torchvision://" instead')
+ model_urls = get_torchvision_models()
+ model_name = filename[11:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ elif filename.startswith('torchvision://'):
+ model_urls = get_torchvision_models()
+ model_name = filename[14:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ elif filename.startswith('open-mmlab://'):
+ model_urls = get_external_models()
+ model_name = filename[13:]
+ deprecated_urls = get_deprecated_model_names()
+ if model_name in deprecated_urls:
+ warnings.warn(f'open-mmlab://{model_name} is deprecated in favor '
+ f'of open-mmlab://{deprecated_urls[model_name]}')
+ model_name = deprecated_urls[model_name]
+ model_url = model_urls[model_name]
+ # check if is url
+ if model_url.startswith(('http://', 'https://')):
+ checkpoint = load_url_dist(model_url)
+ else:
+ filename = osp.join(_get_mmcv_home(), model_url)
+ if not osp.isfile(filename):
+ raise IOError(f'{filename} is not a checkpoint file')
+ checkpoint = torch.load(filename, map_location=map_location)
+ elif filename.startswith('mmcls://'):
+ model_urls = get_mmcls_models()
+ model_name = filename[8:]
+ checkpoint = load_url_dist(model_urls[model_name])
+ checkpoint = _process_mmcls_checkpoint(checkpoint)
+ elif filename.startswith(('http://', 'https://')):
+ checkpoint = load_url_dist(filename)
+ elif filename.startswith('pavi://'):
+ model_path = filename[7:]
+ checkpoint = load_pavimodel_dist(model_path, map_location=map_location)
+ elif filename.startswith('s3://'):
+ checkpoint = load_fileclient_dist(
+ filename, backend='ceph', map_location=map_location)
+ else:
+ if not osp.isfile(filename):
+ raise IOError(f'{filename} is not a checkpoint file')
+ checkpoint = torch.load(filename, map_location=map_location)
+ return checkpoint
+
+
+def cosine_scheduler(base_value, final_value, epochs, niter_per_ep, warmup_epochs=0,
+ start_warmup_value=0, warmup_steps=-1):
+ warmup_schedule = np.array([])
+ warmup_iters = warmup_epochs * niter_per_ep
+ if warmup_steps > 0:
+ warmup_iters = warmup_steps
+ print("Set warmup steps = %d" % warmup_iters)
+ if warmup_epochs > 0:
+ warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)
+
+ iters = np.arange(epochs * niter_per_ep - warmup_iters)
+ schedule = np.array(
+ [final_value + 0.5 * (base_value - final_value) * (1 + math.cos(math.pi * i / (len(iters)))) for i in iters])
+
+ schedule = np.concatenate((warmup_schedule, schedule))
+
+ assert len(schedule) == epochs * niter_per_ep
+ return schedule
+
+
+def load_checkpoint(model,
+ filename,
+ map_location='cpu',
+ strict=False,
+ logger=None,
+ patch_padding='pad',
+ part_features=None
+ ):
+ """Load checkpoint from a file or URI.
+
+ Args:
+ model (Module): Module to load checkpoint.
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+ details.
+ map_location (str): Same as :func:`torch.load`.
+ strict (bool): Whether to allow different params for the model and
+ checkpoint.
+ logger (:mod:`logging.Logger` or None): The logger for error message.
+ patch_padding (str): 'pad' or 'bilinear' or 'bicubic', used for interpolate patch embed from 14x14 to 16x16
+
+ Returns:
+ dict or OrderedDict: The loaded checkpoint.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict = checkpoint['state_dict']
+ elif 'model' in checkpoint:
+ state_dict = checkpoint['model']
+ elif 'module' in checkpoint:
+ state_dict = checkpoint['module']
+ else:
+ state_dict = checkpoint
+ # strip prefix of state_dict
+ if list(state_dict.keys())[0].startswith('module.'):
+ state_dict = {k[7:]: v for k, v in state_dict.items()}
+
+ # for MoBY, load model of online branch
+ if sorted(list(state_dict.keys()))[0].startswith('encoder'):
+ state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')}
+
+ rank, _ = get_dist_info()
+
+ if 'patch_embed.proj.weight' in state_dict:
+ proj_weight = state_dict['patch_embed.proj.weight']
+ orig_size = proj_weight.shape[2:]
+ current_size = model.patch_embed.proj.weight.shape[2:]
+ padding_size = current_size[0] - orig_size[0]
+ padding_l = padding_size // 2
+ padding_r = padding_size - padding_l
+ if orig_size != current_size:
+ if 'pad' in patch_padding:
+ proj_weight = torch.nn.functional.pad(proj_weight, (padding_l, padding_r, padding_l, padding_r))
+ elif 'bilinear' in patch_padding:
+ proj_weight = torch.nn.functional.interpolate(proj_weight, size=current_size, mode='bilinear', align_corners=False)
+ elif 'bicubic' in patch_padding:
+ proj_weight = torch.nn.functional.interpolate(proj_weight, size=current_size, mode='bicubic', align_corners=False)
+ state_dict['patch_embed.proj.weight'] = proj_weight
+
+ if 'pos_embed' in state_dict:
+ pos_embed_checkpoint = state_dict['pos_embed']
+ embedding_size = pos_embed_checkpoint.shape[-1]
+ H, W = model.patch_embed.patch_shape
+ num_patches = model.patch_embed.num_patches
+ num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+ # height (== width) for the checkpoint position embedding
+ orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+ if rank == 0:
+ print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, H, W))
+ extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+ # only the position tokens are interpolated
+ pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+ pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+ pos_tokens = torch.nn.functional.interpolate(
+ pos_tokens, size=(H, W), mode='bicubic', align_corners=False)
+ pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+ new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+ state_dict['pos_embed'] = new_pos_embed
+
+ new_state_dict = copy.deepcopy(state_dict)
+ if part_features is not None:
+ current_keys = list(model.state_dict().keys())
+ for key in current_keys:
+ if "mlp.experts" in key:
+ source_key = re.sub(r'experts.\d+.', 'fc2.', key)
+ new_state_dict[key] = state_dict[source_key][-part_features:]
+ elif 'fc2' in key:
+ new_state_dict[key] = state_dict[key][:-part_features]
+
+ # load state_dict
+ load_state_dict(model, new_state_dict, strict, logger)
+ return checkpoint
+
+
+def weights_to_cpu(state_dict):
+ """Copy a model state_dict to cpu.
+
+ Args:
+ state_dict (OrderedDict): Model weights on GPU.
+
+ Returns:
+ OrderedDict: Model weights on GPU.
+ """
+ state_dict_cpu = OrderedDict()
+ for key, val in state_dict.items():
+ state_dict_cpu[key] = val.cpu()
+ return state_dict_cpu
+
+
+def _save_to_state_dict(module, destination, prefix, keep_vars):
+ """Saves module state to `destination` dictionary.
+
+ This method is modified from :meth:`torch.nn.Module._save_to_state_dict`.
+
+ Args:
+ module (nn.Module): The module to generate state_dict.
+ destination (dict): A dict where state will be stored.
+ prefix (str): The prefix for parameters and buffers used in this
+ module.
+ """
+ for name, param in module._parameters.items():
+ if param is not None:
+ destination[prefix + name] = param if keep_vars else param.detach()
+ for name, buf in module._buffers.items():
+ # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d
+ if buf is not None:
+ destination[prefix + name] = buf if keep_vars else buf.detach()
+
+
+def get_state_dict(module, destination=None, prefix='', keep_vars=False):
+ """Returns a dictionary containing a whole state of the module.
+
+ Both parameters and persistent buffers (e.g. running averages) are
+ included. Keys are corresponding parameter and buffer names.
+
+ This method is modified from :meth:`torch.nn.Module.state_dict` to
+ recursively check parallel module in case that the model has a complicated
+ structure, e.g., nn.Module(nn.Module(DDP)).
+
+ Args:
+ module (nn.Module): The module to generate state_dict.
+ destination (OrderedDict): Returned dict for the state of the
+ module.
+ prefix (str): Prefix of the key.
+ keep_vars (bool): Whether to keep the variable property of the
+ parameters. Default: False.
+
+ Returns:
+ dict: A dictionary containing a whole state of the module.
+ """
+ # recursively check parallel module in case that the model has a
+ # complicated structure, e.g., nn.Module(nn.Module(DDP))
+ if is_module_wrapper(module):
+ module = module.module
+
+ # below is the same as torch.nn.Module.state_dict()
+ if destination is None:
+ destination = OrderedDict()
+ destination._metadata = OrderedDict()
+ destination._metadata[prefix[:-1]] = local_metadata = dict(
+ version=module._version)
+ _save_to_state_dict(module, destination, prefix, keep_vars)
+ for name, child in module._modules.items():
+ if child is not None:
+ get_state_dict(
+ child, destination, prefix + name + '.', keep_vars=keep_vars)
+ for hook in module._state_dict_hooks.values():
+ hook_result = hook(module, destination, prefix, local_metadata)
+ if hook_result is not None:
+ destination = hook_result
+ return destination
+
+
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+ """Save checkpoint to file.
+
+ The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
+ ``optimizer``. By default ``meta`` will contain version and time info.
+
+ Args:
+ model (Module): Module whose params are to be saved.
+ filename (str): Checkpoint filename.
+ optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+ meta (dict, optional): Metadata to be saved in checkpoint.
+ """
+ if meta is None:
+ meta = {}
+ elif not isinstance(meta, dict):
+ raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+ meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+
+ if is_module_wrapper(model):
+ model = model.module
+
+ if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+ # save class name to the meta
+ meta.update(CLASSES=model.CLASSES)
+
+ checkpoint = {
+ 'meta': meta,
+ 'state_dict': weights_to_cpu(get_state_dict(model))
+ }
+ # save optimizer state dict in the checkpoint
+ if isinstance(optimizer, Optimizer):
+ checkpoint['optimizer'] = optimizer.state_dict()
+ elif isinstance(optimizer, dict):
+ checkpoint['optimizer'] = {}
+ for name, optim in optimizer.items():
+ checkpoint['optimizer'][name] = optim.state_dict()
+
+ if filename.startswith('pavi://'):
+ try:
+ from pavi import modelcloud
+ from pavi.exception import NodeNotFoundError
+ except ImportError:
+ raise ImportError(
+ 'Please install pavi to load checkpoint from modelcloud.')
+ model_path = filename[7:]
+ root = modelcloud.Folder()
+ model_dir, model_name = osp.split(model_path)
+ try:
+ model = modelcloud.get(model_dir)
+ except NodeNotFoundError:
+ model = root.create_training_model(model_dir)
+ with TemporaryDirectory() as tmp_dir:
+ checkpoint_file = osp.join(tmp_dir, model_name)
+ with open(checkpoint_file, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
+ model.create_file(checkpoint_file, name=model_name)
+ else:
+ mmcv.mkdir_or_exist(osp.dirname(filename))
+ # immediately flush buffer
+ with open(filename, 'wb') as f:
+ torch.save(checkpoint, f)
+ f.flush()
diff --git a/vendor/ViTPose/mmcv_custom/layer_decay_optimizer_constructor.py b/vendor/ViTPose/mmcv_custom/layer_decay_optimizer_constructor.py
new file mode 100644
index 0000000000000000000000000000000000000000..1357082e66d0a91c2544ee83440745f0e93b5175
--- /dev/null
+++ b/vendor/ViTPose/mmcv_custom/layer_decay_optimizer_constructor.py
@@ -0,0 +1,78 @@
+import json
+from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
+from mmcv.runner import get_dist_info
+
+
+def get_num_layer_for_vit(var_name, num_max_layer):
+ if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
+ return 0
+ elif var_name.startswith("backbone.patch_embed"):
+ return 0
+ elif var_name.startswith("backbone.blocks"):
+ layer_id = int(var_name.split('.')[2])
+ return layer_id + 1
+ else:
+ return num_max_layer - 1
+
+@OPTIMIZER_BUILDERS.register_module()
+class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
+ def add_params(self, params, module, prefix='', is_dcn_module=None):
+ """Add all parameters of module to the params list.
+ The parameters of the given module will be added to the list of param
+ groups, with specific rules defined by paramwise_cfg.
+ Args:
+ params (list[dict]): A list of param groups, it will be modified
+ in place.
+ module (nn.Module): The module to be added.
+ prefix (str): The prefix of the module
+ is_dcn_module (int|float|None): If the current module is a
+ submodule of DCN, `is_dcn_module` will be passed to
+ control conv_offset layer's learning rate. Defaults to None.
+ """
+ parameter_groups = {}
+ print(self.paramwise_cfg)
+ num_layers = self.paramwise_cfg.get('num_layers') + 2
+ layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
+ print("Build LayerDecayOptimizerConstructor %f - %d" % (layer_decay_rate, num_layers))
+ weight_decay = self.base_wd
+
+ for name, param in module.named_parameters():
+ if not param.requires_grad:
+ continue # frozen weights
+ if len(param.shape) == 1 or name.endswith(".bias") or 'pos_embed' in name:
+ group_name = "no_decay"
+ this_weight_decay = 0.
+ else:
+ group_name = "decay"
+ this_weight_decay = weight_decay
+
+ layer_id = get_num_layer_for_vit(name, num_layers)
+ group_name = "layer_%d_%s" % (layer_id, group_name)
+
+ if group_name not in parameter_groups:
+ scale = layer_decay_rate ** (num_layers - layer_id - 1)
+
+ parameter_groups[group_name] = {
+ "weight_decay": this_weight_decay,
+ "params": [],
+ "param_names": [],
+ "lr_scale": scale,
+ "group_name": group_name,
+ "lr": scale * self.base_lr,
+ }
+
+ parameter_groups[group_name]["params"].append(param)
+ parameter_groups[group_name]["param_names"].append(name)
+ rank, _ = get_dist_info()
+ if rank == 0:
+ to_display = {}
+ for key in parameter_groups:
+ to_display[key] = {
+ "param_names": parameter_groups[key]["param_names"],
+ "lr_scale": parameter_groups[key]["lr_scale"],
+ "lr": parameter_groups[key]["lr"],
+ "weight_decay": parameter_groups[key]["weight_decay"],
+ }
+ print("Param groups = %s" % json.dumps(to_display, indent=2))
+
+ params.extend(parameter_groups.values())
diff --git a/vendor/ViTPose/mmpose/.mim/model-index.yml b/vendor/ViTPose/mmpose/.mim/model-index.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5522f6fc18c959f604864464998a1b9ed53f9ef
--- /dev/null
+++ b/vendor/ViTPose/mmpose/.mim/model-index.yml
@@ -0,0 +1,139 @@
+Import:
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.yml
+- configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_dark_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml
+- configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
+- configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml
+- configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml
+- configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml
+- configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml
+- configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.yml
+- configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml
+- configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml
+- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.yml
+- configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.yml
diff --git a/vendor/ViTPose/mmpose/__init__.py b/vendor/ViTPose/mmpose/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e52beb9ddfd6534895ae93bdaa1ab7098f510d81
--- /dev/null
+++ b/vendor/ViTPose/mmpose/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+
+from .version import __version__, short_version
+
+
+def digit_version(version_str):
+ digit_version = []
+ for x in version_str.split('.'):
+ if x.isdigit():
+ digit_version.append(int(x))
+ elif x.find('rc') != -1:
+ patch_version = x.split('rc')
+ digit_version.append(int(patch_version[0]) - 1)
+ digit_version.append(int(patch_version[1]))
+ return digit_version
+
+
+mmcv_minimum_version = '1.3.8'
+mmcv_maximum_version = '1.5.0'
+mmcv_version = digit_version(mmcv.__version__)
+
+
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+ and mmcv_version <= digit_version(mmcv_maximum_version)), \
+ f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+ f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+
+__all__ = ['__version__', 'short_version']
diff --git a/vendor/ViTPose/mmpose/apis/__init__.py b/vendor/ViTPose/mmpose/apis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e263edc4d6aa0a3380a3c2e8dc85e1a696bb164
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .inference import (inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_pose_result)
+from .inference_3d import (extract_pose_sequence, inference_interhand_3d_model,
+ inference_mesh_model, inference_pose_lifter_model,
+ vis_3d_mesh_result, vis_3d_pose_result)
+from .inference_tracking import get_track_id, vis_pose_tracking_result
+from .test import multi_gpu_test, single_gpu_test
+from .train import init_random_seed, train_model
+
+__all__ = [
+ 'train_model', 'init_pose_model', 'inference_top_down_pose_model',
+ 'inference_bottom_up_pose_model', 'multi_gpu_test', 'single_gpu_test',
+ 'vis_pose_result', 'get_track_id', 'vis_pose_tracking_result',
+ 'inference_pose_lifter_model', 'vis_3d_pose_result',
+ 'inference_interhand_3d_model', 'extract_pose_sequence',
+ 'inference_mesh_model', 'vis_3d_mesh_result', 'process_mmdet_results',
+ 'init_random_seed'
+]
diff --git a/vendor/ViTPose/mmpose/apis/inference.py b/vendor/ViTPose/mmpose/apis/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..5363d40c3f8680af79b470f59b5144941a0c4436
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/inference.py
@@ -0,0 +1,833 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+from PIL import Image
+
+from mmpose.core.post_processing import oks_nms
+from mmpose.datasets.dataset_info import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+from mmpose.models import build_posenet
+from mmpose.utils.hooks import OutputHook
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+
+
+def init_pose_model(config, checkpoint=None, device='cuda:0'):
+ """Initialize a pose model from config file.
+
+ Args:
+ config (str or :obj:`mmcv.Config`): Config file path or the config
+ object.
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
+ will not load any weights.
+
+ Returns:
+ nn.Module: The constructed detector.
+ """
+ if isinstance(config, str):
+ config = mmcv.Config.fromfile(config)
+ elif not isinstance(config, mmcv.Config):
+ raise TypeError('config must be a filename or Config object, '
+ f'but got {type(config)}')
+ config.model.pretrained = None
+ model = build_posenet(config.model)
+ if checkpoint is not None:
+ # load model checkpoint
+ load_checkpoint(model, checkpoint, map_location='cpu')
+ # save the config in the model for convenience
+ model.cfg = config
+ model.to(device)
+ model.eval()
+ return model
+
+
+def _xyxy2xywh(bbox_xyxy):
+ """Transform the bbox format from x1y1x2y2 to xywh.
+
+ Args:
+ bbox_xyxy (np.ndarray): Bounding boxes (with scores), shaped (n, 4) or
+ (n, 5). (left, top, right, bottom, [score])
+
+ Returns:
+ np.ndarray: Bounding boxes (with scores),
+ shaped (n, 4) or (n, 5). (left, top, width, height, [score])
+ """
+ bbox_xywh = bbox_xyxy.copy()
+ bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0] + 1
+ bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] + 1
+
+ return bbox_xywh
+
+
+def _xywh2xyxy(bbox_xywh):
+ """Transform the bbox format from xywh to x1y1x2y2.
+
+ Args:
+ bbox_xywh (ndarray): Bounding boxes (with scores),
+ shaped (n, 4) or (n, 5). (left, top, width, height, [score])
+ Returns:
+ np.ndarray: Bounding boxes (with scores), shaped (n, 4) or
+ (n, 5). (left, top, right, bottom, [score])
+ """
+ bbox_xyxy = bbox_xywh.copy()
+ bbox_xyxy[:, 2] = bbox_xyxy[:, 2] + bbox_xyxy[:, 0] - 1
+ bbox_xyxy[:, 3] = bbox_xyxy[:, 3] + bbox_xyxy[:, 1] - 1
+
+ return bbox_xyxy
+
+
+def _box2cs(cfg, box):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h
+
+ Returns:
+ tuple: A tuple containing center and scale.
+
+ - np.ndarray[float32](2,): Center of the bbox (x, y).
+ - np.ndarray[float32](2,): Scale of the bbox w & h.
+ """
+
+ x, y, w, h = box[:4]
+ input_size = cfg.data_cfg['image_size']
+ aspect_ratio = input_size[0] / input_size[1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ scale = scale * 1.25
+
+ return center, scale
+
+
+def _inference_single_pose_model(model,
+ img_or_path,
+ bboxes,
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ return_heatmap=False):
+ """Inference human bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ bboxes (list | np.ndarray): All bounding boxes (with scores),
+ shaped (N, 4) or (N, 5). (left, top, width, height, [score])
+ where N is number of bounding boxes.
+ dataset (str): Dataset name. Deprecated.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ outputs (list[str] | tuple[str]): Names of layers whose output is
+ to be returned, default: None
+
+ Returns:
+ ndarray[NxKx3]: Predicted pose x, y, score.
+ heatmap[N, K, H, W]: Model output heatmap.
+ """
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset_info is not None:
+ dataset_name = dataset_info.dataset_name
+ flip_pairs = dataset_info.flip_pairs
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset in ('TopDownCocoDataset', 'TopDownOCHumanDataset',
+ 'AnimalMacaqueDataset'):
+ flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
+ [13, 14], [15, 16]]
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
+ [13, 14], [15, 16]]
+ foot = [[17, 20], [18, 21], [19, 22]]
+
+ face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34],
+ [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46],
+ [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66],
+ [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75],
+ [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]]
+
+ hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116],
+ [96, 117], [97, 118], [98, 119], [99, 120], [100, 121],
+ [101, 122], [102, 123], [103, 124], [104, 125], [105, 126],
+ [106, 127], [107, 128], [108, 129], [109, 130], [110, 131],
+ [111, 132]]
+ flip_pairs = body + foot + face + hand
+ elif dataset == 'TopDownAicDataset':
+ flip_pairs = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]]
+ elif dataset == 'TopDownMpiiDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
+ elif dataset == 'TopDownMpiiTrbDataset':
+ flip_pairs = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11],
+ [14, 15], [16, 22], [28, 34], [17, 23], [29, 35],
+ [18, 24], [30, 36], [19, 25], [31, 37], [20, 26],
+ [32, 38], [21, 27], [33, 39]]
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset', 'InterHand2DDataset'):
+ flip_pairs = []
+ elif dataset in 'Face300WDataset':
+ flip_pairs = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11],
+ [6, 10], [7, 9], [17, 26], [18, 25], [19, 24],
+ [20, 23], [21, 22], [31, 35], [32, 34], [36, 45],
+ [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
+ [48, 54], [49, 53], [50, 52], [61, 63], [60, 64],
+ [67, 65], [58, 56], [59, 55]]
+
+ elif dataset in 'FaceAFLWDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9],
+ [12, 14], [15, 17]]
+
+ elif dataset in 'FaceCOFWDataset':
+ flip_pairs = [[0, 1], [4, 6], [2, 3], [5, 7], [8, 9], [10, 11],
+ [12, 14], [16, 17], [13, 15], [18, 19], [22, 23]]
+
+ elif dataset in 'FaceWFLWDataset':
+ flip_pairs = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27],
+ [6, 26], [7, 25], [8, 24], [9, 23], [10, 22],
+ [11, 21], [12, 20], [13, 19], [14, 18], [15, 17],
+ [33, 46], [34, 45], [35, 44], [36, 43], [37, 42],
+ [38, 50], [39, 49], [40, 48], [41, 47], [60, 72],
+ [61, 71], [62, 70], [63, 69], [64, 68], [65, 75],
+ [66, 74], [67, 73], [55, 59], [56, 58], [76, 82],
+ [77, 81], [78, 80], [87, 83], [86, 84], [88, 92],
+ [89, 91], [95, 93], [96, 97]]
+
+ elif dataset in 'AnimalFlyDataset':
+ flip_pairs = [[1, 2], [6, 18], [7, 19], [8, 20], [9, 21], [10, 22],
+ [11, 23], [12, 24], [13, 25], [14, 26], [15, 27],
+ [16, 28], [17, 29], [30, 31]]
+ elif dataset in 'AnimalHorse10Dataset':
+ flip_pairs = []
+
+ elif dataset in 'AnimalLocustDataset':
+ flip_pairs = [[5, 20], [6, 21], [7, 22], [8, 23], [9, 24],
+ [10, 25], [11, 26], [12, 27], [13, 28], [14, 29],
+ [15, 30], [16, 31], [17, 32], [18, 33], [19, 34]]
+
+ elif dataset in 'AnimalZebraDataset':
+ flip_pairs = [[3, 4], [5, 6]]
+
+ elif dataset in 'AnimalPoseDataset':
+ flip_pairs = [[0, 1], [2, 3], [8, 9], [10, 11], [12, 13], [14, 15],
+ [16, 17], [18, 19]]
+ else:
+ raise NotImplementedError()
+ dataset_name = dataset
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'bbox_id':
+ 0, # need to be assigned if batch_size > 1
+ 'dataset':
+ dataset_name,
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'rotation':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs
+ }
+ }
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, [device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False,
+ return_heatmap=return_heatmap)
+
+ return result['preds'], result['output_heatmap']
+
+
+def inference_top_down_pose_model(model,
+ img_or_path,
+ person_results=None,
+ bbox_thr=None,
+ format='xywh',
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ return_heatmap=False,
+ outputs=None):
+ """Inference a single image with a list of person bounding boxes.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str| np.ndarray): Image filename or loaded image.
+ person_results (list(dict), optional): a list of detected persons that
+ contains ``bbox`` and/or ``track_id``:
+
+ - ``bbox`` (4, ) or (5, ): The person bounding box, which contains
+ 4 box coordinates (and score).
+ - ``track_id`` (int): The unique id for each human instance. If
+ not provided, a dummy person result with a bbox covering
+ the entire image will be used. Default: None.
+ bbox_thr (float | None): Threshold for bounding boxes. Only bboxes
+ with higher scores will be fed into the pose detector.
+ If bbox_thr is None, all boxes will be used.
+ format (str): bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+
+ - `xyxy` means (left, top, right, bottom),
+ - `xywh` means (left, top, width, height).
+ dataset (str): Dataset name, e.g. 'TopDownCocoDataset'.
+ It is deprecated. Please use dataset_info instead.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ return_heatmap (bool) : Flag to return heatmap, default: False
+ outputs (list(str) | tuple(str)) : Names of layers whose outputs
+ need to be returned. Default: None.
+
+ Returns:
+ tuple:
+ - pose_results (list[dict]): The bbox & pose info. \
+ Each item in the list is a dictionary, \
+ containing the bbox: (left, top, right, bottom, [score]) \
+ and the pose (ndarray[Kx3]): x, y, score.
+ - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \
+ torch.Tensor[N, K, H, W]]]): \
+ Output feature maps from layers specified in `outputs`. \
+ Includes 'heatmap' if `return_heatmap` is True.
+ """
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663'
+ ' for details.', DeprecationWarning)
+
+ # only two kinds of bbox format is supported.
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+ returned_outputs = []
+
+ if person_results is None:
+ # create dummy person results
+ if isinstance(img_or_path, str):
+ width, height = Image.open(img_or_path).size
+ else:
+ height, width = img_or_path.shape[:2]
+ person_results = [{'bbox': np.array([0, 0, width, height])}]
+
+ if len(person_results) == 0:
+ return pose_results, returned_outputs
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in person_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ person_results = [person_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return [], []
+
+ with OutputHook(model, outputs=outputs, as_tensor=False) as h:
+ # poses is results['pred'] # N x 17x 3
+ poses, heatmap = _inference_single_pose_model(
+ model,
+ img_or_path,
+ bboxes_xywh,
+ dataset=dataset,
+ dataset_info=dataset_info,
+ return_heatmap=return_heatmap)
+
+ if return_heatmap:
+ h.layer_outputs['heatmap'] = heatmap
+
+ returned_outputs.append(h.layer_outputs)
+
+ assert len(poses) == len(person_results), print(
+ len(poses), len(person_results), len(bboxes_xyxy))
+ for pose, person_result, bbox_xyxy in zip(poses, person_results,
+ bboxes_xyxy):
+ pose_result = person_result.copy()
+ pose_result['keypoints'] = pose
+ pose_result['bbox'] = bbox_xyxy
+ pose_results.append(pose_result)
+
+ return pose_results, returned_outputs
+
+
+def inference_bottom_up_pose_model(model,
+ img_or_path,
+ dataset='BottomUpCocoDataset',
+ dataset_info=None,
+ pose_nms_thr=0.9,
+ return_heatmap=False,
+ outputs=None):
+ """Inference a single image with a bottom-up pose model.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str| np.ndarray): Image filename or loaded image.
+ dataset (str): Dataset name, e.g. 'BottomUpCocoDataset'.
+ It is deprecated. Please use dataset_info instead.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ pose_nms_thr (float): retain oks overlap < pose_nms_thr, default: 0.9.
+ return_heatmap (bool) : Flag to return heatmap, default: False.
+ outputs (list(str) | tuple(str)) : Names of layers whose outputs
+ need to be returned, default: None.
+
+ Returns:
+ tuple:
+ - pose_results (list[np.ndarray]): The predicted pose info. \
+ The length of the list is the number of people (P). \
+ Each item in the list is a ndarray, containing each \
+ person's pose (np.ndarray[Kx3]): x, y, score.
+ - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \
+ torch.Tensor[N, K, H, W]]]): \
+ Output feature maps from layers specified in `outputs`. \
+ Includes 'heatmap' if `return_heatmap` is True.
+ """
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+
+ if dataset_info is not None:
+ dataset_name = dataset_info.dataset_name
+ flip_index = dataset_info.flip_index
+ sigmas = getattr(dataset_info, 'sigmas', None)
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ assert (dataset == 'BottomUpCocoDataset')
+ dataset_name = dataset
+ flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+ sigmas = None
+
+ pose_results = []
+ returned_outputs = []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ # prepare data
+ data = {
+ 'dataset': dataset_name,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_index': flip_index,
+ }
+ }
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ data = collate([data], samples_per_gpu=1)
+ data = scatter(data, [device])[0]
+
+ with OutputHook(model, outputs=outputs, as_tensor=False) as h:
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=data['img'],
+ img_metas=data['img_metas'],
+ return_loss=False,
+ return_heatmap=return_heatmap)
+
+ if return_heatmap:
+ h.layer_outputs['heatmap'] = result['output_heatmap']
+
+ returned_outputs.append(h.layer_outputs)
+
+ for idx, pred in enumerate(result['preds']):
+ area = (np.max(pred[:, 0]) - np.min(pred[:, 0])) * (
+ np.max(pred[:, 1]) - np.min(pred[:, 1]))
+ pose_results.append({
+ 'keypoints': pred[:, :3],
+ 'score': result['scores'][idx],
+ 'area': area,
+ })
+
+ # pose nms
+ score_per_joint = cfg.model.test_cfg.get('score_per_joint', False)
+ keep = oks_nms(
+ pose_results,
+ pose_nms_thr,
+ sigmas,
+ score_per_joint=score_per_joint)
+ pose_results = [pose_results[_keep] for _keep in keep]
+
+ return pose_results, returned_outputs
+
+
+def vis_pose_result(model,
+ img,
+ result,
+ radius=4,
+ thickness=1,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ show=False,
+ out_file=None):
+ """Visualize the detection results on the image.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ img (str | np.ndarray): Image filename or loaded image.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ kpt_score_thr (float): The threshold to visualize the keypoints.
+ skeleton (list[tuple()]): Default None.
+ show (bool): Whether to show the image. Default True.
+ out_file (str|None): The filename of the output visualization image.
+ """
+
+ # get dataset info
+ if (dataset_info is None and hasattr(model, 'cfg')
+ and 'dataset_info' in model.cfg):
+ dataset_info = DatasetInfo(model.cfg.dataset_info)
+
+ if dataset_info is not None:
+ skeleton = dataset_info.skeleton
+ pose_kpt_color = dataset_info.pose_kpt_color
+ pose_link_color = dataset_info.pose_link_color
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255],
+ [255, 0, 0], [255, 255, 255]])
+
+ if dataset in ('TopDownCocoDataset', 'BottomUpCocoDataset',
+ 'TopDownOCHumanDataset', 'AnimalMacaqueDataset'):
+ # show the results
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [4, 6]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+ ]]
+ pose_kpt_color = palette[[
+ 16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0
+ ]]
+
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ # show the results
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2],
+ [1, 3], [2, 4], [3, 5], [4, 6], [15, 17], [15, 18],
+ [15, 19], [16, 20], [16, 21], [16, 22], [91, 92],
+ [92, 93], [93, 94], [94, 95], [91, 96], [96, 97],
+ [97, 98], [98, 99], [91, 100], [100, 101], [101, 102],
+ [102, 103], [91, 104], [104, 105], [105, 106],
+ [106, 107], [91, 108], [108, 109], [109, 110],
+ [110, 111], [112, 113], [113, 114], [114, 115],
+ [115, 116], [112, 117], [117, 118], [118, 119],
+ [119, 120], [112, 121], [121, 122], [122, 123],
+ [123, 124], [112, 125], [125, 126], [126, 127],
+ [127, 128], [112, 129], [129, 130], [130, 131],
+ [131, 132]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+ ] + [16, 16, 16, 16, 16, 16] + [
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ] + [
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ]]
+ pose_kpt_color = palette[
+ [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] +
+ [0, 0, 0, 0, 0, 0] + [19] * (68 + 42)]
+
+ elif dataset == 'TopDownAicDataset':
+ skeleton = [[2, 1], [1, 0], [0, 13], [13, 3], [3, 4], [4, 5],
+ [8, 7], [7, 6], [6, 9], [9, 10], [10, 11], [12, 13],
+ [0, 6], [3, 9]]
+
+ pose_link_color = palette[[
+ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 0, 7, 7
+ ]]
+ pose_kpt_color = palette[[
+ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 0, 0
+ ]]
+
+ elif dataset == 'TopDownMpiiDataset':
+ skeleton = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7],
+ [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13],
+ [13, 14], [14, 15]]
+
+ pose_link_color = palette[[
+ 16, 16, 16, 16, 16, 16, 7, 7, 0, 9, 9, 9, 9, 9, 9
+ ]]
+ pose_kpt_color = palette[[
+ 16, 16, 16, 16, 16, 16, 7, 7, 0, 0, 9, 9, 9, 9, 9, 9
+ ]]
+
+ elif dataset == 'TopDownMpiiTrbDataset':
+ skeleton = [[12, 13], [13, 0], [13, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [0, 6], [1, 7], [6, 7], [6, 8], [7,
+ 9], [8, 10],
+ [9, 11], [14, 15], [16, 17], [18, 19], [20, 21],
+ [22, 23], [24, 25], [26, 27], [28, 29], [30, 31],
+ [32, 33], [34, 35], [36, 37], [38, 39]]
+
+ pose_link_color = palette[[16] * 14 + [19] * 13]
+ pose_kpt_color = palette[[16] * 14 + [0] * 26]
+
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset'):
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7],
+ [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13],
+ [13, 14], [14, 15], [15, 16], [0, 17], [17, 18],
+ [18, 19], [19, 20]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16
+ ]]
+ pose_kpt_color = palette[[
+ 0, 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16,
+ 16, 16
+ ]]
+
+ elif dataset == 'InterHand2DDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [4, 5], [5, 6], [6, 7], [8, 9],
+ [9, 10], [10, 11], [12, 13], [13, 14], [14, 15],
+ [16, 17], [17, 18], [18, 19], [3, 20], [7, 20],
+ [11, 20], [15, 20], [19, 20]]
+
+ pose_link_color = palette[[
+ 0, 0, 0, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 0, 4, 8, 12,
+ 16
+ ]]
+ pose_kpt_color = palette[[
+ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16,
+ 16, 0
+ ]]
+
+ elif dataset == 'Face300WDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 68]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceAFLWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 19]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceCOFWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 29]
+ kpt_score_thr = 0
+
+ elif dataset == 'FaceWFLWDataset':
+ # show the results
+ skeleton = []
+
+ pose_link_color = palette[[]]
+ pose_kpt_color = palette[[19] * 98]
+ kpt_score_thr = 0
+
+ elif dataset == 'AnimalHorse10Dataset':
+ skeleton = [[0, 1], [1, 12], [12, 16], [16, 21], [21, 17],
+ [17, 11], [11, 10], [10, 8], [8, 9], [9, 12], [2, 3],
+ [3, 4], [5, 6], [6, 7], [13, 14], [14, 15], [18, 19],
+ [19, 20]]
+
+ pose_link_color = palette[[4] * 10 + [6] * 2 + [6] * 2 + [7] * 2 +
+ [7] * 2]
+ pose_kpt_color = palette[[
+ 4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 7, 7, 7, 4, 4, 7, 7, 7,
+ 4
+ ]]
+
+ elif dataset == 'AnimalFlyDataset':
+ skeleton = [[1, 0], [2, 0], [3, 0], [4, 3], [5, 4], [7, 6], [8, 7],
+ [9, 8], [11, 10], [12, 11], [13, 12], [15, 14],
+ [16, 15], [17, 16], [19, 18], [20, 19], [21, 20],
+ [23, 22], [24, 23], [25, 24], [27, 26], [28, 27],
+ [29, 28], [30, 3], [31, 3]]
+
+ pose_link_color = palette[[0] * 25]
+ pose_kpt_color = palette[[0] * 32]
+
+ elif dataset == 'AnimalLocustDataset':
+ skeleton = [[1, 0], [2, 1], [3, 2], [4, 3], [6, 5], [7, 6], [9, 8],
+ [10, 9], [11, 10], [13, 12], [14, 13], [15, 14],
+ [17, 16], [18, 17], [19, 18], [21, 20], [22, 21],
+ [24, 23], [25, 24], [26, 25], [28, 27], [29, 28],
+ [30, 29], [32, 31], [33, 32], [34, 33]]
+
+ pose_link_color = palette[[0] * 26]
+ pose_kpt_color = palette[[0] * 35]
+
+ elif dataset == 'AnimalZebraDataset':
+ skeleton = [[1, 0], [2, 1], [3, 2], [4, 2], [5, 7], [6, 7], [7, 2],
+ [8, 7]]
+
+ pose_link_color = palette[[0] * 8]
+ pose_kpt_color = palette[[0] * 9]
+
+ elif dataset in 'AnimalPoseDataset':
+ skeleton = [[0, 1], [0, 2], [1, 3], [0, 4], [1, 4], [4, 5], [5, 7],
+ [6, 7], [5, 8], [8, 12], [12, 16], [5, 9], [9, 13],
+ [13, 17], [6, 10], [10, 14], [14, 18], [6, 11],
+ [11, 15], [15, 19]]
+
+ pose_link_color = palette[[0] * 20]
+ pose_kpt_color = palette[[0] * 20]
+ else:
+ NotImplementedError()
+
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(
+ img,
+ result,
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ kpt_score_thr=kpt_score_thr,
+ bbox_color=bbox_color,
+ show=show,
+ out_file=out_file)
+
+ return img
+
+
+def process_mmdet_results(mmdet_results, cat_id=1):
+ """Process mmdet results, and return a list of bboxes.
+
+ Args:
+ mmdet_results (list|tuple): mmdet results.
+ cat_id (int): category id (default: 1 for human)
+
+ Returns:
+ person_results (list): a list of detected bounding boxes
+ """
+ if isinstance(mmdet_results, tuple):
+ det_results = mmdet_results[0]
+ else:
+ det_results = mmdet_results
+
+ bboxes = det_results[cat_id - 1]
+
+ person_results = []
+ for bbox in bboxes:
+ person = {}
+ person['bbox'] = bbox
+ person_results.append(person)
+
+ return person_results
diff --git a/vendor/ViTPose/mmpose/apis/inference_3d.py b/vendor/ViTPose/mmpose/apis/inference_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..f59f20a1d0794f542c60c2bcfc20bfa4a014a55a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/inference_3d.py
@@ -0,0 +1,791 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+import torch
+from mmcv.parallel import collate, scatter
+
+from mmpose.datasets.pipelines import Compose
+from .inference import _box2cs, _xywh2xyxy, _xyxy2xywh
+
+
+def extract_pose_sequence(pose_results, frame_idx, causal, seq_len, step=1):
+ """Extract the target frame from 2D pose results, and pad the sequence to a
+ fixed length.
+
+ Args:
+ pose_results (list[list[dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required \
+ when ``with_track_id==True``.
+ - bbox ((4, ) or (5, )): left, right, top, bottom, [score]
+
+ frame_idx (int): The index of the frame in the original video.
+ causal (bool): If True, the target frame is the last frame in
+ a sequence. Otherwise, the target frame is in the middle of
+ a sequence.
+ seq_len (int): The number of frames in the input sequence.
+ step (int): Step size to extract frames from the video.
+
+ Returns:
+ list[list[dict]]: Multi-frame pose detection results stored \
+ in a nested list with a length of seq_len.
+ """
+
+ if causal:
+ frames_left = seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (seq_len - 1) // 2
+ frames_right = frames_left
+ num_frames = len(pose_results)
+
+ # get the padded sequence
+ pad_left = max(0, frames_left - frame_idx // step)
+ pad_right = max(0, frames_right - (num_frames - 1 - frame_idx) // step)
+ start = max(frame_idx % step, frame_idx - frames_left * step)
+ end = min(num_frames - (num_frames - 1 - frame_idx) % step,
+ frame_idx + frames_right * step + 1)
+ pose_results_seq = [pose_results[0]] * pad_left + \
+ pose_results[start:end:step] + [pose_results[-1]] * pad_right
+ return pose_results_seq
+
+
+def _gather_pose_lifter_inputs(pose_results,
+ bbox_center,
+ bbox_scale,
+ norm_pose_2d=False):
+ """Gather input data (keypoints and track_id) for pose lifter model.
+
+ Note:
+ - The temporal length of the pose detection results: T
+ - The number of the person instances: N
+ - The number of the keypoints: K
+ - The channel number of each keypoint: C
+
+ Args:
+ pose_results (List[List[Dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True```
+ - bbox ((4, ) or (5, )): left, right, top, bottom, [score]
+
+ bbox_center (ndarray[1, 2]): x, y. The average center coordinate of the
+ bboxes in the dataset.
+ bbox_scale (int|float): The average scale of the bboxes in the dataset.
+ norm_pose_2d (bool): If True, scale the bbox (along with the 2D
+ pose) to bbox_scale, and move the bbox (along with the 2D pose) to
+ bbox_center. Default: False.
+
+ Returns:
+ list[list[dict]]: Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True``
+ """
+ sequence_inputs = []
+ for frame in pose_results:
+ frame_inputs = []
+ for res in frame:
+ inputs = dict()
+
+ if norm_pose_2d:
+ bbox = res['bbox']
+ center = np.array([[(bbox[0] + bbox[2]) / 2,
+ (bbox[1] + bbox[3]) / 2]])
+ scale = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
+ inputs['keypoints'] = (res['keypoints'][:, :2] - center) \
+ / scale * bbox_scale + bbox_center
+ else:
+ inputs['keypoints'] = res['keypoints'][:, :2]
+
+ if res['keypoints'].shape[1] == 3:
+ inputs['keypoints'] = np.concatenate(
+ [inputs['keypoints'], res['keypoints'][:, 2:]], axis=1)
+
+ if 'track_id' in res:
+ inputs['track_id'] = res['track_id']
+ frame_inputs.append(inputs)
+ sequence_inputs.append(frame_inputs)
+ return sequence_inputs
+
+
+def _collate_pose_sequence(pose_results, with_track_id=True, target_frame=-1):
+ """Reorganize multi-frame pose detection results into individual pose
+ sequences.
+
+ Note:
+ - The temporal length of the pose detection results: T
+ - The number of the person instances: N
+ - The number of the keypoints: K
+ - The channel number of each keypoint: C
+
+ Args:
+ pose_results (List[List[Dict]]): Multi-frame pose detection results
+ stored in a nested list. Each element of the outer list is the
+ pose detection results of a single frame, and each element of the
+ inner list is the pose information of one person, which contains:
+
+ - keypoints (ndarray[K, 2 or 3]): x, y, [score]
+ - track_id (int): unique id of each person, required when
+ ``with_track_id==True```
+
+ with_track_id (bool): If True, the element in pose_results is expected
+ to contain "track_id", which will be used to gather the pose
+ sequence of a person from multiple frames. Otherwise, the pose
+ results in each frame are expected to have a consistent number and
+ order of identities. Default is True.
+ target_frame (int): The index of the target frame. Default: -1.
+ """
+ T = len(pose_results)
+ assert T > 0
+
+ target_frame = (T + target_frame) % T # convert negative index to positive
+
+ N = len(pose_results[target_frame]) # use identities in the target frame
+ if N == 0:
+ return []
+
+ K, C = pose_results[target_frame][0]['keypoints'].shape
+
+ track_ids = None
+ if with_track_id:
+ track_ids = [res['track_id'] for res in pose_results[target_frame]]
+
+ pose_sequences = []
+ for idx in range(N):
+ pose_seq = dict()
+ # gather static information
+ for k, v in pose_results[target_frame][idx].items():
+ if k != 'keypoints':
+ pose_seq[k] = v
+ # gather keypoints
+ if not with_track_id:
+ pose_seq['keypoints'] = np.stack(
+ [frame[idx]['keypoints'] for frame in pose_results])
+ else:
+ keypoints = np.zeros((T, K, C), dtype=np.float32)
+ keypoints[target_frame] = pose_results[target_frame][idx][
+ 'keypoints']
+ # find the left most frame containing track_ids[idx]
+ for frame_idx in range(target_frame - 1, -1, -1):
+ contains_idx = False
+ for res in pose_results[frame_idx]:
+ if res['track_id'] == track_ids[idx]:
+ keypoints[frame_idx] = res['keypoints']
+ contains_idx = True
+ break
+ if not contains_idx:
+ # replicate the left most frame
+ keypoints[:frame_idx + 1] = keypoints[frame_idx + 1]
+ break
+ # find the right most frame containing track_idx[idx]
+ for frame_idx in range(target_frame + 1, T):
+ contains_idx = False
+ for res in pose_results[frame_idx]:
+ if res['track_id'] == track_ids[idx]:
+ keypoints[frame_idx] = res['keypoints']
+ contains_idx = True
+ break
+ if not contains_idx:
+ # replicate the right most frame
+ keypoints[frame_idx + 1:] = keypoints[frame_idx]
+ break
+ pose_seq['keypoints'] = keypoints
+ pose_sequences.append(pose_seq)
+
+ return pose_sequences
+
+
+def inference_pose_lifter_model(model,
+ pose_results_2d,
+ dataset=None,
+ dataset_info=None,
+ with_track_id=True,
+ image_size=None,
+ norm_pose_2d=False):
+ """Inference 3D pose from 2D pose sequences using a pose lifter model.
+
+ Args:
+ model (nn.Module): The loaded pose lifter model
+ pose_results_2d (list[list[dict]]): The 2D pose sequences stored in a
+ nested list. Each element of the outer list is the 2D pose results
+ of a single frame, and each element of the inner list is the 2D
+ pose of one person, which contains:
+
+ - "keypoints" (ndarray[K, 2 or 3]): x, y, [score]
+ - "track_id" (int)
+ dataset (str): Dataset name, e.g. 'Body3DH36MDataset'
+ with_track_id: If True, the element in pose_results_2d is expected to
+ contain "track_id", which will be used to gather the pose sequence
+ of a person from multiple frames. Otherwise, the pose results in
+ each frame are expected to have a consistent number and order of
+ identities. Default is True.
+ image_size (tuple|list): image width, image height. If None, image size
+ will not be contained in dict ``data``.
+ norm_pose_2d (bool): If True, scale the bbox (along with the 2D
+ pose) to the average bbox scale of the dataset, and move the bbox
+ (along with the 2D pose) to the average bbox center of the dataset.
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element is the result of \
+ an instance, which contains:
+
+ - "keypoints_3d" (ndarray[K, 3]): predicted 3D keypoints
+ - "keypoints" (ndarray[K, 2 or 3]): from the last frame in \
+ ``pose_results_2d``.
+ - "track_id" (int): from the last frame in ``pose_results_2d``. \
+ If there is no valid instance, an empty list will be \
+ returned.
+ """
+ cfg = model.cfg
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ if dataset_info is not None:
+ flip_pairs = dataset_info.flip_pairs
+ assert 'stats_info' in dataset_info._dataset_info
+ bbox_center = dataset_info._dataset_info['stats_info']['bbox_center']
+ bbox_scale = dataset_info._dataset_info['stats_info']['bbox_scale']
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset == 'Body3DH36MDataset':
+ flip_pairs = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15], [13, 16]]
+ bbox_center = np.array([[528, 427]], dtype=np.float32)
+ bbox_scale = 400
+ else:
+ raise NotImplementedError()
+
+ target_idx = -1 if model.causal else len(pose_results_2d) // 2
+ pose_lifter_inputs = _gather_pose_lifter_inputs(pose_results_2d,
+ bbox_center, bbox_scale,
+ norm_pose_2d)
+ pose_sequences_2d = _collate_pose_sequence(pose_lifter_inputs,
+ with_track_id, target_idx)
+
+ if not pose_sequences_2d:
+ return []
+
+ batch_data = []
+ for seq in pose_sequences_2d:
+ pose_2d = seq['keypoints'].astype(np.float32)
+ T, K, C = pose_2d.shape
+
+ input_2d = pose_2d[..., :2]
+ input_2d_visible = pose_2d[..., 2:3]
+ if C > 2:
+ input_2d_visible = pose_2d[..., 2:3]
+ else:
+ input_2d_visible = np.ones((T, K, 1), dtype=np.float32)
+
+ # TODO: Will be removed in the later versions
+ # Dummy 3D input
+ # This is for compatibility with configs in mmpose<=v0.14.0, where a
+ # 3D input is required to generate denormalization parameters. This
+ # part will be removed in the future.
+ target = np.zeros((K, 3), dtype=np.float32)
+ target_visible = np.ones((K, 1), dtype=np.float32)
+
+ # Dummy image path
+ # This is for compatibility with configs in mmpose<=v0.14.0, where
+ # target_image_path is required. This part will be removed in the
+ # future.
+ target_image_path = None
+
+ data = {
+ 'input_2d': input_2d,
+ 'input_2d_visible': input_2d_visible,
+ 'target': target,
+ 'target_visible': target_visible,
+ 'target_image_path': target_image_path,
+ 'ann_info': {
+ 'num_joints': K,
+ 'flip_pairs': flip_pairs
+ }
+ }
+
+ if image_size is not None:
+ assert len(image_size) == 2
+ data['image_width'] = image_size[0]
+ data['image_height'] = image_size[1]
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, target_gpus=[device])[0]
+
+ with torch.no_grad():
+ result = model(
+ input=batch_data['input'],
+ metas=batch_data['metas'],
+ return_loss=False)
+
+ poses_3d = result['preds']
+ if poses_3d.shape[-1] != 4:
+ assert poses_3d.shape[-1] == 3
+ dummy_score = np.ones(
+ poses_3d.shape[:-1] + (1, ), dtype=poses_3d.dtype)
+ poses_3d = np.concatenate((poses_3d, dummy_score), axis=-1)
+ pose_results = []
+ for pose_2d, pose_3d in zip(pose_sequences_2d, poses_3d):
+ pose_result = pose_2d.copy()
+ pose_result['keypoints_3d'] = pose_3d
+ pose_results.append(pose_result)
+
+ return pose_results
+
+
+def vis_3d_pose_result(model,
+ result,
+ img=None,
+ dataset='Body3DH36MDataset',
+ dataset_info=None,
+ kpt_score_thr=0.3,
+ radius=8,
+ thickness=2,
+ num_instances=-1,
+ show=False,
+ out_file=None):
+ """Visualize the 3D pose estimation results.
+
+ Args:
+ model (nn.Module): The loaded model.
+ result (list[dict])
+ """
+
+ if dataset_info is not None:
+ skeleton = dataset_info.skeleton
+ pose_kpt_color = dataset_info.pose_kpt_color
+ pose_link_color = dataset_info.pose_link_color
+ else:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255],
+ [255, 0, 0], [255, 255, 255]])
+
+ if dataset == 'Body3DH36MDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7],
+ [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13],
+ [8, 14], [14, 15], [15, 16]]
+
+ pose_kpt_color = palette[[
+ 9, 0, 0, 0, 16, 16, 16, 9, 9, 9, 9, 16, 16, 16, 0, 0, 0
+ ]]
+ pose_link_color = palette[[
+ 0, 0, 0, 16, 16, 16, 9, 9, 9, 9, 16, 16, 16, 0, 0, 0
+ ]]
+
+ elif dataset == 'InterHand3DDataset':
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 20], [4, 5], [5, 6],
+ [6, 7], [7, 20], [8, 9], [9, 10], [10, 11], [11, 20],
+ [12, 13], [13, 14], [14, 15], [15, 20], [16, 17],
+ [17, 18], [18, 19], [19, 20], [21, 22], [22, 23],
+ [23, 24], [24, 41], [25, 26], [26, 27], [27, 28],
+ [28, 41], [29, 30], [30, 31], [31, 32], [32, 41],
+ [33, 34], [34, 35], [35, 36], [36, 41], [37, 38],
+ [38, 39], [39, 40], [40, 41]]
+
+ pose_kpt_color = [[14, 128, 250], [14, 128, 250], [14, 128, 250],
+ [14, 128, 250], [80, 127, 255], [80, 127, 255],
+ [80, 127, 255], [80, 127, 255], [71, 99, 255],
+ [71, 99, 255], [71, 99, 255], [71, 99, 255],
+ [0, 36, 255], [0, 36, 255], [0, 36, 255],
+ [0, 36, 255], [0, 0, 230], [0, 0, 230],
+ [0, 0, 230], [0, 0, 230], [0, 0, 139],
+ [237, 149, 100], [237, 149, 100],
+ [237, 149, 100], [237, 149, 100], [230, 128, 77],
+ [230, 128, 77], [230, 128, 77], [230, 128, 77],
+ [255, 144, 30], [255, 144, 30], [255, 144, 30],
+ [255, 144, 30], [153, 51, 0], [153, 51, 0],
+ [153, 51, 0], [153, 51, 0], [255, 51, 13],
+ [255, 51, 13], [255, 51, 13], [255, 51, 13],
+ [103, 37, 8]]
+
+ pose_link_color = [[14, 128, 250], [14, 128, 250], [14, 128, 250],
+ [14, 128, 250], [80, 127, 255], [80, 127, 255],
+ [80, 127, 255], [80, 127, 255], [71, 99, 255],
+ [71, 99, 255], [71, 99, 255], [71, 99, 255],
+ [0, 36, 255], [0, 36, 255], [0, 36, 255],
+ [0, 36, 255], [0, 0, 230], [0, 0, 230],
+ [0, 0, 230], [0, 0, 230], [237, 149, 100],
+ [237, 149, 100], [237, 149, 100],
+ [237, 149, 100], [230, 128, 77], [230, 128, 77],
+ [230, 128, 77], [230, 128, 77], [255, 144, 30],
+ [255, 144, 30], [255, 144, 30], [255, 144, 30],
+ [153, 51, 0], [153, 51, 0], [153, 51, 0],
+ [153, 51, 0], [255, 51, 13], [255, 51, 13],
+ [255, 51, 13], [255, 51, 13]]
+ else:
+ raise NotImplementedError
+
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(
+ result,
+ img,
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ num_instances=num_instances,
+ show=show,
+ out_file=out_file)
+
+ return img
+
+
+def inference_interhand_3d_model(model,
+ img_or_path,
+ det_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset='InterHand3DDataset'):
+ """Inference a single image with a list of hand bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ det_results (list[dict]): The 2D bbox sequences stored in a list.
+ Each each element of the list is the bbox of one person, whose
+ shape is (ndarray[4 or 5]), containing 4 box coordinates
+ (and score).
+ dataset (str): Dataset name.
+ format: bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+ 'xyxy' means (left, top, right, bottom),
+ 'xywh' means (left, top, width, height).
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element is the result \
+ of an instance, which contains the predicted 3D keypoints with \
+ shape (ndarray[K,3]). If there is no valid instance, an \
+ empty list will be returned.
+ """
+
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+
+ if len(det_results) == 0:
+ return pose_results
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in det_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ det_results = [det_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset == 'InterHand3DDataset':
+ flip_pairs = [[i, 21 + i] for i in range(21)]
+ else:
+ raise NotImplementedError()
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'bbox_id':
+ 0, # need to be assigned if batch_size > 1
+ 'dataset':
+ dataset,
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'rotation':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs,
+ 'heatmap3d_depth_bound': cfg.data_cfg['heatmap3d_depth_bound'],
+ 'heatmap_size_root': cfg.data_cfg['heatmap_size_root'],
+ 'root_depth_bound': cfg.data_cfg['root_depth_bound']
+ }
+ }
+
+ if isinstance(img_or_path, np.ndarray):
+ data['img'] = img_or_path
+ else:
+ data['image_file'] = img_or_path
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, [device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ result = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False)
+
+ poses_3d = result['preds']
+ rel_root_depth = result['rel_root_depth']
+ hand_type = result['hand_type']
+ if poses_3d.shape[-1] != 4:
+ assert poses_3d.shape[-1] == 3
+ dummy_score = np.ones(
+ poses_3d.shape[:-1] + (1, ), dtype=poses_3d.dtype)
+ poses_3d = np.concatenate((poses_3d, dummy_score), axis=-1)
+
+ # add relative root depth to left hand joints
+ poses_3d[:, 21:, 2] += rel_root_depth
+
+ # set joint scores according to hand type
+ poses_3d[:, :21, 3] *= hand_type[:, [0]]
+ poses_3d[:, 21:, 3] *= hand_type[:, [1]]
+
+ pose_results = []
+ for pose_3d, person_res, bbox_xyxy in zip(poses_3d, det_results,
+ bboxes_xyxy):
+ pose_res = person_res.copy()
+ pose_res['keypoints_3d'] = pose_3d
+ pose_res['bbox'] = bbox_xyxy
+ pose_results.append(pose_res)
+
+ return pose_results
+
+
+def inference_mesh_model(model,
+ img_or_path,
+ det_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset='MeshH36MDataset'):
+ """Inference a single image with a list of bounding boxes.
+
+ Note:
+ - num_bboxes: N
+ - num_keypoints: K
+ - num_vertices: V
+ - num_faces: F
+
+ Args:
+ model (nn.Module): The loaded pose model.
+ img_or_path (str | np.ndarray): Image filename or loaded image.
+ det_results (list[dict]): The 2D bbox sequences stored in a list.
+ Each element of the list is the bbox of one person.
+ "bbox" (ndarray[4 or 5]): The person bounding box,
+ which contains 4 box coordinates (and score).
+ bbox_thr (float | None): Threshold for bounding boxes.
+ Only bboxes with higher scores will be fed into the pose
+ detector. If bbox_thr is None, all boxes will be used.
+ format (str): bbox format ('xyxy' | 'xywh'). Default: 'xywh'.
+
+ - 'xyxy' means (left, top, right, bottom),
+ - 'xywh' means (left, top, width, height).
+ dataset (str): Dataset name.
+
+ Returns:
+ list[dict]: 3D pose inference results. Each element \
+ is the result of an instance, which contains:
+
+ - 'bbox' (ndarray[4]): instance bounding bbox
+ - 'center' (ndarray[2]): bbox center
+ - 'scale' (ndarray[2]): bbox scale
+ - 'keypoints_3d' (ndarray[K,3]): predicted 3D keypoints
+ - 'camera' (ndarray[3]): camera parameters
+ - 'vertices' (ndarray[V, 3]): predicted 3D vertices
+ - 'faces' (ndarray[F, 3]): mesh faces
+
+ If there is no valid instance, an empty list
+ will be returned.
+ """
+
+ assert format in ['xyxy', 'xywh']
+
+ pose_results = []
+
+ if len(det_results) == 0:
+ return pose_results
+
+ # Change for-loop preprocess each bbox to preprocess all bboxes at once.
+ bboxes = np.array([box['bbox'] for box in det_results])
+
+ # Select bboxes by score threshold
+ if bbox_thr is not None:
+ assert bboxes.shape[1] == 5
+ valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
+ bboxes = bboxes[valid_idx]
+ det_results = [det_results[i] for i in valid_idx]
+
+ if format == 'xyxy':
+ bboxes_xyxy = bboxes
+ bboxes_xywh = _xyxy2xywh(bboxes)
+ else:
+ # format is already 'xywh'
+ bboxes_xywh = bboxes
+ bboxes_xyxy = _xywh2xyxy(bboxes)
+
+ # if bbox_thr remove all bounding box
+ if len(bboxes_xywh) == 0:
+ return []
+
+ cfg = model.cfg
+ device = next(model.parameters()).device
+ if device.type == 'cpu':
+ device = -1
+
+ # build the data pipeline
+ test_pipeline = Compose(cfg.test_pipeline)
+
+ assert len(bboxes[0]) in [4, 5]
+
+ if dataset == 'MeshH36MDataset':
+ flip_pairs = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9],
+ [20, 21], [22, 23]]
+ else:
+ raise NotImplementedError()
+
+ batch_data = []
+ for bbox in bboxes:
+ center, scale = _box2cs(cfg, bbox)
+
+ # prepare data
+ data = {
+ 'image_file':
+ img_or_path,
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'rotation':
+ 0,
+ 'bbox_score':
+ bbox[4] if len(bbox) == 5 else 1,
+ 'dataset':
+ dataset,
+ 'joints_2d':
+ np.zeros((cfg.data_cfg.num_joints, 2), dtype=np.float32),
+ 'joints_2d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 1), dtype=np.float32),
+ 'joints_3d':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'pose':
+ np.zeros(72, dtype=np.float32),
+ 'beta':
+ np.zeros(10, dtype=np.float32),
+ 'has_smpl':
+ 0,
+ 'ann_info': {
+ 'image_size': np.array(cfg.data_cfg['image_size']),
+ 'num_joints': cfg.data_cfg['num_joints'],
+ 'flip_pairs': flip_pairs,
+ }
+ }
+
+ data = test_pipeline(data)
+ batch_data.append(data)
+
+ batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+ batch_data = scatter(batch_data, target_gpus=[device])[0]
+
+ # forward the model
+ with torch.no_grad():
+ preds = model(
+ img=batch_data['img'],
+ img_metas=batch_data['img_metas'],
+ return_loss=False,
+ return_vertices=True,
+ return_faces=True)
+
+ for idx in range(len(det_results)):
+ pose_res = det_results[idx].copy()
+ pose_res['bbox'] = bboxes_xyxy[idx]
+ pose_res['center'] = batch_data['img_metas'][idx]['center']
+ pose_res['scale'] = batch_data['img_metas'][idx]['scale']
+ pose_res['keypoints_3d'] = preds['keypoints_3d'][idx]
+ pose_res['camera'] = preds['camera'][idx]
+ pose_res['vertices'] = preds['vertices'][idx]
+ pose_res['faces'] = preds['faces']
+ pose_results.append(pose_res)
+ return pose_results
+
+
+def vis_3d_mesh_result(model, result, img=None, show=False, out_file=None):
+ """Visualize the 3D mesh estimation results.
+
+ Args:
+ model (nn.Module): The loaded model.
+ result (list[dict]): 3D mesh estimation results.
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+
+ img = model.show_result(result, img, show=show, out_file=out_file)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/apis/inference_tracking.py b/vendor/ViTPose/mmpose/apis/inference_tracking.py
new file mode 100644
index 0000000000000000000000000000000000000000..9494fbaa75ca54840bd2c3f8bbbfcc7955e3a05d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/inference_tracking.py
@@ -0,0 +1,347 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+
+from mmpose.core import OneEuroFilter, oks_iou
+
+
+def _compute_iou(bboxA, bboxB):
+ """Compute the Intersection over Union (IoU) between two boxes .
+
+ Args:
+ bboxA (list): The first bbox info (left, top, right, bottom, score).
+ bboxB (list): The second bbox info (left, top, right, bottom, score).
+
+ Returns:
+ float: The IoU value.
+ """
+
+ x1 = max(bboxA[0], bboxB[0])
+ y1 = max(bboxA[1], bboxB[1])
+ x2 = min(bboxA[2], bboxB[2])
+ y2 = min(bboxA[3], bboxB[3])
+
+ inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+
+ bboxA_area = (bboxA[2] - bboxA[0]) * (bboxA[3] - bboxA[1])
+ bboxB_area = (bboxB[2] - bboxB[0]) * (bboxB[3] - bboxB[1])
+ union_area = float(bboxA_area + bboxB_area - inter_area)
+ if union_area == 0:
+ union_area = 1e-5
+ warnings.warn('union_area=0 is unexpected')
+
+ iou = inter_area / union_area
+
+ return iou
+
+
+def _track_by_iou(res, results_last, thr):
+ """Get track id using IoU tracking greedily.
+
+ Args:
+ res (dict): The bbox & pose results of the person instance.
+ results_last (list[dict]): The bbox & pose & track_id info of the
+ last frame (bbox_result, pose_result, track_id).
+ thr (float): The threshold for iou tracking.
+
+ Returns:
+ int: The track id for the new person instance.
+ list[dict]: The bbox & pose & track_id info of the persons
+ that have not been matched on the last frame.
+ dict: The matched person instance on the last frame.
+ """
+
+ bbox = list(res['bbox'])
+
+ max_iou_score = -1
+ max_index = -1
+ match_result = {}
+ for index, res_last in enumerate(results_last):
+ bbox_last = list(res_last['bbox'])
+
+ iou_score = _compute_iou(bbox, bbox_last)
+ if iou_score > max_iou_score:
+ max_iou_score = iou_score
+ max_index = index
+
+ if max_iou_score > thr:
+ track_id = results_last[max_index]['track_id']
+ match_result = results_last[max_index]
+ del results_last[max_index]
+ else:
+ track_id = -1
+
+ return track_id, results_last, match_result
+
+
+def _track_by_oks(res, results_last, thr):
+ """Get track id using OKS tracking greedily.
+
+ Args:
+ res (dict): The pose results of the person instance.
+ results_last (list[dict]): The pose & track_id info of the
+ last frame (pose_result, track_id).
+ thr (float): The threshold for oks tracking.
+
+ Returns:
+ int: The track id for the new person instance.
+ list[dict]: The pose & track_id info of the persons
+ that have not been matched on the last frame.
+ dict: The matched person instance on the last frame.
+ """
+ pose = res['keypoints'].reshape((-1))
+ area = res['area']
+ max_index = -1
+ match_result = {}
+
+ if len(results_last) == 0:
+ return -1, results_last, match_result
+
+ pose_last = np.array(
+ [res_last['keypoints'].reshape((-1)) for res_last in results_last])
+ area_last = np.array([res_last['area'] for res_last in results_last])
+
+ oks_score = oks_iou(pose, pose_last, area, area_last)
+
+ max_index = np.argmax(oks_score)
+
+ if oks_score[max_index] > thr:
+ track_id = results_last[max_index]['track_id']
+ match_result = results_last[max_index]
+ del results_last[max_index]
+ else:
+ track_id = -1
+
+ return track_id, results_last, match_result
+
+
+def _get_area(results):
+ """Get bbox for each person instance on the current frame.
+
+ Args:
+ results (list[dict]): The pose results of the current frame
+ (pose_result).
+ Returns:
+ list[dict]: The bbox & pose info of the current frame
+ (bbox_result, pose_result, area).
+ """
+ for result in results:
+ if 'bbox' in result:
+ result['area'] = ((result['bbox'][2] - result['bbox'][0]) *
+ (result['bbox'][3] - result['bbox'][1]))
+ else:
+ xmin = np.min(
+ result['keypoints'][:, 0][result['keypoints'][:, 0] > 0],
+ initial=1e10)
+ xmax = np.max(result['keypoints'][:, 0])
+ ymin = np.min(
+ result['keypoints'][:, 1][result['keypoints'][:, 1] > 0],
+ initial=1e10)
+ ymax = np.max(result['keypoints'][:, 1])
+ result['area'] = (xmax - xmin) * (ymax - ymin)
+ result['bbox'] = np.array([xmin, ymin, xmax, ymax])
+ return results
+
+
+def _temporal_refine(result, match_result, fps=None):
+ """Refine koypoints using tracked person instance on last frame.
+
+ Args:
+ results (dict): The pose results of the current frame
+ (pose_result).
+ match_result (dict): The pose results of the last frame
+ (match_result)
+ Returns:
+ (array): The person keypoints after refine.
+ """
+ if 'one_euro' in match_result:
+ result['keypoints'][:, :2] = match_result['one_euro'](
+ result['keypoints'][:, :2])
+ result['one_euro'] = match_result['one_euro']
+ else:
+ result['one_euro'] = OneEuroFilter(result['keypoints'][:, :2], fps=fps)
+ return result['keypoints']
+
+
+def get_track_id(results,
+ results_last,
+ next_id,
+ min_keypoints=3,
+ use_oks=False,
+ tracking_thr=0.3,
+ use_one_euro=False,
+ fps=None):
+ """Get track id for each person instance on the current frame.
+
+ Args:
+ results (list[dict]): The bbox & pose results of the current frame
+ (bbox_result, pose_result).
+ results_last (list[dict]): The bbox & pose & track_id info of the
+ last frame (bbox_result, pose_result, track_id).
+ next_id (int): The track id for the new person instance.
+ min_keypoints (int): Minimum number of keypoints recognized as person.
+ default: 3.
+ use_oks (bool): Flag to using oks tracking. default: False.
+ tracking_thr (float): The threshold for tracking.
+ use_one_euro (bool): Option to use one-euro-filter. default: False.
+ fps (optional): Parameters that d_cutoff
+ when one-euro-filter is used as a video input
+
+ Returns:
+ tuple:
+ - results (list[dict]): The bbox & pose & track_id info of the \
+ current frame (bbox_result, pose_result, track_id).
+ - next_id (int): The track id for the new person instance.
+ """
+ results = _get_area(results)
+
+ if use_oks:
+ _track = _track_by_oks
+ else:
+ _track = _track_by_iou
+
+ for result in results:
+ track_id, results_last, match_result = _track(result, results_last,
+ tracking_thr)
+ if track_id == -1:
+ if np.count_nonzero(result['keypoints'][:, 1]) > min_keypoints:
+ result['track_id'] = next_id
+ next_id += 1
+ else:
+ # If the number of keypoints detected is small,
+ # delete that person instance.
+ result['keypoints'][:, 1] = -10
+ result['bbox'] *= 0
+ result['track_id'] = -1
+ else:
+ result['track_id'] = track_id
+ if use_one_euro:
+ result['keypoints'] = _temporal_refine(
+ result, match_result, fps=fps)
+ del match_result
+
+ return results, next_id
+
+
+def vis_pose_tracking_result(model,
+ img,
+ result,
+ radius=4,
+ thickness=1,
+ kpt_score_thr=0.3,
+ dataset='TopDownCocoDataset',
+ dataset_info=None,
+ show=False,
+ out_file=None):
+ """Visualize the pose tracking results on the image.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ img (str | np.ndarray): Image filename or loaded image.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ kpt_score_thr (float): The threshold to visualize the keypoints.
+ skeleton (list[tuple]): Default None.
+ show (bool): Whether to show the image. Default True.
+ out_file (str|None): The filename of the output visualization image.
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+
+ palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+ [230, 230, 0], [255, 153, 255], [153, 204, 255],
+ [255, 102, 255], [255, 51, 255], [102, 178, 255],
+ [51, 153, 255], [255, 153, 153], [255, 102, 102],
+ [255, 51, 51], [153, 255, 153], [102, 255, 102],
+ [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
+ [255, 255, 255]])
+
+ if dataset_info is None and dataset is not None:
+ warnings.warn(
+ 'dataset is deprecated.'
+ 'Please set `dataset_info` in the config.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
+ DeprecationWarning)
+ # TODO: These will be removed in the later versions.
+ if dataset in ('TopDownCocoDataset', 'BottomUpCocoDataset',
+ 'TopDownOCHumanDataset'):
+ kpt_num = 17
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+ [3, 5], [4, 6]]
+
+ elif dataset == 'TopDownCocoWholeBodyDataset':
+ kpt_num = 133
+ skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+ [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+ [8, 10], [1, 2], [0, 1], [0, 2],
+ [1, 3], [2, 4], [3, 5], [4, 6], [15, 17], [15, 18],
+ [15, 19], [16, 20], [16, 21], [16, 22], [91, 92],
+ [92, 93], [93, 94], [94, 95], [91, 96], [96, 97],
+ [97, 98], [98, 99], [91, 100], [100, 101], [101, 102],
+ [102, 103], [91, 104], [104, 105], [105, 106],
+ [106, 107], [91, 108], [108, 109], [109, 110],
+ [110, 111], [112, 113], [113, 114], [114, 115],
+ [115, 116], [112, 117], [117, 118], [118, 119],
+ [119, 120], [112, 121], [121, 122], [122, 123],
+ [123, 124], [112, 125], [125, 126], [126, 127],
+ [127, 128], [112, 129], [129, 130], [130, 131],
+ [131, 132]]
+ radius = 1
+
+ elif dataset == 'TopDownAicDataset':
+ kpt_num = 14
+ skeleton = [[2, 1], [1, 0], [0, 13], [13, 3], [3, 4], [4, 5],
+ [8, 7], [7, 6], [6, 9], [9, 10], [10, 11], [12, 13],
+ [0, 6], [3, 9]]
+
+ elif dataset == 'TopDownMpiiDataset':
+ kpt_num = 16
+ skeleton = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7],
+ [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13],
+ [13, 14], [14, 15]]
+
+ elif dataset in ('OneHand10KDataset', 'FreiHandDataset',
+ 'PanopticDataset'):
+ kpt_num = 21
+ skeleton = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7],
+ [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13],
+ [13, 14], [14, 15], [15, 16], [0, 17], [17, 18],
+ [18, 19], [19, 20]]
+
+ elif dataset == 'InterHand2DDataset':
+ kpt_num = 21
+ skeleton = [[0, 1], [1, 2], [2, 3], [4, 5], [5, 6], [6, 7], [8, 9],
+ [9, 10], [10, 11], [12, 13], [13, 14], [14, 15],
+ [16, 17], [17, 18], [18, 19], [3, 20], [7, 20],
+ [11, 20], [15, 20], [19, 20]]
+
+ else:
+ raise NotImplementedError()
+
+ elif dataset_info is not None:
+ kpt_num = dataset_info.keypoint_num
+ skeleton = dataset_info.skeleton
+
+ for res in result:
+ track_id = res['track_id']
+ bbox_color = palette[track_id % len(palette)]
+ pose_kpt_color = palette[[track_id % len(palette)] * kpt_num]
+ pose_link_color = palette[[track_id % len(palette)] * len(skeleton)]
+ img = model.show_result(
+ img, [res],
+ skeleton,
+ radius=radius,
+ thickness=thickness,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ bbox_color=tuple(bbox_color.tolist()),
+ kpt_score_thr=kpt_score_thr,
+ show=show,
+ out_file=out_file)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/apis/test.py b/vendor/ViTPose/mmpose/apis/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3843b5a594c03cf82144f6c3b3805a9221f16d72
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/test.py
@@ -0,0 +1,191 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+
+
+def single_gpu_test(model, data_loader):
+ """Test model with a single gpu.
+
+ This method tests model with a single gpu and displays test progress bar.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+
+
+ Returns:
+ list: The prediction results.
+ """
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for data in data_loader:
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.append(result)
+
+ # use the first key as main key to calculate the batch size
+ batch_size = len(next(iter(data.values())))
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for data in data_loader:
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.append(result)
+
+ if rank == 0:
+ # use the first key as main key to calculate the batch size
+ batch_size = len(next(iter(data.values())))
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ """Collect results in cpu mode.
+
+ It saves the results on different gpus to 'tmpdir' and collects
+ them by the rank 0 worker.
+
+ Args:
+ result_part (list): Results to be collected
+ size (int): Result size.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode. Default: None
+
+ Returns:
+ list: Ordered results.
+ """
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ mmcv.mkdir_or_exist('.dist_test')
+ tmpdir = tempfile.mkdtemp(dir='.dist_test')
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # synchronizes all processes to make sure tmpdir exist
+ dist.barrier()
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+ # synchronizes all processes for loading pickle file
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, f'part_{i}.pkl')
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ """Collect results in gpu mode.
+
+ It encodes results to gpu tensors and use gpu communication for results
+ collection.
+
+ Args:
+ result_part (list): Results to be collected
+ size (int): Result size.
+
+ Returns:
+ list: Ordered results.
+ """
+
+ rank, world_size = get_dist_info()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+ part_send[:shape_tensor[0]] = part_tensor
+ part_recv_list = [
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
+ ]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_list.append(
+ pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
+ return None
diff --git a/vendor/ViTPose/mmpose/apis/train.py b/vendor/ViTPose/mmpose/apis/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c31f8b0b1ace6d27feb14b8d441fec6436ad9e2
--- /dev/null
+++ b/vendor/ViTPose/mmpose/apis/train.py
@@ -0,0 +1,200 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook,
+ get_dist_info)
+from mmcv.utils import digit_version
+
+from mmpose.core import DistEvalHook, EvalHook, build_optimizers
+from mmpose.core.distributed_wrapper import DistributedDataParallelWrapper
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.utils import get_root_logger
+
+try:
+ from mmcv.runner import Fp16OptimizerHook
+except ImportError:
+ warnings.warn(
+ 'Fp16OptimizerHook from mmpose will be deprecated from '
+ 'v0.15.0. Please install mmcv>=1.1.4', DeprecationWarning)
+ from mmpose.core import Fp16OptimizerHook
+
+
+def init_random_seed(seed=None, device='cuda'):
+ """Initialize random seed.
+
+ If the seed is not set, the seed will be automatically randomized,
+ and then broadcast to all processes to prevent some potential bugs.
+
+ Args:
+ seed (int, Optional): The seed. Default to None.
+ device (str): The device where the seed will be put on.
+ Default to 'cuda'.
+
+ Returns:
+ int: Seed to be used.
+ """
+ if seed is not None:
+ return seed
+
+ # Make sure all ranks share the same random seed to prevent
+ # some potential bugs. Please refer to
+ # https://github.com/open-mmlab/mmdetection/issues/6339
+ rank, world_size = get_dist_info()
+ seed = np.random.randint(2**31)
+ if world_size == 1:
+ return seed
+
+ if rank == 0:
+ random_num = torch.tensor(seed, dtype=torch.int32, device=device)
+ else:
+ random_num = torch.tensor(0, dtype=torch.int32, device=device)
+ dist.broadcast(random_num, src=0)
+ return random_num.item()
+
+
+def train_model(model,
+ dataset,
+ cfg,
+ distributed=False,
+ validate=False,
+ timestamp=None,
+ meta=None):
+ """Train model entry function.
+
+ Args:
+ model (nn.Module): The model to be trained.
+ dataset (Dataset): Train dataset.
+ cfg (dict): The config dict for training.
+ distributed (bool): Whether to use distributed training.
+ Default: False.
+ validate (bool): Whether to do evaluation. Default: False.
+ timestamp (str | None): Local time for runner. Default: None.
+ meta (dict | None): Meta dict to record some important information.
+ Default: None
+ """
+ logger = get_root_logger(cfg.log_level)
+
+ # prepare data loaders
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+ # step 1: give default values and override (if exist) from cfg.data
+ loader_cfg = {
+ **dict(
+ seed=cfg.get('seed'),
+ drop_last=False,
+ dist=distributed,
+ num_gpus=len(cfg.gpu_ids)),
+ **({} if torch.__version__ != 'parrots' else dict(
+ prefetch_num=2,
+ pin_memory=False,
+ )),
+ **dict((k, cfg.data[k]) for k in [
+ 'samples_per_gpu',
+ 'workers_per_gpu',
+ 'shuffle',
+ 'seed',
+ 'drop_last',
+ 'prefetch_num',
+ 'pin_memory',
+ 'persistent_workers',
+ ] if k in cfg.data)
+ }
+
+ # step 2: cfg.data.train_dataloader has highest priority
+ train_loader_cfg = dict(loader_cfg, **cfg.data.get('train_dataloader', {}))
+
+ data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]
+
+ # determine whether use adversarial training precess or not
+ use_adverserial_train = cfg.get('use_adversarial_train', False)
+
+ # put model on gpus
+ if distributed:
+ find_unused_parameters = cfg.get('find_unused_parameters', False)
+ # Sets the `find_unused_parameters` parameter in
+ # torch.nn.parallel.DistributedDataParallel
+
+ if use_adverserial_train:
+ # Use DistributedDataParallelWrapper for adversarial training
+ model = DistributedDataParallelWrapper(
+ model,
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters)
+ else:
+ model = MMDistributedDataParallel(
+ model.cuda(),
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters)
+ else:
+ if digit_version(mmcv.__version__) >= digit_version(
+ '1.4.4') or torch.cuda.is_available():
+ model = MMDataParallel(model, device_ids=cfg.gpu_ids)
+ else:
+ warnings.warn(
+ 'We recommend to use MMCV >= 1.4.4 for CPU training. '
+ 'See https://github.com/open-mmlab/mmpose/pull/1157 for '
+ 'details.')
+
+ # build runner
+ optimizer = build_optimizers(model, cfg.optimizer)
+
+ runner = EpochBasedRunner(
+ model,
+ optimizer=optimizer,
+ work_dir=cfg.work_dir,
+ logger=logger,
+ meta=meta)
+ # an ugly workaround to make .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ if use_adverserial_train:
+ # The optimizer step process is included in the train_step function
+ # of the model, so the runner should NOT include optimizer hook.
+ optimizer_config = None
+ else:
+ # fp16 setting
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ optimizer_config = Fp16OptimizerHook(
+ **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+ elif distributed and 'type' not in cfg.optimizer_config:
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
+ else:
+ optimizer_config = cfg.optimizer_config
+
+ # register hooks
+ runner.register_training_hooks(cfg.lr_config, optimizer_config,
+ cfg.checkpoint_config, cfg.log_config,
+ cfg.get('momentum_config', None))
+ if distributed:
+ runner.register_hook(DistSamplerSeedHook())
+
+ # register eval hooks
+ if validate:
+ eval_cfg = cfg.get('evaluation', {})
+ val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+ dataloader_setting = dict(
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
+ # cfg.gpus will be ignored if distributed
+ num_gpus=len(cfg.gpu_ids),
+ dist=distributed,
+ drop_last=False,
+ shuffle=False)
+ dataloader_setting = dict(dataloader_setting,
+ **cfg.data.get('val_dataloader', {}))
+ val_dataloader = build_dataloader(val_dataset, **dataloader_setting)
+ eval_hook = DistEvalHook if distributed else EvalHook
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
diff --git a/vendor/ViTPose/mmpose/core/__init__.py b/vendor/ViTPose/mmpose/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..66185b72c47c99a0d296bf65c72f50a47f2d080c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .camera import * # noqa: F401, F403
+from .evaluation import * # noqa: F401, F403
+from .fp16 import * # noqa: F401, F403
+from .optimizer import * # noqa: F401, F403
+from .post_processing import * # noqa: F401, F403
+from .utils import * # noqa: F401, F403
+from .visualization import * # noqa: F401, F403
diff --git a/vendor/ViTPose/mmpose/core/camera/__init__.py b/vendor/ViTPose/mmpose/core/camera/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4a3c5526560996791a85f0d84a72a66286486ca
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/camera/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .camera_base import CAMERAS
+from .single_camera import SimpleCamera
+from .single_camera_torch import SimpleCameraTorch
+
+__all__ = ['CAMERAS', 'SimpleCamera', 'SimpleCameraTorch']
diff --git a/vendor/ViTPose/mmpose/core/camera/camera_base.py b/vendor/ViTPose/mmpose/core/camera/camera_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..28b23e7c6279e3613265a949df91f6ced0413b99
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/camera/camera_base.py
@@ -0,0 +1,45 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+
+from mmcv.utils import Registry
+
+CAMERAS = Registry('camera')
+
+
+class SingleCameraBase(metaclass=ABCMeta):
+ """Base class for single camera model.
+
+ Args:
+ param (dict): Camera parameters
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_world: Project points from camera coordinates to world
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ @abstractmethod
+ def __init__(self, param):
+ """Load camera parameters and check validity."""
+
+ def world_to_camera(self, X):
+ """Project points from world coordinates to camera coordinates."""
+ raise NotImplementedError
+
+ def camera_to_world(self, X):
+ """Project points from camera coordinates to world coordinates."""
+ raise NotImplementedError
+
+ def camera_to_pixel(self, X):
+ """Project points from camera coordinates to pixel coordinates."""
+ raise NotImplementedError
+
+ def world_to_pixel(self, X):
+ """Project points from world coordinates to pixel coordinates."""
+ _X = self.world_to_camera(X)
+ return self.camera_to_pixel(_X)
diff --git a/vendor/ViTPose/mmpose/core/camera/single_camera.py b/vendor/ViTPose/mmpose/core/camera/single_camera.py
new file mode 100644
index 0000000000000000000000000000000000000000..cabd79941af5c81110876e94ce6103cc02ea5078
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/camera/single_camera.py
@@ -0,0 +1,123 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from .camera_base import CAMERAS, SingleCameraBase
+
+
+@CAMERAS.register_module()
+class SimpleCamera(SingleCameraBase):
+ """Camera model to calculate coordinate transformation with given
+ intrinsic/extrinsic camera parameters.
+
+ Note:
+ The keypoint coordinate should be an np.ndarray with a shape of
+ [...,J, C] where J is the keypoint number of an instance, and C is
+ the coordinate dimension. For example:
+
+ [J, C]: shape of joint coordinates of a person with J joints.
+ [N, J, C]: shape of a batch of person joint coordinates.
+ [N, T, J, C]: shape of a batch of pose sequences.
+
+ Args:
+ param (dict): camera parameters including:
+ - R: 3x3, camera rotation matrix (camera-to-world)
+ - T: 3x1, camera translation (camera-to-world)
+ - K: (optional) 2x3, camera intrinsic matrix
+ - k: (optional) nx1, camera radial distortion coefficients
+ - p: (optional) mx1, camera tangential distortion coefficients
+ - f: (optional) 2x1, camera focal length
+ - c: (optional) 2x1, camera center
+ if K is not provided, it will be calculated from f and c.
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ def __init__(self, param):
+
+ self.param = {}
+ # extrinsic param
+ R = np.array(param['R'], dtype=np.float32)
+ T = np.array(param['T'], dtype=np.float32)
+ assert R.shape == (3, 3)
+ assert T.shape == (3, 1)
+ # The camera matrices are transposed in advance because the joint
+ # coordinates are stored as row vectors.
+ self.param['R_c2w'] = R.T
+ self.param['T_c2w'] = T.T
+ self.param['R_w2c'] = R
+ self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']
+
+ # intrinsic param
+ if 'K' in param:
+ K = np.array(param['K'], dtype=np.float32)
+ assert K.shape == (2, 3)
+ self.param['K'] = K.T
+ self.param['f'] = np.array([K[0, 0], K[1, 1]])[:, np.newaxis]
+ self.param['c'] = np.array([K[0, 2], K[1, 2]])[:, np.newaxis]
+ elif 'f' in param and 'c' in param:
+ f = np.array(param['f'], dtype=np.float32)
+ c = np.array(param['c'], dtype=np.float32)
+ assert f.shape == (2, 1)
+ assert c.shape == (2, 1)
+ self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T
+ self.param['f'] = f
+ self.param['c'] = c
+ else:
+ raise ValueError('Camera intrinsic parameters are missing. '
+ 'Either "K" or "f"&"c" should be provided.')
+
+ # distortion param
+ if 'k' in param and 'p' in param:
+ self.undistortion = True
+ self.param['k'] = np.array(param['k'], dtype=np.float32).flatten()
+ self.param['p'] = np.array(param['p'], dtype=np.float32).flatten()
+ assert self.param['k'].size in {3, 6}
+ assert self.param['p'].size == 2
+ else:
+ self.undistortion = False
+
+ def world_to_camera(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_w2c'] + self.param['T_w2c']
+
+ def camera_to_world(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_c2w'] + self.param['T_c2w']
+
+ def camera_to_pixel(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+
+ _X = X / X[..., 2:]
+
+ if self.undistortion:
+ k = self.param['k']
+ p = self.param['p']
+ _X_2d = _X[..., :2]
+ r2 = (_X_2d**2).sum(-1)
+ radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
+ if k.size == 6:
+ radial /= 1 + sum(
+ (ki * r2**(i + 1) for i, ki in enumerate(k[3:])))
+
+ tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])
+
+ _X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer(
+ r2, p[::-1]).reshape(_X_2d.shape)
+ return _X @ self.param['K']
+
+ def pixel_to_camera(self, X):
+ assert isinstance(X, np.ndarray)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ _X = X.copy()
+ _X[:, :2] = (X[:, :2] - self.param['c'].T) / self.param['f'].T * X[:,
+ [2]]
+ return _X
diff --git a/vendor/ViTPose/mmpose/core/camera/single_camera_torch.py b/vendor/ViTPose/mmpose/core/camera/single_camera_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..22eb72f23d6eecf1b5c5a9b570a4f142fcf6e02a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/camera/single_camera_torch.py
@@ -0,0 +1,118 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from .camera_base import CAMERAS, SingleCameraBase
+
+
+@CAMERAS.register_module()
+class SimpleCameraTorch(SingleCameraBase):
+ """Camera model to calculate coordinate transformation with given
+ intrinsic/extrinsic camera parameters.
+
+ Notes:
+ The keypoint coordinate should be an np.ndarray with a shape of
+ [...,J, C] where J is the keypoint number of an instance, and C is
+ the coordinate dimension. For example:
+
+ [J, C]: shape of joint coordinates of a person with J joints.
+ [N, J, C]: shape of a batch of person joint coordinates.
+ [N, T, J, C]: shape of a batch of pose sequences.
+
+ Args:
+ param (dict): camera parameters including:
+ - R: 3x3, camera rotation matrix (camera-to-world)
+ - T: 3x1, camera translation (camera-to-world)
+ - K: (optional) 2x3, camera intrinsic matrix
+ - k: (optional) nx1, camera radial distortion coefficients
+ - p: (optional) mx1, camera tangential distortion coefficients
+ - f: (optional) 2x1, camera focal length
+ - c: (optional) 2x1, camera center
+ if K is not provided, it will be calculated from f and c.
+
+ Methods:
+ world_to_camera: Project points from world coordinates to camera
+ coordinates
+ camera_to_pixel: Project points from camera coordinates to pixel
+ coordinates
+ world_to_pixel: Project points from world coordinates to pixel
+ coordinates
+ """
+
+ def __init__(self, param, device):
+
+ self.param = {}
+ # extrinsic param
+ R = torch.tensor(param['R'], device=device)
+ T = torch.tensor(param['T'], device=device)
+
+ assert R.shape == (3, 3)
+ assert T.shape == (3, 1)
+ # The camera matrices are transposed in advance because the joint
+ # coordinates are stored as row vectors.
+ self.param['R_c2w'] = R.T
+ self.param['T_c2w'] = T.T
+ self.param['R_w2c'] = R
+ self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']
+
+ # intrinsic param
+ if 'K' in param:
+ K = torch.tensor(param['K'], device=device)
+ assert K.shape == (2, 3)
+ self.param['K'] = K.T
+ self.param['f'] = torch.tensor([[K[0, 0]], [K[1, 1]]],
+ device=device)
+ self.param['c'] = torch.tensor([[K[0, 2]], [K[1, 2]]],
+ device=device)
+ elif 'f' in param and 'c' in param:
+ f = torch.tensor(param['f'], device=device)
+ c = torch.tensor(param['c'], device=device)
+ assert f.shape == (2, 1)
+ assert c.shape == (2, 1)
+ self.param['K'] = torch.cat([torch.diagflat(f), c], dim=-1).T
+ self.param['f'] = f
+ self.param['c'] = c
+ else:
+ raise ValueError('Camera intrinsic parameters are missing. '
+ 'Either "K" or "f"&"c" should be provided.')
+
+ # distortion param
+ if 'k' in param and 'p' in param:
+ self.undistortion = True
+ self.param['k'] = torch.tensor(param['k'], device=device).view(-1)
+ self.param['p'] = torch.tensor(param['p'], device=device).view(-1)
+ assert len(self.param['k']) in {3, 6}
+ assert len(self.param['p']) == 2
+ else:
+ self.undistortion = False
+
+ def world_to_camera(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_w2c'] + self.param['T_w2c']
+
+ def camera_to_world(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+ return X @ self.param['R_c2w'] + self.param['T_c2w']
+
+ def camera_to_pixel(self, X):
+ assert isinstance(X, torch.Tensor)
+ assert X.ndim >= 2 and X.shape[-1] == 3
+
+ _X = X / X[..., 2:]
+
+ if self.undistortion:
+ k = self.param['k']
+ p = self.param['p']
+ _X_2d = _X[..., :2]
+ r2 = (_X_2d**2).sum(-1)
+ radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
+ if k.size == 6:
+ radial /= 1 + sum(
+ (ki * r2**(i + 1) for i, ki in enumerate(k[3:])))
+
+ tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])
+
+ _X[..., :2] = _X_2d * (radial + tangential)[..., None] + torch.ger(
+ r2, p.flip([0])).reshape(_X_2d.shape)
+ return _X @ self.param['K']
diff --git a/vendor/ViTPose/mmpose/core/distributed_wrapper.py b/vendor/ViTPose/mmpose/core/distributed_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..c67aceec992085e9952ea70c62009e9ec1db30ca
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/distributed_wrapper.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.parallel import MODULE_WRAPPERS as MMCV_MODULE_WRAPPERS
+from mmcv.parallel import MMDistributedDataParallel
+from mmcv.parallel.scatter_gather import scatter_kwargs
+from mmcv.utils import Registry
+from torch.cuda._utils import _get_device_index
+
+MODULE_WRAPPERS = Registry('module wrapper', parent=MMCV_MODULE_WRAPPERS)
+
+
+@MODULE_WRAPPERS.register_module()
+class DistributedDataParallelWrapper(nn.Module):
+ """A DistributedDataParallel wrapper for models in 3D mesh estimation task.
+
+ In 3D mesh estimation task, there is a need to wrap different modules in
+ the models with separate DistributedDataParallel. Otherwise, it will cause
+ errors for GAN training.
+ More specific, the GAN model, usually has two sub-modules:
+ generator and discriminator. If we wrap both of them in one
+ standard DistributedDataParallel, it will cause errors during training,
+ because when we update the parameters of the generator (or discriminator),
+ the parameters of the discriminator (or generator) is not updated, which is
+ not allowed for DistributedDataParallel.
+ So we design this wrapper to separately wrap DistributedDataParallel
+ for generator and discriminator.
+
+ In this wrapper, we perform two operations:
+ 1. Wrap the modules in the models with separate MMDistributedDataParallel.
+ Note that only modules with parameters will be wrapped.
+ 2. Do scatter operation for 'forward', 'train_step' and 'val_step'.
+
+ Note that the arguments of this wrapper is the same as those in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+
+ Args:
+ module (nn.Module): Module that needs to be wrapped.
+ device_ids (list[int | `torch.device`]): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ dim (int, optional): Same as that in the official scatter function in
+ pytorch. Defaults to 0.
+ broadcast_buffers (bool): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ Defaults to False.
+ find_unused_parameters (bool, optional): Same as that in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ Traverse the autograd graph of all tensors contained in returned
+ value of the wrapped module’s forward function. Defaults to False.
+ kwargs (dict): Other arguments used in
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
+ """
+
+ def __init__(self,
+ module,
+ device_ids,
+ dim=0,
+ broadcast_buffers=False,
+ find_unused_parameters=False,
+ **kwargs):
+ super().__init__()
+ assert len(device_ids) == 1, (
+ 'Currently, DistributedDataParallelWrapper only supports one'
+ 'single CUDA device for each process.'
+ f'The length of device_ids must be 1, but got {len(device_ids)}.')
+ self.module = module
+ self.dim = dim
+ self.to_ddp(
+ device_ids=device_ids,
+ dim=dim,
+ broadcast_buffers=broadcast_buffers,
+ find_unused_parameters=find_unused_parameters,
+ **kwargs)
+ self.output_device = _get_device_index(device_ids[0], True)
+
+ def to_ddp(self, device_ids, dim, broadcast_buffers,
+ find_unused_parameters, **kwargs):
+ """Wrap models with separate MMDistributedDataParallel.
+
+ It only wraps the modules with parameters.
+ """
+ for name, module in self.module._modules.items():
+ if next(module.parameters(), None) is None:
+ module = module.cuda()
+ elif all(not p.requires_grad for p in module.parameters()):
+ module = module.cuda()
+ else:
+ module = MMDistributedDataParallel(
+ module.cuda(),
+ device_ids=device_ids,
+ dim=dim,
+ broadcast_buffers=broadcast_buffers,
+ find_unused_parameters=find_unused_parameters,
+ **kwargs)
+ self.module._modules[name] = module
+
+ def scatter(self, inputs, kwargs, device_ids):
+ """Scatter function.
+
+ Args:
+ inputs (Tensor): Input Tensor.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ device_ids (int): Device id.
+ """
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+ def forward(self, *inputs, **kwargs):
+ """Forward function.
+
+ Args:
+ inputs (tuple): Input data.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ return self.module(*inputs[0], **kwargs[0])
+
+ def train_step(self, *inputs, **kwargs):
+ """Train step function.
+
+ Args:
+ inputs (Tensor): Input Tensor.
+ kwargs (dict): Args for
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ output = self.module.train_step(*inputs[0], **kwargs[0])
+ return output
+
+ def val_step(self, *inputs, **kwargs):
+ """Validation step function.
+
+ Args:
+ inputs (tuple): Input data.
+ kwargs (dict): Args for ``scatter_kwargs``.
+ """
+ inputs, kwargs = self.scatter(inputs, kwargs,
+ [torch.cuda.current_device()])
+ output = self.module.val_step(*inputs[0], **kwargs[0])
+ return output
diff --git a/vendor/ViTPose/mmpose/core/evaluation/__init__.py b/vendor/ViTPose/mmpose/core/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f9378429c8ddaa15f7ac17446bc9d484987df16
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_eval import (aggregate_scale, aggregate_stage_flip,
+ flip_feature_maps, get_group_preds,
+ split_ae_outputs)
+from .eval_hooks import DistEvalHook, EvalHook
+from .mesh_eval import compute_similarity_transform
+from .pose3d_eval import keypoint_3d_auc, keypoint_3d_pck, keypoint_mpjpe
+from .top_down_eval import (keypoint_auc, keypoint_epe, keypoint_pck_accuracy,
+ keypoints_from_heatmaps, keypoints_from_heatmaps3d,
+ keypoints_from_regression,
+ multilabel_classification_accuracy,
+ pose_pck_accuracy, post_dark_udp)
+
+__all__ = [
+ 'EvalHook', 'DistEvalHook', 'pose_pck_accuracy', 'keypoints_from_heatmaps',
+ 'keypoints_from_regression', 'keypoint_pck_accuracy', 'keypoint_3d_pck',
+ 'keypoint_3d_auc', 'keypoint_auc', 'keypoint_epe', 'get_group_preds',
+ 'split_ae_outputs', 'flip_feature_maps', 'aggregate_stage_flip',
+ 'aggregate_scale', 'compute_similarity_transform', 'post_dark_udp',
+ 'keypoint_mpjpe', 'keypoints_from_heatmaps3d',
+ 'multilabel_classification_accuracy'
+]
diff --git a/vendor/ViTPose/mmpose/core/evaluation/bottom_up_eval.py b/vendor/ViTPose/mmpose/core/evaluation/bottom_up_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b37d7c98e684284e3863922e7c7d2abedce0e24
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/bottom_up_eval.py
@@ -0,0 +1,333 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.core.post_processing import (get_warp_matrix, transform_preds,
+ warp_affine_joints)
+
+
+def split_ae_outputs(outputs, num_joints, with_heatmaps, with_ae,
+ select_output_index):
+ """Split multi-stage outputs into heatmaps & tags.
+
+ Args:
+ outputs (list(Tensor)): Outputs of network
+ num_joints (int): Number of joints
+ with_heatmaps (list[bool]): Option to output
+ heatmaps for different stages.
+ with_ae (list[bool]): Option to output
+ ae tags for different stages.
+ select_output_index (list[int]): Output keep the selected index
+
+ Returns:
+ tuple: A tuple containing multi-stage outputs.
+
+ - list[Tensor]: multi-stage heatmaps.
+ - list[Tensor]: multi-stage tags.
+ """
+
+ heatmaps = []
+ tags = []
+
+ # aggregate heatmaps from different stages
+ for i, output in enumerate(outputs):
+ if i not in select_output_index:
+ continue
+ # staring index of the associative embeddings
+ offset_feat = num_joints if with_heatmaps[i] else 0
+ if with_heatmaps[i]:
+ heatmaps.append(output[:, :num_joints])
+ if with_ae[i]:
+ tags.append(output[:, offset_feat:])
+
+ return heatmaps, tags
+
+
+def flip_feature_maps(feature_maps, flip_index=None):
+ """Flip the feature maps and swap the channels.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ flip_index (list[int] | None): Channel-flip indexes.
+ If None, do not flip channels.
+
+ Returns:
+ list[Tensor]: Flipped feature_maps.
+ """
+ flipped_feature_maps = []
+ for feature_map in feature_maps:
+ feature_map = torch.flip(feature_map, [3])
+ if flip_index is not None:
+ flipped_feature_maps.append(feature_map[:, flip_index, :, :])
+ else:
+ flipped_feature_maps.append(feature_map)
+
+ return flipped_feature_maps
+
+
+def _resize_average(feature_maps, align_corners, index=-1, resize_size=None):
+ """Resize the feature maps and compute the average.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+
+ if feature_maps is None:
+ return None
+ feature_maps_avg = 0
+
+ feature_map_list = _resize_concate(
+ feature_maps, align_corners, index=index, resize_size=resize_size)
+ for feature_map in feature_map_list:
+ feature_maps_avg += feature_map
+
+ feature_maps_avg /= len(feature_map_list)
+ return [feature_maps_avg]
+
+
+def _resize_unsqueeze_concat(feature_maps,
+ align_corners,
+ index=-1,
+ resize_size=None):
+ """Resize, unsqueeze and concatenate the feature_maps.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+ if feature_maps is None:
+ return None
+ feature_map_list = _resize_concate(
+ feature_maps, align_corners, index=index, resize_size=resize_size)
+
+ feat_dim = len(feature_map_list[0].shape) - 1
+ output_feature_maps = torch.cat(
+ [torch.unsqueeze(fmap, dim=feat_dim + 1) for fmap in feature_map_list],
+ dim=feat_dim + 1)
+ return [output_feature_maps]
+
+
+def _resize_concate(feature_maps, align_corners, index=-1, resize_size=None):
+ """Resize and concatenate the feature_maps.
+
+ Args:
+ feature_maps (list[Tensor]): Feature maps.
+ align_corners (bool): Align corners when performing interpolation.
+ index (int): Only used when `resize_size' is None.
+ If `resize_size' is None, the target size is the size
+ of the indexed feature maps.
+ resize_size (list[int, int]): The target size [w, h].
+
+ Returns:
+ list[Tensor]: Averaged feature_maps.
+ """
+ if feature_maps is None:
+ return None
+
+ feature_map_list = []
+
+ if index < 0:
+ index += len(feature_maps)
+
+ if resize_size is None:
+ resize_size = (feature_maps[index].size(2),
+ feature_maps[index].size(3))
+
+ for feature_map in feature_maps:
+ ori_size = (feature_map.size(2), feature_map.size(3))
+ if ori_size != resize_size:
+ feature_map = torch.nn.functional.interpolate(
+ feature_map,
+ size=resize_size,
+ mode='bilinear',
+ align_corners=align_corners)
+
+ feature_map_list.append(feature_map)
+
+ return feature_map_list
+
+
+def aggregate_stage_flip(feature_maps,
+ feature_maps_flip,
+ index=-1,
+ project2image=True,
+ size_projected=None,
+ align_corners=False,
+ aggregate_stage='concat',
+ aggregate_flip='average'):
+ """Inference the model to get multi-stage outputs (heatmaps & tags), and
+ resize them to base sizes.
+
+ Args:
+ feature_maps (list[Tensor]): feature_maps can be heatmaps,
+ tags, and pafs.
+ feature_maps_flip (list[Tensor] | None): flipped feature_maps.
+ feature maps can be heatmaps, tags, and pafs.
+ project2image (bool): Option to resize to base scale.
+ size_projected (list[int, int]): Base size of heatmaps [w, h].
+ align_corners (bool): Align corners when performing interpolation.
+ aggregate_stage (str): Methods to aggregate multi-stage feature maps.
+ Options: 'concat', 'average'. Default: 'concat.
+
+ - 'concat': Concatenate the original and the flipped feature maps.
+ - 'average': Get the average of the original and the flipped
+ feature maps.
+ aggregate_flip (str): Methods to aggregate the original and
+ the flipped feature maps. Options: 'concat', 'average', 'none'.
+ Default: 'average.
+
+ - 'concat': Concatenate the original and the flipped feature maps.
+ - 'average': Get the average of the original and the flipped
+ feature maps..
+ - 'none': no flipped feature maps.
+
+ Returns:
+ list[Tensor]: Aggregated feature maps with shape [NxKxWxH].
+ """
+
+ if feature_maps_flip is None:
+ aggregate_flip = 'none'
+
+ output_feature_maps = []
+
+ if aggregate_stage == 'average':
+ _aggregate_stage_func = _resize_average
+ elif aggregate_stage == 'concat':
+ _aggregate_stage_func = _resize_concate
+ else:
+ NotImplementedError()
+
+ if project2image and size_projected:
+ _origin = _aggregate_stage_func(
+ feature_maps,
+ align_corners,
+ index=index,
+ resize_size=(size_projected[1], size_projected[0]))
+
+ _flipped = _aggregate_stage_func(
+ feature_maps_flip,
+ align_corners,
+ index=index,
+ resize_size=(size_projected[1], size_projected[0]))
+ else:
+ _origin = _aggregate_stage_func(
+ feature_maps, align_corners, index=index, resize_size=None)
+ _flipped = _aggregate_stage_func(
+ feature_maps_flip, align_corners, index=index, resize_size=None)
+
+ if aggregate_flip == 'average':
+ assert feature_maps_flip is not None
+ for _ori, _fli in zip(_origin, _flipped):
+ output_feature_maps.append((_ori + _fli) / 2.0)
+
+ elif aggregate_flip == 'concat':
+ assert feature_maps_flip is not None
+ output_feature_maps.append(*_origin)
+ output_feature_maps.append(*_flipped)
+
+ elif aggregate_flip == 'none':
+ if isinstance(_origin, list):
+ output_feature_maps.append(*_origin)
+ else:
+ output_feature_maps.append(_origin)
+ else:
+ NotImplementedError()
+
+ return output_feature_maps
+
+
+def aggregate_scale(feature_maps_list,
+ align_corners=False,
+ aggregate_scale='average'):
+ """Aggregate multi-scale outputs.
+
+ Note:
+ batch size: N
+ keypoints num : K
+ heatmap width: W
+ heatmap height: H
+
+ Args:
+ feature_maps_list (list[Tensor]): Aggregated feature maps.
+ project2image (bool): Option to resize to base scale.
+ align_corners (bool): Align corners when performing interpolation.
+ aggregate_scale (str): Methods to aggregate multi-scale feature maps.
+ Options: 'average', 'unsqueeze_concat'.
+
+ - 'average': Get the average of the feature maps.
+ - 'unsqueeze_concat': Concatenate the feature maps along new axis.
+ Default: 'average.
+
+ Returns:
+ Tensor: Aggregated feature maps.
+ """
+
+ if aggregate_scale == 'average':
+ output_feature_maps = _resize_average(
+ feature_maps_list, align_corners, index=0, resize_size=None)
+
+ elif aggregate_scale == 'unsqueeze_concat':
+ output_feature_maps = _resize_unsqueeze_concat(
+ feature_maps_list, align_corners, index=0, resize_size=None)
+ else:
+ NotImplementedError()
+
+ return output_feature_maps[0]
+
+
+def get_group_preds(grouped_joints,
+ center,
+ scale,
+ heatmap_size,
+ use_udp=False):
+ """Transform the grouped joints back to the image.
+
+ Args:
+ grouped_joints (list): Grouped person joints.
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ heatmap_size (np.ndarray[2, ]): Size of the destination heatmaps.
+ use_udp (bool): Unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR'2020).
+
+ Returns:
+ list: List of the pose result for each person.
+ """
+ if len(grouped_joints) == 0:
+ return []
+
+ if use_udp:
+ if grouped_joints[0].shape[0] > 0:
+ heatmap_size_t = np.array(heatmap_size, dtype=np.float32) - 1.0
+ trans = get_warp_matrix(
+ theta=0,
+ size_input=heatmap_size_t,
+ size_dst=scale,
+ size_target=heatmap_size_t)
+ grouped_joints[0][..., :2] = \
+ warp_affine_joints(grouped_joints[0][..., :2], trans)
+ results = [person for person in grouped_joints[0]]
+ else:
+ results = []
+ for person in grouped_joints[0]:
+ joints = transform_preds(person, center, scale, heatmap_size)
+ results.append(joints)
+
+ return results
diff --git a/vendor/ViTPose/mmpose/core/evaluation/eval_hooks.py b/vendor/ViTPose/mmpose/core/evaluation/eval_hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf36a038859ee7d7a77b68706ee96c2154fc39cc
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/eval_hooks.py
@@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv.runner import DistEvalHook as _DistEvalHook
+from mmcv.runner import EvalHook as _EvalHook
+
+MMPOSE_GREATER_KEYS = [
+ 'acc', 'ap', 'ar', 'pck', 'auc', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc'
+]
+MMPOSE_LESS_KEYS = ['loss', 'epe', 'nme', 'mpjpe', 'p-mpjpe', 'n-mpjpe']
+
+
+class EvalHook(_EvalHook):
+
+ def __init__(self,
+ dataloader,
+ start=None,
+ interval=1,
+ by_epoch=True,
+ save_best=None,
+ rule=None,
+ test_fn=None,
+ greater_keys=MMPOSE_GREATER_KEYS,
+ less_keys=MMPOSE_LESS_KEYS,
+ **eval_kwargs):
+
+ if test_fn is None:
+ from mmpose.apis import single_gpu_test
+ test_fn = single_gpu_test
+
+ # to be compatible with the config before v0.16.0
+
+ # remove "gpu_collect" from eval_kwargs
+ if 'gpu_collect' in eval_kwargs:
+ warnings.warn(
+ '"gpu_collect" will be deprecated in EvalHook.'
+ 'Please remove it from the config.', DeprecationWarning)
+ _ = eval_kwargs.pop('gpu_collect')
+
+ # update "save_best" according to "key_indicator" and remove the
+ # latter from eval_kwargs
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
+ warnings.warn(
+ '"key_indicator" will be deprecated in EvalHook.'
+ 'Please use "save_best" to specify the metric key,'
+ 'e.g., save_best="AP".', DeprecationWarning)
+
+ key_indicator = eval_kwargs.pop('key_indicator', 'AP')
+ if save_best is True and key_indicator is None:
+ raise ValueError('key_indicator should not be None, when '
+ 'save_best is set to True.')
+ save_best = key_indicator
+
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
+ rule, test_fn, greater_keys, less_keys, **eval_kwargs)
+
+
+class DistEvalHook(_DistEvalHook):
+
+ def __init__(self,
+ dataloader,
+ start=None,
+ interval=1,
+ by_epoch=True,
+ save_best=None,
+ rule=None,
+ test_fn=None,
+ greater_keys=MMPOSE_GREATER_KEYS,
+ less_keys=MMPOSE_LESS_KEYS,
+ broadcast_bn_buffer=True,
+ tmpdir=None,
+ gpu_collect=False,
+ **eval_kwargs):
+
+ if test_fn is None:
+ from mmpose.apis import multi_gpu_test
+ test_fn = multi_gpu_test
+
+ # to be compatible with the config before v0.16.0
+
+ # update "save_best" according to "key_indicator" and remove the
+ # latter from eval_kwargs
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
+ warnings.warn(
+ '"key_indicator" will be deprecated in EvalHook.'
+ 'Please use "save_best" to specify the metric key,'
+ 'e.g., save_best="AP".', DeprecationWarning)
+
+ key_indicator = eval_kwargs.pop('key_indicator', 'AP')
+ if save_best is True and key_indicator is None:
+ raise ValueError('key_indicator should not be None, when '
+ 'save_best is set to True.')
+ save_best = key_indicator
+
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
+ rule, test_fn, greater_keys, less_keys,
+ broadcast_bn_buffer, tmpdir, gpu_collect,
+ **eval_kwargs)
diff --git a/vendor/ViTPose/mmpose/core/evaluation/mesh_eval.py b/vendor/ViTPose/mmpose/core/evaluation/mesh_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..683b4539b29d1829a324de424c6d9f85a7037e5d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/mesh_eval.py
@@ -0,0 +1,66 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/akanazawa/hmr
+# Original licence: Copyright (c) 2018 akanazawa, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+
+
+def compute_similarity_transform(source_points, target_points):
+ """Computes a similarity transform (sR, t) that takes a set of 3D points
+ source_points (N x 3) closest to a set of 3D points target_points, where R
+ is an 3x3 rotation matrix, t 3x1 translation, s scale. And return the
+ transformed 3D points source_points_hat (N x 3). i.e. solves the orthogonal
+ Procrutes problem.
+
+ Note:
+ Points number: N
+
+ Args:
+ source_points (np.ndarray): Source point set with shape [N, 3].
+ target_points (np.ndarray): Target point set with shape [N, 3].
+
+ Returns:
+ np.ndarray: Transformed source point set with shape [N, 3].
+ """
+
+ assert target_points.shape[0] == source_points.shape[0]
+ assert target_points.shape[1] == 3 and source_points.shape[1] == 3
+
+ source_points = source_points.T
+ target_points = target_points.T
+
+ # 1. Remove mean.
+ mu1 = source_points.mean(axis=1, keepdims=True)
+ mu2 = target_points.mean(axis=1, keepdims=True)
+ X1 = source_points - mu1
+ X2 = target_points - mu2
+
+ # 2. Compute variance of X1 used for scale.
+ var1 = np.sum(X1**2)
+
+ # 3. The outer product of X1 and X2.
+ K = X1.dot(X2.T)
+
+ # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
+ # singular vectors of K.
+ U, _, Vh = np.linalg.svd(K)
+ V = Vh.T
+ # Construct Z that fixes the orientation of R to get det(R)=1.
+ Z = np.eye(U.shape[0])
+ Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T)))
+ # Construct R.
+ R = V.dot(Z.dot(U.T))
+
+ # 5. Recover scale.
+ scale = np.trace(R.dot(K)) / var1
+
+ # 6. Recover translation.
+ t = mu2 - scale * (R.dot(mu1))
+
+ # 7. Transform the source points:
+ source_points_hat = scale * R.dot(source_points) + t
+
+ source_points_hat = source_points_hat.T
+
+ return source_points_hat
diff --git a/vendor/ViTPose/mmpose/core/evaluation/pose3d_eval.py b/vendor/ViTPose/mmpose/core/evaluation/pose3d_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..545778ca7441c2d3e8ec58449c8ca7b162322e9e
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/pose3d_eval.py
@@ -0,0 +1,171 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from .mesh_eval import compute_similarity_transform
+
+
+def keypoint_mpjpe(pred, gt, mask, alignment='none'):
+ """Calculate the mean per-joint position error (MPJPE) and the error after
+ rigid alignment with the ground truth (P-MPJPE).
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - keypoint_dims: C
+
+ Args:
+ pred (np.ndarray): Predicted keypoint location with shape [N, K, C].
+ gt (np.ndarray): Groundtruth keypoint location with shape [N, K, C].
+ mask (np.ndarray): Visibility of the target with shape [N, K].
+ False for invisible joints, and True for visible.
+ Invisible joints will be ignored for accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in
+ scale, rotation and translation.
+ Returns:
+ tuple: A tuple containing joint position errors
+
+ - (float | np.ndarray): mean per-joint position error (mpjpe).
+ - (float | np.ndarray): mpjpe after rigid alignment with the
+ ground truth (p-mpjpe).
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)[mask].mean()
+
+ return error
+
+
+def keypoint_3d_pck(pred, gt, mask, alignment='none', threshold=0.15):
+ """Calculate the Percentage of Correct Keypoints (3DPCK) w. or w/o rigid
+ alignment.
+
+ Paper ref: `Monocular 3D Human Pose Estimation In The Wild Using Improved
+ CNN Supervision' 3DV'2017. `__ .
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - keypoint_dims: C
+
+ Args:
+ pred (np.ndarray[N, K, C]): Predicted keypoint location.
+ gt (np.ndarray[N, K, C]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in scale,
+ rotation and translation.
+
+ threshold: If L2 distance between the prediction and the groundtruth
+ is less then threshold, the predicted result is considered as
+ correct. Default: 0.15 (m).
+
+ Returns:
+ pck: percentage of correct keypoints.
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)
+ pck = (error < threshold).astype(np.float32)[mask].mean() * 100
+
+ return pck
+
+
+def keypoint_3d_auc(pred, gt, mask, alignment='none'):
+ """Calculate the Area Under the Curve (3DAUC) computed for a range of 3DPCK
+ thresholds.
+
+ Paper ref: `Monocular 3D Human Pose Estimation In The Wild Using Improved
+ CNN Supervision' 3DV'2017. `__ .
+ This implementation is derived from mpii_compute_3d_pck.m, which is
+ provided as part of the MPI-INF-3DHP test data release.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ keypoint_dims: C
+
+ Args:
+ pred (np.ndarray[N, K, C]): Predicted keypoint location.
+ gt (np.ndarray[N, K, C]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ alignment (str, optional): method to align the prediction with the
+ groundtruth. Supported options are:
+
+ - ``'none'``: no alignment will be applied
+ - ``'scale'``: align in the least-square sense in scale
+ - ``'procrustes'``: align in the least-square sense in scale,
+ rotation and translation.
+
+ Returns:
+ auc: AUC computed for a range of 3DPCK thresholds.
+ """
+ assert mask.any()
+
+ if alignment == 'none':
+ pass
+ elif alignment == 'procrustes':
+ pred = np.stack([
+ compute_similarity_transform(pred_i, gt_i)
+ for pred_i, gt_i in zip(pred, gt)
+ ])
+ elif alignment == 'scale':
+ pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
+ pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
+ scale_factor = pred_dot_gt / pred_dot_pred
+ pred = pred * scale_factor[:, None, None]
+ else:
+ raise ValueError(f'Invalid value for alignment: {alignment}')
+
+ error = np.linalg.norm(pred - gt, ord=2, axis=-1)
+
+ thresholds = np.linspace(0., 0.15, 31)
+ pck_values = np.zeros(len(thresholds))
+ for i in range(len(thresholds)):
+ pck_values[i] = (error < thresholds[i]).astype(np.float32)[mask].mean()
+
+ auc = pck_values.mean() * 100
+
+ return auc
diff --git a/vendor/ViTPose/mmpose/core/evaluation/top_down_eval.py b/vendor/ViTPose/mmpose/core/evaluation/top_down_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee6a2501cf1eec1b16f7d58bf9fd62da0fa48ccf
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/evaluation/top_down_eval.py
@@ -0,0 +1,684 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import transform_preds
+
+
+def _calc_distances(preds, targets, mask, normalize):
+ """Calculate the normalized distances between preds and target.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (normally, D=2 or D=3)
+
+ Args:
+ preds (np.ndarray[N, K, D]): Predicted keypoint location.
+ targets (np.ndarray[N, K, D]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (np.ndarray[N, D]): Typical value is heatmap_size
+
+ Returns:
+ np.ndarray[K, N]: The normalized distances. \
+ If target keypoints are missing, the distance is -1.
+ """
+ N, K, _ = preds.shape
+ # set mask=0 when normalize==0
+ _mask = mask.copy()
+ _mask[np.where((normalize == 0).sum(1))[0], :] = False
+ distances = np.full((N, K), -1, dtype=np.float32)
+ # handle invalid values
+ normalize[np.where(normalize <= 0)] = 1e6
+ distances[_mask] = np.linalg.norm(
+ ((preds - targets) / normalize[:, None, :])[_mask], axis=-1)
+ return distances.T
+
+
+def _distance_acc(distances, thr=0.5):
+ """Return the percentage below the distance threshold, while ignoring
+ distances values with -1.
+
+ Note:
+ batch_size: N
+ Args:
+ distances (np.ndarray[N, ]): The normalized distances.
+ thr (float): Threshold of the distances.
+
+ Returns:
+ float: Percentage of distances below the threshold. \
+ If all target keypoints are missing, return -1.
+ """
+ distance_valid = distances != -1
+ num_distance_valid = distance_valid.sum()
+ if num_distance_valid > 0:
+ return (distances[distance_valid] < thr).sum() / num_distance_valid
+ return -1
+
+
+def _get_max_preds(heatmaps):
+ """Get keypoint predictions from score maps.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+
+ Returns:
+ tuple: A tuple containing aggregated results.
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ assert isinstance(heatmaps,
+ np.ndarray), ('heatmaps should be numpy.ndarray')
+ assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+ N, K, _, W = heatmaps.shape
+ heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+ idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+ maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+
+ preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+ preds[:, :, 0] = preds[:, :, 0] % W
+ preds[:, :, 1] = preds[:, :, 1] // W
+
+ preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)
+ return preds, maxvals
+
+
+def _get_max_preds_3d(heatmaps):
+ """Get keypoint predictions from 3D score maps.
+
+ Note:
+ batch size: N
+ num keypoints: K
+ heatmap depth size: D
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+
+ Returns:
+ tuple: A tuple containing aggregated results.
+
+ - preds (np.ndarray[N, K, 3]): Predicted keypoint location.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ assert isinstance(heatmaps, np.ndarray), \
+ ('heatmaps should be numpy.ndarray')
+ assert heatmaps.ndim == 5, 'heatmaps should be 5-ndim'
+
+ N, K, D, H, W = heatmaps.shape
+ heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+ idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+ maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+
+ preds = np.zeros((N, K, 3), dtype=np.float32)
+ _idx = idx[..., 0]
+ preds[..., 2] = _idx // (H * W)
+ preds[..., 1] = (_idx // W) % H
+ preds[..., 0] = _idx % W
+
+ preds = np.where(maxvals > 0.0, preds, -1)
+ return preds, maxvals
+
+
+def pose_pck_accuracy(output, target, mask, thr=0.05, normalize=None):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints from heatmaps.
+
+ Note:
+ PCK metric measures accuracy of the localization of the body joints.
+ The distances between predicted positions and the ground-truth ones
+ are typically normalized by the bounding box size.
+ The threshold (thr) of the normalized distance is commonly set
+ as 0.05, 0.1 or 0.2 etc.
+
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ output (np.ndarray[N, K, H, W]): Model output heatmaps.
+ target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ thr (float): Threshold of PCK calculation. Default 0.05.
+ normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+
+ Returns:
+ tuple: A tuple containing keypoint accuracy.
+
+ - np.ndarray[K]: Accuracy of each keypoint.
+ - float: Averaged accuracy across all keypoints.
+ - int: Number of valid keypoints.
+ """
+ N, K, H, W = output.shape
+ if K == 0:
+ return None, 0, 0
+ if normalize is None:
+ normalize = np.tile(np.array([[H, W]]), (N, 1))
+
+ pred, _ = _get_max_preds(output)
+ gt, _ = _get_max_preds(target)
+ return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)
+
+
+def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ PCK metric measures accuracy of the localization of the body joints.
+ The distances between predicted positions and the ground-truth ones
+ are typically normalized by the bounding box size.
+ The threshold (thr) of the normalized distance is commonly set
+ as 0.05, 0.1 or 0.2 etc.
+
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ thr (float): Threshold of PCK calculation.
+ normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+
+ Returns:
+ tuple: A tuple containing keypoint accuracy.
+
+ - acc (np.ndarray[K]): Accuracy of each keypoint.
+ - avg_acc (float): Averaged accuracy across all keypoints.
+ - cnt (int): Number of valid keypoints.
+ """
+ distances = _calc_distances(pred, gt, mask, normalize)
+
+ acc = np.array([_distance_acc(d, thr) for d in distances])
+ valid_acc = acc[acc >= 0]
+ cnt = len(valid_acc)
+ avg_acc = valid_acc.mean() if cnt > 0 else 0
+ return acc, avg_acc, cnt
+
+
+def keypoint_auc(pred, gt, mask, normalize, num_step=20):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (float): Normalization factor.
+
+ Returns:
+ float: Area under curve.
+ """
+ nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))
+ x = [1.0 * i / num_step for i in range(num_step)]
+ y = []
+ for thr in x:
+ _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
+ y.append(avg_acc)
+
+ auc = 0
+ for i in range(num_step):
+ auc += 1.0 / num_step * y[i]
+ return auc
+
+
+def keypoint_nme(pred, gt, mask, normalize_factor):
+ """Calculate the normalized mean error (NME).
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize_factor (np.ndarray[N, 2]): Normalization factor.
+
+ Returns:
+ float: normalized mean error
+ """
+ distances = _calc_distances(pred, gt, mask, normalize_factor)
+ distance_valid = distances[distances != -1]
+ return distance_valid.sum() / max(1, len(distance_valid))
+
+
+def keypoint_epe(pred, gt, mask):
+ """Calculate the end-point error.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+
+ Returns:
+ float: Average end-point error.
+ """
+
+ distances = _calc_distances(
+ pred, gt, mask,
+ np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
+ distance_valid = distances[distances != -1]
+ return distance_valid.sum() / max(1, len(distance_valid))
+
+
+def _taylor(heatmap, coord):
+ """Distribution aware coordinate decoding method.
+
+ Note:
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmap (np.ndarray[H, W]): Heatmap of a particular joint type.
+ coord (np.ndarray[2,]): Coordinates of the predicted keypoints.
+
+ Returns:
+ np.ndarray[2,]: Updated coordinates.
+ """
+ H, W = heatmap.shape[:2]
+ px, py = int(coord[0]), int(coord[1])
+ if 1 < px < W - 2 and 1 < py < H - 2:
+ dx = 0.5 * (heatmap[py][px + 1] - heatmap[py][px - 1])
+ dy = 0.5 * (heatmap[py + 1][px] - heatmap[py - 1][px])
+ dxx = 0.25 * (
+ heatmap[py][px + 2] - 2 * heatmap[py][px] + heatmap[py][px - 2])
+ dxy = 0.25 * (
+ heatmap[py + 1][px + 1] - heatmap[py - 1][px + 1] -
+ heatmap[py + 1][px - 1] + heatmap[py - 1][px - 1])
+ dyy = 0.25 * (
+ heatmap[py + 2 * 1][px] - 2 * heatmap[py][px] +
+ heatmap[py - 2 * 1][px])
+ derivative = np.array([[dx], [dy]])
+ hessian = np.array([[dxx, dxy], [dxy, dyy]])
+ if dxx * dyy - dxy**2 != 0:
+ hessianinv = np.linalg.inv(hessian)
+ offset = -hessianinv @ derivative
+ offset = np.squeeze(np.array(offset.T), axis=0)
+ coord += offset
+ return coord
+
+
+def post_dark_udp(coords, batch_heatmaps, kernel=3):
+ """DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
+ Devil is in the Details: Delving into Unbiased Data Processing for Human
+ Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+
+ Note:
+ - batch size: B
+ - num keypoints: K
+ - num persons: N
+ - height of heatmaps: H
+ - width of heatmaps: W
+
+ B=1 for bottom_up paradigm where all persons share the same heatmap.
+ B=N for top_down paradigm where each person has its own heatmaps.
+
+ Args:
+ coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
+ batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
+ kernel (int): Gaussian kernel size (K) for modulation.
+
+ Returns:
+ np.ndarray([N, K, 2]): Refined coordinates.
+ """
+ if not isinstance(batch_heatmaps, np.ndarray):
+ batch_heatmaps = batch_heatmaps.cpu().numpy()
+ B, K, H, W = batch_heatmaps.shape
+ N = coords.shape[0]
+ assert (B == 1 or B == N)
+ for heatmaps in batch_heatmaps:
+ for heatmap in heatmaps:
+ cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
+ np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
+ np.log(batch_heatmaps, batch_heatmaps)
+
+ batch_heatmaps_pad = np.pad(
+ batch_heatmaps, ((0, 0), (0, 0), (1, 1), (1, 1)),
+ mode='edge').flatten()
+
+ index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
+ index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
+ index = index.astype(int).reshape(-1, 1)
+ i_ = batch_heatmaps_pad[index]
+ ix1 = batch_heatmaps_pad[index + 1]
+ iy1 = batch_heatmaps_pad[index + W + 2]
+ ix1y1 = batch_heatmaps_pad[index + W + 3]
+ ix1_y1_ = batch_heatmaps_pad[index - W - 3]
+ ix1_ = batch_heatmaps_pad[index - 1]
+ iy1_ = batch_heatmaps_pad[index - 2 - W]
+
+ dx = 0.5 * (ix1 - ix1_)
+ dy = 0.5 * (iy1 - iy1_)
+ derivative = np.concatenate([dx, dy], axis=1)
+ derivative = derivative.reshape(N, K, 2, 1)
+ dxx = ix1 - 2 * i_ + ix1_
+ dyy = iy1 - 2 * i_ + iy1_
+ dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
+ hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
+ hessian = hessian.reshape(N, K, 2, 2)
+ hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
+ coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
+ return coords
+
+
+def _gaussian_blur(heatmaps, kernel=11):
+ """Modulate heatmap distribution with Gaussian.
+ sigma = 0.3*((kernel_size-1)*0.5-1)+0.8
+ sigma~=3 if k=17
+ sigma=2 if k=11;
+ sigma~=1.5 if k=7;
+ sigma~=1 if k=3;
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ kernel (int): Gaussian kernel size (K) for modulation, which should
+ match the heatmap gaussian sigma when training.
+ K=17 for sigma=3 and k=11 for sigma=2.
+
+ Returns:
+ np.ndarray ([N, K, H, W]): Modulated heatmap distribution.
+ """
+ assert kernel % 2 == 1
+
+ border = (kernel - 1) // 2
+ batch_size = heatmaps.shape[0]
+ num_joints = heatmaps.shape[1]
+ height = heatmaps.shape[2]
+ width = heatmaps.shape[3]
+ for i in range(batch_size):
+ for j in range(num_joints):
+ origin_max = np.max(heatmaps[i, j])
+ dr = np.zeros((height + 2 * border, width + 2 * border),
+ dtype=np.float32)
+ dr[border:-border, border:-border] = heatmaps[i, j].copy()
+ dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
+ heatmaps[i, j] = dr[border:-border, border:-border].copy()
+ heatmaps[i, j] *= origin_max / np.max(heatmaps[i, j])
+ return heatmaps
+
+
+def keypoints_from_regression(regression_preds, center, scale, img_size):
+ """Get final keypoint predictions from regression vectors and transform
+ them back to the image.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ regression_preds (np.ndarray[N, K, 2]): model prediction.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+ img_size (list(img_width, img_height)): model input image size.
+
+ Returns:
+ tuple:
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ N, K, _ = regression_preds.shape
+ preds, maxvals = regression_preds, np.ones((N, K, 1), dtype=np.float32)
+
+ preds = preds * img_size
+
+ # Transform back to the image
+ for i in range(N):
+ preds[i] = transform_preds(preds[i], center[i], scale[i], img_size)
+
+ return preds, maxvals
+
+
+def keypoints_from_heatmaps(heatmaps,
+ center,
+ scale,
+ unbiased=False,
+ post_process='default',
+ kernel=11,
+ valid_radius_factor=0.0546875,
+ use_udp=False,
+ target_type='GaussianHeatmap'):
+ """Get final keypoint predictions from heatmaps and transform them back to
+ the image.
+
+ Note:
+ - batch size: N
+ - num keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+ post_process (str/None): Choice of methods to post-process
+ heatmaps. Currently supported: None, 'default', 'unbiased',
+ 'megvii'.
+ unbiased (bool): Option to use unbiased decoding. Mutually
+ exclusive with megvii.
+ Note: this arg is deprecated and unbiased=True can be replaced
+ by post_process='unbiased'
+ Paper ref: Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+ kernel (int): Gaussian kernel size (K) for modulation, which should
+ match the heatmap gaussian sigma when training.
+ K=17 for sigma=3 and k=11 for sigma=2.
+ valid_radius_factor (float): The radius factor of the positive area
+ in classification heatmap for UDP.
+ use_udp (bool): Use unbiased data processing.
+ target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
+ GaussianHeatmap: Classification target with gaussian distribution.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Returns:
+ tuple: A tuple containing keypoint predictions and scores.
+
+ - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ # Avoid being affected
+ heatmaps = heatmaps.copy()
+
+ # detect conflicts
+ if unbiased:
+ assert post_process not in [False, None, 'megvii']
+ if post_process in ['megvii', 'unbiased']:
+ assert kernel > 0
+ if use_udp:
+ assert not post_process == 'megvii'
+
+ # normalize configs
+ if post_process is False:
+ warnings.warn(
+ 'post_process=False is deprecated, '
+ 'please use post_process=None instead', DeprecationWarning)
+ post_process = None
+ elif post_process is True:
+ if unbiased is True:
+ warnings.warn(
+ 'post_process=True, unbiased=True is deprecated,'
+ " please use post_process='unbiased' instead",
+ DeprecationWarning)
+ post_process = 'unbiased'
+ else:
+ warnings.warn(
+ 'post_process=True, unbiased=False is deprecated, '
+ "please use post_process='default' instead",
+ DeprecationWarning)
+ post_process = 'default'
+ elif post_process == 'default':
+ if unbiased is True:
+ warnings.warn(
+ 'unbiased=True is deprecated, please use '
+ "post_process='unbiased' instead", DeprecationWarning)
+ post_process = 'unbiased'
+
+ # start processing
+ if post_process == 'megvii':
+ heatmaps = _gaussian_blur(heatmaps, kernel=kernel)
+
+ N, K, H, W = heatmaps.shape
+ if use_udp:
+ if target_type.lower() == 'GaussianHeatMap'.lower():
+ preds, maxvals = _get_max_preds(heatmaps)
+ preds = post_dark_udp(preds, heatmaps, kernel=kernel)
+ elif target_type.lower() == 'CombinedTarget'.lower():
+ for person_heatmaps in heatmaps:
+ for i, heatmap in enumerate(person_heatmaps):
+ kt = 2 * kernel + 1 if i % 3 == 0 else kernel
+ cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap)
+ # valid radius is in direct proportion to the height of heatmap.
+ valid_radius = valid_radius_factor * H
+ offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius
+ offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius
+ heatmaps = heatmaps[:, ::3, :]
+ preds, maxvals = _get_max_preds(heatmaps)
+ index = preds[..., 0] + preds[..., 1] * W
+ index += W * H * np.arange(0, N * K / 3)
+ index = index.astype(int).reshape(N, K // 3, 1)
+ preds += np.concatenate((offset_x[index], offset_y[index]), axis=2)
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+ else:
+ preds, maxvals = _get_max_preds(heatmaps)
+ if post_process == 'unbiased': # alleviate biased coordinate
+ # apply Gaussian distribution modulation.
+ heatmaps = np.log(
+ np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10))
+ for n in range(N):
+ for k in range(K):
+ preds[n][k] = _taylor(heatmaps[n][k], preds[n][k])
+ elif post_process is not None:
+ # add +/-0.25 shift to the predicted locations for higher acc.
+ for n in range(N):
+ for k in range(K):
+ heatmap = heatmaps[n][k]
+ px = int(preds[n][k][0])
+ py = int(preds[n][k][1])
+ if 1 < px < W - 1 and 1 < py < H - 1:
+ diff = np.array([
+ heatmap[py][px + 1] - heatmap[py][px - 1],
+ heatmap[py + 1][px] - heatmap[py - 1][px]
+ ])
+ preds[n][k] += np.sign(diff) * .25
+ if post_process == 'megvii':
+ preds[n][k] += 0.5
+
+ # Transform back to the image
+ for i in range(N):
+ preds[i] = transform_preds(
+ preds[i], center[i], scale[i], [W, H], use_udp=use_udp)
+
+ if post_process == 'megvii':
+ maxvals = maxvals / 255.0 + 0.5
+
+ return preds, maxvals
+
+
+def keypoints_from_heatmaps3d(heatmaps, center, scale):
+ """Get final keypoint predictions from 3d heatmaps and transform them back
+ to the image.
+
+ Note:
+ - batch size: N
+ - num keypoints: K
+ - heatmap depth size: D
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+ center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+ scale (np.ndarray[N, 2]): Scale of the bounding box
+ wrt height/width.
+
+ Returns:
+ tuple: A tuple containing keypoint predictions and scores.
+
+ - preds (np.ndarray[N, K, 3]): Predicted 3d keypoint location \
+ in images.
+ - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+ """
+ N, K, D, H, W = heatmaps.shape
+ preds, maxvals = _get_max_preds_3d(heatmaps)
+ # Transform back to the image
+ for i in range(N):
+ preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i],
+ [W, H])
+ return preds, maxvals
+
+
+def multilabel_classification_accuracy(pred, gt, mask, thr=0.5):
+ """Get multi-label classification accuracy.
+
+ Note:
+ - batch size: N
+ - label number: L
+
+ Args:
+ pred (np.ndarray[N, L, 2]): model predicted labels.
+ gt (np.ndarray[N, L, 2]): ground-truth labels.
+ mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of
+ ground-truth labels.
+
+ Returns:
+ float: multi-label classification accuracy.
+ """
+ # we only compute accuracy on the samples with ground-truth of all labels.
+ valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)
+ pred, gt = pred[valid], gt[valid]
+
+ if pred.shape[0] == 0:
+ acc = 0.0 # when no sample is with gt labels, set acc to 0.
+ else:
+ # The classification of a sample is regarded as correct
+ # only if it's correct for all labels.
+ acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()
+ return acc
diff --git a/vendor/ViTPose/mmpose/core/fp16/__init__.py b/vendor/ViTPose/mmpose/core/fp16/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb054810870626496ab4145446b17cf2c2e0b5d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/fp16/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .decorators import auto_fp16, force_fp32
+from .hooks import Fp16OptimizerHook, wrap_fp16_model
+from .utils import cast_tensor_type
+
+__all__ = [
+ 'auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model',
+ 'cast_tensor_type'
+]
diff --git a/vendor/ViTPose/mmpose/core/fp16/decorators.py b/vendor/ViTPose/mmpose/core/fp16/decorators.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d70ddf533c069b26f08ef3a973328790843def5
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/fp16/decorators.py
@@ -0,0 +1,175 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools
+import warnings
+from inspect import getfullargspec
+
+import torch
+
+from .utils import cast_tensor_type
+
+
+def auto_fp16(apply_to=None, out_fp32=False):
+ """Decorator to enable fp16 training automatically.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If inputs arguments are fp32 tensors, they will
+ be converted to fp16 automatically. Arguments other than fp32 tensors are
+ ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp32 (bool): Whether to convert the output back to fp32.
+
+ Example:
+
+ >>> import torch.nn as nn
+ >>> class MyModule1(nn.Module):
+ >>>
+ >>> # Convert x and y to fp16
+ >>> @auto_fp16()
+ >>> def forward(self, x, y):
+ >>> pass
+
+ >>> import torch.nn as nn
+ >>> class MyModule2(nn.Module):
+ >>>
+ >>> # convert pred to fp16
+ >>> @auto_fp16(apply_to=('pred', ))
+ >>> def do_something(self, pred, others):
+ >>> pass
+ """
+
+ warnings.warn(
+ 'auto_fp16 in mmpose will be deprecated in the next release.'
+ 'Please use mmcv.runner.auto_fp16 instead (mmcv>=1.3.1).',
+ DeprecationWarning)
+
+ def auto_fp16_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@auto_fp16 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ # NOTE: default args are not taken into consideration
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.float, torch.half))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = {}
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.float, torch.half)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp32:
+ output = cast_tensor_type(output, torch.half, torch.float)
+ return output
+
+ return new_func
+
+ return auto_fp16_wrapper
+
+
+def force_fp32(apply_to=None, out_fp16=False):
+ """Decorator to convert input arguments to fp32 in force.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If there are some inputs that must be processed
+ in fp32 mode, then this decorator can handle it. If inputs arguments are
+ fp16 tensors, they will be converted to fp32 automatically. Arguments other
+ than fp16 tensors are ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp16 (bool): Whether to convert the output back to fp16.
+
+ Example:
+
+ >>> import torch.nn as nn
+ >>> class MyModule1(nn.Module):
+ >>>
+ >>> # Convert x and y to fp32
+ >>> @force_fp32()
+ >>> def loss(self, x, y):
+ >>> pass
+
+ >>> import torch.nn as nn
+ >>> class MyModule2(nn.Module):
+ >>>
+ >>> # convert pred to fp32
+ >>> @force_fp32(apply_to=('pred', ))
+ >>> def post_process(self, pred, others):
+ >>> pass
+ """
+ warnings.warn(
+ 'force_fp32 in mmpose will be deprecated in the next release.'
+ 'Please use mmcv.runner.force_fp32 instead (mmcv>=1.3.1).',
+ DeprecationWarning)
+
+ def force_fp32_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@force_fp32 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.half, torch.float))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = dict()
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.half, torch.float)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp16:
+ output = cast_tensor_type(output, torch.float, torch.half)
+ return output
+
+ return new_func
+
+ return force_fp32_wrapper
diff --git a/vendor/ViTPose/mmpose/core/fp16/hooks.py b/vendor/ViTPose/mmpose/core/fp16/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..74081a9b73b95ebb20cabf07cfaeab86cc874780
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/fp16/hooks.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.runner import OptimizerHook
+from mmcv.utils import _BatchNorm
+
+from ..utils.dist_utils import allreduce_grads
+from .utils import cast_tensor_type
+
+
+class Fp16OptimizerHook(OptimizerHook):
+ """FP16 optimizer hook.
+
+ The steps of fp16 optimizer is as follows.
+ 1. Scale the loss value.
+ 2. BP in the fp16 model.
+ 2. Copy gradients from fp16 model to fp32 weights.
+ 3. Update fp32 weights.
+ 4. Copy updated parameters from fp32 weights to fp16 model.
+
+ Refer to https://arxiv.org/abs/1710.03740 for more details.
+
+ Args:
+ loss_scale (float): Scale factor multiplied with loss.
+ """
+
+ def __init__(self,
+ grad_clip=None,
+ coalesce=True,
+ bucket_size_mb=-1,
+ loss_scale=512.,
+ distributed=True):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+ self.loss_scale = loss_scale
+ self.distributed = distributed
+
+ def before_run(self, runner):
+ """Preparing steps before Mixed Precision Training.
+
+ 1. Make a master copy of fp32 weights for optimization.
+ 2. Convert the main model from fp32 to fp16.
+
+ Args:
+ runner (:obj:`mmcv.Runner`): The underlines training runner.
+ """
+ # keep a copy of fp32 weights
+ runner.optimizer.param_groups = copy.deepcopy(
+ runner.optimizer.param_groups)
+ # convert model to fp16
+ wrap_fp16_model(runner.model)
+
+ @staticmethod
+ def copy_grads_to_fp32(fp16_net, fp32_weights):
+ """Copy gradients from fp16 model to fp32 weight copy."""
+ for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()):
+ if fp16_param.grad is not None:
+ if fp32_param.grad is None:
+ fp32_param.grad = fp32_param.data.new(fp32_param.size())
+ fp32_param.grad.copy_(fp16_param.grad)
+
+ @staticmethod
+ def copy_params_to_fp16(fp16_net, fp32_weights):
+ """Copy updated params from fp32 weight copy to fp16 model."""
+ for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights):
+ fp16_param.data.copy_(fp32_param.data)
+
+ def after_train_iter(self, runner):
+ """Backward optimization steps for Mixed Precision Training.
+
+ 1. Scale the loss by a scale factor.
+ 2. Backward the loss to obtain the gradients (fp16).
+ 3. Copy gradients from the model to the fp32 weight copy.
+ 4. Scale the gradients back and update the fp32 weight copy.
+ 5. Copy back the params from fp32 weight copy to the fp16 model.
+
+ Args:
+ runner (:obj:`mmcv.Runner`): The underlines training runner.
+ """
+ # clear grads of last iteration
+ runner.model.zero_grad()
+ runner.optimizer.zero_grad()
+ # scale the loss value
+ scaled_loss = runner.outputs['loss'] * self.loss_scale
+ scaled_loss.backward()
+ # copy fp16 grads in the model to fp32 params in the optimizer
+ fp32_weights = []
+ for param_group in runner.optimizer.param_groups:
+ fp32_weights += param_group['params']
+ self.copy_grads_to_fp32(runner.model, fp32_weights)
+ # allreduce grads
+ if self.distributed:
+ allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb)
+ # scale the gradients back
+ for param in fp32_weights:
+ if param.grad is not None:
+ param.grad.div_(self.loss_scale)
+ if self.grad_clip is not None:
+ self.clip_grads(fp32_weights)
+ # update fp32 params
+ runner.optimizer.step()
+ # copy fp32 params to the fp16 model
+ self.copy_params_to_fp16(runner.model, fp32_weights)
+
+
+def wrap_fp16_model(model):
+ """Wrap the FP32 model to FP16.
+
+ 1. Convert FP32 model to FP16.
+ 2. Remain some necessary layers to be FP32, e.g., normalization layers.
+
+ Args:
+ model (nn.Module): Model in FP32.
+ """
+ # convert model to fp16
+ model.half()
+ # patch the normalization layers to make it work in fp32 mode
+ patch_norm_fp32(model)
+ # set `fp16_enabled` flag
+ for m in model.modules():
+ if hasattr(m, 'fp16_enabled'):
+ m.fp16_enabled = True
+
+
+def patch_norm_fp32(module):
+ """Recursively convert normalization layers from FP16 to FP32.
+
+ Args:
+ module (nn.Module): The modules to be converted in FP16.
+
+ Returns:
+ nn.Module: The converted module, the normalization layers have been
+ converted to FP32.
+ """
+ if isinstance(module, (_BatchNorm, nn.GroupNorm)):
+ module.float()
+ module.forward = patch_forward_method(module.forward, torch.half,
+ torch.float)
+ for child in module.children():
+ patch_norm_fp32(child)
+ return module
+
+
+def patch_forward_method(func, src_type, dst_type, convert_output=True):
+ """Patch the forward method of a module.
+
+ Args:
+ func (callable): The original forward method.
+ src_type (torch.dtype): Type of input arguments to be converted from.
+ dst_type (torch.dtype): Type of input arguments to be converted to.
+ convert_output (bool): Whether to convert the output back to src_type.
+
+ Returns:
+ callable: The patched forward method.
+ """
+
+ def new_forward(*args, **kwargs):
+ output = func(*cast_tensor_type(args, src_type, dst_type),
+ **cast_tensor_type(kwargs, src_type, dst_type))
+ if convert_output:
+ output = cast_tensor_type(output, dst_type, src_type)
+ return output
+
+ return new_forward
diff --git a/vendor/ViTPose/mmpose/core/fp16/utils.py b/vendor/ViTPose/mmpose/core/fp16/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1ec3d328328560c7959ae5e77621feb77692068
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/fp16/utils.py
@@ -0,0 +1,34 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import abc
+
+import numpy as np
+import torch
+
+
+def cast_tensor_type(inputs, src_type, dst_type):
+ """Recursively convert Tensor in inputs from src_type to dst_type.
+
+ Args:
+ inputs: Inputs that to be casted.
+ src_type (torch.dtype): Source type.
+ dst_type (torch.dtype): Destination type.
+
+ Returns:
+ The same type with inputs, but all contained Tensors have been cast.
+ """
+ if isinstance(inputs, torch.Tensor):
+ return inputs.to(dst_type)
+ elif isinstance(inputs, str):
+ return inputs
+ elif isinstance(inputs, np.ndarray):
+ return inputs
+ elif isinstance(inputs, abc.Mapping):
+ return type(inputs)({
+ k: cast_tensor_type(v, src_type, dst_type)
+ for k, v in inputs.items()
+ })
+ elif isinstance(inputs, abc.Iterable):
+ return type(inputs)(
+ cast_tensor_type(item, src_type, dst_type) for item in inputs)
+
+ return inputs
diff --git a/vendor/ViTPose/mmpose/core/optimizer/__init__.py b/vendor/ViTPose/mmpose/core/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4340ffc075afdcdf3d9f7a398ead394ca5a168a1
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/optimizer/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import OPTIMIZERS, build_optimizers
+
+__all__ = ['build_optimizers', 'OPTIMIZERS']
diff --git a/vendor/ViTPose/mmpose/core/optimizer/builder.py b/vendor/ViTPose/mmpose/core/optimizer/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d6accd707db0728142dbcfccee15d902e3632a3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/optimizer/builder.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import build_optimizer
+from mmcv.utils import Registry
+
+OPTIMIZERS = Registry('optimizers')
+
+
+def build_optimizers(model, cfgs):
+ """Build multiple optimizers from configs.
+
+ If `cfgs` contains several dicts for optimizers, then a dict for each
+ constructed optimizers will be returned.
+ If `cfgs` only contains one optimizer config, the constructed optimizer
+ itself will be returned.
+
+ For example,
+
+ 1) Multiple optimizer configs:
+
+ .. code-block:: python
+
+ optimizer_cfg = dict(
+ model1=dict(type='SGD', lr=lr),
+ model2=dict(type='SGD', lr=lr))
+
+ The return dict is
+ ``dict('model1': torch.optim.Optimizer, 'model2': torch.optim.Optimizer)``
+
+ 2) Single optimizer config:
+
+ .. code-block:: python
+
+ optimizer_cfg = dict(type='SGD', lr=lr)
+
+ The return is ``torch.optim.Optimizer``.
+
+ Args:
+ model (:obj:`nn.Module`): The model with parameters to be optimized.
+ cfgs (dict): The config dict of the optimizer.
+
+ Returns:
+ dict[:obj:`torch.optim.Optimizer`] | :obj:`torch.optim.Optimizer`:
+ The initialized optimizers.
+ """
+ optimizers = {}
+ if hasattr(model, 'module'):
+ model = model.module
+ # determine whether 'cfgs' has several dicts for optimizers
+ if all(isinstance(v, dict) for v in cfgs.values()):
+ for key, cfg in cfgs.items():
+ cfg_ = cfg.copy()
+ module = getattr(model, key)
+ optimizers[key] = build_optimizer(module, cfg_)
+ return optimizers
+
+ return build_optimizer(model, cfgs)
diff --git a/vendor/ViTPose/mmpose/core/post_processing/__init__.py b/vendor/ViTPose/mmpose/core/post_processing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ee6858d953134a9b870b1a3635968729a4762ea
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/post_processing/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .nms import oks_iou, oks_nms, soft_oks_nms
+from .one_euro_filter import OneEuroFilter
+from .post_transforms import (affine_transform, flip_back, fliplr_joints,
+ fliplr_regression, get_affine_transform,
+ get_warp_matrix, rotate_point, transform_preds,
+ warp_affine_joints)
+
+__all__ = [
+ 'oks_nms', 'soft_oks_nms', 'affine_transform', 'rotate_point', 'flip_back',
+ 'fliplr_joints', 'fliplr_regression', 'transform_preds',
+ 'get_affine_transform', 'get_warp_matrix', 'warp_affine_joints',
+ 'OneEuroFilter', 'oks_iou'
+]
diff --git a/vendor/ViTPose/mmpose/core/post_processing/group.py b/vendor/ViTPose/mmpose/core/post_processing/group.py
new file mode 100644
index 0000000000000000000000000000000000000000..6235dbc111eae55e8bc1d34671db84152bc7c542
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/post_processing/group.py
@@ -0,0 +1,410 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/princeton-vl/pose-ae-train/
+# Original licence: Copyright (c) 2017, umich-vl, under BSD 3-Clause License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+import torch
+from munkres import Munkres
+
+from mmpose.core.evaluation import post_dark_udp
+
+
+def _py_max_match(scores):
+ """Apply munkres algorithm to get the best match.
+
+ Args:
+ scores(np.ndarray): cost matrix.
+
+ Returns:
+ np.ndarray: best match.
+ """
+ m = Munkres()
+ tmp = m.compute(scores)
+ tmp = np.array(tmp).astype(int)
+ return tmp
+
+
+def _match_by_tag(inp, params):
+ """Match joints by tags. Use Munkres algorithm to calculate the best match
+ for keypoints grouping.
+
+ Note:
+ number of keypoints: K
+ max number of people in an image: M (M=30 by default)
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ inp(tuple):
+ tag_k (np.ndarray[KxMxL]): tag corresponding to the
+ top k values of feature map per keypoint.
+ loc_k (np.ndarray[KxMx2]): top k locations of the
+ feature maps for keypoint.
+ val_k (np.ndarray[KxM]): top k value of the
+ feature maps per keypoint.
+ params(Params): class Params().
+
+ Returns:
+ np.ndarray: result of pose groups.
+ """
+ assert isinstance(params, _Params), 'params should be class _Params()'
+
+ tag_k, loc_k, val_k = inp
+
+ default_ = np.zeros((params.num_joints, 3 + tag_k.shape[2]),
+ dtype=np.float32)
+
+ joint_dict = {}
+ tag_dict = {}
+ for i in range(params.num_joints):
+ idx = params.joint_order[i]
+
+ tags = tag_k[idx]
+ joints = np.concatenate((loc_k[idx], val_k[idx, :, None], tags), 1)
+ mask = joints[:, 2] > params.detection_threshold
+ tags = tags[mask]
+ joints = joints[mask]
+
+ if joints.shape[0] == 0:
+ continue
+
+ if i == 0 or len(joint_dict) == 0:
+ for tag, joint in zip(tags, joints):
+ key = tag[0]
+ joint_dict.setdefault(key, np.copy(default_))[idx] = joint
+ tag_dict[key] = [tag]
+ else:
+ grouped_keys = list(joint_dict.keys())[:params.max_num_people]
+ grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys]
+
+ if (params.ignore_too_much
+ and len(grouped_keys) == params.max_num_people):
+ continue
+
+ diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :]
+ diff_normed = np.linalg.norm(diff, ord=2, axis=2)
+ diff_saved = np.copy(diff_normed)
+
+ if params.use_detection_val:
+ diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
+
+ num_added = diff.shape[0]
+ num_grouped = diff.shape[1]
+
+ if num_added > num_grouped:
+ diff_normed = np.concatenate(
+ (diff_normed,
+ np.zeros((num_added, num_added - num_grouped),
+ dtype=np.float32) + 1e10),
+ axis=1)
+
+ pairs = _py_max_match(diff_normed)
+ for row, col in pairs:
+ if (row < num_added and col < num_grouped
+ and diff_saved[row][col] < params.tag_threshold):
+ key = grouped_keys[col]
+ joint_dict[key][idx] = joints[row]
+ tag_dict[key].append(tags[row])
+ else:
+ key = tags[row][0]
+ joint_dict.setdefault(key, np.copy(default_))[idx] = \
+ joints[row]
+ tag_dict[key] = [tags[row]]
+
+ results = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32)
+ return results
+
+
+class _Params:
+ """A class of parameter.
+
+ Args:
+ cfg(Config): config.
+ """
+
+ def __init__(self, cfg):
+ self.num_joints = cfg['num_joints']
+ self.max_num_people = cfg['max_num_people']
+
+ self.detection_threshold = cfg['detection_threshold']
+ self.tag_threshold = cfg['tag_threshold']
+ self.use_detection_val = cfg['use_detection_val']
+ self.ignore_too_much = cfg['ignore_too_much']
+
+ if self.num_joints == 17:
+ self.joint_order = [
+ i - 1 for i in
+ [1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]
+ ]
+ else:
+ self.joint_order = list(np.arange(self.num_joints))
+
+
+class HeatmapParser:
+ """The heatmap parser for post processing."""
+
+ def __init__(self, cfg):
+ self.params = _Params(cfg)
+ self.tag_per_joint = cfg['tag_per_joint']
+ self.pool = torch.nn.MaxPool2d(cfg['nms_kernel'], 1,
+ cfg['nms_padding'])
+ self.use_udp = cfg.get('use_udp', False)
+ self.score_per_joint = cfg.get('score_per_joint', False)
+
+ def nms(self, heatmaps):
+ """Non-Maximum Suppression for heatmaps.
+
+ Args:
+ heatmap(torch.Tensor): Heatmaps before nms.
+
+ Returns:
+ torch.Tensor: Heatmaps after nms.
+ """
+
+ maxm = self.pool(heatmaps)
+ maxm = torch.eq(maxm, heatmaps).float()
+ heatmaps = heatmaps * maxm
+
+ return heatmaps
+
+ def match(self, tag_k, loc_k, val_k):
+ """Group keypoints to human poses in a batch.
+
+ Args:
+ tag_k (np.ndarray[NxKxMxL]): tag corresponding to the
+ top k values of feature map per keypoint.
+ loc_k (np.ndarray[NxKxMx2]): top k locations of the
+ feature maps for keypoint.
+ val_k (np.ndarray[NxKxM]): top k value of the
+ feature maps per keypoint.
+
+ Returns:
+ list
+ """
+
+ def _match(x):
+ return _match_by_tag(x, self.params)
+
+ return list(map(_match, zip(tag_k, loc_k, val_k)))
+
+ def top_k(self, heatmaps, tags):
+ """Find top_k values in an image.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ max number of people: M
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmaps (torch.Tensor[NxKxHxW])
+ tags (torch.Tensor[NxKxHxWxL])
+
+ Returns:
+ dict: A dict containing top_k values.
+
+ - tag_k (np.ndarray[NxKxMxL]):
+ tag corresponding to the top k values of
+ feature map per keypoint.
+ - loc_k (np.ndarray[NxKxMx2]):
+ top k location of feature map per keypoint.
+ - val_k (np.ndarray[NxKxM]):
+ top k value of feature map per keypoint.
+ """
+ heatmaps = self.nms(heatmaps)
+ N, K, H, W = heatmaps.size()
+ heatmaps = heatmaps.view(N, K, -1)
+ val_k, ind = heatmaps.topk(self.params.max_num_people, dim=2)
+
+ tags = tags.view(tags.size(0), tags.size(1), W * H, -1)
+ if not self.tag_per_joint:
+ tags = tags.expand(-1, self.params.num_joints, -1, -1)
+
+ tag_k = torch.stack(
+ [torch.gather(tags[..., i], 2, ind) for i in range(tags.size(3))],
+ dim=3)
+
+ x = ind % W
+ y = ind // W
+
+ ind_k = torch.stack((x, y), dim=3)
+
+ results = {
+ 'tag_k': tag_k.cpu().numpy(),
+ 'loc_k': ind_k.cpu().numpy(),
+ 'val_k': val_k.cpu().numpy()
+ }
+
+ return results
+
+ @staticmethod
+ def adjust(results, heatmaps):
+ """Adjust the coordinates for better accuracy.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ results (list(np.ndarray)): Keypoint predictions.
+ heatmaps (torch.Tensor[NxKxHxW]): Heatmaps.
+ """
+ _, _, H, W = heatmaps.shape
+ for batch_id, people in enumerate(results):
+ for people_id, people_i in enumerate(people):
+ for joint_id, joint in enumerate(people_i):
+ if joint[2] > 0:
+ x, y = joint[0:2]
+ xx, yy = int(x), int(y)
+ tmp = heatmaps[batch_id][joint_id]
+ if tmp[min(H - 1, yy + 1), xx] > tmp[max(0, yy - 1),
+ xx]:
+ y += 0.25
+ else:
+ y -= 0.25
+
+ if tmp[yy, min(W - 1, xx + 1)] > tmp[yy,
+ max(0, xx - 1)]:
+ x += 0.25
+ else:
+ x -= 0.25
+ results[batch_id][people_id, joint_id,
+ 0:2] = (x + 0.5, y + 0.5)
+ return results
+
+ @staticmethod
+ def refine(heatmap, tag, keypoints, use_udp=False):
+ """Given initial keypoint predictions, we identify missing joints.
+
+ Note:
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmap: np.ndarray(K, H, W).
+ tag: np.ndarray(K, H, W) | np.ndarray(K, H, W, L)
+ keypoints: np.ndarray of size (K, 3 + L)
+ last dim is (x, y, score, tag).
+ use_udp: bool-unbiased data processing
+
+ Returns:
+ np.ndarray: The refined keypoints.
+ """
+
+ K, H, W = heatmap.shape
+ if len(tag.shape) == 3:
+ tag = tag[..., None]
+
+ tags = []
+ for i in range(K):
+ if keypoints[i, 2] > 0:
+ # save tag value of detected keypoint
+ x, y = keypoints[i][:2].astype(int)
+ x = np.clip(x, 0, W - 1)
+ y = np.clip(y, 0, H - 1)
+ tags.append(tag[i, y, x])
+
+ # mean tag of current detected people
+ prev_tag = np.mean(tags, axis=0)
+ results = []
+
+ for _heatmap, _tag in zip(heatmap, tag):
+ # distance of all tag values with mean tag of
+ # current detected people
+ distance_tag = (((_tag -
+ prev_tag[None, None, :])**2).sum(axis=2)**0.5)
+ norm_heatmap = _heatmap - np.round(distance_tag)
+
+ # find maximum position
+ y, x = np.unravel_index(np.argmax(norm_heatmap), _heatmap.shape)
+ xx = x.copy()
+ yy = y.copy()
+ # detection score at maximum position
+ val = _heatmap[y, x]
+ if not use_udp:
+ # offset by 0.5
+ x += 0.5
+ y += 0.5
+
+ # add a quarter offset
+ if _heatmap[yy, min(W - 1, xx + 1)] > _heatmap[yy, max(0, xx - 1)]:
+ x += 0.25
+ else:
+ x -= 0.25
+
+ if _heatmap[min(H - 1, yy + 1), xx] > _heatmap[max(0, yy - 1), xx]:
+ y += 0.25
+ else:
+ y -= 0.25
+
+ results.append((x, y, val))
+ results = np.array(results)
+
+ if results is not None:
+ for i in range(K):
+ # add keypoint if it is not detected
+ if results[i, 2] > 0 and keypoints[i, 2] == 0:
+ keypoints[i, :3] = results[i, :3]
+
+ return keypoints
+
+ def parse(self, heatmaps, tags, adjust=True, refine=True):
+ """Group keypoints into poses given heatmap and tag.
+
+ Note:
+ batch size: N
+ number of keypoints: K
+ heatmap height: H
+ heatmap width: W
+ dim of tags: L
+ If use flip testing, L=2; else L=1.
+
+ Args:
+ heatmaps (torch.Tensor[NxKxHxW]): model output heatmaps.
+ tags (torch.Tensor[NxKxHxWxL]): model output tagmaps.
+
+ Returns:
+ tuple: A tuple containing keypoint grouping results.
+
+ - results (list(np.ndarray)): Pose results.
+ - scores (list/list(np.ndarray)): Score of people.
+ """
+ results = self.match(**self.top_k(heatmaps, tags))
+
+ if adjust:
+ if self.use_udp:
+ for i in range(len(results)):
+ if results[i].shape[0] > 0:
+ results[i][..., :2] = post_dark_udp(
+ results[i][..., :2].copy(), heatmaps[i:i + 1, :])
+ else:
+ results = self.adjust(results, heatmaps)
+
+ if self.score_per_joint:
+ scores = [i[:, 2] for i in results[0]]
+ else:
+ scores = [i[:, 2].mean() for i in results[0]]
+
+ if refine:
+ results = results[0]
+ # for every detected person
+ for i in range(len(results)):
+ heatmap_numpy = heatmaps[0].cpu().numpy()
+ tag_numpy = tags[0].cpu().numpy()
+ if not self.tag_per_joint:
+ tag_numpy = np.tile(tag_numpy,
+ (self.params.num_joints, 1, 1, 1))
+ results[i] = self.refine(
+ heatmap_numpy, tag_numpy, results[i], use_udp=self.use_udp)
+ results = [results]
+
+ return results, scores
diff --git a/vendor/ViTPose/mmpose/core/post_processing/nms.py b/vendor/ViTPose/mmpose/core/post_processing/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..86a0ab35e0e26d27bb0bb55071018ffc5ac9af1d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/post_processing/nms.py
@@ -0,0 +1,207 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import numpy as np
+
+
+def nms(dets, thr):
+ """Greedily select boxes with high confidence and overlap <= thr.
+
+ Args:
+ dets: [[x1, y1, x2, y2, score]].
+ thr: Retain overlap < thr.
+
+ Returns:
+ list: Indexes to keep.
+ """
+ if len(dets) == 0:
+ return []
+
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while len(order) > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thr)[0]
+ order = order[inds + 1]
+
+ return keep
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None):
+ """Calculate oks ious.
+
+ Args:
+ g: Ground truth keypoints.
+ d: Detected keypoints.
+ a_g: Area of the ground truth object.
+ a_d: Area of the detected object.
+ sigmas: standard deviation of keypoint labelling.
+ vis_thr: threshold of the keypoint visibility.
+
+ Returns:
+ list: The oks ious.
+ """
+ if sigmas is None:
+ sigmas = np.array([
+ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
+ .87, .87, .89, .89
+ ]) / 10.0
+ vars = (sigmas * 2)**2
+ xg = g[0::3]
+ yg = g[1::3]
+ vg = g[2::3]
+ ious = np.zeros(len(d), dtype=np.float32)
+ for n_d in range(0, len(d)):
+ xd = d[n_d, 0::3]
+ yd = d[n_d, 1::3]
+ vd = d[n_d, 2::3]
+ dx = xd - xg
+ dy = yd - yg
+ e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+ if vis_thr is not None:
+ ind = list(vg > vis_thr) and list(vd > vis_thr)
+ e = e[ind]
+ ious[n_d] = np.sum(np.exp(-e)) / len(e) if len(e) != 0 else 0.0
+ return ious
+
+
+def oks_nms(kpts_db, thr, sigmas=None, vis_thr=None, score_per_joint=False):
+ """OKS NMS implementations.
+
+ Args:
+ kpts_db: keypoints.
+ thr: Retain overlap < thr.
+ sigmas: standard deviation of keypoint labelling.
+ vis_thr: threshold of the keypoint visibility.
+ score_per_joint: the input scores (in kpts_db) are per joint scores
+
+ Returns:
+ np.ndarray: indexes to keep.
+ """
+ if len(kpts_db) == 0:
+ return []
+
+ if score_per_joint:
+ scores = np.array([k['score'].mean() for k in kpts_db])
+ else:
+ scores = np.array([k['score'] for k in kpts_db])
+
+ kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
+ areas = np.array([k['area'] for k in kpts_db])
+
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while len(order) > 0:
+ i = order[0]
+ keep.append(i)
+
+ oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+ sigmas, vis_thr)
+
+ inds = np.where(oks_ovr <= thr)[0]
+ order = order[inds + 1]
+
+ keep = np.array(keep)
+
+ return keep
+
+
+def _rescore(overlap, scores, thr, type='gaussian'):
+ """Rescoring mechanism gaussian or linear.
+
+ Args:
+ overlap: calculated ious
+ scores: target scores.
+ thr: retain oks overlap < thr.
+ type: 'gaussian' or 'linear'
+
+ Returns:
+ np.ndarray: indexes to keep
+ """
+ assert len(overlap) == len(scores)
+ assert type in ['gaussian', 'linear']
+
+ if type == 'linear':
+ inds = np.where(overlap >= thr)[0]
+ scores[inds] = scores[inds] * (1 - overlap[inds])
+ else:
+ scores = scores * np.exp(-overlap**2 / thr)
+
+ return scores
+
+
+def soft_oks_nms(kpts_db,
+ thr,
+ max_dets=20,
+ sigmas=None,
+ vis_thr=None,
+ score_per_joint=False):
+ """Soft OKS NMS implementations.
+
+ Args:
+ kpts_db
+ thr: retain oks overlap < thr.
+ max_dets: max number of detections to keep.
+ sigmas: Keypoint labelling uncertainty.
+ score_per_joint: the input scores (in kpts_db) are per joint scores
+
+ Returns:
+ np.ndarray: indexes to keep.
+ """
+ if len(kpts_db) == 0:
+ return []
+
+ if score_per_joint:
+ scores = np.array([k['score'].mean() for k in kpts_db])
+ else:
+ scores = np.array([k['score'] for k in kpts_db])
+
+ kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
+ areas = np.array([k['area'] for k in kpts_db])
+
+ order = scores.argsort()[::-1]
+ scores = scores[order]
+
+ keep = np.zeros(max_dets, dtype=np.intp)
+ keep_cnt = 0
+ while len(order) > 0 and keep_cnt < max_dets:
+ i = order[0]
+
+ oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+ sigmas, vis_thr)
+
+ order = order[1:]
+ scores = _rescore(oks_ovr, scores[1:], thr)
+
+ tmp = scores.argsort()[::-1]
+ order = order[tmp]
+ scores = scores[tmp]
+
+ keep[keep_cnt] = i
+ keep_cnt += 1
+
+ keep = keep[:keep_cnt]
+
+ return keep
diff --git a/vendor/ViTPose/mmpose/core/post_processing/one_euro_filter.py b/vendor/ViTPose/mmpose/core/post_processing/one_euro_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..01ffa5fda9b1669e3611f14643ed731669b3b421
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/post_processing/one_euro_filter.py
@@ -0,0 +1,102 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HoBeom/OneEuroFilter-Numpy
+# Original licence: Copyright (c) HoBeom Jeon, under the MIT License.
+# ------------------------------------------------------------------------------
+from time import time
+
+import numpy as np
+
+
+def smoothing_factor(t_e, cutoff):
+ r = 2 * np.pi * cutoff * t_e
+ return r / (r + 1)
+
+
+def exponential_smoothing(a, x, x_prev):
+ return a * x + (1 - a) * x_prev
+
+
+class OneEuroFilter:
+
+ def __init__(self,
+ x0,
+ dx0=0.0,
+ min_cutoff=1.7,
+ beta=0.3,
+ d_cutoff=30.0,
+ fps=None):
+ """One Euro Filter for keypoints smoothing.
+
+ Args:
+ x0 (np.ndarray[K, 2]): Initialize keypoints value
+ dx0 (float): 0.0
+ min_cutoff (float): parameter for one euro filter
+ beta (float): parameter for one euro filter
+ d_cutoff (float): Input data FPS
+ fps (float): Video FPS for video inference
+ """
+
+ # The parameters.
+ self.data_shape = x0.shape
+ self.min_cutoff = np.full(x0.shape, min_cutoff)
+ self.beta = np.full(x0.shape, beta)
+ self.d_cutoff = np.full(x0.shape, d_cutoff)
+ # Previous values.
+ self.x_prev = x0.astype(np.float32)
+ self.dx_prev = np.full(x0.shape, dx0)
+ self.mask_prev = np.ma.masked_where(x0 <= 0, x0)
+ self.realtime = True
+ if fps is None:
+ # Using in realtime inference
+ self.t_e = None
+ self.skip_frame_factor = d_cutoff
+ else:
+ # fps using video inference
+ self.realtime = False
+ self.d_cutoff = np.full(x0.shape, float(fps))
+ self.t_prev = time()
+
+ def __call__(self, x, t_e=1.0):
+ """Compute the filtered signal.
+
+ Hyper-parameters (cutoff, beta) are from `VNect
+ `__ .
+
+ Realtime Camera fps (d_cutoff) default 30.0
+
+ Args:
+ x (np.ndarray[K, 2]): keypoints results in frame
+ t_e (Optional): video skip frame count for posetrack
+ evaluation
+ """
+ assert x.shape == self.data_shape
+
+ t = 0
+ if self.realtime:
+ t = time()
+ t_e = (t - self.t_prev) * self.skip_frame_factor
+ t_e = np.full(x.shape, t_e)
+
+ # missing keypoints mask
+ mask = np.ma.masked_where(x <= 0, x)
+
+ # The filtered derivative of the signal.
+ a_d = smoothing_factor(t_e, self.d_cutoff)
+ dx = (x - self.x_prev) / t_e
+ dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
+
+ # The filtered signal.
+ cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
+ a = smoothing_factor(t_e, cutoff)
+ x_hat = exponential_smoothing(a, x, self.x_prev)
+
+ # missing keypoints remove
+ np.copyto(x_hat, -10, where=mask.mask)
+
+ # Memorize the previous values.
+ self.x_prev = x_hat
+ self.dx_prev = dx_hat
+ self.t_prev = t
+ self.mask_prev = mask
+
+ return x_hat
diff --git a/vendor/ViTPose/mmpose/core/post_processing/post_transforms.py b/vendor/ViTPose/mmpose/core/post_processing/post_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..93063fb1c1a60519a527037795654b0278a880e4
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/post_processing/post_transforms.py
@@ -0,0 +1,366 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import math
+
+import cv2
+import numpy as np
+import torch
+
+
+def fliplr_joints(joints_3d, joints_3d_visible, img_width, flip_pairs):
+ """Flip human joints horizontally.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ joints_3d_visible (np.ndarray([K, 1])): Visibility of keypoints.
+ img_width (int): Image width.
+ flip_pairs (list[tuple]): Pairs of keypoints which are mirrored
+ (for example, left ear and right ear).
+
+ Returns:
+ tuple: Flipped human joints.
+
+ - joints_3d_flipped (np.ndarray([K, 3])): Flipped joints.
+ - joints_3d_visible_flipped (np.ndarray([K, 1])): Joint visibility.
+ """
+
+ assert len(joints_3d) == len(joints_3d_visible)
+ assert img_width > 0
+
+ joints_3d_flipped = joints_3d.copy()
+ joints_3d_visible_flipped = joints_3d_visible.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ joints_3d_flipped[left, :] = joints_3d[right, :]
+ joints_3d_flipped[right, :] = joints_3d[left, :]
+
+ joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
+ joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
+
+ # Flip horizontally
+ joints_3d_flipped[:, 0] = img_width - 1 - joints_3d_flipped[:, 0]
+ joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
+
+ return joints_3d_flipped, joints_3d_visible_flipped
+
+
+def fliplr_regression(regression,
+ flip_pairs,
+ center_mode='static',
+ center_x=0.5,
+ center_index=0):
+ """Flip human joints horizontally.
+
+ Note:
+ - batch_size: N
+ - num_keypoint: K
+
+ Args:
+ regression (np.ndarray([..., K, C])): Coordinates of keypoints, where K
+ is the joint number and C is the dimension. Example shapes are:
+
+ - [N, K, C]: a batch of keypoints where N is the batch size.
+ - [N, T, K, C]: a batch of pose sequences, where T is the frame
+ number.
+ flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+ center_mode (str): The mode to set the center location on the x-axis
+ to flip around. Options are:
+
+ - static: use a static x value (see center_x also)
+ - root: use a root joint (see center_index also)
+ center_x (float): Set the x-axis location of the flip center. Only used
+ when center_mode=static.
+ center_index (int): Set the index of the root joint, whose x location
+ will be used as the flip center. Only used when center_mode=root.
+
+ Returns:
+ np.ndarray([..., K, C]): Flipped joints.
+ """
+ assert regression.ndim >= 2, f'Invalid pose shape {regression.shape}'
+
+ allowed_center_mode = {'static', 'root'}
+ assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
+ f'{center_mode}, allowed choices are {allowed_center_mode}'
+
+ if center_mode == 'static':
+ x_c = center_x
+ elif center_mode == 'root':
+ assert regression.shape[-2] > center_index
+ x_c = regression[..., center_index:center_index + 1, 0]
+
+ regression_flipped = regression.copy()
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ regression_flipped[..., left, :] = regression[..., right, :]
+ regression_flipped[..., right, :] = regression[..., left, :]
+
+ # Flip horizontally
+ regression_flipped[..., 0] = x_c * 2 - regression_flipped[..., 0]
+ return regression_flipped
+
+
+def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatmap'):
+ """Flip the flipped heatmaps back to the original form.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ output_flipped (np.ndarray[N, K, H, W]): The output heatmaps obtained
+ from the flipped images.
+ flip_pairs (list[tuple()): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+ target_type (str): GaussianHeatmap or CombinedTarget
+
+ Returns:
+ np.ndarray: heatmaps that flipped back to the original image
+ """
+ assert output_flipped.ndim == 4, \
+ 'output_flipped should be [batch_size, num_keypoints, height, width]'
+ shape_ori = output_flipped.shape
+ channels = 1
+ if target_type.lower() == 'CombinedTarget'.lower():
+ channels = 3
+ output_flipped[:, 1::3, ...] = -output_flipped[:, 1::3, ...]
+ output_flipped = output_flipped.reshape(shape_ori[0], -1, channels,
+ shape_ori[2], shape_ori[3])
+ output_flipped_back = output_flipped.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ output_flipped_back[:, left, ...] = output_flipped[:, right, ...]
+ output_flipped_back[:, right, ...] = output_flipped[:, left, ...]
+ output_flipped_back = output_flipped_back.reshape(shape_ori)
+ # Flip horizontally
+ output_flipped_back = output_flipped_back[..., ::-1]
+ return output_flipped_back
+
+
+def transform_preds(coords, center, scale, output_size, use_udp=False):
+ """Get final keypoint predictions from heatmaps and apply scaling and
+ translation to map them back to the image.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ coords (np.ndarray[K, ndims]):
+
+ * If ndims=2, corrds are predicted keypoint location.
+ * If ndims=4, corrds are composed of (x, y, scores, tags)
+ * If ndims=5, corrds are composed of (x, y, scores, tags,
+ flipped_tags)
+
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ output_size (np.ndarray[2, ] | list(2,)): Size of the
+ destination heatmaps.
+ use_udp (bool): Use unbiased data processing
+
+ Returns:
+ np.ndarray: Predicted coordinates in the images.
+ """
+ assert coords.shape[1] in (2, 4, 5)
+ assert len(center) == 2
+ assert len(scale) == 2
+ assert len(output_size) == 2
+
+ # Recover the scale which is normalized by a factor of 200.
+ scale = scale * 200.0
+
+ if use_udp:
+ scale_x = scale[0] / (output_size[0] - 1.0)
+ scale_y = scale[1] / (output_size[1] - 1.0)
+ else:
+ scale_x = scale[0] / output_size[0]
+ scale_y = scale[1] / output_size[1]
+
+ target_coords = np.ones_like(coords)
+ target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
+ target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
+
+ return target_coords
+
+
+def get_affine_transform(center,
+ scale,
+ rot,
+ output_size,
+ shift=(0., 0.),
+ inv=False):
+ """Get the affine transform matrix, given the center/scale/rot/output_size.
+
+ Args:
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ rot (float): Rotation angle (degree).
+ output_size (np.ndarray[2, ] | list(2,)): Size of the
+ destination heatmaps.
+ shift (0-100%): Shift translation ratio wrt the width/height.
+ Default (0., 0.).
+ inv (bool): Option to inverse the affine transform direction.
+ (inv=False: src->dst or inv=True: dst->src)
+
+ Returns:
+ np.ndarray: The transform matrix.
+ """
+ assert len(center) == 2
+ assert len(scale) == 2
+ assert len(output_size) == 2
+ assert len(shift) == 2
+
+ # pixel_std is 200.
+ scale_tmp = scale * 200.0
+
+ shift = np.array(shift)
+ src_w = scale_tmp[0]
+ dst_w = output_size[0]
+ dst_h = output_size[1]
+
+ rot_rad = np.pi * rot / 180
+ src_dir = rotate_point([0., src_w * -0.5], rot_rad)
+ dst_dir = np.array([0., dst_w * -0.5])
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ src[0, :] = center + scale_tmp * shift
+ src[1, :] = center + src_dir + scale_tmp * shift
+ src[2, :] = _get_3rd_point(src[0, :], src[1, :])
+
+ dst = np.zeros((3, 2), dtype=np.float32)
+ dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+ dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+ dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
+
+ if inv:
+ trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+ else:
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ return trans
+
+
+def affine_transform(pt, trans_mat):
+ """Apply an affine transformation to the points.
+
+ Args:
+ pt (np.ndarray): a 2 dimensional point to be transformed
+ trans_mat (np.ndarray): 2x3 matrix of an affine transform
+
+ Returns:
+ np.ndarray: Transformed points.
+ """
+ assert len(pt) == 2
+ new_pt = np.array(trans_mat) @ np.array([pt[0], pt[1], 1.])
+
+ return new_pt
+
+
+def _get_3rd_point(a, b):
+ """To calculate the affine matrix, three pairs of points are required. This
+ function is used to get the 3rd point, given 2D points a & b.
+
+ The 3rd point is defined by rotating vector `a - b` by 90 degrees
+ anticlockwise, using b as the rotation center.
+
+ Args:
+ a (np.ndarray): point(x,y)
+ b (np.ndarray): point(x,y)
+
+ Returns:
+ np.ndarray: The 3rd point.
+ """
+ assert len(a) == 2
+ assert len(b) == 2
+ direction = a - b
+ third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
+
+ return third_pt
+
+
+def rotate_point(pt, angle_rad):
+ """Rotate a point by an angle.
+
+ Args:
+ pt (list[float]): 2 dimensional point to be rotated
+ angle_rad (float): rotation angle by radian
+
+ Returns:
+ list[float]: Rotated point.
+ """
+ assert len(pt) == 2
+ sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+ new_x = pt[0] * cs - pt[1] * sn
+ new_y = pt[0] * sn + pt[1] * cs
+ rotated_pt = [new_x, new_y]
+
+ return rotated_pt
+
+
+def get_warp_matrix(theta, size_input, size_dst, size_target):
+ """Calculate the transformation matrix under the constraint of unbiased.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
+ Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Args:
+ theta (float): Rotation angle in degrees.
+ size_input (np.ndarray): Size of input image [w, h].
+ size_dst (np.ndarray): Size of output image [w, h].
+ size_target (np.ndarray): Size of ROI in input plane [w, h].
+
+ Returns:
+ np.ndarray: A matrix for transformation.
+ """
+ theta = np.deg2rad(theta)
+ matrix = np.zeros((2, 3), dtype=np.float32)
+ scale_x = size_dst[0] / size_target[0]
+ scale_y = size_dst[1] / size_target[1]
+ matrix[0, 0] = math.cos(theta) * scale_x
+ matrix[0, 1] = -math.sin(theta) * scale_x
+ matrix[0, 2] = scale_x * (-0.5 * size_input[0] * math.cos(theta) +
+ 0.5 * size_input[1] * math.sin(theta) +
+ 0.5 * size_target[0])
+ matrix[1, 0] = math.sin(theta) * scale_y
+ matrix[1, 1] = math.cos(theta) * scale_y
+ matrix[1, 2] = scale_y * (-0.5 * size_input[0] * math.sin(theta) -
+ 0.5 * size_input[1] * math.cos(theta) +
+ 0.5 * size_target[1])
+ return matrix
+
+
+def warp_affine_joints(joints, mat):
+ """Apply affine transformation defined by the transform matrix on the
+ joints.
+
+ Args:
+ joints (np.ndarray[..., 2]): Origin coordinate of joints.
+ mat (np.ndarray[3, 2]): The affine matrix.
+
+ Returns:
+ np.ndarray[..., 2]: Result coordinate of joints.
+ """
+ joints = np.array(joints)
+ shape = joints.shape
+ joints = joints.reshape(-1, 2)
+ return np.dot(
+ np.concatenate((joints, joints[:, 0:1] * 0 + 1), axis=1),
+ mat.T).reshape(shape)
+
+
+def affine_transform_torch(pts, t):
+ npts = pts.shape[0]
+ pts_homo = torch.cat([pts, torch.ones(npts, 1, device=pts.device)], dim=1)
+ out = torch.mm(t, torch.t(pts_homo))
+ return torch.t(out[:2, :])
diff --git a/vendor/ViTPose/mmpose/core/utils/__init__.py b/vendor/ViTPose/mmpose/core/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd6c0277a0647e605eaf29ccac41c1f9a37a05ac
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/utils/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .dist_utils import allreduce_grads
+from .regularizations import WeightNormClipHook
+
+__all__ = ['allreduce_grads', 'WeightNormClipHook']
diff --git a/vendor/ViTPose/mmpose/core/utils/dist_utils.py b/vendor/ViTPose/mmpose/core/utils/dist_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e76e591050284b1e9c541ea4ee8ee66708b8e7fb
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/utils/dist_utils.py
@@ -0,0 +1,51 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+
+import torch.distributed as dist
+from torch._utils import (_flatten_dense_tensors, _take_tensors,
+ _unflatten_dense_tensors)
+
+
+def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
+ """Allreduce parameters as a whole."""
+ if bucket_size_mb > 0:
+ bucket_size_bytes = bucket_size_mb * 1024 * 1024
+ buckets = _take_tensors(tensors, bucket_size_bytes)
+ else:
+ buckets = OrderedDict()
+ for tensor in tensors:
+ tp = tensor.type()
+ if tp not in buckets:
+ buckets[tp] = []
+ buckets[tp].append(tensor)
+ buckets = buckets.values()
+
+ for bucket in buckets:
+ flat_tensors = _flatten_dense_tensors(bucket)
+ dist.all_reduce(flat_tensors)
+ flat_tensors.div_(world_size)
+ for tensor, synced in zip(
+ bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
+ tensor.copy_(synced)
+
+
+def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
+ """Allreduce gradients.
+
+ Args:
+ params (list[torch.Parameters]): List of parameters of a model
+ coalesce (bool, optional): Whether allreduce parameters as a whole.
+ Default: True.
+ bucket_size_mb (int, optional): Size of bucket, the unit is MB.
+ Default: -1.
+ """
+ grads = [
+ param.grad.data for param in params
+ if param.requires_grad and param.grad is not None
+ ]
+ world_size = dist.get_world_size()
+ if coalesce:
+ _allreduce_coalesced(grads, world_size, bucket_size_mb)
+ else:
+ for tensor in grads:
+ dist.all_reduce(tensor.div_(world_size))
diff --git a/vendor/ViTPose/mmpose/core/utils/regularizations.py b/vendor/ViTPose/mmpose/core/utils/regularizations.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8c7449038066016f6efb60e126111ace962fe98
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/utils/regularizations.py
@@ -0,0 +1,86 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod, abstractproperty
+
+import torch
+
+
+class PytorchModuleHook(metaclass=ABCMeta):
+ """Base class for PyTorch module hook registers.
+
+ An instance of a subclass of PytorchModuleHook can be used to
+ register hook to a pytorch module using the `register` method like:
+ hook_register.register(module)
+
+ Subclasses should add/overwrite the following methods:
+ - __init__
+ - hook
+ - hook_type
+ """
+
+ @abstractmethod
+ def hook(self, *args, **kwargs):
+ """Hook function."""
+
+ @abstractproperty
+ def hook_type(self) -> str:
+ """Hook type Subclasses should overwrite this function to return a
+ string value in.
+
+ {`forward`, `forward_pre`, `backward`}
+ """
+
+ def register(self, module):
+ """Register the hook function to the module.
+
+ Args:
+ module (pytorch module): the module to register the hook.
+
+ Returns:
+ handle (torch.utils.hooks.RemovableHandle): a handle to remove
+ the hook by calling handle.remove()
+ """
+ assert isinstance(module, torch.nn.Module)
+
+ if self.hook_type == 'forward':
+ h = module.register_forward_hook(self.hook)
+ elif self.hook_type == 'forward_pre':
+ h = module.register_forward_pre_hook(self.hook)
+ elif self.hook_type == 'backward':
+ h = module.register_backward_hook(self.hook)
+ else:
+ raise ValueError(f'Invalid hook type {self.hook}')
+
+ return h
+
+
+class WeightNormClipHook(PytorchModuleHook):
+ """Apply weight norm clip regularization.
+
+ The module's parameter will be clip to a given maximum norm before each
+ forward pass.
+
+ Args:
+ max_norm (float): The maximum norm of the parameter.
+ module_param_names (str|list): The parameter name (or name list) to
+ apply weight norm clip.
+ """
+
+ def __init__(self, max_norm=1.0, module_param_names='weight'):
+ self.module_param_names = module_param_names if isinstance(
+ module_param_names, list) else [module_param_names]
+ self.max_norm = max_norm
+
+ @property
+ def hook_type(self):
+ return 'forward_pre'
+
+ def hook(self, module, _input):
+ for name in self.module_param_names:
+ assert name in module._parameters, f'{name} is not a parameter' \
+ f' of the module {type(module)}'
+ param = module._parameters[name]
+
+ with torch.no_grad():
+ m = param.norm().item()
+ if m > self.max_norm:
+ param.mul_(self.max_norm / (m + 1e-6))
diff --git a/vendor/ViTPose/mmpose/core/visualization/__init__.py b/vendor/ViTPose/mmpose/core/visualization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9705494bc8ef4dfb49e6a8db21ab6f243f3bb6d2
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/visualization/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .effects import apply_bugeye_effect, apply_sunglasses_effect
+from .image import (imshow_bboxes, imshow_keypoints, imshow_keypoints_3d,
+ imshow_mesh_3d)
+
+__all__ = [
+ 'imshow_keypoints',
+ 'imshow_keypoints_3d',
+ 'imshow_bboxes',
+ 'apply_bugeye_effect',
+ 'apply_sunglasses_effect',
+ 'imshow_mesh_3d',
+]
diff --git a/vendor/ViTPose/mmpose/core/visualization/effects.py b/vendor/ViTPose/mmpose/core/visualization/effects.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3add7d95dafe4d072b7945823aaa75664622994
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/visualization/effects.py
@@ -0,0 +1,111 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+
+def apply_bugeye_effect(img,
+ pose_results,
+ left_eye_index,
+ right_eye_index,
+ kpt_thr=0.5):
+ """Apply bug-eye effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "bbox" ([K, 4(or 5)]): detection bbox in
+ [x1, y1, x2, y2, (score)]
+ - "keypoints" ([K,3]): keypoint detection result in [x, y, score]
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ xx, yy = np.meshgrid(np.arange(img.shape[1]), np.arange(img.shape[0]))
+ xx = xx.astype(np.float32)
+ yy = yy.astype(np.float32)
+
+ for pose in pose_results:
+ bbox = pose['bbox']
+ kpts = pose['keypoints']
+
+ if kpts[left_eye_index, 2] < kpt_thr or kpts[right_eye_index,
+ 2] < kpt_thr:
+ continue
+
+ kpt_leye = kpts[left_eye_index, :2]
+ kpt_reye = kpts[right_eye_index, :2]
+ for xc, yc in [kpt_leye, kpt_reye]:
+
+ # distortion parameters
+ k1 = 0.001
+ epe = 1e-5
+
+ scale = (bbox[2] - bbox[0])**2 + (bbox[3] - bbox[1])**2
+ r2 = ((xx - xc)**2 + (yy - yc)**2)
+ r2 = (r2 + epe) / scale # normalized by bbox scale
+
+ xx = (xx - xc) / (1 + k1 / r2) + xc
+ yy = (yy - yc) / (1 + k1 / r2) + yc
+
+ img = cv2.remap(
+ img,
+ xx,
+ yy,
+ interpolation=cv2.INTER_AREA,
+ borderMode=cv2.BORDER_REPLICATE)
+ return img
+
+
+def apply_sunglasses_effect(img,
+ pose_results,
+ sunglasses_img,
+ left_eye_index,
+ right_eye_index,
+ kpt_thr=0.5):
+ """Apply sunglasses effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result in [x, y, score]
+ sunglasses_img (np.ndarray): Sunglasses image with white background.
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ hm, wm = sunglasses_img.shape[:2]
+ # anchor points in the sunglasses mask
+ pts_src = np.array([[0.3 * wm, 0.3 * hm], [0.3 * wm, 0.7 * hm],
+ [0.7 * wm, 0.3 * hm], [0.7 * wm, 0.7 * hm]],
+ dtype=np.float32)
+
+ for pose in pose_results:
+ kpts = pose['keypoints']
+
+ if kpts[left_eye_index, 2] < kpt_thr or kpts[right_eye_index,
+ 2] < kpt_thr:
+ continue
+
+ kpt_leye = kpts[left_eye_index, :2]
+ kpt_reye = kpts[right_eye_index, :2]
+ # orthogonal vector to the left-to-right eyes
+ vo = 0.5 * (kpt_reye - kpt_leye)[::-1] * [-1, 1]
+
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack(
+ [kpt_reye + vo, kpt_reye - vo, kpt_leye + vo, kpt_leye - vo])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ sunglasses_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with a threshold 200
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 200).astype(np.uint8)
+ img = cv2.copyTo(patch, mask, img)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/core/visualization/image.py b/vendor/ViTPose/mmpose/core/visualization/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..9414877fa7b53c5b1c10d29430dd53715cc22ce3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/core/visualization/image.py
@@ -0,0 +1,442 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+import os
+import warnings
+
+import cv2
+import mmcv
+import numpy as np
+from matplotlib import pyplot as plt
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.color import color_val
+
+try:
+ import trimesh
+ has_trimesh = True
+except (ImportError, ModuleNotFoundError):
+ has_trimesh = False
+
+try:
+ #os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
+ import pyrender
+ has_pyrender = True
+except (ImportError, ModuleNotFoundError):
+ has_pyrender = False
+
+
+def imshow_bboxes(img,
+ bboxes,
+ labels=None,
+ colors='green',
+ text_color='white',
+ thickness=1,
+ font_scale=0.5,
+ show=True,
+ win_name='',
+ wait_time=0,
+ out_file=None):
+ """Draw bboxes with labels (optional) on an image. This is a wrapper of
+ mmcv.imshow_bboxes.
+
+ Args:
+ img (str or ndarray): The image to be displayed.
+ bboxes (ndarray): ndarray of shape (k, 4), each row is a bbox in
+ format [x1, y1, x2, y2].
+ labels (str or list[str], optional): labels of each bbox.
+ colors (list[str or tuple or :obj:`Color`]): A list of colors.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ show (bool): Whether to show the image.
+ win_name (str): The window name.
+ wait_time (int): Value of waitKey param.
+ out_file (str, optional): The filename to write the image.
+
+ Returns:
+ ndarray: The image with bboxes drawn on it.
+ """
+
+ # adapt to mmcv.imshow_bboxes input format
+ bboxes = np.split(
+ bboxes, bboxes.shape[0], axis=0) if bboxes.shape[0] > 0 else []
+ if not isinstance(colors, list):
+ colors = [colors for _ in range(len(bboxes))]
+ colors = [mmcv.color_val(c) for c in colors]
+ assert len(bboxes) == len(colors)
+
+ img = mmcv.imshow_bboxes(
+ img,
+ bboxes,
+ colors,
+ top_k=-1,
+ thickness=thickness,
+ show=False,
+ out_file=None)
+
+ if labels is not None:
+ if not isinstance(labels, list):
+ labels = [labels for _ in range(len(bboxes))]
+ assert len(labels) == len(bboxes)
+
+ for bbox, label, color in zip(bboxes, labels, colors):
+ if label is None:
+ continue
+ bbox_int = bbox[0, :4].astype(np.int32)
+ # roughly estimate the proper font size
+ text_size, text_baseline = cv2.getTextSize(label,
+ cv2.FONT_HERSHEY_DUPLEX,
+ font_scale, thickness)
+ text_x1 = bbox_int[0]
+ text_y1 = max(0, bbox_int[1] - text_size[1] - text_baseline)
+ text_x2 = bbox_int[0] + text_size[0]
+ text_y2 = text_y1 + text_size[1] + text_baseline
+ cv2.rectangle(img, (text_x1, text_y1), (text_x2, text_y2), color,
+ cv2.FILLED)
+ cv2.putText(img, label, (text_x1, text_y2 - text_baseline),
+ cv2.FONT_HERSHEY_DUPLEX, font_scale,
+ mmcv.color_val(text_color), thickness)
+
+ if show:
+ mmcv.imshow(img, win_name, wait_time)
+ if out_file is not None:
+ mmcv.imwrite(img, out_file)
+ return img
+
+
+@deprecated_api_warning({'pose_limb_color': 'pose_link_color'})
+def imshow_keypoints(img,
+ pose_result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=4,
+ thickness=1,
+ show_keypoint_weight=False):
+ """Draw keypoints and links on an image.
+
+ Args:
+ img (str or Tensor): The image to draw poses on. If an image array
+ is given, id will be modified in-place.
+ pose_result (list[kpts]): The poses to draw. Each element kpts is
+ a set of K keypoints as an Kx3 numpy.ndarray, where each
+ keypoint is represented as x, y, score.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None,
+ the keypoint will not be drawn.
+ pose_link_color (np.array[Mx3]): Color of M links. If None, the
+ links will not be drawn.
+ thickness (int): Thickness of lines.
+ """
+
+ img = mmcv.imread(img)
+ img_h, img_w, _ = img.shape
+
+ for kpts in pose_result:
+
+ kpts = np.array(kpts, copy=False)
+
+ # draw each point on image
+ if pose_kpt_color is not None:
+ assert len(pose_kpt_color) == len(kpts)
+ for kid, kpt in enumerate(kpts):
+ x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2]
+ if kpt_score > kpt_score_thr:
+ color = tuple(int(c) for c in pose_kpt_color[kid])
+ if show_keypoint_weight:
+ img_copy = img.copy()
+ cv2.circle(img_copy, (int(x_coord), int(y_coord)),
+ radius, color, -1)
+ transparency = max(0, min(1, kpt_score))
+ cv2.addWeighted(
+ img_copy,
+ transparency,
+ img,
+ 1 - transparency,
+ 0,
+ dst=img)
+ else:
+ cv2.circle(img, (int(x_coord), int(y_coord)), radius,
+ color, -1)
+
+ # draw links
+ if skeleton is not None and pose_link_color is not None:
+ assert len(pose_link_color) == len(skeleton)
+ for sk_id, sk in enumerate(skeleton):
+ pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
+ pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
+ if (pos1[0] > 0 and pos1[0] < img_w and pos1[1] > 0
+ and pos1[1] < img_h and pos2[0] > 0 and pos2[0] < img_w
+ and pos2[1] > 0 and pos2[1] < img_h
+ and kpts[sk[0], 2] > kpt_score_thr
+ and kpts[sk[1], 2] > kpt_score_thr):
+ color = tuple(int(c) for c in pose_link_color[sk_id])
+ if show_keypoint_weight:
+ img_copy = img.copy()
+ X = (pos1[0], pos2[0])
+ Y = (pos1[1], pos2[1])
+ mX = np.mean(X)
+ mY = np.mean(Y)
+ length = ((Y[0] - Y[1])**2 + (X[0] - X[1])**2)**0.5
+ angle = math.degrees(
+ math.atan2(Y[0] - Y[1], X[0] - X[1]))
+ stickwidth = 2
+ polygon = cv2.ellipse2Poly(
+ (int(mX), int(mY)),
+ (int(length / 2), int(stickwidth)), int(angle), 0,
+ 360, 1)
+ cv2.fillConvexPoly(img_copy, polygon, color)
+ transparency = max(
+ 0, min(1, 0.5 * (kpts[sk[0], 2] + kpts[sk[1], 2])))
+ cv2.addWeighted(
+ img_copy,
+ transparency,
+ img,
+ 1 - transparency,
+ 0,
+ dst=img)
+ else:
+ cv2.line(img, pos1, pos2, color, thickness=thickness)
+
+ return img
+
+
+def imshow_keypoints_3d(
+ pose_result,
+ img=None,
+ skeleton=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ vis_height=400,
+ kpt_score_thr=0.3,
+ num_instances=-1,
+ *,
+ axis_azimuth=70,
+ axis_limit=1.7,
+ axis_dist=10.0,
+ axis_elev=15.0,
+):
+ """Draw 3D keypoints and links in 3D coordinates.
+
+ Args:
+ pose_result (list[dict]): 3D pose results containing:
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "title" (str): Optional. A string to specify the title of the
+ visualization of this pose result
+ img (str|np.ndarray): Opptional. The image or image path to show input
+ image and/or 2D pose. Note that the image should be given in BGR
+ channel order.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ pose_kpt_color (np.ndarray[Nx3]`): Color of N keypoints. If None, do
+ not nddraw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links. If None, do not
+ draw links.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ kpt_score_thr (float): Minimum score of keypoints to be shown.
+ Default: 0.3.
+ num_instances (int): Number of instances to be shown in 3D. If smaller
+ than 0, all the instances in the pose_result will be shown.
+ Otherwise, pad or truncate the pose_result to a length of
+ num_instances.
+ axis_azimuth (float): axis azimuth angle for 3D visualizations.
+ axis_dist (float): axis distance for 3D visualizations.
+ axis_elev (float): axis elevation view angle for 3D visualizations.
+ axis_limit (float): The axis limit to visualize 3d pose. The xyz
+ range will be set as:
+ - x: [x_c - axis_limit/2, x_c + axis_limit/2]
+ - y: [y_c - axis_limit/2, y_c + axis_limit/2]
+ - z: [0, axis_limit]
+ Where x_c, y_c is the mean value of x and y coordinates
+ figsize: (float): figure size in inch.
+ """
+
+ show_img = img is not None
+ if num_instances < 0:
+ num_instances = len(pose_result)
+ else:
+ if len(pose_result) > num_instances:
+ pose_result = pose_result[:num_instances]
+ elif len(pose_result) < num_instances:
+ pose_result += [dict()] * (num_instances - len(pose_result))
+ num_axis = num_instances + 1 if show_img else num_instances
+
+ plt.ioff()
+ fig = plt.figure(figsize=(vis_height * num_axis * 0.01, vis_height * 0.01))
+
+ if show_img:
+ img = mmcv.imread(img, channel_order='bgr')
+ img = mmcv.bgr2rgb(img)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ ax_img = fig.add_subplot(1, num_axis, 1)
+ ax_img.get_xaxis().set_visible(False)
+ ax_img.get_yaxis().set_visible(False)
+ ax_img.set_axis_off()
+ ax_img.set_title('Input')
+ ax_img.imshow(img, aspect='equal')
+
+ for idx, res in enumerate(pose_result):
+ dummy = len(res) == 0
+ kpts = np.zeros((1, 3)) if dummy else res['keypoints_3d']
+ if kpts.shape[1] == 3:
+ kpts = np.concatenate([kpts, np.ones((kpts.shape[0], 1))], axis=1)
+ valid = kpts[:, 3] >= kpt_score_thr
+
+ ax_idx = idx + 2 if show_img else idx + 1
+ ax = fig.add_subplot(1, num_axis, ax_idx, projection='3d')
+ ax.view_init(
+ elev=axis_elev,
+ azim=axis_azimuth,
+ )
+ x_c = np.mean(kpts[valid, 0]) if sum(valid) > 0 else 0
+ y_c = np.mean(kpts[valid, 1]) if sum(valid) > 0 else 0
+ ax.set_xlim3d([x_c - axis_limit / 2, x_c + axis_limit / 2])
+ ax.set_ylim3d([y_c - axis_limit / 2, y_c + axis_limit / 2])
+ ax.set_zlim3d([0, axis_limit])
+ ax.set_aspect('auto')
+ ax.set_xticks([])
+ ax.set_yticks([])
+ ax.set_zticks([])
+ ax.set_xticklabels([])
+ ax.set_yticklabels([])
+ ax.set_zticklabels([])
+ ax.dist = axis_dist
+
+ if not dummy and pose_kpt_color is not None:
+ pose_kpt_color = np.array(pose_kpt_color)
+ assert len(pose_kpt_color) == len(kpts)
+ x_3d, y_3d, z_3d = np.split(kpts[:, :3], [1, 2], axis=1)
+ # matplotlib uses RGB color in [0, 1] value range
+ _color = pose_kpt_color[..., ::-1] / 255.
+ ax.scatter(
+ x_3d[valid],
+ y_3d[valid],
+ z_3d[valid],
+ marker='o',
+ color=_color[valid],
+ )
+
+ if not dummy and skeleton is not None and pose_link_color is not None:
+ pose_link_color = np.array(pose_link_color)
+ assert len(pose_link_color) == len(skeleton)
+ for link, link_color in zip(skeleton, pose_link_color):
+ link_indices = [_i for _i in link]
+ xs_3d = kpts[link_indices, 0]
+ ys_3d = kpts[link_indices, 1]
+ zs_3d = kpts[link_indices, 2]
+ kpt_score = kpts[link_indices, 3]
+ if kpt_score.min() > kpt_score_thr:
+ # matplotlib uses RGB color in [0, 1] value range
+ _color = link_color[::-1] / 255.
+ ax.plot(xs_3d, ys_3d, zs_3d, color=_color, zdir='z')
+
+ if 'title' in res:
+ ax.set_title(res['title'])
+
+ # convert figure to numpy array
+ fig.tight_layout()
+ fig.canvas.draw()
+ img_w, img_h = fig.canvas.get_width_height()
+ img_vis = np.frombuffer(
+ fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(img_h, img_w, -1)
+ img_vis = mmcv.rgb2bgr(img_vis)
+
+ plt.close(fig)
+
+ return img_vis
+
+
+def imshow_mesh_3d(img,
+ vertices,
+ faces,
+ camera_center,
+ focal_length,
+ colors=(76, 76, 204)):
+ """Render 3D meshes on background image.
+
+ Args:
+ img(np.ndarray): Background image.
+ vertices (list of np.ndarray): Vetrex coordinates in camera space.
+ faces (list of np.ndarray): Faces of meshes.
+ camera_center ([2]): Center pixel.
+ focal_length ([2]): Focal length of camera.
+ colors (list[str or tuple or Color]): A list of mesh colors.
+ """
+
+ H, W, C = img.shape
+
+ if not has_pyrender:
+ warnings.warn('pyrender package is not installed.')
+ return img
+
+ if not has_trimesh:
+ warnings.warn('trimesh package is not installed.')
+ return img
+
+ try:
+ renderer = pyrender.OffscreenRenderer(
+ viewport_width=W, viewport_height=H)
+ except (ImportError, RuntimeError):
+ warnings.warn('pyrender package is not installed correctly.')
+ return img
+
+ if not isinstance(colors, list):
+ colors = [colors for _ in range(len(vertices))]
+ colors = [color_val(c) for c in colors]
+
+ depth_map = np.ones([H, W]) * np.inf
+ output_img = img
+ for idx in range(len(vertices)):
+ color = colors[idx]
+ color = [c / 255.0 for c in color]
+ color.append(1.0)
+ vert = vertices[idx]
+ face = faces[idx]
+
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.2, alphaMode='OPAQUE', baseColorFactor=color)
+
+ mesh = trimesh.Trimesh(vert, face)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(ambient_light=(0.5, 0.5, 0.5))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera = pyrender.IntrinsicsCamera(
+ fx=focal_length[0],
+ fy=focal_length[1],
+ cx=camera_center[0],
+ cy=camera_center[1],
+ zfar=1e5)
+ scene.add(camera, pose=camera_pose)
+
+ light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1)
+ light_pose = np.eye(4)
+
+ light_pose[:3, 3] = np.array([0, -1, 1])
+ scene.add(light, pose=light_pose)
+
+ light_pose[:3, 3] = np.array([0, 1, 1])
+ scene.add(light, pose=light_pose)
+
+ light_pose[:3, 3] = np.array([1, 1, 2])
+ scene.add(light, pose=light_pose)
+
+ color, rend_depth = renderer.render(
+ scene, flags=pyrender.RenderFlags.RGBA)
+
+ valid_mask = (rend_depth < depth_map) * (rend_depth > 0)
+ depth_map[valid_mask] = rend_depth[valid_mask]
+ valid_mask = valid_mask[:, :, None]
+ output_img = (
+ valid_mask * color[:, :, :3] + (1 - valid_mask) * output_img)
+
+ return output_img
diff --git a/vendor/ViTPose/mmpose/datasets/__init__.py b/vendor/ViTPose/mmpose/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b9e7cf035e1e7621d82ce98eb8ab372ce8cfc98
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/__init__.py
@@ -0,0 +1,42 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
+from .dataset_info import DatasetInfo
+from .pipelines import Compose
+from .samplers import DistributedSampler
+
+from .datasets import ( # isort:skip
+ AnimalATRWDataset, AnimalFlyDataset, AnimalHorse10Dataset,
+ AnimalLocustDataset, AnimalMacaqueDataset, AnimalPoseDataset,
+ AnimalZebraDataset, Body3DH36MDataset, BottomUpAicDataset,
+ BottomUpCocoDataset, BottomUpCocoWholeBodyDataset,
+ BottomUpCrowdPoseDataset, BottomUpMhpDataset, DeepFashionDataset,
+ Face300WDataset, FaceAFLWDataset, FaceCocoWholeBodyDataset,
+ FaceCOFWDataset, FaceWFLWDataset, FreiHandDataset,
+ HandCocoWholeBodyDataset, InterHand2DDataset, InterHand3DDataset,
+ MeshAdversarialDataset, MeshH36MDataset, MeshMixDataset, MoshDataset,
+ OneHand10KDataset, PanopticDataset, TopDownAicDataset, TopDownCocoDataset,
+ TopDownCocoWholeBodyDataset, TopDownCrowdPoseDataset,
+ TopDownFreiHandDataset, TopDownH36MDataset, TopDownJhmdbDataset,
+ TopDownMhpDataset, TopDownMpiiDataset, TopDownMpiiTrbDataset,
+ TopDownOCHumanDataset, TopDownOneHand10KDataset, TopDownPanopticDataset,
+ TopDownPoseTrack18Dataset, TopDownPoseTrack18VideoDataset)
+
+__all__ = [
+ 'TopDownCocoDataset', 'BottomUpCocoDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset', 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset', 'OneHand10KDataset', 'PanopticDataset',
+ 'HandCocoWholeBodyDataset', 'FreiHandDataset', 'InterHand2DDataset',
+ 'InterHand3DDataset', 'TopDownOCHumanDataset', 'TopDownAicDataset',
+ 'TopDownCocoWholeBodyDataset', 'MeshH36MDataset', 'MeshMixDataset',
+ 'MoshDataset', 'MeshAdversarialDataset', 'TopDownCrowdPoseDataset',
+ 'BottomUpCrowdPoseDataset', 'TopDownFreiHandDataset',
+ 'TopDownOneHand10KDataset', 'TopDownPanopticDataset',
+ 'TopDownPoseTrack18Dataset', 'TopDownJhmdbDataset', 'TopDownMhpDataset',
+ 'DeepFashionDataset', 'Face300WDataset', 'FaceAFLWDataset',
+ 'FaceWFLWDataset', 'FaceCOFWDataset', 'FaceCocoWholeBodyDataset',
+ 'Body3DH36MDataset', 'AnimalHorse10Dataset', 'AnimalMacaqueDataset',
+ 'AnimalFlyDataset', 'AnimalLocustDataset', 'AnimalZebraDataset',
+ 'AnimalATRWDataset', 'AnimalPoseDataset', 'TopDownH36MDataset',
+ 'TopDownPoseTrack18VideoDataset', 'build_dataloader', 'build_dataset',
+ 'Compose', 'DistributedSampler', 'DATASETS', 'PIPELINES', 'DatasetInfo'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/builder.py b/vendor/ViTPose/mmpose/datasets/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..990ba859e010064377f805e6aa3826984cf25b55
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/builder.py
@@ -0,0 +1,162 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import platform
+import random
+from functools import partial
+
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg, is_seq_of
+from mmcv.utils.parrots_wrapper import _get_dataloader
+from torch.utils.data.dataset import ConcatDataset
+
+from .samplers import DistributedSampler
+
+if platform.system() != 'Windows':
+ # https://github.com/pytorch/pytorch/issues/973
+ import resource
+ rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ base_soft_limit = rlimit[0]
+ hard_limit = rlimit[1]
+ soft_limit = min(max(4096, base_soft_limit), hard_limit)
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+
+
+def _concat_dataset(cfg, default_args=None):
+ types = cfg['type']
+ ann_files = cfg['ann_file']
+ img_prefixes = cfg.get('img_prefix', None)
+ dataset_infos = cfg.get('dataset_info', None)
+
+ num_joints = cfg['data_cfg'].get('num_joints', None)
+ dataset_channel = cfg['data_cfg'].get('dataset_channel', None)
+
+ datasets = []
+ num_dset = len(ann_files)
+ for i in range(num_dset):
+ cfg_copy = copy.deepcopy(cfg)
+ cfg_copy['ann_file'] = ann_files[i]
+
+ if isinstance(types, (list, tuple)):
+ cfg_copy['type'] = types[i]
+ if isinstance(img_prefixes, (list, tuple)):
+ cfg_copy['img_prefix'] = img_prefixes[i]
+ if isinstance(dataset_infos, (list, tuple)):
+ cfg_copy['dataset_info'] = dataset_infos[i]
+
+ if isinstance(num_joints, (list, tuple)):
+ cfg_copy['data_cfg']['num_joints'] = num_joints[i]
+
+ if is_seq_of(dataset_channel, list):
+ cfg_copy['data_cfg']['dataset_channel'] = dataset_channel[i]
+
+ datasets.append(build_dataset(cfg_copy, default_args))
+
+ return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+ """Build a dataset from config dict.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+ default_args (dict, optional): Default initialization arguments.
+ Default: None.
+
+ Returns:
+ Dataset: The constructed dataset.
+ """
+ from .dataset_wrappers import RepeatDataset
+
+ if isinstance(cfg, (list, tuple)):
+ dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+ elif cfg['type'] == 'ConcatDataset':
+ dataset = ConcatDataset(
+ [build_dataset(c, default_args) for c in cfg['datasets']])
+ elif cfg['type'] == 'RepeatDataset':
+ dataset = RepeatDataset(
+ build_dataset(cfg['dataset'], default_args), cfg['times'])
+ elif isinstance(cfg.get('ann_file'), (list, tuple)):
+ dataset = _concat_dataset(cfg, default_args)
+ else:
+ dataset = build_from_cfg(cfg, DATASETS, default_args)
+ return dataset
+
+
+def build_dataloader(dataset,
+ samples_per_gpu,
+ workers_per_gpu,
+ num_gpus=1,
+ dist=True,
+ shuffle=True,
+ seed=None,
+ drop_last=True,
+ pin_memory=True,
+ **kwargs):
+ """Build PyTorch DataLoader.
+
+ In distributed training, each GPU/process has a dataloader.
+ In non-distributed training, there is only one dataloader for all GPUs.
+
+ Args:
+ dataset (Dataset): A PyTorch dataset.
+ samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+ batch size of each GPU.
+ workers_per_gpu (int): How many subprocesses to use for data loading
+ for each GPU.
+ num_gpus (int): Number of GPUs. Only used in non-distributed training.
+ dist (bool): Distributed training/test or not. Default: True.
+ shuffle (bool): Whether to shuffle the data at every epoch.
+ Default: True.
+ drop_last (bool): Whether to drop the last incomplete batch in epoch.
+ Default: True
+ pin_memory (bool): Whether to use pin_memory in DataLoader.
+ Default: True
+ kwargs: any keyword argument to be used to initialize DataLoader
+
+ Returns:
+ DataLoader: A PyTorch dataloader.
+ """
+ rank, world_size = get_dist_info()
+ if dist:
+ sampler = DistributedSampler(
+ dataset, world_size, rank, shuffle=shuffle, seed=seed)
+ shuffle = False
+ batch_size = samples_per_gpu
+ num_workers = workers_per_gpu
+ else:
+ sampler = None
+ batch_size = num_gpus * samples_per_gpu
+ num_workers = num_gpus * workers_per_gpu
+
+ init_fn = partial(
+ worker_init_fn, num_workers=num_workers, rank=rank,
+ seed=seed) if seed is not None else None
+
+ _, DataLoader = _get_dataloader()
+ data_loader = DataLoader(
+ dataset,
+ batch_size=batch_size,
+ sampler=sampler,
+ num_workers=num_workers,
+ collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ **kwargs)
+
+ return data_loader
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+ """Init the random seed for various workers."""
+ # The seed of each worker equals to
+ # num_worker * rank + worker_id + user_seed
+ worker_seed = num_workers * rank + worker_id + seed
+ np.random.seed(worker_seed)
+ random.seed(worker_seed)
diff --git a/vendor/ViTPose/mmpose/datasets/dataset_info.py b/vendor/ViTPose/mmpose/datasets/dataset_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef0d62e43089770797ef565d2153c8d42e4956c5
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/dataset_info.py
@@ -0,0 +1,104 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+
+class DatasetInfo:
+
+ def __init__(self, dataset_info):
+ self._dataset_info = dataset_info
+ self.dataset_name = self._dataset_info['dataset_name']
+ self.paper_info = self._dataset_info['paper_info']
+ self.keypoint_info = self._dataset_info['keypoint_info']
+ self.skeleton_info = self._dataset_info['skeleton_info']
+ self.joint_weights = np.array(
+ self._dataset_info['joint_weights'], dtype=np.float32)[:, None]
+
+ self.sigmas = np.array(self._dataset_info['sigmas'])
+
+ self._parse_keypoint_info()
+ self._parse_skeleton_info()
+
+ def _parse_skeleton_info(self):
+ """Parse skeleton information.
+
+ - link_num (int): number of links.
+ - skeleton (list((2,))): list of links (id).
+ - skeleton_name (list((2,))): list of links (name).
+ - pose_link_color (np.ndarray): the color of the link for
+ visualization.
+ """
+ self.link_num = len(self.skeleton_info.keys())
+ self.pose_link_color = []
+
+ self.skeleton_name = []
+ self.skeleton = []
+ for skid in self.skeleton_info.keys():
+ link = self.skeleton_info[skid]['link']
+ self.skeleton_name.append(link)
+ self.skeleton.append([
+ self.keypoint_name2id[link[0]], self.keypoint_name2id[link[1]]
+ ])
+ self.pose_link_color.append(self.skeleton_info[skid].get(
+ 'color', [255, 128, 0]))
+ self.pose_link_color = np.array(self.pose_link_color)
+
+ def _parse_keypoint_info(self):
+ """Parse keypoint information.
+
+ - keypoint_num (int): number of keypoints.
+ - keypoint_id2name (dict): mapping keypoint id to keypoint name.
+ - keypoint_name2id (dict): mapping keypoint name to keypoint id.
+ - upper_body_ids (list): a list of keypoints that belong to the
+ upper body.
+ - lower_body_ids (list): a list of keypoints that belong to the
+ lower body.
+ - flip_index (list): list of flip index (id)
+ - flip_pairs (list((2,))): list of flip pairs (id)
+ - flip_index_name (list): list of flip index (name)
+ - flip_pairs_name (list((2,))): list of flip pairs (name)
+ - pose_kpt_color (np.ndarray): the color of the keypoint for
+ visualization.
+ """
+
+ self.keypoint_num = len(self.keypoint_info.keys())
+ self.keypoint_id2name = {}
+ self.keypoint_name2id = {}
+
+ self.pose_kpt_color = []
+ self.upper_body_ids = []
+ self.lower_body_ids = []
+
+ self.flip_index_name = []
+ self.flip_pairs_name = []
+
+ for kid in self.keypoint_info.keys():
+
+ keypoint_name = self.keypoint_info[kid]['name']
+ self.keypoint_id2name[kid] = keypoint_name
+ self.keypoint_name2id[keypoint_name] = kid
+ self.pose_kpt_color.append(self.keypoint_info[kid].get(
+ 'color', [255, 128, 0]))
+
+ type = self.keypoint_info[kid].get('type', '')
+ if type == 'upper':
+ self.upper_body_ids.append(kid)
+ elif type == 'lower':
+ self.lower_body_ids.append(kid)
+ else:
+ pass
+
+ swap_keypoint = self.keypoint_info[kid].get('swap', '')
+ if swap_keypoint == keypoint_name or swap_keypoint == '':
+ self.flip_index_name.append(keypoint_name)
+ else:
+ self.flip_index_name.append(swap_keypoint)
+ if [swap_keypoint, keypoint_name] not in self.flip_pairs_name:
+ self.flip_pairs_name.append([keypoint_name, swap_keypoint])
+
+ self.flip_pairs = [[
+ self.keypoint_name2id[pair[0]], self.keypoint_name2id[pair[1]]
+ ] for pair in self.flip_pairs_name]
+ self.flip_index = [
+ self.keypoint_name2id[name] for name in self.flip_index_name
+ ]
+ self.pose_kpt_color = np.array(self.pose_kpt_color)
diff --git a/vendor/ViTPose/mmpose/datasets/dataset_wrappers.py b/vendor/ViTPose/mmpose/datasets/dataset_wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..aaaa173b91f2ad63dc7d80b793fa3d9619a4630c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/dataset_wrappers.py
@@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class RepeatDataset:
+ """A wrapper of repeated dataset.
+
+ The length of repeated dataset will be `times` larger than the original
+ dataset. This is useful when the data loading time is long but the dataset
+ is small. Using RepeatDataset can reduce the data loading time between
+ epochs.
+
+ Args:
+ dataset (:obj:`Dataset`): The dataset to be repeated.
+ times (int): Repeat times.
+ """
+
+ def __init__(self, dataset, times):
+ self.dataset = dataset
+ self.times = times
+
+ self._ori_len = len(self.dataset)
+
+ def __getitem__(self, idx):
+ """Get data."""
+ return self.dataset[idx % self._ori_len]
+
+ def __len__(self):
+ """Length after repetition."""
+ return self.times * self._ori_len
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3839e5eaa0c068fec5e86804ce9d75c9e85ae4b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/__init__.py
@@ -0,0 +1,45 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from ...deprecated import (TopDownFreiHandDataset, TopDownOneHand10KDataset,
+ TopDownPanopticDataset)
+from .animal import (AnimalATRWDataset, AnimalFlyDataset, AnimalHorse10Dataset,
+ AnimalLocustDataset, AnimalMacaqueDataset,
+ AnimalPoseDataset, AnimalZebraDataset)
+from .body3d import Body3DH36MDataset, Body3DMviewDirectPanopticDataset
+from .bottom_up import (BottomUpAicDataset, BottomUpCocoDataset,
+ BottomUpCocoWholeBodyDataset, BottomUpCrowdPoseDataset,
+ BottomUpMhpDataset)
+from .face import (Face300WDataset, FaceAFLWDataset, FaceCocoWholeBodyDataset,
+ FaceCOFWDataset, FaceWFLWDataset)
+from .fashion import DeepFashionDataset
+from .hand import (FreiHandDataset, HandCocoWholeBodyDataset,
+ InterHand2DDataset, InterHand3DDataset, OneHand10KDataset,
+ PanopticDataset)
+from .mesh import (MeshAdversarialDataset, MeshH36MDataset, MeshMixDataset,
+ MoshDataset)
+from .top_down import (TopDownAicDataset, TopDownCocoDataset,
+ TopDownCocoWholeBodyDataset, TopDownCrowdPoseDataset,
+ TopDownH36MDataset, TopDownHalpeDataset,
+ TopDownJhmdbDataset, TopDownMhpDataset,
+ TopDownMpiiDataset, TopDownMpiiTrbDataset,
+ TopDownOCHumanDataset, TopDownPoseTrack18Dataset,
+ TopDownPoseTrack18VideoDataset)
+
+__all__ = [
+ 'TopDownCocoDataset', 'BottomUpCocoDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset', 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset', 'OneHand10KDataset', 'PanopticDataset',
+ 'HandCocoWholeBodyDataset', 'FreiHandDataset', 'InterHand2DDataset',
+ 'InterHand3DDataset', 'TopDownOCHumanDataset', 'TopDownAicDataset',
+ 'TopDownCocoWholeBodyDataset', 'MeshH36MDataset', 'MeshMixDataset',
+ 'MoshDataset', 'MeshAdversarialDataset', 'TopDownCrowdPoseDataset',
+ 'BottomUpCrowdPoseDataset', 'TopDownFreiHandDataset',
+ 'TopDownOneHand10KDataset', 'TopDownPanopticDataset',
+ 'TopDownPoseTrack18Dataset', 'TopDownJhmdbDataset', 'TopDownMhpDataset',
+ 'DeepFashionDataset', 'Face300WDataset', 'FaceAFLWDataset',
+ 'FaceWFLWDataset', 'FaceCOFWDataset', 'FaceCocoWholeBodyDataset',
+ 'Body3DH36MDataset', 'AnimalHorse10Dataset', 'AnimalMacaqueDataset',
+ 'AnimalFlyDataset', 'AnimalLocustDataset', 'AnimalZebraDataset',
+ 'AnimalATRWDataset', 'AnimalPoseDataset', 'TopDownH36MDataset',
+ 'TopDownHalpeDataset', 'TopDownPoseTrack18VideoDataset',
+ 'Body3DMviewDirectPanopticDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..185b935ced4cf072975ec37701b5e8a3aa1d7939
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .animal_ap10k_dataset import AnimalAP10KDataset
+from .animal_atrw_dataset import AnimalATRWDataset
+from .animal_fly_dataset import AnimalFlyDataset
+from .animal_horse10_dataset import AnimalHorse10Dataset
+from .animal_locust_dataset import AnimalLocustDataset
+from .animal_macaque_dataset import AnimalMacaqueDataset
+from .animal_pose_dataset import AnimalPoseDataset
+from .animal_zebra_dataset import AnimalZebraDataset
+
+__all__ = [
+ 'AnimalHorse10Dataset', 'AnimalMacaqueDataset', 'AnimalFlyDataset',
+ 'AnimalLocustDataset', 'AnimalZebraDataset', 'AnimalATRWDataset',
+ 'AnimalPoseDataset', 'AnimalAP10KDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a1e73ed0c72f5c3fc4ccdab010b53acd2a57c4
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_ap10k_dataset.py
@@ -0,0 +1,367 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalAP10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AP-10K dataset for animal pose estimation.
+
+ "AP-10K: A Benchmark for Animal Pose Estimation in the Wild"
+ Neurips Dataset Track'2021.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AP-10K keypoint indexes::
+
+ 0: 'L_Eye',
+ 1: 'R_Eye',
+ 2: 'Nose',
+ 3: 'Neck',
+ 4: 'root of tail',
+ 5: 'L_Shoulder',
+ 6: 'L_Elbow',
+ 7: 'L_F_Paw',
+ 8: 'R_Shoulder',
+ 9: 'R_Elbow',
+ 10: 'R_F_Paw,
+ 11: 'L_Hip',
+ 12: 'L_Knee',
+ 13: 'L_B_Paw',
+ 14: 'R_Hip',
+ 15: 'R_Knee',
+ 16: 'R_B_Paw'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/ap10k.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db, self.id2Cat = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db, id2Cat = self._load_coco_keypoint_annotations()
+ return gt_db, id2Cat
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db, id2Cat = [], dict()
+ for img_id in self.img_ids:
+ db_tmp, id2Cat_tmp = self._load_coco_keypoint_annotation_kernel(
+ img_id)
+ gt_db.extend(db_tmp)
+ id2Cat.update({img_id: id2Cat_tmp})
+ return gt_db, id2Cat
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ id2Cat = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ category = obj['category_id']
+ id2Cat.append({
+ 'image_file': image_file,
+ 'bbox_id': bbox_id,
+ 'category': category,
+ })
+ bbox_id = bbox_id + 1
+
+ return rec, id2Cat
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ cat = self.id2Cat[image_id][bbox_ids[i]]['category']
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i],
+ 'category': cat
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': img_kpt['category'],
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_atrw_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_atrw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..edfd3f96c6571cda4bd39b223c3382f8cff17f51
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_atrw_dataset.py
@@ -0,0 +1,353 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalATRWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """ATRW dataset for animal pose estimation.
+
+ "ATRW: A Benchmark for Amur Tiger Re-identification in the Wild"
+ ACM MM'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ ATRW keypoint indexes::
+
+ 0: "left_ear",
+ 1: "right_ear",
+ 2: "nose",
+ 3: "right_shoulder",
+ 4: "right_front_paw",
+ 5: "left_shoulder",
+ 6: "left_front_paw",
+ 7: "right_hip",
+ 8: "right_knee",
+ 9: "right_back_paw",
+ 10: "left_hip",
+ 11: "left_knee",
+ 12: "left_back_paw",
+ 13: "tail",
+ 14: "center"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/atrw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4], padding=1.0)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e191882f3424167e9bd07693498f36cd57905fd0
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class AnimalBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'AnimalBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_fly_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_fly_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4141176142e0d12c1c65b772f4e48c873f04c47
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_fly_dataset.py
@@ -0,0 +1,215 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalFlyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalFlyDataset for animal pose estimation.
+
+ "Fast animal pose estimation using deep neural networks"
+ Nature methods'2019. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Vinegar Fly keypoint indexes::
+
+ 0: "head",
+ 1: "eyeL",
+ 2: "eyeR",
+ 3: "neck",
+ 4: "thorax",
+ 5: "abdomen",
+ 6: "forelegR1",
+ 7: "forelegR2",
+ 8: "forelegR3",
+ 9: "forelegR4",
+ 10: "midlegR1",
+ 11: "midlegR2",
+ 12: "midlegR3",
+ 13: "midlegR4",
+ 14: "hindlegR1",
+ 15: "hindlegR2",
+ 16: "hindlegR3",
+ 17: "hindlegR4",
+ 18: "forelegL1",
+ 19: "forelegL2",
+ 20: "forelegL3",
+ 21: "forelegL4",
+ 22: "midlegL1",
+ 23: "midlegL2",
+ 24: "midlegL3",
+ 25: "midlegL4",
+ 26: "hindlegL1",
+ 27: "hindlegL2",
+ 28: "hindlegL3",
+ 29: "hindlegL4",
+ 30: "wingL",
+ 31: "wingR"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/fly.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 192x192
+ center, scale = self._xywh2cs(0, 0, 192, 192, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str): Path of directory to save the results.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_horse10_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_horse10_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2bf1986edb75f8f5e60c4ddd45bfb45d5e38d9c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_horse10_dataset.py
@@ -0,0 +1,220 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalHorse10Dataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalHorse10Dataset for animal pose estimation.
+
+ "Pretraining boosts out-of-domain robustness for pose estimation"
+ WACV'2021. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Horse-10 keypoint indexes::
+
+ 0: 'Nose',
+ 1: 'Eye',
+ 2: 'Nearknee',
+ 3: 'Nearfrontfetlock',
+ 4: 'Nearfrontfoot',
+ 5: 'Offknee',
+ 6: 'Offfrontfetlock',
+ 7: 'Offfrontfoot',
+ 8: 'Shoulder',
+ 9: 'Midshoulder',
+ 10: 'Elbow',
+ 11: 'Girth',
+ 12: 'Wither',
+ 13: 'Nearhindhock',
+ 14: 'Nearhindfetlock',
+ 15: 'Nearhindfoot',
+ 16: 'Hip',
+ 17: 'Stifle',
+ 18: 'Offhindhock',
+ 19: 'Offhindfetlock',
+ 20: 'Offhindfoot',
+ 21: 'Ischium'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/horse10.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 0, :] - gts[:, 1, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate horse-10 keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_locust_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_locust_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..95fb6ac896e7d0553efb6c479fca92684d87ac22
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_locust_dataset.py
@@ -0,0 +1,218 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalLocustDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalLocustDataset for animal pose estimation.
+
+ "DeepPoseKit, a software toolkit for fast and robust animal
+ pose estimation using deep learning" Elife'2019.
+ More details can be found in the paper.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Desert Locust keypoint indexes::
+
+ 0: "head",
+ 1: "neck",
+ 2: "thorax",
+ 3: "abdomen1",
+ 4: "abdomen2",
+ 5: "anttipL",
+ 6: "antbaseL",
+ 7: "eyeL",
+ 8: "forelegL1",
+ 9: "forelegL2",
+ 10: "forelegL3",
+ 11: "forelegL4",
+ 12: "midlegL1",
+ 13: "midlegL2",
+ 14: "midlegL3",
+ 15: "midlegL4",
+ 16: "hindlegL1",
+ 17: "hindlegL2",
+ 18: "hindlegL3",
+ 19: "hindlegL4",
+ 20: "anttipR",
+ 21: "antbaseR",
+ 22: "eyeR",
+ 23: "forelegR1",
+ 24: "forelegR2",
+ 25: "forelegR3",
+ 26: "forelegR4",
+ 27: "midlegR1",
+ 28: "midlegR2",
+ 29: "midlegR3",
+ 30: "midlegR4",
+ 31: "hindlegR1",
+ 32: "hindlegR2",
+ 33: "hindlegR3",
+ 34: "hindlegR4"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/locust.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 160x160
+ center, scale = self._xywh2cs(0, 0, 160, 160, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_macaque_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_macaque_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..359fecaa2b6e29f24e2bdb01a3a8715f12c5925f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_macaque_dataset.py
@@ -0,0 +1,355 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalMacaqueDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MacaquePose dataset for animal pose estimation.
+
+ "MacaquePose: A novel ‘in the wild’ macaque monkey pose dataset
+ for markerless motion capture" bioRxiv'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Macaque keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/macaque.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ heatmap height: H
+ heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_pose_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_pose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ced5703f3771597f21123b44c77a53a02a48e78
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_pose_dataset.py
@@ -0,0 +1,359 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalPoseDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Animal-Pose dataset for animal pose estimation.
+
+ "Cross-domain Adaptation For Animal Pose Estimation" ICCV'2019
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Animal-Pose keypoint indexes::
+
+ 0: 'L_Eye',
+ 1: 'R_Eye',
+ 2: 'L_EarBase',
+ 3: 'R_EarBase',
+ 4: 'Nose',
+ 5: 'Throat',
+ 6: 'TailBase',
+ 7: 'Withers',
+ 8: 'L_F_Elbow',
+ 9: 'R_F_Elbow',
+ 10: 'L_B_Elbow',
+ 11: 'R_B_Elbow',
+ 12: 'L_F_Knee',
+ 13: 'R_F_Knee',
+ 14: 'L_B_Knee',
+ 15: 'R_B_Knee',
+ 16: 'L_F_Paw',
+ 17: 'R_F_Paw',
+ 18: 'L_B_Paw',
+ 19: 'R_B_Paw'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/animalpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+
+ Args:
+ img_id: coco image id
+
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_zebra_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_zebra_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c5e3b73c885f86c13e7a5ebf02b03441b2dc93d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/animal/animal_zebra_dataset.py
@@ -0,0 +1,193 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class AnimalZebraDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """AnimalZebraDataset for animal pose estimation.
+
+ "DeepPoseKit, a software toolkit for fast and robust animal
+ pose estimation using deep learning" Elife'2019.
+ More details can be found in the paper.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Desert Locust keypoint indexes::
+
+ 0: "snout",
+ 1: "head",
+ 2: "neck",
+ 3: "forelegL1",
+ 4: "forelegR1",
+ 5: "hindlegL1",
+ 6: "hindlegR1",
+ 7: "tailbase",
+ 8: "tailtip"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/zebra.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 160x160
+ center, scale = self._xywh2cs(0, 0, 160, 160, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate Fly keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5f9a0899cdfde4132b068e6408ca721a59dc9b4
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .kpt_2d_sview_rgb_img_bottom_up_dataset import \
+ Kpt2dSviewRgbImgBottomUpDataset
+from .kpt_2d_sview_rgb_img_top_down_dataset import \
+ Kpt2dSviewRgbImgTopDownDataset
+from .kpt_2d_sview_rgb_vid_top_down_dataset import \
+ Kpt2dSviewRgbVidTopDownDataset
+from .kpt_3d_mview_rgb_img_direct_dataset import Kpt3dMviewRgbImgDirectDataset
+from .kpt_3d_sview_kpt_2d_dataset import Kpt3dSviewKpt2dDataset
+from .kpt_3d_sview_rgb_img_top_down_dataset import \
+ Kpt3dSviewRgbImgTopDownDataset
+
+__all__ = [
+ 'Kpt3dMviewRgbImgDirectDataset', 'Kpt2dSviewRgbImgTopDownDataset',
+ 'Kpt3dSviewRgbImgTopDownDataset', 'Kpt2dSviewRgbImgBottomUpDataset',
+ 'Kpt3dSviewKpt2dDataset', 'Kpt2dSviewRgbVidTopDownDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..99306214db3a36465bdc8a24ebec41db58a6ca68
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_bottom_up_dataset.py
@@ -0,0 +1,188 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import xtcocotools
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbImgBottomUpDataset(Dataset, metaclass=ABCMeta):
+ """Base class for bottom-up datasets.
+
+ All datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_single`
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ # bottom-up
+ self.base_size = data_cfg['base_size']
+ self.base_sigma = data_cfg['base_sigma']
+ self.int_sigma = False
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ self.ann_info['num_scales'] = data_cfg['num_scales']
+ self.ann_info['scale_aware_sigma'] = data_cfg['scale_aware_sigma']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.use_nms = data_cfg.get('use_nms', False)
+ self.soft_nms = data_cfg.get('soft_nms', True)
+ self.oks_thr = data_cfg.get('oks_thr', 0.9)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ if not test_mode:
+ self.img_ids = [
+ img_id for img_id in self.img_ids if
+ len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
+ ]
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _get_mask(self, anno, idx):
+ """Get ignore masks to mask out losses."""
+ coco = self.coco
+ img_info = coco.loadImgs(self.img_ids[idx])[0]
+
+ m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
+
+ for obj in anno:
+ if 'segmentation' in obj:
+ if obj['iscrowd']:
+ rle = xtcocotools.mask.frPyObjects(obj['segmentation'],
+ img_info['height'],
+ img_info['width'])
+ m += xtcocotools.mask.decode(rle)
+ elif obj['num_keypoints'] == 0:
+ rles = xtcocotools.mask.frPyObjects(
+ obj['segmentation'], img_info['height'],
+ img_info['width'])
+ for rle in rles:
+ m += xtcocotools.mask.decode(rle)
+
+ return m < 0.5
+
+ @abstractmethod
+ def _get_single(self, idx):
+ """Get anno for a single image."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ def prepare_train_img(self, idx):
+ """Prepare image for training given the index."""
+ results = copy.deepcopy(self._get_single(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def prepare_test_img(self, idx):
+ """Prepare image for testing given the index."""
+ results = copy.deepcopy(self._get_single(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def __len__(self):
+ """Get dataset length."""
+ return len(self.img_ids)
+
+ def __getitem__(self, idx):
+ """Get the sample for either training or testing given index."""
+ if self.test_mode:
+ return self.prepare_test_img(idx)
+
+ return self.prepare_train_img(idx)
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb281f1bcf1a3771aea4fb5335487b17d5994168
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
@@ -0,0 +1,287 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.core.evaluation.top_down_eval import (keypoint_auc, keypoint_epe,
+ keypoint_nme,
+ keypoint_pck_accuracy)
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbImgTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 2D top-down pose estimation with single-view RGB
+ image as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.ann_info['max_num_joints'] = data_cfg.get('max_num_joints', None)
+ self.ann_info['dataset_idx'] = data_cfg.get('dataset_idx', 0)
+
+ self.ann_info['use_different_joint_weights'] = data_cfg.get(
+ 'use_different_joint_weights', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get the normalize factor. generally inter-ocular distance measured
+ as the Euclidean distance between the outer corners of the eyes is
+ used. This function should be overrode, to measure NME.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+ return np.ones([gts.shape[0], 2], dtype=np.float32)
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self,
+ res_file,
+ metrics,
+ pck_thr=0.2,
+ pckh_thr=0.7,
+ auc_nor=30):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'PCKh', 'AUC', 'EPE', 'NME'.
+ pck_thr (float): PCK threshold, default as 0.2.
+ pckh_thr (float): PCKh threshold, default as 0.7.
+ auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ outputs = []
+ gts = []
+ masks = []
+ box_sizes = []
+ threshold_bbox = []
+ threshold_head_box = []
+
+ for pred, item in zip(preds, self.db):
+ outputs.append(np.array(pred['keypoints'])[:, :-1])
+ gts.append(np.array(item['joints_3d'])[:, :-1])
+ masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+ if 'PCK' in metrics:
+ bbox = np.array(item['bbox'])
+ bbox_thr = np.max(bbox[2:])
+ threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+ if 'PCKh' in metrics:
+ head_box_thr = item['head_size']
+ threshold_head_box.append(
+ np.array([head_box_thr, head_box_thr]))
+ box_sizes.append(item.get('box_size', 1))
+
+ outputs = np.array(outputs)
+ gts = np.array(gts)
+ masks = np.array(masks)
+ threshold_bbox = np.array(threshold_bbox)
+ threshold_head_box = np.array(threshold_head_box)
+ box_sizes = np.array(box_sizes).reshape([-1, 1])
+
+ if 'PCK' in metrics:
+ _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_bbox)
+ info_str.append(('PCK', pck))
+
+ if 'PCKh' in metrics:
+ _, pckh, _ = keypoint_pck_accuracy(outputs, gts, masks, pckh_thr,
+ threshold_head_box)
+ info_str.append(('PCKh', pckh))
+
+ if 'AUC' in metrics:
+ info_str.append(('AUC', keypoint_auc(outputs, gts, masks,
+ auc_nor)))
+
+ if 'EPE' in metrics:
+ info_str.append(('EPE', keypoint_epe(outputs, gts, masks)))
+
+ if 'NME' in metrics:
+ normalize_factor = self._get_normalize_factor(
+ gts=gts, box_sizes=box_sizes)
+ info_str.append(
+ ('NME', keypoint_nme(outputs, gts, masks, normalize_factor)))
+
+ return info_str
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e52927032d87e93021307804dfabe08a5b7ee3b6
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_vid_top_down_dataset.py
@@ -0,0 +1,200 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt2dSviewRgbVidTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 2D top-down pose estimation with single-view RGB
+ video as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where videos/images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ self.ann_info['use_different_joint_weights'] = data_cfg.get(
+ 'use_different_joint_weights', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ @abstractmethod
+ def _write_keypoint_results(keypoint_results, gt_folder, pred_folder):
+ """Write results into a json file."""
+
+ @abstractmethod
+ def _do_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation.
+ Args:
+ gt_folder (str): The folder of the json files storing
+ ground truth keypoint annotations.
+ pred_folder (str): The folder of the json files storing
+ prediction results.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..94cc1c22e97b8e5e798e366dfc69b611fa742d6e
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dMviewRgbImgDirectDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 3D top-down pose estimation with multi-view RGB
+ images as the input.
+
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['space_size'] = data_cfg['space_size']
+ self.ann_info['space_center'] = data_cfg['space_center']
+ self.ann_info['cube_size'] = data_cfg['cube_size']
+ self.ann_info['scale_aware_sigma'] = data_cfg.get(
+ 'scale_aware_sigma', False)
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] <= dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['num_scales'] = 1
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ self.load_config(data_cfg)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+ self.num_joints = data_cfg['num_joints']
+ self.num_cameras = data_cfg['num_cameras']
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.subset = data_cfg.get('subset', 'train')
+ self.need_2d_label = data_cfg.get('need_2d_label', False)
+ self.need_camera_param = True
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db) // self.num_cameras
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = {}
+ # return self.pipeline(results)
+ for c in range(self.num_cameras):
+ result = copy.deepcopy(self.db[self.num_cameras * idx + c])
+ result['ann_info'] = self.ann_info
+ results[c] = result
+
+ return self.pipeline(results)
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbdb9989e83d9b8ff91cfd99f2fec6d87b13aceb
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_kpt_2d_dataset.py
@@ -0,0 +1,226 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dSviewKpt2dDataset(Dataset, metaclass=ABCMeta):
+ """Base class for 3D human pose datasets.
+
+ Subclasses should consider overwriting following methods:
+ - load_config
+ - load_annotations
+ - build_sample_indices
+ - evaluate
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ - num_joints: Number of joints.
+ - seq_len: Number of frames in a sequence. Default: 1.
+ - seq_frame_interval: Extract frames from the video at certain
+ intervals. Default: 1.
+ - causal: If set to True, the rightmost input frame will be the
+ target frame. Otherwise, the middle input frame will be the
+ target frame. Default: True.
+ - temporal_padding: Whether to pad the video so that poses will be
+ predicted for every frame in the video. Default: False
+ - subset: Reduce dataset size by fraction. Default: 1.
+ - need_2d_label: Whether need 2D joint labels or not.
+ Default: False.
+
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.data_cfg = copy.deepcopy(data_cfg)
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+ self.ann_info = {}
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ self.load_config(self.data_cfg)
+
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ self.data_info = self.load_annotations()
+ self.sample_indices = self.build_sample_indices()
+ self.pipeline = Compose(pipeline)
+
+ self.name2id = {
+ name: i
+ for i, name in enumerate(self.data_info['imgnames'])
+ }
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+
+ self.num_joints = data_cfg['num_joints']
+ self.seq_len = data_cfg.get('seq_len', 1)
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.causal = data_cfg.get('causal', True)
+ self.temporal_padding = data_cfg.get('temporal_padding', False)
+ self.subset = data_cfg.get('subset', 1)
+ self.need_2d_label = data_cfg.get('need_2d_label', False)
+ self.need_camera_param = False
+
+ def load_annotations(self):
+ """Load data annotation."""
+ data = np.load(self.ann_file)
+
+ # get image info
+ _imgnames = data['imgname']
+ num_imgs = len(_imgnames)
+ num_joints = self.ann_info['num_joints']
+
+ if 'scale' in data:
+ _scales = data['scale'].astype(np.float32)
+ else:
+ _scales = np.zeros(num_imgs, dtype=np.float32)
+
+ if 'center' in data:
+ _centers = data['center'].astype(np.float32)
+ else:
+ _centers = np.zeros((num_imgs, 2), dtype=np.float32)
+
+ # get 3D pose
+ if 'S' in data.keys():
+ _joints_3d = data['S'].astype(np.float32)
+ else:
+ _joints_3d = np.zeros((num_imgs, num_joints, 4), dtype=np.float32)
+
+ # get 2D pose
+ if 'part' in data.keys():
+ _joints_2d = data['part'].astype(np.float32)
+ else:
+ _joints_2d = np.zeros((num_imgs, num_joints, 3), dtype=np.float32)
+
+ data_info = {
+ 'imgnames': _imgnames,
+ 'joints_3d': _joints_3d,
+ 'joints_2d': _joints_2d,
+ 'scales': _scales,
+ 'centers': _centers,
+ }
+
+ return data_info
+
+ def build_sample_indices(self):
+ """Build sample indices.
+
+ The default method creates sample indices that each sample is a single
+ frame (i.e. seq_len=1). Override this method in the subclass to define
+ how frames are sampled to form data samples.
+
+ Outputs:
+ sample_indices [list(tuple)]: the frame indices of each sample.
+ For a sample, all frames will be treated as an input sequence,
+ and the ground-truth pose of the last frame will be the target.
+ """
+ sample_indices = []
+ if self.seq_len == 1:
+ num_imgs = len(self.ann_info['imgnames'])
+ sample_indices = [(idx, ) for idx in range(num_imgs)]
+ else:
+ raise NotImplementedError('Multi-frame data sample unsupported!')
+ return sample_indices
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ def prepare_data(self, idx):
+ """Get data sample."""
+ data = self.data_info
+
+ frame_ids = self.sample_indices[idx]
+ assert len(frame_ids) == self.seq_len
+
+ # get the 3D/2D pose sequence
+ _joints_3d = data['joints_3d'][frame_ids]
+ _joints_2d = data['joints_2d'][frame_ids]
+
+ # get the image info
+ _imgnames = data['imgnames'][frame_ids]
+ _centers = data['centers'][frame_ids]
+ _scales = data['scales'][frame_ids]
+ if _scales.ndim == 1:
+ _scales = np.stack([_scales, _scales], axis=1)
+
+ target_idx = -1 if self.causal else int(self.seq_len) // 2
+
+ results = {
+ 'input_2d': _joints_2d[:, :, :2],
+ 'input_2d_visible': _joints_2d[:, :, -1:],
+ 'input_3d': _joints_3d[:, :, :3],
+ 'input_3d_visible': _joints_3d[:, :, -1:],
+ 'target': _joints_3d[target_idx, :, :3],
+ 'target_visible': _joints_3d[target_idx, :, -1:],
+ 'image_paths': _imgnames,
+ 'target_image_path': _imgnames[target_idx],
+ 'scales': _scales,
+ 'centers': _centers,
+ }
+
+ if self.need_2d_label:
+ results['target_2d'] = _joints_2d[target_idx, :, :2]
+
+ if self.need_camera_param:
+ _cam_param = self.get_camera_param(_imgnames[0])
+ results['camera_param'] = _cam_param
+ # get image size from camera parameters
+ if 'w' in _cam_param and 'h' in _cam_param:
+ results['image_width'] = _cam_param['w']
+ results['image_height'] = _cam_param['h']
+
+ return results
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.sample_indices)
+
+ def __getitem__(self, idx):
+ """Get a sample with given index."""
+ results = copy.deepcopy(self.prepare_data(idx))
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ raise NotImplementedError
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..af01e81868d0a918da474be896525cbe47ef006d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/base/kpt_3d_sview_rgb_img_top_down_dataset.py
@@ -0,0 +1,256 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from abc import ABCMeta, abstractmethod
+
+import json_tricks as json
+import numpy as np
+from torch.utils.data import Dataset
+from xtcocotools.coco import COCO
+
+from mmpose.datasets import DatasetInfo
+from mmpose.datasets.pipelines import Compose
+
+
+class Kpt3dSviewRgbImgTopDownDataset(Dataset, metaclass=ABCMeta):
+ """Base class for keypoint 3D top-down pose estimation with single-view RGB
+ image as the input.
+
+ All fashion datasets should subclass it.
+ All subclasses should overwrite:
+ Methods:`_get_db`, 'evaluate'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ coco_style (bool): Whether the annotation json is coco-style.
+ Default: True
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ coco_style=True,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+
+ self.ann_info['inference_channel'] = data_cfg['inference_channel']
+ self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+ self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+
+ if dataset_info is None:
+ raise ValueError(
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.')
+
+ dataset_info = DatasetInfo(dataset_info)
+
+ assert self.ann_info['num_joints'] == dataset_info.keypoint_num
+ self.ann_info['flip_pairs'] = dataset_info.flip_pairs
+ self.ann_info['flip_index'] = dataset_info.flip_index
+ self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids
+ self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids
+ self.ann_info['joint_weights'] = dataset_info.joint_weights
+ self.ann_info['skeleton'] = dataset_info.skeleton
+ self.sigmas = dataset_info.sigmas
+ self.dataset_name = dataset_info.dataset_name
+
+ if coco_style:
+ self.coco = COCO(ann_file)
+ if 'categories' in self.coco.dataset:
+ cats = [
+ cat['name']
+ for cat in self.coco.loadCats(self.coco.getCatIds())
+ ]
+ self.classes = ['__background__'] + cats
+ self.num_classes = len(self.classes)
+ self._class_to_ind = dict(
+ zip(self.classes, range(self.num_classes)))
+ self._class_to_coco_ind = dict(
+ zip(cats, self.coco.getCatIds()))
+ self._coco_ind_to_class_ind = dict(
+ (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+ for cls in self.classes[1:])
+ self.img_ids = self.coco.getImgIds()
+ self.num_images = len(self.img_ids)
+ self.id2name, self.name2id = self._get_mapping_id_name(
+ self.coco.imgs)
+
+ self.db = []
+
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _cam2pixel(cam_coord, f, c):
+ """Transform the joints from their camera coordinates to their pixel
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ img_coord (ndarray[N, 3]): the coordinates (x, y, 0)
+ in the image plane.
+ """
+ x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
+ y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
+ z = np.zeros_like(x)
+ img_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return img_coord
+
+ @staticmethod
+ def _world2cam(world_coord, R, T):
+ """Transform the joints from their world coordinates to their camera
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ world_coord (ndarray[3, N]): 3D joints coordinates
+ in the world coordinate system
+ R (ndarray[3, 3]): camera rotation matrix
+ T (ndarray[3, 1]): camera position (x, y, z)
+
+ Returns:
+ cam_coord (ndarray[3, N]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ cam_coord = np.dot(R, world_coord - T)
+ return cam_coord
+
+ @staticmethod
+ def _pixel2cam(pixel_coord, f, c):
+ """Transform the joints from their pixel coordinates to their camera
+ coordinates.
+
+ Note:
+ N: number of joints
+
+ Args:
+ pixel_coord (ndarray[N, 3]): 3D joints coordinates
+ in the pixel coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ x = (pixel_coord[:, 0] - c[0]) / f[0] * pixel_coord[:, 2]
+ y = (pixel_coord[:, 1] - c[1]) / f[1] * pixel_coord[:, 2]
+ z = pixel_coord[:, 2]
+ cam_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return cam_coord
+
+ @staticmethod
+ def _get_mapping_id_name(imgs):
+ """
+ Args:
+ imgs (dict): dict of image info.
+
+ Returns:
+ tuple: Image name & id mapping dicts.
+
+ - id2name (dict): Mapping image id to name.
+ - name2id (dict): Mapping image name to id.
+ """
+ id2name = {}
+ name2id = {}
+ for image_id, image in imgs.items():
+ file_name = image['file_name']
+ id2name[image_id] = file_name
+ name2id[file_name] = image_id
+
+ return id2name, name2id
+
+ def _xywh2cs(self, x, y, w, h, padding=1.25):
+ """This encodes bbox(x,y,w,h) into (center, scale)
+
+ Args:
+ x, y, w, h (float): left, top, width and height
+ padding (float): bounding box padding factor
+
+ Returns:
+ center (np.ndarray[float32](2,)): center of the bbox (x, y).
+ scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+ """
+ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+ 'image_size'][1]
+ center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+
+ if (not self.test_mode) and np.random.rand() < 0.3:
+ center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ # pixel std is 200.0
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ # padding to include proper amount of context
+ scale = scale * padding
+
+ return center, scale
+
+ @abstractmethod
+ def _get_db(self):
+ """Load dataset."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def evaluate(self, results, *args, **kwargs):
+ """Evaluate keypoint results."""
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = copy.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bc25a9ebbbeb936a304c9a0416fb9892b79cbef
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .body3d_h36m_dataset import Body3DH36MDataset
+from .body3d_mpi_inf_3dhp_dataset import Body3DMpiInf3dhpDataset
+from .body3d_mview_direct_panoptic_dataset import \
+ Body3DMviewDirectPanopticDataset
+from .body3d_semi_supervision_dataset import Body3DSemiSupervisionDataset
+
+__all__ = [
+ 'Body3DH36MDataset', 'Body3DSemiSupervisionDataset',
+ 'Body3DMpiInf3dhpDataset', 'Body3DMviewDirectPanopticDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c29232cf74e4af2cf5b60cd71bd301e4dca7f3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class Body3DBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt3dSviewKpt2dDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'Body3DBaseDataset has been replaced by '
+ 'Kpt3dSviewKpt2dDataset'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae4949d5c5a869bfd37a2f19d47afafc3c1c3eea
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_h36m_dataset.py
@@ -0,0 +1,343 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation import keypoint_mpjpe
+from mmpose.datasets.datasets.base import Kpt3dSviewKpt2dDataset
+from ...builder import DATASETS
+
+
+@DATASETS.register_module()
+class Body3DH36MDataset(Kpt3dSviewKpt2dDataset):
+ """Human3.6M dataset for 3D human pose estimation.
+
+ "Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human
+ Sensing in Natural Environments", TPAMI`2014.
+ More details can be found in the `paper
+ `__.
+
+ Human3.6M keypoint indexes::
+
+ 0: 'root (pelvis)',
+ 1: 'right_hip',
+ 2: 'right_knee',
+ 3: 'right_foot',
+ 4: 'left_hip',
+ 5: 'left_knee',
+ 6: 'left_foot',
+ 7: 'spine',
+ 8: 'thorax',
+ 9: 'neck_base',
+ 10: 'head',
+ 11: 'left_shoulder',
+ 12: 'left_elbow',
+ 13: 'left_wrist',
+ 14: 'right_shoulder',
+ 15: 'right_elbow',
+ 16: 'right_wrist'
+
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ JOINT_NAMES = [
+ 'Root', 'RHip', 'RKnee', 'RFoot', 'LHip', 'LKnee', 'LFoot', 'Spine',
+ 'Thorax', 'NeckBase', 'Head', 'LShoulder', 'LElbow', 'LWrist',
+ 'RShoulder', 'RElbow', 'RWrist'
+ ]
+
+ # 2D joint source options:
+ # "gt": from the annotation file
+ # "detection": from a detection result file of 2D keypoint
+ # "pipeline": will be generate by the pipeline
+ SUPPORTED_JOINT_2D_SRC = {'gt', 'detection', 'pipeline'}
+
+ # metric
+ ALLOWED_METRICS = {'mpjpe', 'p-mpjpe', 'n-mpjpe'}
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/h36m.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ def load_config(self, data_cfg):
+ super().load_config(data_cfg)
+ # h36m specific attributes
+ self.joint_2d_src = data_cfg.get('joint_2d_src', 'gt')
+ if self.joint_2d_src not in self.SUPPORTED_JOINT_2D_SRC:
+ raise ValueError(
+ f'Unsupported joint_2d_src "{self.joint_2d_src}". '
+ f'Supported options are {self.SUPPORTED_JOINT_2D_SRC}')
+
+ self.joint_2d_det_file = data_cfg.get('joint_2d_det_file', None)
+
+ self.need_camera_param = data_cfg.get('need_camera_param', False)
+ if self.need_camera_param:
+ assert 'camera_param_file' in data_cfg
+ self.camera_param = self._load_camera_param(
+ data_cfg['camera_param_file'])
+
+ # h36m specific annotation info
+ ann_info = {}
+ ann_info['use_different_joint_weights'] = False
+ # action filter
+ actions = data_cfg.get('actions', '_all_')
+ self.actions = set(
+ actions if isinstance(actions, (list, tuple)) else [actions])
+
+ # subject filter
+ subjects = data_cfg.get('subjects', '_all_')
+ self.subjects = set(
+ subjects if isinstance(subjects, (list, tuple)) else [subjects])
+
+ self.ann_info.update(ann_info)
+
+ def load_annotations(self):
+ data_info = super().load_annotations()
+
+ # get 2D joints
+ if self.joint_2d_src == 'gt':
+ data_info['joints_2d'] = data_info['joints_2d']
+ elif self.joint_2d_src == 'detection':
+ data_info['joints_2d'] = self._load_joint_2d_detection(
+ self.joint_2d_det_file)
+ assert data_info['joints_2d'].shape[0] == data_info[
+ 'joints_3d'].shape[0]
+ assert data_info['joints_2d'].shape[2] == 3
+ elif self.joint_2d_src == 'pipeline':
+ # joint_2d will be generated in the pipeline
+ pass
+ else:
+ raise NotImplementedError(
+ f'Unhandled joint_2d_src option {self.joint_2d_src}')
+
+ return data_info
+
+ @staticmethod
+ def _parse_h36m_imgname(imgname):
+ """Parse imgname to get information of subject, action and camera.
+
+ A typical h36m image filename is like:
+ S1_Directions_1.54138969_000001.jpg
+ """
+ subj, rest = osp.basename(imgname).split('_', 1)
+ action, rest = rest.split('.', 1)
+ camera, rest = rest.split('_', 1)
+
+ return subj, action, camera
+
+ def build_sample_indices(self):
+ """Split original videos into sequences and build frame indices.
+
+ This method overrides the default one in the base class.
+ """
+
+ # Group frames into videos. Assume that self.data_info is
+ # chronological.
+ video_frames = defaultdict(list)
+ for idx, imgname in enumerate(self.data_info['imgnames']):
+ subj, action, camera = self._parse_h36m_imgname(imgname)
+
+ if '_all_' not in self.actions and action not in self.actions:
+ continue
+
+ if '_all_' not in self.subjects and subj not in self.subjects:
+ continue
+
+ video_frames[(subj, action, camera)].append(idx)
+
+ # build sample indices
+ sample_indices = []
+ _len = (self.seq_len - 1) * self.seq_frame_interval + 1
+ _step = self.seq_frame_interval
+ for _, _indices in sorted(video_frames.items()):
+ n_frame = len(_indices)
+
+ if self.temporal_padding:
+ # Pad the sequence so that every frame in the sequence will be
+ # predicted.
+ if self.causal:
+ frames_left = self.seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (self.seq_len - 1) // 2
+ frames_right = frames_left
+ for i in range(n_frame):
+ pad_left = max(0, frames_left - i // _step)
+ pad_right = max(0,
+ frames_right - (n_frame - 1 - i) // _step)
+ start = max(i % _step, i - frames_left * _step)
+ end = min(n_frame - (n_frame - 1 - i) % _step,
+ i + frames_right * _step + 1)
+ sample_indices.append([_indices[0]] * pad_left +
+ _indices[start:end:_step] +
+ [_indices[-1]] * pad_right)
+ else:
+ seqs_from_video = [
+ _indices[i:(i + _len):_step]
+ for i in range(0, n_frame - _len + 1)
+ ]
+ sample_indices.extend(seqs_from_video)
+
+ # reduce dataset size if self.subset < 1
+ assert 0 < self.subset <= 1
+ subset_size = int(len(sample_indices) * self.subset)
+ start = np.random.randint(0, len(sample_indices) - subset_size + 1)
+ end = start + subset_size
+
+ return sample_indices[start:end]
+
+ def _load_joint_2d_detection(self, det_file):
+ """"Load 2D joint detection results from file."""
+ joints_2d = np.load(det_file).astype(np.float32)
+
+ return joints_2d
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}" for human3.6 dataset.'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ image_paths = result['target_image_paths']
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ target_id = self.name2id[image_paths[i]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'target_id': target_id,
+ })
+
+ mmcv.dump(kpts, res_file)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts)
+ elif _metric == 'p-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='p-mpjpe')
+ elif _metric == 'n-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='n-mpjpe')
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(_nv_tuples)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ def _report_mpjpe(self, keypoint_results, mode='mpjpe'):
+ """Cauculate mean per joint position error (MPJPE) or its variants like
+ P-MPJPE or N-MPJPE.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DH36MDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+
+ - ``'mpjpe'``: Standard MPJPE.
+ - ``'p-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ - ``'n-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ in scale only.
+ """
+
+ preds = []
+ gts = []
+ masks = []
+ action_category_indices = defaultdict(list)
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+ masks.append(gt_visible)
+
+ action = self._parse_h36m_imgname(
+ self.data_info['imgnames'][target_id])[1]
+ action_category = action.split('_')[0]
+ action_category_indices[action_category].append(idx)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.stack(masks).squeeze(-1) > 0
+
+ err_name = mode.upper()
+ if mode == 'mpjpe':
+ alignment = 'none'
+ elif mode == 'p-mpjpe':
+ alignment = 'procrustes'
+ elif mode == 'n-mpjpe':
+ alignment = 'scale'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_mpjpe(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ for action_category, indices in action_category_indices.items():
+ _error = keypoint_mpjpe(preds[indices], gts[indices],
+ masks[indices])
+ name_value_tuples.append((f'{err_name}_{action_category}', _error))
+
+ return name_value_tuples
+
+ def _load_camera_param(self, camera_param_file):
+ """Load camera parameters from file."""
+ return mmcv.load(camera_param_file)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ assert hasattr(self, 'camera_param')
+ subj, _, camera = self._parse_h36m_imgname(imgname)
+ return self.camera_param[(subj, camera)]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d06fcd2f200e8c5c3d4174be90551990cc6886e
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mpi_inf_3dhp_dataset.py
@@ -0,0 +1,417 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation import (keypoint_3d_auc, keypoint_3d_pck,
+ keypoint_mpjpe)
+from mmpose.datasets.datasets.base import Kpt3dSviewKpt2dDataset
+from ...builder import DATASETS
+
+
+@DATASETS.register_module()
+class Body3DMpiInf3dhpDataset(Kpt3dSviewKpt2dDataset):
+ """MPI-INF-3DHP dataset for 3D human pose estimation.
+
+ "Monocular 3D Human Pose Estimation In The Wild Using Improved CNN
+ Supervision", 3DV'2017.
+ More details can be found in the `paper
+ `__.
+
+ MPI-INF-3DHP keypoint indexes:
+
+ 0: 'head_top',
+ 1: 'neck',
+ 2: 'right_shoulder',
+ 3: 'right_elbow',
+ 4: 'right_wrist',
+ 5: 'left_shoulder;,
+ 6: 'left_elbow',
+ 7: 'left_wrist',
+ 8: 'right_hip',
+ 9: 'right_knee',
+ 10: 'right_ankle',
+ 11: 'left_hip',
+ 12: 'left_knee',
+ 13: 'left_ankle',
+ 14: 'root (pelvis)',
+ 15: 'spine',
+ 16: 'head'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): Data configurations. Please refer to the docstring of
+ Body3DBaseDataset for common data attributes. Here are MPI-INF-3DHP
+ specific attributes.
+ - joint_2d_src: 2D joint source. Options include:
+ "gt": from the annotation file
+ "detection": from a detection result file of 2D keypoint
+ "pipeline": will be generate by the pipeline
+ Default: "gt".
+ - joint_2d_det_file: Path to the detection result file of 2D
+ keypoint. Only used when joint_2d_src == "detection".
+ - need_camera_param: Whether need camera parameters or not.
+ Default: False.
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ JOINT_NAMES = [
+ 'HeadTop', 'Neck', 'RShoulder', 'RElbow', 'RWrist', 'LShoulder',
+ 'LElbow', 'LWrist', 'RHip', 'RKnee', 'RAnkle', 'LHip', 'LKnee',
+ 'LAnkle', 'Root', 'Spine', 'Head'
+ ]
+
+ # 2D joint source options:
+ # "gt": from the annotation file
+ # "detection": from a detection result file of 2D keypoint
+ # "pipeline": will be generate by the pipeline
+ SUPPORTED_JOINT_2D_SRC = {'gt', 'detection', 'pipeline'}
+
+ # metric
+ ALLOWED_METRICS = {
+ 'mpjpe', 'p-mpjpe', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc'
+ }
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpi_inf_3dhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ def load_config(self, data_cfg):
+ super().load_config(data_cfg)
+ # mpi-inf-3dhp specific attributes
+ self.joint_2d_src = data_cfg.get('joint_2d_src', 'gt')
+ if self.joint_2d_src not in self.SUPPORTED_JOINT_2D_SRC:
+ raise ValueError(
+ f'Unsupported joint_2d_src "{self.joint_2d_src}". '
+ f'Supported options are {self.SUPPORTED_JOINT_2D_SRC}')
+
+ self.joint_2d_det_file = data_cfg.get('joint_2d_det_file', None)
+
+ self.need_camera_param = data_cfg.get('need_camera_param', False)
+ if self.need_camera_param:
+ assert 'camera_param_file' in data_cfg
+ self.camera_param = self._load_camera_param(
+ data_cfg['camera_param_file'])
+
+ # mpi-inf-3dhp specific annotation info
+ ann_info = {}
+ ann_info['use_different_joint_weights'] = False
+
+ self.ann_info.update(ann_info)
+
+ def load_annotations(self):
+ data_info = super().load_annotations()
+
+ # get 2D joints
+ if self.joint_2d_src == 'gt':
+ data_info['joints_2d'] = data_info['joints_2d']
+ elif self.joint_2d_src == 'detection':
+ data_info['joints_2d'] = self._load_joint_2d_detection(
+ self.joint_2d_det_file)
+ assert data_info['joints_2d'].shape[0] == data_info[
+ 'joints_3d'].shape[0]
+ assert data_info['joints_2d'].shape[2] == 3
+ elif self.joint_2d_src == 'pipeline':
+ # joint_2d will be generated in the pipeline
+ pass
+ else:
+ raise NotImplementedError(
+ f'Unhandled joint_2d_src option {self.joint_2d_src}')
+
+ return data_info
+
+ @staticmethod
+ def _parse_mpi_inf_3dhp_imgname(imgname):
+ """Parse imgname to get information of subject, sequence and camera.
+
+ A typical mpi-inf-3dhp training image filename is like:
+ S1_Seq1_Cam0_000001.jpg. A typical mpi-inf-3dhp testing image filename
+ is like: TS1_000001.jpg
+ """
+ if imgname[0] == 'S':
+ subj, rest = imgname.split('_', 1)
+ seq, rest = rest.split('_', 1)
+ camera, rest = rest.split('_', 1)
+ return subj, seq, camera
+ else:
+ subj, rest = imgname.split('_', 1)
+ return subj, None, None
+
+ def build_sample_indices(self):
+ """Split original videos into sequences and build frame indices.
+
+ This method overrides the default one in the base class.
+ """
+
+ # Group frames into videos. Assume that self.data_info is
+ # chronological.
+ video_frames = defaultdict(list)
+ for idx, imgname in enumerate(self.data_info['imgnames']):
+ subj, seq, camera = self._parse_mpi_inf_3dhp_imgname(imgname)
+ if seq is not None:
+ video_frames[(subj, seq, camera)].append(idx)
+ else:
+ video_frames[subj].append(idx)
+
+ # build sample indices
+ sample_indices = []
+ _len = (self.seq_len - 1) * self.seq_frame_interval + 1
+ _step = self.seq_frame_interval
+ for _, _indices in sorted(video_frames.items()):
+ n_frame = len(_indices)
+
+ if self.temporal_padding:
+ # Pad the sequence so that every frame in the sequence will be
+ # predicted.
+ if self.causal:
+ frames_left = self.seq_len - 1
+ frames_right = 0
+ else:
+ frames_left = (self.seq_len - 1) // 2
+ frames_right = frames_left
+ for i in range(n_frame):
+ pad_left = max(0, frames_left - i // _step)
+ pad_right = max(0,
+ frames_right - (n_frame - 1 - i) // _step)
+ start = max(i % _step, i - frames_left * _step)
+ end = min(n_frame - (n_frame - 1 - i) % _step,
+ i + frames_right * _step + 1)
+ sample_indices.append([_indices[0]] * pad_left +
+ _indices[start:end:_step] +
+ [_indices[-1]] * pad_right)
+ else:
+ seqs_from_video = [
+ _indices[i:(i + _len):_step]
+ for i in range(0, n_frame - _len + 1)
+ ]
+ sample_indices.extend(seqs_from_video)
+
+ # reduce dataset size if self.subset < 1
+ assert 0 < self.subset <= 1
+ subset_size = int(len(sample_indices) * self.subset)
+ start = np.random.randint(0, len(sample_indices) - subset_size + 1)
+ end = start + subset_size
+
+ return sample_indices[start:end]
+
+ def _load_joint_2d_detection(self, det_file):
+ """"Load 2D joint detection results from file."""
+ joints_2d = np.load(det_file).astype(np.float32)
+
+ return joints_2d
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}" for mpi-inf-3dhp dataset.'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ image_paths = result['target_image_paths']
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ target_id = self.name2id[image_paths[i]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'target_id': target_id,
+ })
+
+ mmcv.dump(kpts, res_file)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts)
+ elif _metric == 'p-mpjpe':
+ _nv_tuples = self._report_mpjpe(kpts, mode='p-mpjpe')
+ elif _metric == '3dpck':
+ _nv_tuples = self._report_3d_pck(kpts)
+ elif _metric == 'p-3dpck':
+ _nv_tuples = self._report_3d_pck(kpts, mode='p-3dpck')
+ elif _metric == '3dauc':
+ _nv_tuples = self._report_3d_auc(kpts)
+ elif _metric == 'p-3dauc':
+ _nv_tuples = self._report_3d_auc(kpts, mode='p-3dauc')
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(_nv_tuples)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ def _report_mpjpe(self, keypoint_results, mode='mpjpe'):
+ """Cauculate mean per joint position error (MPJPE) or its variants
+ P-MPJPE.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+ - ``'mpjpe'``: Standard MPJPE.
+ - ``'p-mpjpe'``: MPJPE after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == 'mpjpe':
+ alignment = 'none'
+ elif mode == 'p-mpjpe':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_mpjpe(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _report_3d_pck(self, keypoint_results, mode='3dpck'):
+ """Cauculate Percentage of Correct Keypoints (3DPCK) w. or w/o
+ Procrustes alignment.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+ - ``'3dpck'``: Standard 3DPCK.
+ - ``'p-3dpck'``: 3DPCK after aligning prediction to groundtruth
+ via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == '3dpck':
+ alignment = 'none'
+ elif mode == 'p-3dpck':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_3d_pck(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _report_3d_auc(self, keypoint_results, mode='3dauc'):
+ """Cauculate the Area Under the Curve (AUC) computed for a range of
+ 3DPCK thresholds.
+
+ Args:
+ keypoint_results (list): Keypoint predictions. See
+ 'Body3DMpiInf3dhpDataset.evaluate' for details.
+ mode (str): Specify mpjpe variants. Supported options are:
+
+ - ``'3dauc'``: Standard 3DAUC.
+ - ``'p-3dauc'``: 3DAUC after aligning prediction to
+ groundtruth via a rigid transformation (scale, rotation and
+ translation).
+ """
+
+ preds = []
+ gts = []
+ for idx, result in enumerate(keypoint_results):
+ pred = result['keypoints']
+ target_id = result['target_id']
+ gt, gt_visible = np.split(
+ self.data_info['joints_3d'][target_id], [3], axis=-1)
+ preds.append(pred)
+ gts.append(gt)
+
+ preds = np.stack(preds)
+ gts = np.stack(gts)
+ masks = np.ones_like(gts[:, :, 0], dtype=bool)
+
+ err_name = mode.upper()
+ if mode == '3dauc':
+ alignment = 'none'
+ elif mode == 'p-3dauc':
+ alignment = 'procrustes'
+ else:
+ raise ValueError(f'Invalid mode: {mode}')
+
+ error = keypoint_3d_auc(preds, gts, masks, alignment)
+ name_value_tuples = [(err_name, error)]
+
+ return name_value_tuples
+
+ def _load_camera_param(self, camear_param_file):
+ """Load camera parameters from file."""
+ return mmcv.load(camear_param_file)
+
+ def get_camera_param(self, imgname):
+ """Get camera parameters of a frame by its image name."""
+ assert hasattr(self, 'camera_param')
+ return self.camera_param[imgname[:-11]]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5bf92d182b972cd1821990bb3fc673d99f624e3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_mview_direct_panoptic_dataset.py
@@ -0,0 +1,493 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import glob
+import json
+import os.path as osp
+import pickle
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import mmcv
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.camera import SimpleCamera
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.datasets.base import Kpt3dMviewRgbImgDirectDataset
+
+
+@DATASETS.register_module()
+class Body3DMviewDirectPanopticDataset(Kpt3dMviewRgbImgDirectDataset):
+ """Panoptic dataset for direct multi-view human pose estimation.
+
+ `Panoptic Studio: A Massively Multiview System for Social Motion
+ Capture' ICCV'2015
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads both 2D and 3D annotations as well as camera parameters.
+
+ Panoptic keypoint indexes::
+
+ 'neck': 0,
+ 'nose': 1,
+ 'mid-hip': 2,
+ 'l-shoulder': 3,
+ 'l-elbow': 4,
+ 'l-wrist': 5,
+ 'l-hip': 6,
+ 'l-knee': 7,
+ 'l-ankle': 8,
+ 'r-shoulder': 9,
+ 'r-elbow': 10,
+ 'r-wrist': 11,
+ 'r-hip': 12,
+ 'r-knee': 13,
+ 'r-ankle': 14,
+ 'l-eye': 15,
+ 'l-ear': 16,
+ 'r-eye': 17,
+ 'r-ear': 18,
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+ ALLOWED_METRICS = {'mpjpe', 'mAP'}
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/panoptic_body3d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.load_config(data_cfg)
+ self.ann_info['use_different_joint_weights'] = False
+
+ if ann_file is None:
+ self.db_file = osp.join(
+ img_prefix, f'group_{self.subset}_cam{self.num_cameras}.pkl')
+ else:
+ self.db_file = ann_file
+
+ if osp.exists(self.db_file):
+ with open(self.db_file, 'rb') as f:
+ info = pickle.load(f)
+ assert info['sequence_list'] == self.seq_list
+ assert info['interval'] == self.seq_frame_interval
+ assert info['cam_list'] == self.cam_list
+ self.db = info['db']
+ else:
+ self.db = self._get_db()
+ info = {
+ 'sequence_list': self.seq_list,
+ 'interval': self.seq_frame_interval,
+ 'cam_list': self.cam_list,
+ 'db': self.db
+ }
+ with open(self.db_file, 'wb') as f:
+ pickle.dump(info, f)
+
+ self.db_size = len(self.db)
+
+ print(f'=> load {len(self.db)} samples')
+
+ def load_config(self, data_cfg):
+ """Initialize dataset attributes according to the config.
+
+ Override this method to set dataset specific attributes.
+ """
+ self.num_joints = data_cfg['num_joints']
+ assert self.num_joints <= 19
+ self.seq_list = data_cfg['seq_list']
+ self.cam_list = data_cfg['cam_list']
+ self.num_cameras = data_cfg['num_cameras']
+ assert self.num_cameras == len(self.cam_list)
+ self.seq_frame_interval = data_cfg.get('seq_frame_interval', 1)
+ self.subset = data_cfg.get('subset', 'train')
+ self.need_camera_param = True
+ self.root_id = data_cfg.get('root_id', 0)
+ self.max_persons = data_cfg.get('max_num', 10)
+
+ def _get_scale(self, raw_image_size):
+ heatmap_size = self.ann_info['heatmap_size']
+ image_size = self.ann_info['image_size']
+ assert heatmap_size[0][0] / heatmap_size[0][1] \
+ == image_size[0] / image_size[1]
+ w, h = raw_image_size
+ w_resized, h_resized = image_size
+ if w / w_resized < h / h_resized:
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ else:
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+
+ scale = np.array([w_pad, h_pad], dtype=np.float32)
+
+ return scale
+
+ def _get_cam(self, seq):
+ """Get camera parameters.
+
+ Args:
+ seq (str): Sequence name.
+
+ Returns: Camera parameters.
+ """
+ cam_file = osp.join(self.img_prefix, seq,
+ 'calibration_{:s}.json'.format(seq))
+ with open(cam_file) as cfile:
+ calib = json.load(cfile)
+
+ M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
+ cameras = {}
+ for cam in calib['cameras']:
+ if (cam['panel'], cam['node']) in self.cam_list:
+ sel_cam = {}
+ R_w2c = np.array(cam['R']).dot(M)
+ T_w2c = np.array(cam['t']).reshape((3, 1)) * 10.0 # cm to mm
+ R_c2w = R_w2c.T
+ T_c2w = -R_w2c.T @ T_w2c
+ sel_cam['R'] = R_c2w.tolist()
+ sel_cam['T'] = T_c2w.tolist()
+ sel_cam['K'] = cam['K'][:2]
+ distCoef = cam['distCoef']
+ sel_cam['k'] = [distCoef[0], distCoef[1], distCoef[4]]
+ sel_cam['p'] = [distCoef[2], distCoef[3]]
+ cameras[(cam['panel'], cam['node'])] = sel_cam
+
+ return cameras
+
+ def _get_db(self):
+ """Get dataset base.
+
+ Returns:
+ dict: the dataset base (2D and 3D information)
+ """
+ width = 1920
+ height = 1080
+ db = []
+ sample_id = 0
+ for seq in self.seq_list:
+ cameras = self._get_cam(seq)
+ curr_anno = osp.join(self.img_prefix, seq,
+ 'hdPose3d_stage1_coco19')
+ anno_files = sorted(glob.iglob('{:s}/*.json'.format(curr_anno)))
+ print(f'load sequence: {seq}', flush=True)
+ for i, file in enumerate(anno_files):
+ if i % self.seq_frame_interval == 0:
+ with open(file) as dfile:
+ bodies = json.load(dfile)['bodies']
+ if len(bodies) == 0:
+ continue
+
+ for k, cam_param in cameras.items():
+ single_view_camera = SimpleCamera(cam_param)
+ postfix = osp.basename(file).replace('body3DScene', '')
+ prefix = '{:02d}_{:02d}'.format(k[0], k[1])
+ image_file = osp.join(seq, 'hdImgs', prefix,
+ prefix + postfix)
+ image_file = image_file.replace('json', 'jpg')
+
+ all_poses_3d = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+ all_poses_vis_3d = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+ all_roots_3d = np.zeros((self.max_persons, 3),
+ dtype=np.float32)
+ all_poses = np.zeros(
+ (self.max_persons, self.num_joints, 3),
+ dtype=np.float32)
+
+ cnt = 0
+ person_ids = -np.ones(self.max_persons, dtype=np.int)
+ for body in bodies:
+ if cnt >= self.max_persons:
+ break
+ pose3d = np.array(body['joints19']).reshape(
+ (-1, 4))
+ pose3d = pose3d[:self.num_joints]
+
+ joints_vis = pose3d[:, -1] > 0.1
+
+ if not joints_vis[self.root_id]:
+ continue
+
+ # Coordinate transformation
+ M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0],
+ [0.0, 1.0, 0.0]])
+ pose3d[:, 0:3] = pose3d[:, 0:3].dot(M) * 10.0
+
+ all_poses_3d[cnt] = pose3d[:, :3]
+ all_roots_3d[cnt] = pose3d[self.root_id, :3]
+ all_poses_vis_3d[cnt] = np.repeat(
+ np.reshape(joints_vis, (-1, 1)), 3, axis=1)
+
+ pose2d = np.zeros((pose3d.shape[0], 3))
+ # get pose_2d from pose_3d
+ pose2d[:, :2] = single_view_camera.world_to_pixel(
+ pose3d[:, :3])
+ x_check = np.bitwise_and(pose2d[:, 0] >= 0,
+ pose2d[:, 0] <= width - 1)
+ y_check = np.bitwise_and(
+ pose2d[:, 1] >= 0, pose2d[:, 1] <= height - 1)
+ check = np.bitwise_and(x_check, y_check)
+ joints_vis[np.logical_not(check)] = 0
+ pose2d[:, -1] = joints_vis
+
+ all_poses[cnt] = pose2d
+ person_ids[cnt] = body['id']
+ cnt += 1
+
+ if cnt > 0:
+ db.append({
+ 'image_file':
+ osp.join(self.img_prefix, image_file),
+ 'joints_3d':
+ all_poses_3d,
+ 'person_ids':
+ person_ids,
+ 'joints_3d_visible':
+ all_poses_vis_3d,
+ 'joints': [all_poses],
+ 'roots_3d':
+ all_roots_3d,
+ 'camera':
+ cam_param,
+ 'num_persons':
+ cnt,
+ 'sample_id':
+ sample_id,
+ 'center':
+ np.array((width / 2, height / 2),
+ dtype=np.float32),
+ 'scale':
+ self._get_scale((width, height))
+ })
+ sample_id += 1
+ return db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mpjpe', **kwargs):
+ """
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+ - pose_3d (np.ndarray): predicted 3D human pose
+ - sample_id (np.ndarray): sample id of a frame.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Defaults: 'mpjpe'.
+ **kwargs:
+
+ Returns:
+
+ """
+ pose_3ds = np.concatenate([result['pose_3d'] for result in results],
+ axis=0)
+ sample_ids = []
+ for result in results:
+ sample_ids.extend(result['sample_id'])
+
+ _results = [
+ dict(sample_id=sample_id, pose_3d=pose_3d)
+ for (sample_id, pose_3d) in zip(sample_ids, pose_3ds)
+ ]
+ _results = self._sort_and_unique_outputs(_results, key='sample_id')
+
+ metrics = metric if isinstance(metric, list) else [metric]
+ for _metric in metrics:
+ if _metric not in self.ALLOWED_METRICS:
+ raise ValueError(
+ f'Unsupported metric "{_metric}"'
+ f'Supported metrics are {self.ALLOWED_METRICS}')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ mmcv.dump(_results, res_file)
+
+ eval_list = []
+ gt_num = self.db_size // self.num_cameras
+ assert len(
+ _results) == gt_num, f'number mismatch: {len(_results)}, {gt_num}'
+
+ total_gt = 0
+ for i in range(gt_num):
+ index = self.num_cameras * i
+ db_rec = copy.deepcopy(self.db[index])
+ joints_3d = db_rec['joints_3d']
+ joints_3d_vis = db_rec['joints_3d_visible']
+
+ if joints_3d_vis.sum() < 1:
+ continue
+
+ pred = _results[i]['pose_3d'].copy()
+ pred = pred[pred[:, 0, 3] >= 0]
+ for pose in pred:
+ mpjpes = []
+ for (gt, gt_vis) in zip(joints_3d, joints_3d_vis):
+ vis = gt_vis[:, 0] > 0
+ if vis.sum() < 1:
+ break
+ mpjpe = np.mean(
+ np.sqrt(
+ np.sum((pose[vis, 0:3] - gt[vis])**2, axis=-1)))
+ mpjpes.append(mpjpe)
+ min_gt = np.argmin(mpjpes)
+ min_mpjpe = np.min(mpjpes)
+ score = pose[0, 4]
+ eval_list.append({
+ 'mpjpe': float(min_mpjpe),
+ 'score': float(score),
+ 'gt_id': int(total_gt + min_gt)
+ })
+
+ total_gt += (joints_3d_vis[:, :, 0].sum(-1) >= 1).sum()
+
+ mpjpe_threshold = np.arange(25, 155, 25)
+ aps = []
+ ars = []
+ for t in mpjpe_threshold:
+ ap, ar = self._eval_list_to_ap(eval_list, total_gt, t)
+ aps.append(ap)
+ ars.append(ar)
+
+ name_value_tuples = []
+ for _metric in metrics:
+ if _metric == 'mpjpe':
+ stats_names = ['RECALL 500mm', 'MPJPE 500mm']
+ info_str = list(
+ zip(stats_names, [
+ self._eval_list_to_recall(eval_list, total_gt),
+ self._eval_list_to_mpjpe(eval_list)
+ ]))
+ elif _metric == 'mAP':
+ stats_names = [
+ 'AP 25', 'AP 50', 'AP 75', 'AP 100', 'AP 125', 'AP 150',
+ 'mAP', 'AR 25', 'AR 50', 'AR 75', 'AR 100', 'AR 125',
+ 'AR 150', 'mAR'
+ ]
+ mAP = np.array(aps).mean()
+ mAR = np.array(ars).mean()
+ info_str = list(zip(stats_names, aps + [mAP] + ars + [mAR]))
+ else:
+ raise NotImplementedError
+ name_value_tuples.extend(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return OrderedDict(name_value_tuples)
+
+ @staticmethod
+ def _eval_list_to_ap(eval_list, total_gt, threshold):
+ """Get Average Precision (AP) and Average Recall at a certain
+ threshold."""
+
+ eval_list.sort(key=lambda k: k['score'], reverse=True)
+ total_num = len(eval_list)
+
+ tp = np.zeros(total_num)
+ fp = np.zeros(total_num)
+ gt_det = []
+ for i, item in enumerate(eval_list):
+ if item['mpjpe'] < threshold and item['gt_id'] not in gt_det:
+ tp[i] = 1
+ gt_det.append(item['gt_id'])
+ else:
+ fp[i] = 1
+ tp = np.cumsum(tp)
+ fp = np.cumsum(fp)
+ recall = tp / (total_gt + 1e-5)
+ precise = tp / (tp + fp + 1e-5)
+ for n in range(total_num - 2, -1, -1):
+ precise[n] = max(precise[n], precise[n + 1])
+
+ precise = np.concatenate(([0], precise, [0]))
+ recall = np.concatenate(([0], recall, [1]))
+ index = np.where(recall[1:] != recall[:-1])[0]
+ ap = np.sum((recall[index + 1] - recall[index]) * precise[index + 1])
+
+ return ap, recall[-2]
+
+ @staticmethod
+ def _eval_list_to_mpjpe(eval_list, threshold=500):
+ """Get MPJPE within a certain threshold."""
+ eval_list.sort(key=lambda k: k['score'], reverse=True)
+ gt_det = []
+
+ mpjpes = []
+ for i, item in enumerate(eval_list):
+ if item['mpjpe'] < threshold and item['gt_id'] not in gt_det:
+ mpjpes.append(item['mpjpe'])
+ gt_det.append(item['gt_id'])
+
+ return np.mean(mpjpes) if len(mpjpes) > 0 else np.inf
+
+ @staticmethod
+ def _eval_list_to_recall(eval_list, total_gt, threshold=500):
+ """Get Recall at a certain threshold."""
+ gt_ids = [e['gt_id'] for e in eval_list if e['mpjpe'] < threshold]
+
+ return len(np.unique(gt_ids)) / total_gt
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = {}
+ for c in range(self.num_cameras):
+ result = copy.deepcopy(self.db[self.num_cameras * idx + c])
+ result['ann_info'] = self.ann_info
+ width = 1920
+ height = 1080
+ result['mask'] = [np.ones((height, width), dtype=np.float32)]
+ results[c] = result
+
+ return self.pipeline(results)
+
+ @staticmethod
+ def _sort_and_unique_outputs(outputs, key='sample_id'):
+ """sort outputs and remove the repeated ones."""
+ outputs = sorted(outputs, key=lambda x: x[key])
+ num_outputs = len(outputs)
+ for i in range(num_outputs - 1, 0, -1):
+ if outputs[i][key] == outputs[i - 1][key]:
+ del outputs[i]
+
+ return outputs
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..491d54914d5838a1759b7da7fb16ad2b205ba83c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/body3d/body3d_semi_supervision_dataset.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS, build_dataset
+
+
+@DATASETS.register_module()
+class Body3DSemiSupervisionDataset(Dataset):
+ """Mix Dataset for semi-supervised training in 3D human pose estimation
+ task.
+
+ The dataset combines data from two datasets (a labeled one and an unlabeled
+ one) and return a dict containing data from two datasets.
+
+ Args:
+ labeled_dataset (Dataset): Dataset with 3D keypoint annotations.
+ unlabeled_dataset (Dataset): Dataset without 3D keypoint annotations.
+ """
+
+ def __init__(self, labeled_dataset, unlabeled_dataset):
+ super().__init__()
+ self.labeled_dataset = build_dataset(labeled_dataset)
+ self.unlabeled_dataset = build_dataset(unlabeled_dataset)
+ self.length = len(self.unlabeled_dataset)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, i):
+ """Given index, get the data from unlabeled dataset and randomly sample
+ an item from labeled dataset.
+
+ Return a dict containing data from labeled and unlabeled dataset.
+ """
+ data = self.unlabeled_dataset[i]
+ rand_ind = np.random.randint(0, len(self.labeled_dataset))
+ labeled_data = self.labeled_dataset[rand_ind]
+ data.update(labeled_data)
+ return data
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ac79377f8ef8c66f279e8c68c44c8bd61d87dbb
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_aic import BottomUpAicDataset
+from .bottom_up_coco import BottomUpCocoDataset
+from .bottom_up_coco_wholebody import BottomUpCocoWholeBodyDataset
+from .bottom_up_crowdpose import BottomUpCrowdPoseDataset
+from .bottom_up_mhp import BottomUpMhpDataset
+
+__all__ = [
+ 'BottomUpCocoDataset', 'BottomUpCrowdPoseDataset', 'BottomUpMhpDataset',
+ 'BottomUpAicDataset', 'BottomUpCocoWholeBodyDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py
new file mode 100644
index 0000000000000000000000000000000000000000..e56b72586f36bc0758876fa5d0ce3016efad3802
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_aic.py
@@ -0,0 +1,105 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpAicDataset(BottomUpCocoDataset):
+ """Aic dataset for bottom-up pose estimation.
+
+ "AI Challenger : A Large-scale Dataset for Going Deeper
+ in Image Understanding", arXiv'2017.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AIC keypoint indexes::
+
+ 0: "right_shoulder",
+ 1: "right_elbow",
+ 2: "right_wrist",
+ 3: "left_shoulder",
+ 4: "left_elbow",
+ 5: "left_wrist",
+ 6: "right_hip",
+ 7: "right_knee",
+ 8: "right_ankle",
+ 9: "left_hip",
+ 10: "left_knee",
+ 11: "left_ankle",
+ 12: "head_top",
+ 13: "neck"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aic.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a2fea5d34b208b0d3703fe9dff1294e053ec950
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from torch.utils.data import Dataset
+
+
+class BottomUpBaseDataset(Dataset):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgBottomUpDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'BottomUpBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgBottomUpDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa2967fe22db1427975568aec40e7f1313d1de2d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
@@ -0,0 +1,305 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.core.post_processing import oks_nms, soft_oks_nms
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgBottomUpDataset
+
+
+@DATASETS.register_module()
+class BottomUpCocoDataset(Kpt2dSviewRgbImgBottomUpDataset):
+ """COCO dataset for bottom-up pose estimation.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _get_single(self, idx):
+ """Get anno for a single image.
+
+ Args:
+ idx (int): image idx
+
+ Returns:
+ dict: info for model training
+ """
+ coco = self.coco
+ img_id = self.img_ids[idx]
+ ann_ids = coco.getAnnIds(imgIds=img_id)
+ anno = coco.loadAnns(ann_ids)
+
+ mask = self._get_mask(anno, idx)
+ anno = [
+ obj.copy() for obj in anno
+ if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+ ]
+
+ joints = self._get_joints(anno)
+ mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])]
+ joints_list = [
+ joints.copy() for _ in range(self.ann_info['num_scales'])
+ ]
+
+ db_rec = {}
+ db_rec['dataset'] = self.dataset_name
+ db_rec['image_file'] = osp.join(self.img_prefix, self.id2name[img_id])
+ db_rec['mask'] = mask_list
+ db_rec['joints'] = joints_list
+
+ return db_rec
+
+ def _get_joints(self, anno):
+ """Get joints for all people in an image."""
+ num_people = len(anno)
+
+ if self.ann_info['scale_aware_sigma']:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
+ dtype=np.float32)
+ else:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+
+ for i, obj in enumerate(anno):
+ joints[i, :, :3] = \
+ np.array(obj['keypoints']).reshape([-1, 3])
+ if self.ann_info['scale_aware_sigma']:
+ # get person box
+ box = obj['bbox']
+ size = max(box[2], box[3])
+ sigma = size / self.base_size * self.base_sigma
+ if self.int_sigma:
+ sigma = int(np.ceil(sigma))
+ assert sigma > 0, sigma
+ joints[i, :, 3] = sigma
+
+ return joints
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_people: P
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (list[np.ndarray(P, K, 3+tag_num)]): \
+ Pose predictions for all people in images.
+ - scores (list[P]): List of person scores.
+ - image_path (list[str]): For example, ['coco/images/\
+ val2017/000000397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model outputs.
+
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ preds = []
+ scores = []
+ image_paths = []
+
+ for result in results:
+ preds.append(result['preds'])
+ scores.append(result['scores'])
+ image_paths.append(result['image_paths'][0])
+
+ kpts = defaultdict(list)
+ # iterate over images
+ for idx, _preds in enumerate(preds):
+ str_image_path = image_paths[idx]
+ image_id = self.name2id[osp.basename(str_image_path)]
+ # iterate over people
+ for idx_person, kpt in enumerate(_preds):
+ # use bbox area
+ area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (
+ np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
+
+ kpts[image_id].append({
+ 'keypoints': kpt[:, 0:3],
+ 'score': scores[idx][idx_person],
+ 'tags': kpt[:, 3],
+ 'image_id': image_id,
+ 'area': area,
+ })
+
+ valid_kpts = []
+ for img in kpts.keys():
+ img_kpts = kpts[img]
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, self.oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ for img_kpt, key_point in zip(img_kpts, key_points):
+ kpt = key_point.reshape((self.ann_info['num_joints'], 3))
+ left_top = np.amin(kpt, axis=0)
+ right_bottom = np.amax(kpt, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ cat_results.append({
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': img_kpt['score'],
+ 'bbox': [left_top[0], left_top[1], w, h]
+ })
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
new file mode 100644
index 0000000000000000000000000000000000000000..363d2efb2ec93dedb8abbe78430af52970c4afc3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
@@ -0,0 +1,238 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpCocoWholeBodyDataset(BottomUpCocoDataset):
+ """CocoWholeBodyDataset dataset for bottom-up pose estimation.
+
+ `Whole-Body Human Pose Estimation in the Wild', ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ In total, we have 133 keypoints for wholebody pose estimation.
+
+ COCO-WholeBody keypoint indexes::
+
+ 0-16: 17 body keypoints,
+ 17-22: 6 foot keypoints,
+ 23-90: 68 face keypoints,
+ 91-132: 42 hand keypoints
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco_wholebody.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.body_num = 17
+ self.foot_num = 6
+ self.face_num = 68
+ self.left_hand_num = 21
+ self.right_hand_num = 21
+
+ print(f'=> num_images: {self.num_images}')
+
+ def _get_joints(self, anno):
+ """Get joints for all people in an image."""
+ num_people = len(anno)
+
+ if self.ann_info['scale_aware_sigma']:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
+ dtype=np.float32)
+ else:
+ joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+
+ for i, obj in enumerate(anno):
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+
+ joints[i, :self.ann_info['num_joints'], :3] = keypoints
+ if self.ann_info['scale_aware_sigma']:
+ # get person box
+ box = obj['bbox']
+ size = max(box[2], box[3])
+ sigma = size / self.base_size * self.base_sigma
+ if self.int_sigma:
+ sigma = int(np.ceil(sigma))
+ assert sigma > 0, sigma
+ joints[i, :, 3] = sigma
+
+ return joints
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num,
+ self.left_hand_num, self.right_hand_num
+ ]) * 3
+
+ for img_kpt, key_point in zip(img_kpts, key_points):
+ kpt = key_point.reshape((self.ann_info['num_joints'], 3))
+ left_top = np.amin(kpt, axis=0)
+ right_bottom = np.amax(kpt, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ cat_results.append({
+ 'image_id':
+ img_kpt['image_id'],
+ 'category_id':
+ cat_id,
+ 'keypoints':
+ key_point[cuts[0]:cuts[1]].tolist(),
+ 'foot_kpts':
+ key_point[cuts[1]:cuts[2]].tolist(),
+ 'face_kpts':
+ key_point[cuts[2]:cuts[3]].tolist(),
+ 'lefthand_kpts':
+ key_point[cuts[3]:cuts[4]].tolist(),
+ 'righthand_kpts':
+ key_point[cuts[4]:cuts[5]].tolist(),
+ 'score':
+ img_kpt['score'],
+ 'bbox': [left_top[0], left_top[1], w, h]
+ })
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num,
+ self.right_hand_num
+ ])
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_body',
+ self.sigmas[cuts[0]:cuts[1]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_foot',
+ self.sigmas[cuts[1]:cuts[2]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_face',
+ self.sigmas[cuts[2]:cuts[3]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_lefthand',
+ self.sigmas[cuts[3]:cuts[4]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_righthand',
+ self.sigmas[cuts[4]:cuts[5]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_wholebody',
+ self.sigmas,
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebabf3e1ddddd96de8aea9bfe00a095480b3112f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_crowdpose.py
@@ -0,0 +1,109 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpCrowdPoseDataset(BottomUpCocoDataset):
+ """CrowdPose dataset for bottom-up pose estimation.
+
+ "CrowdPose: Efficient Crowded Scenes Pose Estimation and
+ A New Benchmark", CVPR'2019.
+ More details can be found in the `paper
+ `__.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ CrowdPose keypoint indexes::
+
+ 0: 'left_shoulder',
+ 1: 'right_shoulder',
+ 2: 'left_elbow',
+ 3: 'right_elbow',
+ 4: 'left_wrist',
+ 5: 'right_wrist',
+ 6: 'left_hip',
+ 7: 'right_hip',
+ 8: 'left_knee',
+ 9: 'right_knee',
+ 10: 'left_ankle',
+ 11: 'right_ankle',
+ 12: 'top_head',
+ 13: 'neck'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/crowdpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)',
+ 'AP(H)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_crowd',
+ self.sigmas,
+ use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..143812332512e56e6962a780d8900d6ca8823c96
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/bottom_up/bottom_up_mhp.py
@@ -0,0 +1,108 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import json_tricks as json
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from mmpose.datasets.builder import DATASETS
+from .bottom_up_coco import BottomUpCocoDataset
+
+
+@DATASETS.register_module()
+class BottomUpMhpDataset(BottomUpCocoDataset):
+ """MHPv2.0 dataset for top-down pose estimation.
+
+ "Understanding Humans in Crowded Scenes: Deep Nested Adversarial
+ Learning and A New Benchmark for Multi-Human Parsing", ACM MM'2018.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MHP keypoint indexes::
+
+ 0: "right ankle",
+ 1: "right knee",
+ 2: "right hip",
+ 3: "left hip",
+ 4: "left knee",
+ 5: "left ankle",
+ 6: "pelvis",
+ 7: "thorax",
+ 8: "upper neck",
+ 9: "head top",
+ 10: "right wrist",
+ 11: "right elbow",
+ 12: "right shoulder",
+ 13: "left shoulder",
+ 14: "left elbow",
+ 15: "left wrist",
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(BottomUpCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ print(f'=> num_images: {self.num_images}')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ with open(res_file, 'r') as file:
+ res_json = json.load(file)
+ if not res_json:
+ info_str = list(zip(stats_names, [
+ 0,
+ ] * len(stats_names)))
+ return info_str
+
+ coco_det = self.coco.loadRes(res_file)
+
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/face/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ba42d4413a657080bddf6224850e49a5a24601b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .face_300w_dataset import Face300WDataset
+from .face_aflw_dataset import FaceAFLWDataset
+from .face_coco_wholebody_dataset import FaceCocoWholeBodyDataset
+from .face_cofw_dataset import FaceCOFWDataset
+from .face_wflw_dataset import FaceWFLWDataset
+
+__all__ = [
+ 'Face300WDataset', 'FaceAFLWDataset', 'FaceWFLWDataset', 'FaceCOFWDataset',
+ 'FaceCocoWholeBodyDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_300w_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_300w_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5b602e09c2df2469444bec306342dc97a9c3d8d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_300w_dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class Face300WDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face300W dataset for top-down face keypoint localization.
+
+ "300 faces In-the-wild challenge: Database and results",
+ Image and Vision Computing (IMAVIS) 2019.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 68 points mark-up. The definition
+ can be found in `https://ibug.doc.ic.ac.uk/resources/300-W/`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/300w.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 36, :] - gts[:, 45, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['300W/ibug/\
+ image_018.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_aflw_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_aflw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..292d9eece7e33e97467088b8710bd2c7c272fe52
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_aflw_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceAFLWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face AFLW dataset for top-down face keypoint localization.
+
+ "Annotated Facial Landmarks in the Wild: A Large-scale,
+ Real-world Database for Facial Landmark Localization".
+ In Proc. First IEEE International Workshop on Benchmarking
+ Facial Image Analysis Technologies, 2011.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 19 points mark-up. The definition
+ can be found in `https://www.tugraz.at/institute/icg/research`
+ `/team-bischof/lrs/downloads/aflw/`
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aflw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if self.test_mode:
+ # 'box_size' is used as normalization factor
+ assert 'box_size' in obj
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'box_size': obj['box_size'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, box_sizes, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ box_sizes (np.ndarray[N, 1]): box size
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ return np.tile(box_sizes, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['aflw/images/flickr/ \
+ 0/image00002.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..466fabbfcbeaa8ba3abe976269ab8a1de56e4e51
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class FaceBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'FaceBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef5117a8a06626cb5bc520795cca06e788bf198d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_coco_wholebody_dataset.py
@@ -0,0 +1,198 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceCocoWholeBodyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoWholeBodyDataset for face keypoint localization.
+
+ `Whole-Body Human Pose Estimation in the Wild', ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The face landmark annotations follow the 68 points mark-up.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/'
+ 'coco_wholebody_face.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if obj['face_valid'] and max(obj['face_kpts']) > 0:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+
+ keypoints = np.array(obj['face_kpts']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['face_box'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['face_box'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get inter-ocular distance as the normalize factor, measured as the
+ Euclidean distance between the outer corners of the eyes.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 36, :] - gts[:, 45, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate COCO-WholeBody Face keypoint results. The pose prediction
+ results will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['coco/train2017/\
+ 000000000009.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_cofw_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_cofw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..456ea0e9adbbadb6ecf4dffb3b5ff5e48cf92123
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_cofw_dataset.py
@@ -0,0 +1,198 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceCOFWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face COFW dataset for top-down face keypoint localization.
+
+ "Robust face landmark estimation under occlusion", ICCV'2013.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 29 points mark-up. The definition
+ can be found in `http://www.vision.caltech.edu/xpburgos/ICCV13/`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/cofw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 8, :] - gts[:, 9, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['cofw/images/\
+ 000001.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/face/face_wflw_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/face/face_wflw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4611e197bd334a3864d8af99f1778af94c51d16
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/face/face_wflw_dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FaceWFLWDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Face WFLW dataset for top-down face keypoint localization.
+
+ "Look at Boundary: A Boundary-Aware Face Alignment Algorithm",
+ CVPR'2018.
+
+ The dataset loads raw images and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The landmark annotations follow the 98 points mark-up. The definition
+ can be found in `https://wywu.github.io/projects/LAB/WFLW.html`.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/wflw.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ if 'center' in obj and 'scale' in obj:
+ center = np.array(obj['center'])
+ scale = np.array([obj['scale'], obj['scale']]) * 1.25
+ else:
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _get_normalize_factor(self, gts, *args, **kwargs):
+ """Get normalize factor for evaluation.
+
+ Args:
+ gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+
+ Returns:
+ np.ndarray[N, 2]: normalized factor
+ """
+
+ interocular = np.linalg.norm(
+ gts[:, 60, :] - gts[:, 72, :], axis=1, keepdims=True)
+ return np.tile(interocular, [1, 2])
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='NME', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[1,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[1,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str]): For example, ['wflw/images/\
+ 0--Parade/0_Parade_marchingband_1_1015.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'NME'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['NME']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/fashion/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/fashion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..575d6ed4af94686a87443f5938ed8b0d0809540f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/fashion/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .deepfashion_dataset import DeepFashionDataset
+
+__all__ = ['DeepFashionDataset']
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/fashion/deepfashion_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/fashion/deepfashion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fef65528c27e4f4bb6c77100b5fd4e398c9129f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/fashion/deepfashion_dataset.py
@@ -0,0 +1,225 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class DeepFashionDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """DeepFashion dataset (full-body clothes) for fashion landmark detection.
+
+ "DeepFashion: Powering Robust Clothes Recognition
+ and Retrieval with Rich Annotations", CVPR'2016.
+ "Fashion Landmark Detection in the Wild", ECCV'2016.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ The dataset contains 3 categories for full-body, upper-body and lower-body.
+
+ Fashion landmark indexes for upper-body clothes::
+
+ 0: 'left collar',
+ 1: 'right collar',
+ 2: 'left sleeve',
+ 3: 'right sleeve',
+ 4: 'left hem',
+ 5: 'right hem'
+
+ Fashion landmark indexes for lower-body clothes::
+
+ 0: 'left waistline',
+ 1: 'right waistline',
+ 2: 'left hem',
+ 3: 'right hem'
+
+ Fashion landmark indexes for full-body clothes::
+
+ 0: 'left collar',
+ 1: 'right collar',
+ 2: 'left sleeve',
+ 3: 'right sleeve',
+ 4: 'left waistline',
+ 5: 'right waistline',
+ 6: 'left hem',
+ 7: 'right hem'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ subset='',
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ if subset != '':
+ warnings.warn(
+ 'subset is deprecated.'
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ if subset == 'upper':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_upper.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+ elif subset == 'lower':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_lower.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+ elif subset == 'full':
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_full.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['img_00000001.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/fashion/fashion_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/fashion/fashion_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4e5860a478f5b9fb8d7a30873b6a4b0a32c3533
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/fashion/fashion_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class FashionBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'FashionBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..49159afa6027e82ead87053f7f807267288b7a94
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .freihand_dataset import FreiHandDataset
+from .hand_coco_wholebody_dataset import HandCocoWholeBodyDataset
+from .interhand2d_dataset import InterHand2DDataset
+from .interhand3d_dataset import InterHand3DDataset
+from .onehand10k_dataset import OneHand10KDataset
+from .panoptic_hand2d_dataset import PanopticDataset
+from .rhd2d_dataset import Rhd2DDataset
+
+__all__ = [
+ 'FreiHandDataset', 'InterHand2DDataset', 'InterHand3DDataset',
+ 'OneHand10KDataset', 'PanopticDataset', 'Rhd2DDataset',
+ 'HandCocoWholeBodyDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/freihand_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/freihand_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9ceeff2ef61619fa42909526218740dbb89027a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/freihand_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class FreiHandDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """FreiHand dataset for top-down hand pose estimation.
+
+ "FreiHAND: A Dataset for Markerless Capture of Hand Pose
+ and Shape from Single RGB Images", ICCV'2019.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ FreiHand keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/freihand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 224x224
+ center, scale = self._xywh2cs(0, 0, 224, 224, 0.8)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate freihand keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['training/rgb/\
+ 00031426.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd20846d40ec8f7d9520902d6a289ebedcb07cae
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class HandBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'HandBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c95cc09fbbe61b16bc36646cff4d394b72a1711
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
@@ -0,0 +1,211 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class HandCocoWholeBodyDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoWholeBodyDataset for top-down hand pose estimation.
+
+ "Whole-Body Human Pose Estimation in the Wild", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO-WholeBody Hand keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody_hand.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ for type in ['left', 'right']:
+ if obj[f'{type}hand_valid'] and max(
+ obj[f'{type}hand_kpts']) > 0:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+
+ keypoints = np.array(obj[f'{type}hand_kpts']).reshape(
+ -1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(
+ 1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(
+ *obj[f'{type}hand_box'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj[f'{type}hand_box'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate COCO-WholeBody Hand keypoint results. The pose prediction
+ results will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand2d_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fea17fa59aa75ea9846c401a3ad2276fb2b525cc
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand2d_dataset.py
@@ -0,0 +1,306 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class InterHand2DDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """InterHand2.6M 2D dataset for top-down hand pose estimation.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ InterHand2.6M keypoint indexes::
+
+ 0: 'thumb4',
+ 1: 'thumb3',
+ 2: 'thumb2',
+ 3: 'thumb1',
+ 4: 'forefinger4',
+ 5: 'forefinger3',
+ 6: 'forefinger2',
+ 7: 'forefinger1',
+ 8: 'middle_finger4',
+ 9: 'middle_finger3',
+ 10: 'middle_finger2',
+ 11: 'middle_finger1',
+ 12: 'ring_finger4',
+ 13: 'ring_finger3',
+ 14: 'ring_finger2',
+ 15: 'ring_finger1',
+ 16: 'pinky_finger4',
+ 17: 'pinky_finger3',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger1',
+ 20: 'wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ camera_file (str): Path to the camera file.
+ joint_file (str): Path to the joint file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (str): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ camera_file,
+ joint_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/interhand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.camera_file = camera_file
+ self.joint_file = joint_file
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @staticmethod
+ def _cam2pixel(cam_coord, f, c):
+ """Transform the joints from their camera coordinates to their pixel
+ coordinates.
+
+ Note:
+ - N: number of joints
+
+ Args:
+ cam_coord (ndarray[N, 3]): 3D joints coordinates
+ in the camera coordinate system
+ f (ndarray[2]): focal length of x and y axis
+ c (ndarray[2]): principal point of x and y axis
+
+ Returns:
+ img_coord (ndarray[N, 3]): the coordinates (x, y, 0)
+ in the image plane.
+ """
+ x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
+ y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
+ z = np.zeros_like(x)
+ img_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
+ return img_coord
+
+ @staticmethod
+ def _world2cam(world_coord, R, T):
+ """Transform the joints from their world coordinates to their camera
+ coordinates.
+
+ Note:
+ - N: number of joints
+
+ Args:
+ world_coord (ndarray[3, N]): 3D joints coordinates
+ in the world coordinate system
+ R (ndarray[3, 3]): camera rotation matrix
+ T (ndarray[3]): camera position (x, y, z)
+
+ Returns:
+ cam_coord (ndarray[3, N]): 3D joints coordinates
+ in the camera coordinate system
+ """
+ cam_coord = np.dot(R, world_coord - T)
+ return cam_coord
+
+ def _get_db(self):
+ """Load dataset.
+
+ Adapted from 'https://github.com/facebookresearch/InterHand2.6M/'
+ 'blob/master/data/InterHand2.6M/dataset.py'
+ Copyright (c) FaceBook Research, under CC-BY-NC 4.0 license.
+ """
+ with open(self.camera_file, 'r') as f:
+ cameras = json.load(f)
+ with open(self.joint_file, 'r') as f:
+ joints = json.load(f)
+ gt_db = []
+ bbox_id = 0
+ for img_id in self.img_ids:
+ num_joints = self.ann_info['num_joints']
+
+ ann_id = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ ann = self.coco.loadAnns(ann_id)[0]
+ img = self.coco.loadImgs(img_id)[0]
+
+ capture_id = str(img['capture'])
+ camera_name = img['camera']
+ frame_idx = str(img['frame_idx'])
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ camera_pos, camera_rot = np.array(
+ cameras[capture_id]['campos'][camera_name],
+ dtype=np.float32), np.array(
+ cameras[capture_id]['camrot'][camera_name],
+ dtype=np.float32)
+ focal, principal_pt = np.array(
+ cameras[capture_id]['focal'][camera_name],
+ dtype=np.float32), np.array(
+ cameras[capture_id]['princpt'][camera_name],
+ dtype=np.float32)
+ joint_world = np.array(
+ joints[capture_id][frame_idx]['world_coord'], dtype=np.float32)
+ joint_cam = self._world2cam(
+ joint_world.transpose(1, 0), camera_rot,
+ camera_pos.reshape(3, 1)).transpose(1, 0)
+ joint_img = self._cam2pixel(joint_cam, focal, principal_pt)[:, :2]
+ joint_img = joint_img.reshape(2, -1, 2)
+
+ joint_valid = np.array(
+ ann['joint_valid'], dtype=np.float32).reshape(2, -1)
+ # if root is not valid -> root-relative 3D pose is also not valid.
+ # Therefore, mark all joints as invalid
+ for hand in range(2):
+ joint_valid[hand, :] *= joint_valid[hand][-1]
+
+ if np.sum(joint_valid[hand, :]) > 11:
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3),
+ dtype=np.float32)
+ joints_3d[:, :2] = joint_img[hand, :, :]
+ joints_3d_visible[:, :2] = np.minimum(
+ 1, joint_valid[hand, :].reshape(-1, 1))
+
+ # use the tightest bbox enclosing all keypoints as bbox
+ bbox = [img['width'], img['height'], 0, 0]
+ for i in range(num_joints):
+ if joints_3d_visible[i][0]:
+ bbox[0] = min(bbox[0], joints_3d[i][0])
+ bbox[1] = min(bbox[1], joints_3d[i][1])
+ bbox[2] = max(bbox[2], joints_3d[i][0])
+ bbox[3] = max(bbox[3], joints_3d[i][1])
+
+ bbox[2] -= bbox[0]
+ bbox[3] -= bbox[1]
+
+ # use 1.5bbox as input
+ center, scale = self._xywh2cs(*bbox, 1.5)
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': bbox,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate interhand2d keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Capture12/\
+ 0390_dh_touchROM/cam410209/image62434.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand3d_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand3d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..318d73fbd561c215aa31c83b4df786030400a4d9
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/interhand3d_dataset.py
@@ -0,0 +1,505 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation.top_down_eval import keypoint_epe
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt3dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class InterHand3DDataset(Kpt3dSviewRgbImgTopDownDataset):
+ """InterHand2.6M 3D dataset for top-down hand pose estimation.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ InterHand2.6M keypoint indexes::
+
+ 0: 'r_thumb4',
+ 1: 'r_thumb3',
+ 2: 'r_thumb2',
+ 3: 'r_thumb1',
+ 4: 'r_index4',
+ 5: 'r_index3',
+ 6: 'r_index2',
+ 7: 'r_index1',
+ 8: 'r_middle4',
+ 9: 'r_middle3',
+ 10: 'r_middle2',
+ 11: 'r_middle1',
+ 12: 'r_ring4',
+ 13: 'r_ring3',
+ 14: 'r_ring2',
+ 15: 'r_ring1',
+ 16: 'r_pinky4',
+ 17: 'r_pinky3',
+ 18: 'r_pinky2',
+ 19: 'r_pinky1',
+ 20: 'r_wrist',
+ 21: 'l_thumb4',
+ 22: 'l_thumb3',
+ 23: 'l_thumb2',
+ 24: 'l_thumb1',
+ 25: 'l_index4',
+ 26: 'l_index3',
+ 27: 'l_index2',
+ 28: 'l_index1',
+ 29: 'l_middle4',
+ 30: 'l_middle3',
+ 31: 'l_middle2',
+ 32: 'l_middle1',
+ 33: 'l_ring4',
+ 34: 'l_ring3',
+ 35: 'l_ring2',
+ 36: 'l_ring1',
+ 37: 'l_pinky4',
+ 38: 'l_pinky3',
+ 39: 'l_pinky2',
+ 40: 'l_pinky1',
+ 41: 'l_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ camera_file (str): Path to the camera file.
+ joint_file (str): Path to the joint file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ use_gt_root_depth (bool): Using the ground truth depth of the wrist
+ or given depth from rootnet_result_file.
+ rootnet_result_file (str): Path to the wrist depth file.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (str): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ camera_file,
+ joint_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ use_gt_root_depth=True,
+ rootnet_result_file=None,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/interhand3d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['heatmap3d_depth_bound'] = data_cfg[
+ 'heatmap3d_depth_bound']
+ self.ann_info['heatmap_size_root'] = data_cfg['heatmap_size_root']
+ self.ann_info['root_depth_bound'] = data_cfg['root_depth_bound']
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.camera_file = camera_file
+ self.joint_file = joint_file
+
+ self.use_gt_root_depth = use_gt_root_depth
+ if not self.use_gt_root_depth:
+ assert rootnet_result_file is not None
+ self.rootnet_result_file = rootnet_result_file
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @staticmethod
+ def _encode_handtype(hand_type):
+ if hand_type == 'right':
+ return np.array([1, 0], dtype=np.float32)
+ elif hand_type == 'left':
+ return np.array([0, 1], dtype=np.float32)
+ elif hand_type == 'interacting':
+ return np.array([1, 1], dtype=np.float32)
+ else:
+ assert 0, f'Not support hand type: {hand_type}'
+
+ def _get_db(self):
+ """Load dataset.
+
+ Adapted from 'https://github.com/facebookresearch/InterHand2.6M/'
+ 'blob/master/data/InterHand2.6M/dataset.py'
+ Copyright (c) FaceBook Research, under CC-BY-NC 4.0 license.
+ """
+ with open(self.camera_file, 'r') as f:
+ cameras = json.load(f)
+ with open(self.joint_file, 'r') as f:
+ joints = json.load(f)
+
+ if not self.use_gt_root_depth:
+ rootnet_result = {}
+ with open(self.rootnet_result_file, 'r') as f:
+ rootnet_annot = json.load(f)
+ for i in range(len(rootnet_annot)):
+ rootnet_result[str(
+ rootnet_annot[i]['annot_id'])] = rootnet_annot[i]
+
+ gt_db = []
+ bbox_id = 0
+ for img_id in self.img_ids:
+ num_joints = self.ann_info['num_joints']
+
+ ann_id = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ ann = self.coco.loadAnns(ann_id)[0]
+ img = self.coco.loadImgs(img_id)[0]
+
+ capture_id = str(img['capture'])
+ camera_name = img['camera']
+ frame_idx = str(img['frame_idx'])
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ camera_pos = np.array(
+ cameras[capture_id]['campos'][camera_name], dtype=np.float32)
+ camera_rot = np.array(
+ cameras[capture_id]['camrot'][camera_name], dtype=np.float32)
+ focal = np.array(
+ cameras[capture_id]['focal'][camera_name], dtype=np.float32)
+ principal_pt = np.array(
+ cameras[capture_id]['princpt'][camera_name], dtype=np.float32)
+ joint_world = np.array(
+ joints[capture_id][frame_idx]['world_coord'], dtype=np.float32)
+ joint_cam = self._world2cam(
+ joint_world.transpose(1, 0), camera_rot,
+ camera_pos.reshape(3, 1)).transpose(1, 0)
+ joint_img = self._cam2pixel(joint_cam, focal, principal_pt)[:, :2]
+
+ joint_valid = np.array(
+ ann['joint_valid'], dtype=np.float32).flatten()
+ hand_type = self._encode_handtype(ann['hand_type'])
+ hand_type_valid = ann['hand_type_valid']
+
+ if self.use_gt_root_depth:
+ bbox = np.array(ann['bbox'], dtype=np.float32)
+ # extend the bbox to include some context
+ center, scale = self._xywh2cs(*bbox, 1.25)
+ abs_depth = [joint_cam[20, 2], joint_cam[41, 2]]
+ else:
+ rootnet_ann_data = rootnet_result[str(ann_id[0])]
+ bbox = np.array(rootnet_ann_data['bbox'], dtype=np.float32)
+ # the bboxes have been extended
+ center, scale = self._xywh2cs(*bbox, 1.0)
+ abs_depth = rootnet_ann_data['abs_depth']
+ # 41: 'l_wrist', left hand root
+ # 20: 'r_wrist', right hand root
+ rel_root_depth = joint_cam[41, 2] - joint_cam[20, 2]
+ # if root is not valid, root-relative 3D depth is also invalid.
+ rel_root_valid = joint_valid[20] * joint_valid[41]
+
+ # if root is not valid -> root-relative 3D pose is also not valid.
+ # Therefore, mark all joints as invalid
+ joint_valid[:20] *= joint_valid[20]
+ joint_valid[21:] *= joint_valid[41]
+
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d[:, :2] = joint_img
+ joints_3d[:21, 2] = joint_cam[:21, 2] - joint_cam[20, 2]
+ joints_3d[21:, 2] = joint_cam[21:, 2] - joint_cam[41, 2]
+ joints_3d_visible[...] = np.minimum(1, joint_valid.reshape(-1, 1))
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'hand_type': hand_type,
+ 'hand_type_valid': hand_type_valid,
+ 'rel_root_depth': rel_root_depth,
+ 'rel_root_valid': rel_root_valid,
+ 'abs_depth': abs_depth,
+ 'joints_cam': joint_cam,
+ 'focal': focal,
+ 'princpt': principal_pt,
+ 'dataset': self.dataset_name,
+ 'bbox': bbox,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='MPJPE', **kwargs):
+ """Evaluate interhand2d keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - hand_type (np.ndarray[N, 4]): The first two dimensions are \
+ hand type, scores is the last two dimensions.
+ - rel_root_depth (np.ndarray[N]): The relative depth of left \
+ wrist and right wrist.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Capture6/\
+ 0012_aokay_upright/cam410061/image4996.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'MRRPE', 'MPJPE', 'Handedness_acc'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['MRRPE', 'MPJPE', 'Handedness_acc']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result.get('preds')
+ if preds is None and 'MPJPE' in metrics:
+ raise KeyError('metric MPJPE is not supported')
+
+ hand_type = result.get('hand_type')
+ if hand_type is None and 'Handedness_acc' in metrics:
+ raise KeyError('metric Handedness_acc is not supported')
+
+ rel_root_depth = result.get('rel_root_depth')
+ if rel_root_depth is None and 'MRRPE' in metrics:
+ raise KeyError('metric MRRPE is not supported')
+
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpt = {
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ }
+
+ if preds is not None:
+ kpt['keypoints'] = preds[i, :, :3].tolist()
+ if hand_type is not None:
+ kpt['hand_type'] = hand_type[i][0:2].tolist()
+ kpt['hand_type_score'] = hand_type[i][2:4].tolist()
+ if rel_root_depth is not None:
+ kpt['rel_root_depth'] = float(rel_root_depth[i])
+
+ kpts.append(kpt)
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _get_accuracy(outputs, gts, masks):
+ """Get accuracy of multi-label classification.
+
+ Note:
+ - batch_size: N
+ - label_num: C
+
+ Args:
+ outputs (np.array[N, C]): predicted multi-label.
+ gts (np.array[N, C]): Groundtruth muti-label.
+ masks (np.array[N, ]): masked outputs will be ignored for
+ accuracy calculation.
+
+ Returns:
+ float: mean accuracy
+ """
+ acc = (outputs == gts).all(axis=1)
+ return np.mean(acc[masks])
+
+ def _report_metric(self, res_file, metrics):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'MRRPE', 'MPJPE', 'Handedness_acc'.
+
+ Returns:
+ list: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ gts_rel_root = []
+ preds_rel_root = []
+ rel_root_masks = []
+ gts_joint_coord_cam = []
+ preds_joint_coord_cam = []
+ single_masks = []
+ interacting_masks = []
+ all_masks = []
+ gts_hand_type = []
+ preds_hand_type = []
+ hand_type_masks = []
+
+ for pred, item in zip(preds, self.db):
+ # mrrpe
+ if 'MRRPE' in metrics:
+ if item['hand_type'].all() and item['joints_3d_visible'][
+ 20, 0] and item['joints_3d_visible'][41, 0]:
+ rel_root_masks.append(True)
+
+ pred_left_root_img = np.array(
+ pred['keypoints'][41], dtype=np.float32)[None, :]
+ pred_left_root_img[:, 2] += item['abs_depth'][0] + pred[
+ 'rel_root_depth']
+ pred_left_root_cam = self._pixel2cam(
+ pred_left_root_img, item['focal'], item['princpt'])
+
+ pred_right_root_img = np.array(
+ pred['keypoints'][20], dtype=np.float32)[None, :]
+ pred_right_root_img[:, 2] += item['abs_depth'][0]
+ pred_right_root_cam = self._pixel2cam(
+ pred_right_root_img, item['focal'], item['princpt'])
+
+ preds_rel_root.append(pred_left_root_cam -
+ pred_right_root_cam)
+ gts_rel_root.append(
+ [item['joints_cam'][41] - item['joints_cam'][20]])
+ else:
+ rel_root_masks.append(False)
+ preds_rel_root.append([[0., 0., 0.]])
+ gts_rel_root.append([[0., 0., 0.]])
+
+ if 'MPJPE' in metrics:
+ pred_joint_coord_img = np.array(
+ pred['keypoints'], dtype=np.float32)
+ gt_joint_coord_cam = item['joints_cam'].copy()
+
+ pred_joint_coord_img[:21, 2] += item['abs_depth'][0]
+ pred_joint_coord_img[21:, 2] += item['abs_depth'][1]
+ pred_joint_coord_cam = self._pixel2cam(pred_joint_coord_img,
+ item['focal'],
+ item['princpt'])
+
+ pred_joint_coord_cam[:21] -= pred_joint_coord_cam[20]
+ pred_joint_coord_cam[21:] -= pred_joint_coord_cam[41]
+ gt_joint_coord_cam[:21] -= gt_joint_coord_cam[20]
+ gt_joint_coord_cam[21:] -= gt_joint_coord_cam[41]
+
+ preds_joint_coord_cam.append(pred_joint_coord_cam)
+ gts_joint_coord_cam.append(gt_joint_coord_cam)
+
+ mask = (np.array(item['joints_3d_visible'])[:, 0]) > 0
+
+ if item['hand_type'].all():
+ single_masks.append(
+ np.zeros(self.ann_info['num_joints'], dtype=bool))
+ interacting_masks.append(mask)
+ all_masks.append(mask)
+ else:
+ single_masks.append(mask)
+ interacting_masks.append(
+ np.zeros(self.ann_info['num_joints'], dtype=bool))
+ all_masks.append(mask)
+
+ if 'Handedness_acc' in metrics:
+ pred_hand_type = np.array(pred['hand_type'], dtype=int)
+ preds_hand_type.append(pred_hand_type)
+ gts_hand_type.append(item['hand_type'])
+ hand_type_masks.append(item['hand_type_valid'] > 0)
+
+ gts_rel_root = np.array(gts_rel_root, dtype=np.float32)
+ preds_rel_root = np.array(preds_rel_root, dtype=np.float32)
+ rel_root_masks = np.array(rel_root_masks, dtype=bool)[:, None]
+ gts_joint_coord_cam = np.array(gts_joint_coord_cam, dtype=np.float32)
+ preds_joint_coord_cam = np.array(
+ preds_joint_coord_cam, dtype=np.float32)
+ single_masks = np.array(single_masks, dtype=bool)
+ interacting_masks = np.array(interacting_masks, dtype=bool)
+ all_masks = np.array(all_masks, dtype=bool)
+ gts_hand_type = np.array(gts_hand_type, dtype=int)
+ preds_hand_type = np.array(preds_hand_type, dtype=int)
+ hand_type_masks = np.array(hand_type_masks, dtype=bool)
+
+ if 'MRRPE' in metrics:
+ info_str.append(('MRRPE',
+ keypoint_epe(preds_rel_root, gts_rel_root,
+ rel_root_masks)))
+
+ if 'MPJPE' in metrics:
+ info_str.append(('MPJPE_all',
+ keypoint_epe(preds_joint_coord_cam,
+ gts_joint_coord_cam, all_masks)))
+ info_str.append(('MPJPE_single',
+ keypoint_epe(preds_joint_coord_cam,
+ gts_joint_coord_cam, single_masks)))
+ info_str.append(
+ ('MPJPE_interacting',
+ keypoint_epe(preds_joint_coord_cam, gts_joint_coord_cam,
+ interacting_masks)))
+
+ if 'Handedness_acc' in metrics:
+ info_str.append(('Handedness_acc',
+ self._get_accuracy(preds_hand_type, gts_hand_type,
+ hand_type_masks)))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/onehand10k_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/onehand10k_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9783cab16c7e3c3a9600005008e985d112e71a07
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/onehand10k_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class OneHand10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """OneHand10K dataset for top-down hand pose estimation.
+
+ "Mask-pose Cascaded CNN for 2D Hand Pose Estimation from
+ Single Color Images", TCSVT'2019.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ OneHand10K keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/onehand10k.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate onehand10k keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['Test/source/0.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1d7fc6af1ec0dee22a81e2dff8819827062a3d5
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/panoptic_hand2d_dataset.py
@@ -0,0 +1,208 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class PanopticDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Panoptic dataset for top-down hand pose estimation.
+
+ "Hand Keypoint Detection in Single Images using Multiview
+ Bootstrapping", CVPR'2017.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Panoptic keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/panoptic_hand2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # The bbox is the tightest bbox enclosing keypoints.
+ # The paper uses 2.2 bbox as the input, while
+ # we use 1.76 (2.2 * 0.8) bbox as the input.
+ center, scale = self._xywh2cs(*obj['bbox'][:4], 1.76)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'head_size': obj['head_size'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate panoptic keypoint results. The pose prediction results will
+ be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['hand_labels/\
+ manual_test/000648952_02_l.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCKh', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/hand/rhd2d_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/hand/rhd2d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3667f5fb672f71b08331706656049734cdfa790d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/hand/rhd2d_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class Rhd2DDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Rendered Handpose Dataset for top-down hand pose estimation.
+
+ "Learning to Estimate 3D Hand Pose from Single RGB Images",
+ ICCV'2017.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Rhd keypoint indexes::
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/rhd2d.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.ann_info['use_different_joint_weights'] = False
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # the ori image is 224x224
+ center, scale = self._xywh2cs(*obj['bbox'][:4], padding=1.25)
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate rhd keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1], area, score]
+ - image_paths (list[str]): For example,
+ ['training/rgb/00031426.jpg']
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'AUC', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..14297c7261aed14f814e2e986f315dedd51702be
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .mesh_adv_dataset import MeshAdversarialDataset
+from .mesh_h36m_dataset import MeshH36MDataset
+from .mesh_mix_dataset import MeshMixDataset
+from .mosh_dataset import MoshDataset
+
+__all__ = [
+ 'MeshH36MDataset', 'MoshDataset', 'MeshMixDataset',
+ 'MeshAdversarialDataset'
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd9ba39d50415d2897cd14e32435feee397c2963
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_adv_dataset.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS, build_dataset
+
+
+@DATASETS.register_module()
+class MeshAdversarialDataset(Dataset):
+ """Mix Dataset for the adversarial training in 3D human mesh estimation
+ task.
+
+ The dataset combines data from two datasets and
+ return a dict containing data from two datasets.
+
+ Args:
+ train_dataset (Dataset): Dataset for 3D human mesh estimation.
+ adversarial_dataset (Dataset): Dataset for adversarial learning,
+ provides real SMPL parameters.
+ """
+
+ def __init__(self, train_dataset, adversarial_dataset):
+ super().__init__()
+ self.train_dataset = build_dataset(train_dataset)
+ self.adversarial_dataset = build_dataset(adversarial_dataset)
+ self.length = len(self.train_dataset)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, i):
+ """Given index, get the data from train dataset and randomly sample an
+ item from adversarial dataset.
+
+ Return a dict containing data from train and adversarial dataset.
+ """
+ data = self.train_dataset[i]
+ ind_adv = np.random.randint(
+ low=0, high=len(self.adversarial_dataset), dtype=int)
+ data.update(self.adversarial_dataset[ind_adv %
+ len(self.adversarial_dataset)])
+ return data
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..79c8a8ac9040463152cb779ffff146ef5391b241
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_base_dataset.py
@@ -0,0 +1,155 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+import os
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.pipelines import Compose
+
+
+class MeshBaseDataset(Dataset, metaclass=ABCMeta):
+ """Base dataset for 3D human mesh estimation task. In 3D humamesh
+ estimation task, all datasets share this BaseDataset for training and have
+ their own evaluate function.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ This dataset can only be used for training.
+ For evaluation, subclass should write an extra evaluate function.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ test_mode=False):
+
+ self.image_info = {}
+ self.ann_info = {}
+
+ self.ann_file = ann_file
+ self.img_prefix = img_prefix
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+ self.ann_info['iuv_size'] = np.array(data_cfg['iuv_size'])
+ self.ann_info['num_joints'] = data_cfg['num_joints']
+ self.ann_info['flip_pairs'] = None
+ self.db = []
+ self.pipeline = Compose(self.pipeline)
+
+ # flip_pairs
+ # For all mesh dataset, we use 24 joints as CMR and SPIN.
+ self.ann_info['flip_pairs'] = [[0, 5], [1, 4], [2, 3], [6, 11],
+ [7, 10], [8, 9], [20, 21], [22, 23]]
+ self.ann_info['use_different_joint_weights'] = False
+ assert self.ann_info['num_joints'] == 24
+ self.ann_info['joint_weights'] = np.ones([24, 1], dtype=np.float32)
+
+ self.ann_info['uv_type'] = data_cfg['uv_type']
+ self.ann_info['use_IUV'] = data_cfg['use_IUV']
+ uv_type = self.ann_info['uv_type']
+ self.iuv_prefix = os.path.join(self.img_prefix, f'{uv_type}_IUV_gt')
+ self.db = self._get_db(ann_file)
+
+ def _get_db(self, ann_file):
+ """Load dataset."""
+ data = np.load(ann_file)
+ tmpl = dict(
+ image_file=None,
+ center=None,
+ scale=None,
+ rotation=0,
+ joints_2d=None,
+ joints_2d_visible=None,
+ joints_3d=None,
+ joints_3d_visible=None,
+ gender=None,
+ pose=None,
+ beta=None,
+ has_smpl=0,
+ iuv_file=None,
+ has_iuv=0)
+ gt_db = []
+
+ _imgnames = data['imgname']
+ _scales = data['scale'].astype(np.float32)
+ _centers = data['center'].astype(np.float32)
+ dataset_len = len(_imgnames)
+
+ # Get 2D keypoints
+ if 'part' in data.keys():
+ _keypoints = data['part'].astype(np.float32)
+ else:
+ _keypoints = np.zeros((dataset_len, 24, 3), dtype=np.float32)
+
+ # Get gt 3D joints, if available
+ if 'S' in data.keys():
+ _joints_3d = data['S'].astype(np.float32)
+ else:
+ _joints_3d = np.zeros((dataset_len, 24, 4), dtype=np.float32)
+
+ # Get gt SMPL parameters, if available
+ if 'pose' in data.keys() and 'shape' in data.keys():
+ _poses = data['pose'].astype(np.float32)
+ _betas = data['shape'].astype(np.float32)
+ has_smpl = 1
+ else:
+ _poses = np.zeros((dataset_len, 72), dtype=np.float32)
+ _betas = np.zeros((dataset_len, 10), dtype=np.float32)
+ has_smpl = 0
+
+ # Get gender data, if available
+ if 'gender' in data.keys():
+ _genders = data['gender']
+ _genders = np.array([str(g) != 'm' for g in _genders]).astype(int)
+ else:
+ _genders = -1 * np.ones(dataset_len).astype(int)
+
+ # Get IUV image, if available
+ if 'iuv_names' in data.keys():
+ _iuv_names = data['iuv_names']
+ has_iuv = has_smpl
+ else:
+ _iuv_names = [''] * dataset_len
+ has_iuv = 0
+
+ for i in range(len(_imgnames)):
+ newitem = cp.deepcopy(tmpl)
+ newitem['image_file'] = os.path.join(self.img_prefix, _imgnames[i])
+ newitem['scale'] = np.array([_scales[i], _scales[i]])
+ newitem['center'] = _centers[i]
+ newitem['joints_2d'] = _keypoints[i, :, :2]
+ newitem['joints_2d_visible'] = _keypoints[i, :, -1][:, None]
+ newitem['joints_3d'] = _joints_3d[i, :, :3]
+ newitem['joints_3d_visible'] = _joints_3d[i, :, -1][:, None]
+ newitem['pose'] = _poses[i]
+ newitem['beta'] = _betas[i]
+ newitem['has_smpl'] = has_smpl
+ newitem['gender'] = _genders[i]
+ newitem['iuv_file'] = os.path.join(self.iuv_prefix, _iuv_names[i])
+ newitem['has_iuv'] = has_iuv
+ gt_db.append(newitem)
+ return gt_db
+
+ def __len__(self, ):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ results = cp.deepcopy(self.db[idx])
+ results['ann_info'] = self.ann_info
+ return self.pipeline(results)
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac9ead1f5c1c1de40604c6830f6b0c762ad70eb
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_h36m_dataset.py
@@ -0,0 +1,101 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+
+from mmpose.core.evaluation import keypoint_mpjpe
+from mmpose.datasets.builder import DATASETS
+from .mesh_base_dataset import MeshBaseDataset
+
+
+@DATASETS.register_module()
+class MeshH36MDataset(MeshBaseDataset):
+ """Human3.6M Dataset for 3D human mesh estimation. It inherits all function
+ from MeshBaseDataset and has its own evaluate function.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def evaluate(self, outputs, res_folder, metric='joint_error', logger=None):
+ """Evaluate 3D keypoint results."""
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['joint_error']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ res_file = os.path.join(res_folder, 'result_keypoints.json')
+ kpts = []
+ for out in outputs:
+ for (keypoints, image_path) in zip(out['keypoints_3d'],
+ out['image_path']):
+ kpts.append({
+ 'keypoints': keypoints.tolist(),
+ 'image': image_path,
+ })
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file)
+ name_value = OrderedDict(info_str)
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file):
+ """Keypoint evaluation.
+
+ Report mean per joint position error (MPJPE) and mean per joint
+ position error after rigid alignment (MPJPE-PA)
+ """
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ pred_joints_3d = [pred['keypoints'] for pred in preds]
+ gt_joints_3d = [item['joints_3d'] for item in self.db]
+ gt_joints_visible = [item['joints_3d_visible'] for item in self.db]
+
+ pred_joints_3d = np.array(pred_joints_3d)
+ gt_joints_3d = np.array(gt_joints_3d)
+ gt_joints_visible = np.array(gt_joints_visible)
+
+ # we only evaluate on 14 lsp joints
+ joint_mapper = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18]
+ pred_joints_3d = pred_joints_3d[:, joint_mapper, :]
+ pred_pelvis = (pred_joints_3d[:, 2] + pred_joints_3d[:, 3]) / 2
+ pred_joints_3d = pred_joints_3d - pred_pelvis[:, None, :]
+
+ gt_joints_3d = gt_joints_3d[:, joint_mapper, :]
+ gt_pelvis = (gt_joints_3d[:, 2] + gt_joints_3d[:, 3]) / 2
+ gt_joints_3d = gt_joints_3d - gt_pelvis[:, None, :]
+ gt_joints_visible = gt_joints_visible[:, joint_mapper, 0] > 0
+
+ mpjpe = keypoint_mpjpe(pred_joints_3d, gt_joints_3d, gt_joints_visible)
+ mpjpe_pa = keypoint_mpjpe(
+ pred_joints_3d,
+ gt_joints_3d,
+ gt_joints_visible,
+ alignment='procrustes')
+
+ info_str = []
+ info_str.append(('MPJPE', mpjpe * 1000))
+ info_str.append(('MPJPE-PA', mpjpe_pa * 1000))
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..244a7c323c6c69aa2a00e9adfb0a11e08182c004
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mesh_mix_dataset.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import ConcatDataset, Dataset, WeightedRandomSampler
+
+from mmpose.datasets.builder import DATASETS
+from .mesh_base_dataset import MeshBaseDataset
+
+
+@DATASETS.register_module()
+class MeshMixDataset(Dataset, metaclass=ABCMeta):
+ """Mix Dataset for 3D human mesh estimation.
+
+ The dataset combines data from multiple datasets (MeshBaseDataset) and
+ sample the data from different datasets with the provided proportions.
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Args:
+ configs (list): List of configs for multiple datasets.
+ partition (list): Sample proportion of multiple datasets. The length
+ of partition should be same with that of configs. The elements
+ of it should be non-negative and is not necessary summing up to
+ one.
+
+ Example:
+ >>> from mmpose.datasets import MeshMixDataset
+ >>> data_cfg = dict(
+ >>> image_size=[256, 256],
+ >>> iuv_size=[64, 64],
+ >>> num_joints=24,
+ >>> use_IUV=True,
+ >>> uv_type='BF')
+ >>>
+ >>> mix_dataset = MeshMixDataset(
+ >>> configs=[
+ >>> dict(
+ >>> ann_file='tests/data/h36m/test_h36m.npz',
+ >>> img_prefix='tests/data/h36m',
+ >>> data_cfg=data_cfg,
+ >>> pipeline=[]),
+ >>> dict(
+ >>> ann_file='tests/data/h36m/test_h36m.npz',
+ >>> img_prefix='tests/data/h36m',
+ >>> data_cfg=data_cfg,
+ >>> pipeline=[]),
+ >>> ],
+ >>> partition=[0.6, 0.4])
+ """
+
+ def __init__(self, configs, partition):
+ """Load data from multiple datasets."""
+ assert min(partition) >= 0
+ datasets = [MeshBaseDataset(**cfg) for cfg in configs]
+ self.dataset = ConcatDataset(datasets)
+ self.length = max(len(ds) for ds in datasets)
+ weights = [
+ np.ones(len(ds)) * p / len(ds)
+ for (p, ds) in zip(partition, datasets)
+ ]
+ weights = np.concatenate(weights, axis=0)
+ self.sampler = WeightedRandomSampler(weights, 1)
+
+ def __len__(self):
+ """Get the size of the dataset."""
+ return self.length
+
+ def __getitem__(self, idx):
+ """Given index, sample the data from multiple datasets with the given
+ proportion."""
+ idx_new = list(self.sampler)[0]
+ return self.dataset[idx_new]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/mesh/mosh_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mosh_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3185265e7d6e666d8c9096244c3df4104bcdb020
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/mesh/mosh_dataset.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+from abc import ABCMeta
+
+import numpy as np
+from torch.utils.data import Dataset
+
+from mmpose.datasets.builder import DATASETS
+from mmpose.datasets.pipelines import Compose
+
+
+@DATASETS.register_module()
+class MoshDataset(Dataset, metaclass=ABCMeta):
+ """Mosh Dataset for the adversarial training in 3D human mesh estimation
+ task.
+
+ The dataset return a dict containing real-world SMPL parameters.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self, ann_file, pipeline, test_mode=False):
+
+ self.ann_file = ann_file
+ self.pipeline = pipeline
+ self.test_mode = test_mode
+
+ self.db = self._get_db(ann_file)
+ self.pipeline = Compose(self.pipeline)
+
+ @staticmethod
+ def _get_db(ann_file):
+ """Load dataset."""
+ data = np.load(ann_file)
+ _betas = data['shape'].astype(np.float32)
+ _poses = data['pose'].astype(np.float32)
+ tmpl = dict(
+ pose=None,
+ beta=None,
+ )
+ gt_db = []
+ dataset_len = len(_betas)
+
+ for i in range(dataset_len):
+ newitem = cp.deepcopy(tmpl)
+ newitem['pose'] = _poses[i]
+ newitem['beta'] = _betas[i]
+ gt_db.append(newitem)
+ return gt_db
+
+ def __len__(self, ):
+ """Get the size of the dataset."""
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """Get the sample given index."""
+ item = cp.deepcopy(self.db[idx])
+ trivial, pose, beta = \
+ np.zeros(3, dtype=np.float32), item['pose'], item['beta']
+ results = {
+ 'mosh_theta':
+ np.concatenate((trivial, pose, beta), axis=0).astype(np.float32)
+ }
+ return self.pipeline(results)
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/__init__.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc5b46a8b1e3d68cda6ab6564eb748987a9a9e8d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/__init__.py
@@ -0,0 +1,30 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .topdown_aic_dataset import TopDownAicDataset
+from .topdown_coco_dataset import TopDownCocoDataset
+from .topdown_coco_wholebody_dataset import TopDownCocoWholeBodyDataset
+from .topdown_crowdpose_dataset import TopDownCrowdPoseDataset
+from .topdown_h36m_dataset import TopDownH36MDataset
+from .topdown_halpe_dataset import TopDownHalpeDataset
+from .topdown_jhmdb_dataset import TopDownJhmdbDataset
+from .topdown_mhp_dataset import TopDownMhpDataset
+from .topdown_mpii_dataset import TopDownMpiiDataset
+from .topdown_mpii_trb_dataset import TopDownMpiiTrbDataset
+from .topdown_ochuman_dataset import TopDownOCHumanDataset
+from .topdown_posetrack18_dataset import TopDownPoseTrack18Dataset
+from .topdown_posetrack18_video_dataset import TopDownPoseTrack18VideoDataset
+
+__all__ = [
+ 'TopDownAicDataset',
+ 'TopDownCocoDataset',
+ 'TopDownCocoWholeBodyDataset',
+ 'TopDownCrowdPoseDataset',
+ 'TopDownMpiiDataset',
+ 'TopDownMpiiTrbDataset',
+ 'TopDownOCHumanDataset',
+ 'TopDownPoseTrack18Dataset',
+ 'TopDownJhmdbDataset',
+ 'TopDownMhpDataset',
+ 'TopDownH36MDataset',
+ 'TopDownHalpeDataset',
+ 'TopDownPoseTrack18VideoDataset',
+]
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..13c41dfea92189e113dd291afa3771547881efbc
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_aic_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownAicDataset(TopDownCocoDataset):
+ """AicDataset dataset for top-down pose estimation.
+
+ "AI Challenger : A Large-scale Dataset for Going Deeper
+ in Image Understanding", arXiv'2017.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ AIC keypoint indexes::
+
+ 0: "right_shoulder",
+ 1: "right_elbow",
+ 2: "right_wrist",
+ 3: "left_shoulder",
+ 4: "left_elbow",
+ 5: "left_wrist",
+ 6: "right_hip",
+ 7: "right_knee",
+ 8: "right_ankle",
+ 9: "left_hip",
+ 10: "left_knee",
+ 11: "left_ankle",
+ 12: "head_top",
+ 13: "neck"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/aic.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_base_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc99576716ea5fc77af277e3e764c2c9b5dd158f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_base_dataset.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta
+
+from torch.utils.data import Dataset
+
+
+class TopDownBaseDataset(Dataset, metaclass=ABCMeta):
+ """This class has been deprecated and replaced by
+ Kpt2dSviewRgbImgTopDownDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownBaseDataset has been replaced by '
+ 'Kpt2dSviewRgbImgTopDownDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/663 for details.')
+ )
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..664c88149634bb63966438508af52f6d746e9aef
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
@@ -0,0 +1,405 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from xtcocotools.cocoeval import COCOeval
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownCocoDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """CocoDataset dataset for top-down pose estimation.
+
+ "Microsoft COCO: Common Objects in Context", ECCV'2014.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO keypoint indexes::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ if (not self.test_mode) or self.use_gt_bbox:
+ # use ground truth bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ else:
+ # use bbox from detection
+ gt_db = self._load_coco_person_detection_results()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+
+ Args:
+ img_id: coco image id
+
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _load_coco_person_detection_results(self):
+ """Load coco person detection results."""
+ num_joints = self.ann_info['num_joints']
+ all_boxes = None
+ with open(self.bbox_file, 'r') as f:
+ all_boxes = json.load(f)
+
+ if not all_boxes:
+ raise ValueError('=> Load %s fail!' % self.bbox_file)
+
+ print(f'=> Total boxes: {len(all_boxes)}')
+
+ kpt_db = []
+ bbox_id = 0
+ for det_res in all_boxes:
+ if det_res['category_id'] != 1:
+ continue
+
+ image_file = osp.join(self.img_prefix,
+ self.id2name[det_res['image_id']])
+ box = det_res['bbox']
+ score = det_res['score']
+
+ if score < self.det_bbox_thr:
+ continue
+
+ center, scale = self._xywh2cs(*box[:4])
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+ kpt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'bbox': box[:4],
+ 'bbox_score': score,
+ 'dataset': self.dataset_name,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ print(f'=> Total boxes after filter '
+ f'low score@{self.det_bbox_thr}: {bbox_id}')
+ return kpt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate coco keypoint results. The pose prediction results will be
+ saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017\
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = []
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts.append([img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts.append(img_kpts)
+
+ self._write_coco_keypoint_results(valid_kpts, res_file)
+
+ info_str = self._do_python_keypoint_eval(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _write_coco_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+ data_pack = [{
+ 'cat_id': self._class_to_coco_ind[cls],
+ 'cls_ind': cls_ind,
+ 'cls': cls,
+ 'ann_type': 'keypoints',
+ 'keypoints': keypoints
+ } for cls_ind, cls in enumerate(self.classes)
+ if not cls == '__background__']
+
+ results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+
+ with open(res_file, 'w') as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point.tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ for img_id, persons in kpts.items():
+ num = len(persons)
+ kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
+ for i in range(num - 1, 0, -1):
+ if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
+ del kpts[img_id][i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..791a3c5790d68ef480bc54d94cf377c06e5f0383
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_coco_wholebody_dataset.py
@@ -0,0 +1,274 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import warnings
+
+import numpy as np
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownCocoWholeBodyDataset(TopDownCocoDataset):
+ """CocoWholeBodyDataset dataset for top-down pose estimation.
+
+ "Whole-Body Human Pose Estimation in the Wild", ECCV'2020.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO-WholeBody keypoint indexes::
+
+ 0-16: 17 body keypoints,
+ 17-22: 6 foot keypoints,
+ 23-90: 68 face keypoints,
+ 91-132: 42 hand keypoints
+
+ In total, we have 133 keypoints for wholebody pose estimation.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/coco_wholebody.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.body_num = 17
+ self.foot_num = 6
+ self.face_num = 68
+ self.left_hand_num = 21
+ self.right_hand_num = 21
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ rec = []
+ bbox_id = 0
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0)
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _coco_keypoint_results_one_category_kernel(self, data_pack):
+ """Get coco keypoint results."""
+ cat_id = data_pack['cat_id']
+ keypoints = data_pack['keypoints']
+ cat_results = []
+
+ for img_kpts in keypoints:
+ if len(img_kpts) == 0:
+ continue
+
+ _key_points = np.array(
+ [img_kpt['keypoints'] for img_kpt in img_kpts])
+ key_points = _key_points.reshape(-1,
+ self.ann_info['num_joints'] * 3)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num,
+ self.left_hand_num, self.right_hand_num
+ ]) * 3
+
+ result = [{
+ 'image_id': img_kpt['image_id'],
+ 'category_id': cat_id,
+ 'keypoints': key_point[cuts[0]:cuts[1]].tolist(),
+ 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(),
+ 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(),
+ 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(),
+ 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(),
+ 'score': float(img_kpt['score']),
+ 'center': img_kpt['center'].tolist(),
+ 'scale': img_kpt['scale'].tolist()
+ } for img_kpt, key_point in zip(img_kpts, key_points)]
+
+ cat_results.extend(result)
+
+ return cat_results
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+
+ cuts = np.cumsum([
+ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num,
+ self.right_hand_num
+ ])
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_body',
+ self.sigmas[cuts[0]:cuts[1]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_foot',
+ self.sigmas[cuts[1]:cuts[2]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_face',
+ self.sigmas[cuts[2]:cuts[3]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_lefthand',
+ self.sigmas[cuts[3]:cuts[4]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_righthand',
+ self.sigmas[cuts[4]:cuts[5]],
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_wholebody',
+ self.sigmas,
+ use_area=True)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9b196f744aa67d46c420612f9476b1d73c68cf3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_crowdpose_dataset.py
@@ -0,0 +1,110 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownCrowdPoseDataset(TopDownCocoDataset):
+ """CrowdPoseDataset dataset for top-down pose estimation.
+
+ "CrowdPose: Efficient Crowded Scenes Pose Estimation and
+ A New Benchmark", CVPR'2019.
+ More details can be found in the `paper
+ `__.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ CrowdPose keypoint indexes::
+
+ 0: 'left_shoulder',
+ 1: 'right_shoulder',
+ 2: 'left_elbow',
+ 3: 'right_elbow',
+ 4: 'left_wrist',
+ 5: 'right_wrist',
+ 6: 'left_hip',
+ 7: 'right_hip',
+ 8: 'left_knee',
+ 9: 'right_knee',
+ 10: 'left_ankle',
+ 11: 'right_ankle',
+ 12: 'top_head',
+ 13: 'neck'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/crowdpose.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco,
+ coco_det,
+ 'keypoints_crowd',
+ self.sigmas,
+ use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)',
+ 'AP(H)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bc49e3a2994037993bdb44a6ba59e44eeef0270
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_h36m_dataset.py
@@ -0,0 +1,206 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownH36MDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Human3.6M dataset for top-down 2D pose estimation.
+
+ "Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human
+ Sensing in Natural Environments", TPAMI`2014.
+ More details can be found in the `paper
+ `__.
+
+ Human3.6M keypoint indexes::
+
+ 0: 'root (pelvis)',
+ 1: 'right_hip',
+ 2: 'right_knee',
+ 3: 'right_foot',
+ 4: 'left_hip',
+ 5: 'left_knee',
+ 6: 'left_foot',
+ 7: 'spine',
+ 8: 'thorax',
+ 9: 'neck_base',
+ 10: 'head',
+ 11: 'left_shoulder',
+ 12: 'left_elbow',
+ 13: 'left_wrist',
+ 14: 'right_shoulder',
+ 15: 'right_elbow',
+ 16: 'right_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/h36m.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ gt_db = []
+ bbox_id = 0
+ num_joints = self.ann_info['num_joints']
+ for img_id in self.img_ids:
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ if max(obj['keypoints']) == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ # use 1.25 padded bbox as input
+ center, scale = self._xywh2cs(*obj['bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox': obj['bbox'],
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate human3.6m 2d keypoint results. The pose prediction results
+ will be saved in `${res_folder}/result_keypoints.json`.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0],
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['data/coco/val2017
+ /000000393226.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap
+ - bbox_id (list(int)).
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'PCK'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'EPE']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7042daa29ec2b2b8eafb16a1404be32cf761d678
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_halpe_dataset.py
@@ -0,0 +1,77 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownHalpeDataset(TopDownCocoDataset):
+ """HalpeDataset for top-down pose estimation.
+
+ 'https://github.com/Fang-Haoshu/Halpe-FullBody'
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ Halpe keypoint indexes::
+
+ 0-19: 20 body keypoints,
+ 20-25: 6 foot keypoints,
+ 26-93: 68 face keypoints,
+ 94-135: 42 hand keypoints
+
+ In total, we have 136 keypoints for wholebody pose estimation.
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/halpe.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.ann_info['use_different_joint_weights'] = False
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5204f04d869c59b9fe9b9f337714d1aa6f555c9e
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_jhmdb_dataset.py
@@ -0,0 +1,361 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.core.evaluation.top_down_eval import keypoint_pck_accuracy
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownJhmdbDataset(TopDownCocoDataset):
+ """JhmdbDataset dataset for top-down pose estimation.
+
+ "Towards understanding action recognition", ICCV'2013.
+ More details can be found in the `paper
+ `__
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ sub-JHMDB keypoint indexes::
+
+ 0: "neck",
+ 1: "belly",
+ 2: "head",
+ 3: "right_shoulder",
+ 4: "left_shoulder",
+ 5: "right_hip",
+ 6: "left_hip",
+ 7: "right_elbow",
+ 8: "left_elbow",
+ 9: "right_knee",
+ 10: "left_knee",
+ 11: "right_wrist",
+ 12: "left_wrist",
+ 13: "right_ankle",
+ 14: "left_ankle"
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/jhmdb.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ # JHMDB uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ x -= 1
+ y -= 1
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ rec = []
+ bbox_id = 0
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+
+ # JHMDB uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ joints_3d[:, :2] = keypoints[:, :2] - 1
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ rec.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': f'{img_id}_{bbox_id:03}'
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _write_keypoint_results(self, keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file, metrics, pck_thr=0.2):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'PCKh', 'AUC', 'EPE'.
+ pck_thr (float): PCK threshold, default as 0.2.
+ pckh_thr (float): PCKh threshold, default as 0.7.
+ auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ outputs = []
+ gts = []
+ masks = []
+ threshold_bbox = []
+ threshold_torso = []
+
+ for pred, item in zip(preds, self.db):
+ outputs.append(np.array(pred['keypoints'])[:, :-1])
+ gts.append(np.array(item['joints_3d'])[:, :-1])
+ masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+ if 'PCK' in metrics:
+ bbox = np.array(item['bbox'])
+ bbox_thr = np.max(bbox[2:])
+ threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+
+ if 'tPCK' in metrics:
+ torso_thr = np.linalg.norm(item['joints_3d'][4, :2] -
+ item['joints_3d'][5, :2])
+ if torso_thr < 1:
+ torso_thr = np.linalg.norm(
+ np.array(pred['keypoints'])[4, :2] -
+ np.array(pred['keypoints'])[5, :2])
+ warnings.warn('Torso Size < 1.')
+ threshold_torso.append(np.array([torso_thr, torso_thr]))
+
+ outputs = np.array(outputs)
+ gts = np.array(gts)
+ masks = np.array(masks)
+ threshold_bbox = np.array(threshold_bbox)
+ threshold_torso = np.array(threshold_torso)
+
+ if 'PCK' in metrics:
+ pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_bbox)
+
+ stats_names = [
+ 'Head PCK', 'Sho PCK', 'Elb PCK', 'Wri PCK', 'Hip PCK',
+ 'Knee PCK', 'Ank PCK', 'Mean PCK'
+ ]
+
+ stats = [
+ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4],
+ 0.5 * pck_p[7] + 0.5 * pck_p[8],
+ 0.5 * pck_p[11] + 0.5 * pck_p[12],
+ 0.5 * pck_p[5] + 0.5 * pck_p[6],
+ 0.5 * pck_p[9] + 0.5 * pck_p[10],
+ 0.5 * pck_p[13] + 0.5 * pck_p[14], pck
+ ]
+
+ info_str.extend(list(zip(stats_names, stats)))
+
+ if 'tPCK' in metrics:
+ pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_torso)
+
+ stats_names = [
+ 'Head tPCK', 'Sho tPCK', 'Elb tPCK', 'Wri tPCK', 'Hip tPCK',
+ 'Knee tPCK', 'Ank tPCK', 'Mean tPCK'
+ ]
+
+ stats = [
+ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4],
+ 0.5 * pck_p[7] + 0.5 * pck_p[8],
+ 0.5 * pck_p[11] + 0.5 * pck_p[12],
+ 0.5 * pck_p[5] + 0.5 * pck_p[6],
+ 0.5 * pck_p[9] + 0.5 * pck_p[10],
+ 0.5 * pck_p[13] + 0.5 * pck_p[14], pck
+ ]
+
+ info_str.extend(list(zip(stats_names, stats)))
+
+ return info_str
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCK', **kwargs):
+ """Evaluate onehand10k keypoint results. The pose prediction results
+ will be saved in `${res_folder}/result_keypoints.json`.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_path (list[str])
+ - output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'tPCK'.
+ PCK means normalized by the bounding boxes, while tPCK
+ means normalized by the torso size.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCK', 'tPCK']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ # convert 0-based index to 1-based index,
+ # and get the first two dimensions.
+ preds[..., :2] += 1.0
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts.append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file, metrics)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..050824a88ab520ad44feafd4a8553582689b1fab
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mhp_dataset.py
@@ -0,0 +1,125 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+from xtcocotools.cocoeval import COCOeval
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownMhpDataset(TopDownCocoDataset):
+ """MHPv2.0 dataset for top-down pose estimation.
+
+ "Understanding Humans in Crowded Scenes: Deep Nested Adversarial
+ Learning and A New Benchmark for Multi-Human Parsing", ACM MM'2018.
+ More details can be found in the `paper
+ `__
+
+ Note that, the evaluation metric used here is mAP (adapted from COCO),
+ which may be different from the official evaluation codes.
+ 'https://github.com/ZhaoJ9014/Multi-Human-Parsing/tree/master/'
+ 'Evaluation/Multi-Human-Pose'
+ Please be cautious if you use the results in papers.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MHP keypoint indexes::
+
+ 0: "right ankle",
+ 1: "right knee",
+ 2: "right hip",
+ 3: "left hip",
+ 4: "left knee",
+ 5: "left ankle",
+ 6: "pelvis",
+ 7: "thorax",
+ 8: "upper neck",
+ 9: "head top",
+ 10: "right wrist",
+ 11: "right elbow",
+ 12: "right shoulder",
+ 13: "left shoulder",
+ 14: "left elbow",
+ 15: "left wrist",
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mhp.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ if 'image_thr' in data_cfg:
+ warnings.warn(
+ 'image_thr is deprecated, '
+ 'please use det_bbox_thr instead', DeprecationWarning)
+ self.det_bbox_thr = data_cfg['image_thr']
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
+
+ def _do_python_keypoint_eval(self, res_file):
+ """Keypoint evaluation using COCOAPI."""
+ coco_det = self.coco.loadRes(res_file)
+ coco_eval = COCOeval(
+ self.coco, coco_det, 'keypoints', self.sigmas, use_area=False)
+ coco_eval.params.useSegm = None
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ stats_names = [
+ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+ 'AR .75', 'AR (M)', 'AR (L)'
+ ]
+
+ info_str = list(zip(stats_names, coco_eval.stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..751046aa683dd6304b97f639d85cc9489027a6ef
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
@@ -0,0 +1,275 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os.path as osp
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+from scipy.io import loadmat, savemat
+
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownMpiiDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MPII Dataset for top-down pose estimation.
+
+ "2D Human Pose Estimation: New Benchmark and State of the Art Analysis"
+ ,CVPR'2014. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MPII keypoint indexes::
+
+ 0: 'right_ankle'
+ 1: 'right_knee',
+ 2: 'right_hip',
+ 3: 'left_hip',
+ 4: 'left_knee',
+ 5: 'left_ankle',
+ 6: 'pelvis',
+ 7: 'thorax',
+ 8: 'upper_neck',
+ 9: 'head_top',
+ 10: 'right_wrist',
+ 11: 'right_elbow',
+ 12: 'right_shoulder',
+ 13: 'left_shoulder',
+ 14: 'left_elbow',
+ 15: 'left_wrist'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpii.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ coco_style=False,
+ test_mode=test_mode)
+
+ self.db = self._get_db()
+ self.image_set = set(x['image_file'] for x in self.db)
+ self.num_images = len(self.image_set)
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ # create train/val split
+ with open(self.ann_file) as anno_file:
+ anno = json.load(anno_file)
+
+ gt_db = []
+ bbox_id = 0
+ for a in anno:
+ image_name = a['image']
+
+ center = np.array(a['center'], dtype=np.float32)
+ scale = np.array([a['scale'], a['scale']], dtype=np.float32)
+
+ # Adjust center/scale slightly to avoid cropping limbs
+ if center[0] != -1:
+ center[1] = center[1] + 15 * scale[1]
+ # padding to include proper amount of context
+ scale = scale * 1.25
+
+ # MPII uses matlab format, index is 1-based,
+ # we should first convert to 0-based index
+ center = center - 1
+
+ joints_3d = np.zeros((self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+ joints_3d_visible = np.zeros((self.ann_info['num_joints'], 3),
+ dtype=np.float32)
+ if not self.test_mode:
+ joints = np.array(a['joints'])
+ joints_vis = np.array(a['joints_vis'])
+ assert len(joints) == self.ann_info['num_joints'], \
+ f'joint num diff: {len(joints)}' + \
+ f' vs {self.ann_info["num_joints"]}'
+
+ joints_3d[:, 0:2] = joints[:, 0:2] - 1
+ joints_3d_visible[:, :2] = joints_vis[:, None]
+ image_file = osp.join(self.img_prefix, image_name)
+ gt_db.append({
+ 'image_file': image_file,
+ 'bbox_id': bbox_id,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1
+ })
+ bbox_id = bbox_id + 1
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate PCKh for MPII dataset. Adapted from
+ https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+ Copyright (c) Microsoft, under the MIT License.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['/val2017/000000\
+ 397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ res_folder (str, optional): The folder to save the testing
+ results. Default: None.
+ metric (str | list[str]): Metrics to be performed.
+ Defaults: 'PCKh'.
+
+ Returns:
+ dict: PCKh for each joint
+ """
+
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ bbox_ids = result['bbox_ids']
+ batch_size = len(bbox_ids)
+ for i in range(batch_size):
+ kpts.append({'keypoints': preds[i], 'bbox_id': bbox_ids[i]})
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ preds = np.stack([kpt['keypoints'] for kpt in kpts])
+
+ # convert 0-based index to 1-based index,
+ # and get the first two dimensions.
+ preds = preds[..., :2] + 1.0
+
+ if res_folder:
+ pred_file = osp.join(res_folder, 'pred.mat')
+ savemat(pred_file, mdict={'preds': preds})
+
+ SC_BIAS = 0.6
+ threshold = 0.5
+
+ gt_file = osp.join(osp.dirname(self.ann_file), 'mpii_gt_val.mat')
+ gt_dict = loadmat(gt_file)
+ dataset_joints = gt_dict['dataset_joints']
+ jnt_missing = gt_dict['jnt_missing']
+ pos_gt_src = gt_dict['pos_gt_src']
+ headboxes_src = gt_dict['headboxes_src']
+
+ pos_pred_src = np.transpose(preds, [1, 2, 0])
+
+ head = np.where(dataset_joints == 'head')[1][0]
+ lsho = np.where(dataset_joints == 'lsho')[1][0]
+ lelb = np.where(dataset_joints == 'lelb')[1][0]
+ lwri = np.where(dataset_joints == 'lwri')[1][0]
+ lhip = np.where(dataset_joints == 'lhip')[1][0]
+ lkne = np.where(dataset_joints == 'lkne')[1][0]
+ lank = np.where(dataset_joints == 'lank')[1][0]
+
+ rsho = np.where(dataset_joints == 'rsho')[1][0]
+ relb = np.where(dataset_joints == 'relb')[1][0]
+ rwri = np.where(dataset_joints == 'rwri')[1][0]
+ rkne = np.where(dataset_joints == 'rkne')[1][0]
+ rank = np.where(dataset_joints == 'rank')[1][0]
+ rhip = np.where(dataset_joints == 'rhip')[1][0]
+
+ jnt_visible = 1 - jnt_missing
+ uv_error = pos_pred_src - pos_gt_src
+ uv_err = np.linalg.norm(uv_error, axis=1)
+ headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
+ headsizes = np.linalg.norm(headsizes, axis=0)
+ headsizes *= SC_BIAS
+ scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32)
+ scaled_uv_err = uv_err / scale
+ scaled_uv_err = scaled_uv_err * jnt_visible
+ jnt_count = np.sum(jnt_visible, axis=1)
+ less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+ PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count
+
+ # save
+ rng = np.arange(0, 0.5 + 0.01, 0.01)
+ pckAll = np.zeros((len(rng), 16), dtype=np.float32)
+
+ for r, threshold in enumerate(rng):
+ less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+ pckAll[r, :] = 100. * np.sum(
+ less_than_threshold, axis=1) / jnt_count
+
+ PCKh = np.ma.array(PCKh, mask=False)
+ PCKh.mask[6:8] = True
+
+ jnt_count = np.ma.array(jnt_count, mask=False)
+ jnt_count.mask[6:8] = True
+ jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
+
+ name_value = [('Head', PCKh[head]),
+ ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
+ ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
+ ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
+ ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
+ ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
+ ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
+ ('PCKh', np.sum(PCKh * jnt_ratio)),
+ ('PCKh@0.1', np.sum(pckAll[10, :] * jnt_ratio))]
+ name_value = OrderedDict(name_value)
+
+ return name_value
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0da65b47a27074fac6dc1bfbd98309f75e359a3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
@@ -0,0 +1,310 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from mmpose.datasets.builder import DATASETS
+from ..base import Kpt2dSviewRgbImgTopDownDataset
+
+
+@DATASETS.register_module()
+class TopDownMpiiTrbDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """MPII-TRB Dataset dataset for top-down pose estimation.
+
+ "TRB: A Novel Triplet Representation for Understanding 2D Human Body",
+ ICCV'2019. More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ MPII-TRB keypoint indexes::
+
+ 0: 'left_shoulder'
+ 1: 'right_shoulder'
+ 2: 'left_elbow'
+ 3: 'right_elbow'
+ 4: 'left_wrist'
+ 5: 'right_wrist'
+ 6: 'left_hip'
+ 7: 'right_hip'
+ 8: 'left_knee'
+ 9: 'right_knee'
+ 10: 'left_ankle'
+ 11: 'right_ankle'
+ 12: 'head'
+ 13: 'neck'
+
+ 14: 'right_neck'
+ 15: 'left_neck'
+ 16: 'medial_right_shoulder'
+ 17: 'lateral_right_shoulder'
+ 18: 'medial_right_bow'
+ 19: 'lateral_right_bow'
+ 20: 'medial_right_wrist'
+ 21: 'lateral_right_wrist'
+ 22: 'medial_left_shoulder'
+ 23: 'lateral_left_shoulder'
+ 24: 'medial_left_bow'
+ 25: 'lateral_left_bow'
+ 26: 'medial_left_wrist'
+ 27: 'lateral_left_wrist'
+ 28: 'medial_right_hip'
+ 29: 'lateral_right_hip'
+ 30: 'medial_right_knee'
+ 31: 'lateral_right_knee'
+ 32: 'medial_right_ankle'
+ 33: 'lateral_right_ankle'
+ 34: 'medial_left_hip'
+ 35: 'lateral_left_hip'
+ 36: 'medial_left_knee'
+ 37: 'lateral_left_knee'
+ 38: 'medial_left_ankle'
+ 39: 'lateral_left_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/mpii_trb.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.db = self._get_db(ann_file)
+ self.image_set = set(x['image_file'] for x in self.db)
+ self.num_images = len(self.image_set)
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self, ann_file):
+ """Load dataset."""
+ with open(ann_file, 'r') as f:
+ data = json.load(f)
+ tmpl = dict(
+ image_file=None,
+ bbox_id=None,
+ center=None,
+ scale=None,
+ rotation=0,
+ joints_3d=None,
+ joints_3d_visible=None,
+ dataset=self.dataset_name)
+
+ imid2info = {
+ int(osp.splitext(x['file_name'])[0]): x
+ for x in data['images']
+ }
+
+ num_joints = self.ann_info['num_joints']
+ gt_db = []
+
+ for anno in data['annotations']:
+ newitem = cp.deepcopy(tmpl)
+ image_id = anno['image_id']
+ newitem['bbox_id'] = anno['id']
+ newitem['image_file'] = osp.join(self.img_prefix,
+ imid2info[image_id]['file_name'])
+
+ if max(anno['keypoints']) == 0:
+ continue
+
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ for ipt in range(num_joints):
+ joints_3d[ipt, 0] = anno['keypoints'][ipt * 3 + 0]
+ joints_3d[ipt, 1] = anno['keypoints'][ipt * 3 + 1]
+ joints_3d[ipt, 2] = 0
+ t_vis = min(anno['keypoints'][ipt * 3 + 2], 1)
+ joints_3d_visible[ipt, :] = (t_vis, t_vis, 0)
+
+ center = np.array(anno['center'], dtype=np.float32)
+ scale = self.ann_info['image_size'] / anno['scale'] / 200.0
+ newitem['center'] = center
+ newitem['scale'] = scale
+ newitem['joints_3d'] = joints_3d
+ newitem['joints_3d_visible'] = joints_3d_visible
+ if 'headbox' in anno:
+ newitem['headbox'] = anno['headbox']
+ gt_db.append(newitem)
+ gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+
+ return gt_db
+
+ def _evaluate_kernel(self, pred, joints_3d, joints_3d_visible, headbox):
+ """Evaluate one example."""
+ num_joints = self.ann_info['num_joints']
+ headbox = np.array(headbox)
+ threshold = np.linalg.norm(headbox[:2] - headbox[2:]) * 0.3
+ hit = np.zeros(num_joints, dtype=np.float32)
+ exist = np.zeros(num_joints, dtype=np.float32)
+
+ for i in range(num_joints):
+ pred_pt = pred[i]
+ gt_pt = joints_3d[i]
+ vis = joints_3d_visible[i][0]
+ if vis:
+ exist[i] = 1
+ else:
+ continue
+ distance = np.linalg.norm(pred_pt[:2] - gt_pt[:2])
+ if distance < threshold:
+ hit[i] = 1
+ return hit, exist
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='PCKh', **kwargs):
+ """Evaluate PCKh for MPII-TRB dataset.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['/val2017/\
+ 000000397133.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_ids (list[str]): For example, ['27407'].
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metrics to be performed.
+ Defaults: 'PCKh'.
+
+ Returns:
+ dict: PCKh for each joint
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['PCKh']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ res_file = osp.join(res_folder, 'result_keypoints.json')
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
+
+ kpts = []
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ str_image_path = image_paths[i]
+ image_id = int(osp.basename(osp.splitext(str_image_path)[0]))
+
+ kpts.append({
+ 'keypoints': preds[i].tolist(),
+ 'center': boxes[i][0:2].tolist(),
+ 'scale': boxes[i][2:4].tolist(),
+ 'area': float(boxes[i][4]),
+ 'score': float(boxes[i][5]),
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ self._write_keypoint_results(kpts, res_file)
+ info_str = self._report_metric(res_file)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoints, res_file):
+ """Write results into a json file."""
+
+ with open(res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+
+ def _report_metric(self, res_file):
+ """Keypoint evaluation.
+
+ Report Mean Acc of skeleton, contour and all joints.
+ """
+ num_joints = self.ann_info['num_joints']
+ hit = np.zeros(num_joints, dtype=np.float32)
+ exist = np.zeros(num_joints, dtype=np.float32)
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+
+ assert len(preds) == len(
+ self.db), f'len(preds)={len(preds)}, len(self.db)={len(self.db)}'
+ for pred, item in zip(preds, self.db):
+ h, e = self._evaluate_kernel(pred['keypoints'], item['joints_3d'],
+ item['joints_3d_visible'],
+ item['headbox'])
+ hit += h
+ exist += e
+ skeleton = np.sum(hit[:14]) / np.sum(exist[:14])
+ contour = np.sum(hit[14:]) / np.sum(exist[14:])
+ mean = np.sum(hit) / np.sum(exist)
+
+ info_str = []
+ info_str.append(('Skeleton_acc', skeleton.item()))
+ info_str.append(('Contour_acc', contour.item()))
+ info_str.append(('PCKh', mean.item()))
+ return info_str
+
+ def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+ """sort kpts and remove the repeated ones."""
+ kpts = sorted(kpts, key=lambda x: x[key])
+ num = len(kpts)
+ for i in range(num - 1, 0, -1):
+ if kpts[i][key] == kpts[i - 1][key]:
+ del kpts[i]
+
+ return kpts
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ad6b81405e2411bae1a531521208d2cc272fbf3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_ochuman_dataset.py
@@ -0,0 +1,97 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from mmcv import Config
+
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+
+@DATASETS.register_module()
+class TopDownOCHumanDataset(TopDownCocoDataset):
+ """OChuman dataset for top-down pose estimation.
+
+ "Pose2Seg: Detection Free Human Instance Segmentation", CVPR'2019.
+ More details can be found in the `paper
+ `__ .
+
+ "Occluded Human (OCHuman)" dataset contains 8110 heavily occluded
+ human instances within 4731 images. OCHuman dataset is designed for
+ validation and testing. To evaluate on OCHuman, the model should be
+ trained on COCO training set, and then test the robustness of the
+ model to occlusion using OCHuman.
+
+ OCHuman keypoint indexes (same as COCO)::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/ochuman.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ assert self.use_gt_bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ return gt_db
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c690860ac7a11129c9eee50c19eda05279e9ace1
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_dataset.py
@@ -0,0 +1,312 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import Config, deprecated_api_warning
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from .topdown_coco_dataset import TopDownCocoDataset
+
+try:
+ from poseval import eval_helpers
+ from poseval.evaluateAP import evaluateAP
+ has_poseval = True
+except (ImportError, ModuleNotFoundError):
+ has_poseval = False
+
+
+@DATASETS.register_module()
+class TopDownPoseTrack18Dataset(TopDownCocoDataset):
+ """PoseTrack18 dataset for top-down pose estimation.
+
+ "Posetrack: A benchmark for human pose estimation and tracking", CVPR'2018.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ PoseTrack2018 keypoint indexes::
+
+ 0: 'nose',
+ 1: 'head_bottom',
+ 2: 'head_top',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False):
+
+ if dataset_info is None:
+ warnings.warn(
+ 'dataset_info is missing. '
+ 'Check https://github.com/open-mmlab/mmpose/pull/663 '
+ 'for details.', DeprecationWarning)
+ cfg = Config.fromfile('configs/_base_/datasets/posetrack18.py')
+ dataset_info = cfg._cfg_dict['dataset_info']
+
+ super(TopDownCocoDataset, self).__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate posetrack keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['val/010016_mpii_test\
+ /000024.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_id (list(int))
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_folder = tmp_folder.name
+
+ gt_folder = osp.join(
+ osp.dirname(self.ann_file),
+ osp.splitext(self.ann_file.split('_')[-1])[0])
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = defaultdict(list)
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts[image_id].append(
+ [img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts[image_id].append(img_kpts)
+
+ self._write_posetrack18_keypoint_results(valid_kpts, gt_folder,
+ res_folder)
+
+ info_str = self._do_python_keypoint_eval(gt_folder, res_folder)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_posetrack18_keypoint_results(keypoint_results, gt_folder,
+ pred_folder):
+ """Write results into a json file.
+
+ Args:
+ keypoint_results (dict): keypoint results organized by image_id.
+ gt_folder (str): Path of directory for official gt files.
+ pred_folder (str): Path of directory to save the results.
+ """
+ categories = []
+
+ cat = {}
+ cat['supercategory'] = 'person'
+ cat['id'] = 1
+ cat['name'] = 'person'
+ cat['keypoints'] = [
+ 'nose', 'head_bottom', 'head_top', 'left_ear', 'right_ear',
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
+ 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee',
+ 'right_knee', 'left_ankle', 'right_ankle'
+ ]
+ cat['skeleton'] = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13],
+ [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10],
+ [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
+ [4, 6], [5, 7]]
+ categories.append(cat)
+
+ json_files = [
+ pos for pos in os.listdir(gt_folder) if pos.endswith('.json')
+ ]
+ for json_file in json_files:
+
+ with open(osp.join(gt_folder, json_file), 'r') as f:
+ gt = json.load(f)
+
+ annotations = []
+ images = []
+
+ for image in gt['images']:
+ im = {}
+ im['id'] = image['id']
+ im['file_name'] = image['file_name']
+ images.append(im)
+
+ img_kpts = keypoint_results[im['id']]
+
+ if len(img_kpts) == 0:
+ continue
+ for track_id, img_kpt in enumerate(img_kpts[0]):
+ ann = {}
+ ann['image_id'] = img_kpt['image_id']
+ ann['keypoints'] = np.array(
+ img_kpt['keypoints']).reshape(-1).tolist()
+ ann['scores'] = np.array(ann['keypoints']).reshape(
+ [-1, 3])[:, 2].tolist()
+ ann['score'] = float(img_kpt['score'])
+ ann['track_id'] = track_id
+ annotations.append(ann)
+
+ info = {}
+ info['images'] = images
+ info['categories'] = categories
+ info['annotations'] = annotations
+
+ with open(osp.join(pred_folder, json_file), 'w') as f:
+ json.dump(info, f, sort_keys=True, indent=4)
+
+ def _do_python_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation using poseval."""
+
+ if not has_poseval:
+ raise ImportError('Please install poseval package for evaluation'
+ 'on PoseTrack dataset '
+ '(see requirements/optional.txt)')
+
+ argv = ['', gt_folder + '/', pred_folder + '/']
+
+ print('Loading data')
+ gtFramesAll, prFramesAll = eval_helpers.load_data_dir(argv)
+
+ print('# gt frames :', len(gtFramesAll))
+ print('# pred frames:', len(prFramesAll))
+
+ # evaluate per-frame multi-person pose estimation (AP)
+ # compute AP
+ print('Evaluation of per-frame multi-person pose estimation')
+ apAll, _, _ = evaluateAP(gtFramesAll, prFramesAll, None, False, False)
+
+ # print AP
+ print('Average Precision (AP) metric:')
+ eval_helpers.printTable(apAll)
+
+ stats = eval_helpers.getCum(apAll)
+
+ stats_names = [
+ 'Head AP', 'Shou AP', 'Elb AP', 'Wri AP', 'Hip AP', 'Knee AP',
+ 'Ankl AP', 'Total AP'
+ ]
+
+ info_str = list(zip(stats_names, stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..045148d3e01ed513d9514ee81a85efaba9a72287
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/datasets/top_down/topdown_posetrack18_video_dataset.py
@@ -0,0 +1,549 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import tempfile
+import warnings
+from collections import OrderedDict, defaultdict
+
+import json_tricks as json
+import numpy as np
+from mmcv import deprecated_api_warning
+
+from ....core.post_processing import oks_nms, soft_oks_nms
+from ...builder import DATASETS
+from ..base import Kpt2dSviewRgbVidTopDownDataset
+
+try:
+ from poseval import eval_helpers
+ from poseval.evaluateAP import evaluateAP
+ has_poseval = True
+except (ImportError, ModuleNotFoundError):
+ has_poseval = False
+
+
+@DATASETS.register_module()
+class TopDownPoseTrack18VideoDataset(Kpt2dSviewRgbVidTopDownDataset):
+ """PoseTrack18 dataset for top-down pose estimation.
+
+ "Posetrack: A benchmark for human pose estimation and tracking", CVPR'2018.
+ More details can be found in the `paper
+ `__ .
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ PoseTrack2018 keypoint indexes::
+
+ 0: 'nose',
+ 1: 'head_bottom',
+ 2: 'head_top',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Path to the annotation file.
+ img_prefix (str): Path to a directory where videos/images are held.
+ Default: None.
+ data_cfg (dict): config
+ pipeline (list[dict | callable]): A sequence of data transforms.
+ dataset_info (DatasetInfo): A class containing all dataset info.
+ test_mode (bool): Store True when building test or
+ validation dataset. Default: False.
+ ph_fill_len (int): The length of the placeholder to fill in the
+ image filenames, default: 6 in PoseTrack18.
+ """
+
+ def __init__(self,
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=None,
+ test_mode=False,
+ ph_fill_len=6):
+ super().__init__(
+ ann_file,
+ img_prefix,
+ data_cfg,
+ pipeline,
+ dataset_info=dataset_info,
+ test_mode=test_mode)
+
+ self.use_gt_bbox = data_cfg['use_gt_bbox']
+ self.bbox_file = data_cfg['bbox_file']
+ self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+ self.use_nms = data_cfg.get('use_nms', True)
+ self.soft_nms = data_cfg['soft_nms']
+ self.nms_thr = data_cfg['nms_thr']
+ self.oks_thr = data_cfg['oks_thr']
+ self.vis_thr = data_cfg['vis_thr']
+ self.frame_weight_train = data_cfg['frame_weight_train']
+ self.frame_weight_test = data_cfg['frame_weight_test']
+ self.frame_weight = self.frame_weight_test \
+ if self.test_mode else self.frame_weight_train
+
+ self.ph_fill_len = ph_fill_len
+
+ # select the frame indices
+ self.frame_index_rand = data_cfg.get('frame_index_rand', True)
+ self.frame_index_range = data_cfg.get('frame_index_range', [-2, 2])
+ self.num_adj_frames = data_cfg.get('num_adj_frames', 1)
+ self.frame_indices_train = data_cfg.get('frame_indices_train', None)
+ self.frame_indices_test = data_cfg.get('frame_indices_test',
+ [-2, -1, 0, 1, 2])
+
+ if self.frame_indices_train is not None:
+ self.frame_indices_train.sort()
+ self.frame_indices_test.sort()
+
+ self.db = self._get_db()
+
+ print(f'=> num_images: {self.num_images}')
+ print(f'=> load {len(self.db)} samples')
+
+ def _get_db(self):
+ """Load dataset."""
+ if (not self.test_mode) or self.use_gt_bbox:
+ # use ground truth bbox
+ gt_db = self._load_coco_keypoint_annotations()
+ else:
+ # use bbox from detection
+ gt_db = self._load_posetrack_person_detection_results()
+ return gt_db
+
+ def _load_coco_keypoint_annotations(self):
+ """Ground truth bbox and keypoints."""
+ gt_db = []
+ for img_id in self.img_ids:
+ gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+ return gt_db
+
+ def _load_coco_keypoint_annotation_kernel(self, img_id):
+ """load annotation from COCOAPI.
+
+ Note:
+ bbox:[x1, y1, w, h]
+ Args:
+ img_id: coco image id
+ Returns:
+ dict: db entry
+ """
+ img_ann = self.coco.loadImgs(img_id)[0]
+ width = img_ann['width']
+ height = img_ann['height']
+ num_joints = self.ann_info['num_joints']
+
+ file_name = img_ann['file_name']
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+
+ ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = self.coco.loadAnns(ann_ids)
+
+ # sanitize bboxes
+ valid_objs = []
+ for obj in objs:
+ if 'bbox' not in obj:
+ continue
+ x, y, w, h = obj['bbox']
+ x1 = max(0, x)
+ y1 = max(0, y)
+ x2 = min(width - 1, x1 + max(0, w - 1))
+ y2 = min(height - 1, y1 + max(0, h - 1))
+ if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+ obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+ valid_objs.append(obj)
+ objs = valid_objs
+
+ bbox_id = 0
+ rec = []
+ for obj in objs:
+ if 'keypoints' not in obj:
+ continue
+ if max(obj['keypoints']) == 0:
+ continue
+ if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+ continue
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+ joints_3d[:, :2] = keypoints[:, :2]
+ joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+
+ image_files = []
+ cur_image_file = osp.join(self.img_prefix, self.id2name[img_id])
+ image_files.append(cur_image_file)
+
+ # "images/val/012834_mpii_test/000000.jpg" -->> "000000.jpg"
+ cur_image_name = file_name.split('/')[-1]
+ ref_idx = int(cur_image_name.replace('.jpg', ''))
+
+ # select the frame indices
+ if not self.test_mode and self.frame_indices_train is not None:
+ indices = self.frame_indices_train
+ elif not self.test_mode and self.frame_index_rand:
+ low, high = self.frame_index_range
+ indices = np.random.randint(low, high + 1, self.num_adj_frames)
+ else:
+ indices = self.frame_indices_test
+
+ for index in indices:
+ if self.test_mode and index == 0:
+ continue
+ # the supporting frame index
+ support_idx = ref_idx + index
+ support_idx = np.clip(support_idx, 0, nframes - 1)
+ sup_image_file = cur_image_file.replace(
+ cur_image_name,
+ str(support_idx).zfill(self.ph_fill_len) + '.jpg')
+
+ if osp.exists(sup_image_file):
+ image_files.append(sup_image_file)
+ else:
+ warnings.warn(
+ f'{sup_image_file} does not exist, '
+ f'use {cur_image_file} instead.', UserWarning)
+ image_files.append(cur_image_file)
+ rec.append({
+ 'image_file': image_files,
+ 'center': center,
+ 'scale': scale,
+ 'bbox': obj['clean_bbox'][:4],
+ 'rotation': 0,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'dataset': self.dataset_name,
+ 'bbox_score': 1,
+ 'bbox_id': bbox_id,
+ 'nframes': nframes,
+ 'frame_id': frame_id,
+ 'frame_weight': self.frame_weight
+ })
+ bbox_id = bbox_id + 1
+
+ return rec
+
+ def _load_posetrack_person_detection_results(self):
+ """Load Posetrack person detection results.
+
+ Only in test mode.
+ """
+ num_joints = self.ann_info['num_joints']
+ all_boxes = None
+ with open(self.bbox_file, 'r') as f:
+ all_boxes = json.load(f)
+
+ if not all_boxes:
+ raise ValueError('=> Load %s fail!' % self.bbox_file)
+
+ print(f'=> Total boxes: {len(all_boxes)}')
+
+ kpt_db = []
+ bbox_id = 0
+ for det_res in all_boxes:
+ if det_res['category_id'] != 1:
+ continue
+
+ score = det_res['score']
+ if score < self.det_bbox_thr:
+ continue
+
+ box = det_res['bbox']
+
+ # deal with different bbox file formats
+ if 'nframes' in det_res and 'frame_id' in det_res:
+ nframes = int(det_res['nframes'])
+ frame_id = int(det_res['frame_id'])
+ elif 'image_name' in det_res:
+ img_id = self.name2id[det_res['image_name']]
+ img_ann = self.coco.loadImgs(img_id)[0]
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+ else:
+ img_id = det_res['image_id']
+ img_ann = self.coco.loadImgs(img_id)[0]
+ nframes = int(img_ann['nframes'])
+ frame_id = int(img_ann['frame_id'])
+
+ image_files = []
+ if 'image_name' in det_res:
+ file_name = det_res['image_name']
+ else:
+ file_name = self.id2name[det_res['image_id']]
+
+ cur_image_file = osp.join(self.img_prefix, file_name)
+ image_files.append(cur_image_file)
+
+ # "images/val/012834_mpii_test/000000.jpg" -->> "000000.jpg"
+ cur_image_name = file_name.split('/')[-1]
+ ref_idx = int(cur_image_name.replace('.jpg', ''))
+
+ indices = self.frame_indices_test
+ for index in indices:
+ if self.test_mode and index == 0:
+ continue
+ # the supporting frame index
+ support_idx = ref_idx + index
+ support_idx = np.clip(support_idx, 0, nframes - 1)
+ sup_image_file = cur_image_file.replace(
+ cur_image_name,
+ str(support_idx).zfill(self.ph_fill_len) + '.jpg')
+
+ if osp.exists(sup_image_file):
+ image_files.append(sup_image_file)
+ else:
+ warnings.warn(f'{sup_image_file} does not exist, '
+ f'use {cur_image_file} instead.')
+ image_files.append(cur_image_file)
+
+ center, scale = self._xywh2cs(*box[:4])
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+ kpt_db.append({
+ 'image_file': image_files,
+ 'center': center,
+ 'scale': scale,
+ 'rotation': 0,
+ 'bbox': box[:4],
+ 'bbox_score': score,
+ 'dataset': self.dataset_name,
+ 'joints_3d': joints_3d,
+ 'joints_3d_visible': joints_3d_visible,
+ 'bbox_id': bbox_id,
+ 'nframes': nframes,
+ 'frame_id': frame_id,
+ 'frame_weight': self.frame_weight
+ })
+ bbox_id = bbox_id + 1
+ print(f'=> Total boxes after filter '
+ f'low score@{self.det_bbox_thr}: {bbox_id}')
+ return kpt_db
+
+ @deprecated_api_warning(name_dict=dict(outputs='results'))
+ def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
+ """Evaluate posetrack keypoint results. The pose prediction results
+ will be saved in ``${res_folder}/result_keypoints.json``.
+
+ Note:
+ - num_keypoints: K
+
+ Args:
+ results (list[dict]): Testing results containing the following
+ items:
+
+ - preds (np.ndarray[N,K,3]): The first two dimensions are \
+ coordinates, score is the third dimension of the array.
+ - boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
+ scale[1],area, score]
+ - image_paths (list[str]): For example, ['val/010016_mpii_test\
+ /000024.jpg']
+ - heatmap (np.ndarray[N, K, H, W]): model output heatmap.
+ - bbox_id (list(int))
+ res_folder (str, optional): The folder to save the testing
+ results. If not specified, a temp folder will be created.
+ Default: None.
+ metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
+
+ Returns:
+ dict: Evaluation results for evaluation metric.
+ """
+ metrics = metric if isinstance(metric, list) else [metric]
+ allowed_metrics = ['mAP']
+ for metric in metrics:
+ if metric not in allowed_metrics:
+ raise KeyError(f'metric {metric} is not supported')
+
+ if res_folder is not None:
+ tmp_folder = None
+ else:
+ tmp_folder = tempfile.TemporaryDirectory()
+ res_folder = tmp_folder.name
+
+ gt_folder = osp.join(
+ osp.dirname(self.ann_file),
+ osp.splitext(self.ann_file.split('_')[-1])[0])
+
+ kpts = defaultdict(list)
+
+ for result in results:
+ preds = result['preds']
+ boxes = result['boxes']
+ image_paths = result['image_paths']
+ bbox_ids = result['bbox_ids']
+
+ batch_size = len(image_paths)
+ for i in range(batch_size):
+ if not isinstance(image_paths[i], list):
+ image_id = self.name2id[image_paths[i]
+ [len(self.img_prefix):]]
+ else:
+ image_id = self.name2id[image_paths[i][0]
+ [len(self.img_prefix):]]
+
+ kpts[image_id].append({
+ 'keypoints': preds[i],
+ 'center': boxes[i][0:2],
+ 'scale': boxes[i][2:4],
+ 'area': boxes[i][4],
+ 'score': boxes[i][5],
+ 'image_id': image_id,
+ 'bbox_id': bbox_ids[i]
+ })
+ kpts = self._sort_and_unique_bboxes(kpts)
+
+ # rescoring and oks nms
+ num_joints = self.ann_info['num_joints']
+ vis_thr = self.vis_thr
+ oks_thr = self.oks_thr
+ valid_kpts = defaultdict(list)
+ for image_id in kpts.keys():
+ img_kpts = kpts[image_id]
+ for n_p in img_kpts:
+ box_score = n_p['score']
+ kpt_score = 0
+ valid_num = 0
+ for n_jt in range(0, num_joints):
+ t_s = n_p['keypoints'][n_jt][2]
+ if t_s > vis_thr:
+ kpt_score = kpt_score + t_s
+ valid_num = valid_num + 1
+ if valid_num != 0:
+ kpt_score = kpt_score / valid_num
+ # rescoring
+ n_p['score'] = kpt_score * box_score
+
+ if self.use_nms:
+ nms = soft_oks_nms if self.soft_nms else oks_nms
+ keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
+ valid_kpts[image_id].append(
+ [img_kpts[_keep] for _keep in keep])
+ else:
+ valid_kpts[image_id].append(img_kpts)
+
+ self._write_keypoint_results(valid_kpts, gt_folder, res_folder)
+
+ info_str = self._do_keypoint_eval(gt_folder, res_folder)
+ name_value = OrderedDict(info_str)
+
+ if tmp_folder is not None:
+ tmp_folder.cleanup()
+
+ return name_value
+
+ @staticmethod
+ def _write_keypoint_results(keypoint_results, gt_folder, pred_folder):
+ """Write results into a json file.
+
+ Args:
+ keypoint_results (dict): keypoint results organized by image_id.
+ gt_folder (str): Path of directory for official gt files.
+ pred_folder (str): Path of directory to save the results.
+ """
+ categories = []
+
+ cat = {}
+ cat['supercategory'] = 'person'
+ cat['id'] = 1
+ cat['name'] = 'person'
+ cat['keypoints'] = [
+ 'nose', 'head_bottom', 'head_top', 'left_ear', 'right_ear',
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
+ 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee',
+ 'right_knee', 'left_ankle', 'right_ankle'
+ ]
+ cat['skeleton'] = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13],
+ [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10],
+ [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
+ [4, 6], [5, 7]]
+ categories.append(cat)
+
+ json_files = [
+ pos for pos in os.listdir(gt_folder) if pos.endswith('.json')
+ ]
+ for json_file in json_files:
+
+ with open(osp.join(gt_folder, json_file), 'r') as f:
+ gt = json.load(f)
+
+ annotations = []
+ images = []
+
+ for image in gt['images']:
+ im = {}
+ im['id'] = image['id']
+ im['file_name'] = image['file_name']
+ images.append(im)
+
+ img_kpts = keypoint_results[im['id']]
+
+ if len(img_kpts) == 0:
+ continue
+ for track_id, img_kpt in enumerate(img_kpts[0]):
+ ann = {}
+ ann['image_id'] = img_kpt['image_id']
+ ann['keypoints'] = np.array(
+ img_kpt['keypoints']).reshape(-1).tolist()
+ ann['scores'] = np.array(ann['keypoints']).reshape(
+ [-1, 3])[:, 2].tolist()
+ ann['score'] = float(img_kpt['score'])
+ ann['track_id'] = track_id
+ annotations.append(ann)
+
+ info = {}
+ info['images'] = images
+ info['categories'] = categories
+ info['annotations'] = annotations
+
+ with open(osp.join(pred_folder, json_file), 'w') as f:
+ json.dump(info, f, sort_keys=True, indent=4)
+
+ def _do_keypoint_eval(self, gt_folder, pred_folder):
+ """Keypoint evaluation using poseval."""
+
+ if not has_poseval:
+ raise ImportError('Please install poseval package for evaluation'
+ 'on PoseTrack dataset '
+ '(see requirements/optional.txt)')
+
+ argv = ['', gt_folder + '/', pred_folder + '/']
+
+ print('Loading data')
+ gtFramesAll, prFramesAll = eval_helpers.load_data_dir(argv)
+
+ print('# gt frames :', len(gtFramesAll))
+ print('# pred frames:', len(prFramesAll))
+
+ # evaluate per-frame multi-person pose estimation (AP)
+ # compute AP
+ print('Evaluation of per-frame multi-person pose estimation')
+ apAll, _, _ = evaluateAP(gtFramesAll, prFramesAll, None, False, False)
+
+ # print AP
+ print('Average Precision (AP) metric:')
+ eval_helpers.printTable(apAll)
+
+ stats = eval_helpers.getCum(apAll)
+
+ stats_names = [
+ 'Head AP', 'Shou AP', 'Elb AP', 'Wri AP', 'Hip AP', 'Knee AP',
+ 'Ankl AP', 'Total AP'
+ ]
+
+ info_str = list(zip(stats_names, stats))
+
+ return info_str
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/__init__.py b/vendor/ViTPose/mmpose/datasets/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf06db1c9d0656627ed91670d9a91ede66e0254f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .bottom_up_transform import * # noqa
+from .hand_transform import * # noqa
+from .loading import LoadImageFromFile # noqa
+from .mesh_transform import * # noqa
+from .pose3d_transform import * # noqa
+from .shared_transform import * # noqa
+from .top_down_transform import * # noqa
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/bottom_up_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/bottom_up_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..032ce4548f5c6c142771405bf84b3a647641b460
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/bottom_up_transform.py
@@ -0,0 +1,816 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import (get_affine_transform, get_warp_matrix,
+ warp_affine_joints)
+from mmpose.datasets.builder import PIPELINES
+from .shared_transform import Compose
+
+
+def _ceil_to_multiples_of(x, base=64):
+ """Transform x to the integral multiple of the base."""
+ return int(np.ceil(x / base)) * base
+
+
+def _get_multi_scale_size(image,
+ input_size,
+ current_scale,
+ min_scale,
+ use_udp=False):
+ """Get the size for multi-scale training.
+
+ Args:
+ image: Input image.
+ input_size (np.ndarray[2]): Size (w, h) of the image input.
+ current_scale (float): Scale factor.
+ min_scale (float): Minimal scale.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Returns:
+ tuple: A tuple containing multi-scale sizes.
+
+ - (w_resized, h_resized) (tuple(int)): resized width/height
+ - center (np.ndarray)image center
+ - scale (np.ndarray): scales wrt width/height
+ """
+ assert len(input_size) == 2
+ h, w, _ = image.shape
+
+ # calculate the size for min_scale
+ min_input_w = _ceil_to_multiples_of(min_scale * input_size[0], 64)
+ min_input_h = _ceil_to_multiples_of(min_scale * input_size[1], 64)
+ if w < h:
+ w_resized = int(min_input_w * current_scale / min_scale)
+ h_resized = int(
+ _ceil_to_multiples_of(min_input_w / w * h, 64) * current_scale /
+ min_scale)
+ if use_udp:
+ scale_w = w - 1.0
+ scale_h = (h_resized - 1.0) / (w_resized - 1.0) * (w - 1.0)
+ else:
+ scale_w = w / 200.0
+ scale_h = h_resized / w_resized * w / 200.0
+ else:
+ h_resized = int(min_input_h * current_scale / min_scale)
+ w_resized = int(
+ _ceil_to_multiples_of(min_input_h / h * w, 64) * current_scale /
+ min_scale)
+ if use_udp:
+ scale_h = h - 1.0
+ scale_w = (w_resized - 1.0) / (h_resized - 1.0) * (h - 1.0)
+ else:
+ scale_h = h / 200.0
+ scale_w = w_resized / h_resized * h / 200.0
+ if use_udp:
+ center = (scale_w / 2.0, scale_h / 2.0)
+ else:
+ center = np.array([round(w / 2.0), round(h / 2.0)])
+ return (w_resized, h_resized), center, np.array([scale_w, scale_h])
+
+
+def _resize_align_multi_scale(image, input_size, current_scale, min_scale):
+ """Resize the images for multi-scale training.
+
+ Args:
+ image: Input image
+ input_size (np.ndarray[2]): Size (w, h) of the image input
+ current_scale (float): Current scale
+ min_scale (float): Minimal scale
+
+ Returns:
+ tuple: A tuple containing image info.
+
+ - image_resized (np.ndarray): resized image
+ - center (np.ndarray): center of image
+ - scale (np.ndarray): scale
+ """
+ assert len(input_size) == 2
+ size_resized, center, scale = _get_multi_scale_size(
+ image, input_size, current_scale, min_scale)
+
+ trans = get_affine_transform(center, scale, 0, size_resized)
+ image_resized = cv2.warpAffine(image, trans, size_resized)
+
+ return image_resized, center, scale
+
+
+def _resize_align_multi_scale_udp(image, input_size, current_scale, min_scale):
+ """Resize the images for multi-scale training.
+
+ Args:
+ image: Input image
+ input_size (np.ndarray[2]): Size (w, h) of the image input
+ current_scale (float): Current scale
+ min_scale (float): Minimal scale
+
+ Returns:
+ tuple: A tuple containing image info.
+
+ - image_resized (np.ndarray): resized image
+ - center (np.ndarray): center of image
+ - scale (np.ndarray): scale
+ """
+ assert len(input_size) == 2
+ size_resized, _, _ = _get_multi_scale_size(image, input_size,
+ current_scale, min_scale, True)
+
+ _, center, scale = _get_multi_scale_size(image, input_size, min_scale,
+ min_scale, True)
+
+ trans = get_warp_matrix(
+ theta=0,
+ size_input=np.array(scale, dtype=np.float32),
+ size_dst=np.array(size_resized, dtype=np.float32) - 1.0,
+ size_target=np.array(scale, dtype=np.float32))
+ image_resized = cv2.warpAffine(
+ image.copy(), trans, size_resized, flags=cv2.INTER_LINEAR)
+
+ return image_resized, center, scale
+
+
+class HeatmapGenerator:
+ """Generate heatmaps for bottom-up models.
+
+ Args:
+ num_joints (int): Number of keypoints
+ output_size (np.ndarray): Size (w, h) of feature map
+ sigma (int): Sigma of the heatmaps.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, output_size, num_joints, sigma=-1, use_udp=False):
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.num_joints = num_joints
+ if sigma < 0:
+ sigma = self.output_size.prod()**0.5 / 64
+ self.sigma = sigma
+ size = 6 * sigma + 3
+ self.use_udp = use_udp
+ if use_udp:
+ self.x = np.arange(0, size, 1, np.float32)
+ self.y = self.x[:, None]
+ else:
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+ x0, y0 = 3 * sigma + 1, 3 * sigma + 1
+ self.g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+
+ def __call__(self, joints):
+ """Generate heatmaps."""
+ hms = np.zeros(
+ (self.num_joints, self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ sigma = self.sigma
+ for p in joints:
+ for idx, pt in enumerate(p):
+ if pt[2] > 0:
+ x, y = int(pt[0]), int(pt[1])
+ if x < 0 or y < 0 or \
+ x >= self.output_size[0] or y >= self.output_size[1]:
+ continue
+
+ if self.use_udp:
+ x0 = 3 * sigma + 1 + pt[0] - x
+ y0 = 3 * sigma + 1 + pt[1] - y
+ g = np.exp(-((self.x - x0)**2 + (self.y - y0)**2) /
+ (2 * sigma**2))
+ else:
+ g = self.g
+
+ ul = int(np.round(x - 3 * sigma -
+ 1)), int(np.round(y - 3 * sigma - 1))
+ br = int(np.round(x + 3 * sigma +
+ 2)), int(np.round(y + 3 * sigma + 2))
+
+ c, d = max(0,
+ -ul[0]), min(br[0], self.output_size[0]) - ul[0]
+ a, b = max(0,
+ -ul[1]), min(br[1], self.output_size[1]) - ul[1]
+
+ cc, dd = max(0, ul[0]), min(br[0], self.output_size[0])
+ aa, bb = max(0, ul[1]), min(br[1], self.output_size[1])
+ hms[idx, aa:bb,
+ cc:dd] = np.maximum(hms[idx, aa:bb, cc:dd], g[a:b,
+ c:d])
+ return hms
+
+
+class JointsEncoder:
+ """Encodes the visible joints into (coordinates, score); The coordinate of
+ one joint and its score are of `int` type.
+
+ (idx * output_size**2 + y * output_size + x, 1) or (0, 0).
+
+ Args:
+ max_num_people(int): Max number of people in an image
+ num_joints(int): Number of keypoints
+ output_size(np.ndarray): Size (w, h) of feature map
+ tag_per_joint(bool): Option to use one tag map per joint.
+ """
+
+ def __init__(self, max_num_people, num_joints, output_size, tag_per_joint):
+ self.max_num_people = max_num_people
+ self.num_joints = num_joints
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.tag_per_joint = tag_per_joint
+
+ def __call__(self, joints):
+ """
+ Note:
+ - number of people in image: N
+ - number of keypoints: K
+ - max number of people in an image: M
+
+ Args:
+ joints (np.ndarray[N,K,3])
+
+ Returns:
+ visible_kpts (np.ndarray[M,K,2]).
+ """
+ visible_kpts = np.zeros((self.max_num_people, self.num_joints, 2),
+ dtype=np.float32)
+ for i in range(len(joints)):
+ tot = 0
+ for idx, pt in enumerate(joints[i]):
+ x, y = int(pt[0]), int(pt[1])
+ if (pt[2] > 0 and 0 <= y < self.output_size[1]
+ and 0 <= x < self.output_size[0]):
+ if self.tag_per_joint:
+ visible_kpts[i][tot] = \
+ (idx * self.output_size.prod()
+ + y * self.output_size[0] + x, 1)
+ else:
+ visible_kpts[i][tot] = (y * self.output_size[0] + x, 1)
+ tot += 1
+ return visible_kpts
+
+
+class PAFGenerator:
+ """Generate part affinity fields.
+
+ Args:
+ output_size (np.ndarray): Size (w, h) of feature map.
+ limb_width (int): Limb width of part affinity fields.
+ skeleton (list[list]): connections of joints.
+ """
+
+ def __init__(self, output_size, limb_width, skeleton):
+ if not isinstance(output_size, np.ndarray):
+ output_size = np.array(output_size)
+ if output_size.size > 1:
+ assert len(output_size) == 2
+ self.output_size = output_size
+ else:
+ self.output_size = np.array([output_size, output_size],
+ dtype=np.int)
+ self.limb_width = limb_width
+ self.skeleton = skeleton
+
+ def _accumulate_paf_map_(self, pafs, src, dst, count):
+ """Accumulate part affinity fields between two given joints.
+
+ Args:
+ pafs (np.ndarray[2,H,W]): paf maps (2 dimensions:x axis and
+ y axis) for a certain limb connection. This argument will
+ be modified inplace.
+ src (np.ndarray[2,]): coordinates of the source joint.
+ dst (np.ndarray[2,]): coordinates of the destination joint.
+ count (np.ndarray[H,W]): count map that preserves the number
+ of non-zero vectors at each point. This argument will be
+ modified inplace.
+ """
+ limb_vec = dst - src
+ norm = np.linalg.norm(limb_vec)
+ if norm == 0:
+ unit_limb_vec = np.zeros(2)
+ else:
+ unit_limb_vec = limb_vec / norm
+
+ min_x = max(np.floor(min(src[0], dst[0]) - self.limb_width), 0)
+ max_x = min(
+ np.ceil(max(src[0], dst[0]) + self.limb_width),
+ self.output_size[0] - 1)
+ min_y = max(np.floor(min(src[1], dst[1]) - self.limb_width), 0)
+ max_y = min(
+ np.ceil(max(src[1], dst[1]) + self.limb_width),
+ self.output_size[1] - 1)
+
+ range_x = list(range(int(min_x), int(max_x + 1), 1))
+ range_y = list(range(int(min_y), int(max_y + 1), 1))
+
+ mask = np.zeros_like(count, dtype=bool)
+ if len(range_x) > 0 and len(range_y) > 0:
+ xx, yy = np.meshgrid(range_x, range_y)
+ delta_x = xx - src[0]
+ delta_y = yy - src[1]
+ dist = np.abs(delta_x * unit_limb_vec[1] -
+ delta_y * unit_limb_vec[0])
+ mask_local = (dist < self.limb_width)
+ mask[yy, xx] = mask_local
+
+ pafs[0, mask] += unit_limb_vec[0]
+ pafs[1, mask] += unit_limb_vec[1]
+ count += mask
+
+ return pafs, count
+
+ def __call__(self, joints):
+ """Generate the target part affinity fields."""
+ pafs = np.zeros(
+ (len(self.skeleton) * 2, self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ for idx, sk in enumerate(self.skeleton):
+ count = np.zeros((self.output_size[1], self.output_size[0]),
+ dtype=np.float32)
+
+ for p in joints:
+ src = p[sk[0]]
+ dst = p[sk[1]]
+ if src[2] > 0 and dst[2] > 0:
+ self._accumulate_paf_map_(pafs[2 * idx:2 * idx + 2],
+ src[:2], dst[:2], count)
+
+ pafs[2 * idx:2 * idx + 2] /= np.maximum(count, 1)
+
+ return pafs
+
+
+@PIPELINES.register_module()
+class BottomUpRandomFlip:
+ """Data augmentation with random image flip for bottom-up.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ image, mask, joints = results['img'], results['mask'], results[
+ 'joints']
+ self.flip_index = results['ann_info']['flip_index']
+ self.output_size = results['ann_info']['heatmap_size']
+
+ assert isinstance(mask, list)
+ assert isinstance(joints, list)
+ assert len(mask) == len(joints)
+ assert len(mask) == len(self.output_size)
+
+ if np.random.random() < self.flip_prob:
+ image = image[:, ::-1].copy() - np.zeros_like(image)
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = np.array([_output_size, _output_size],
+ dtype=np.int)
+ mask[i] = mask[i][:, ::-1].copy()
+ joints[i] = joints[i][:, self.flip_index]
+ joints[i][:, :, 0] = _output_size[0] - joints[i][:, :, 0] - 1
+ results['img'], results['mask'], results[
+ 'joints'] = image, mask, joints
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpRandomAffine:
+ """Data augmentation with random scaling & rotating.
+
+ Args:
+ rot_factor (int): Rotating to [-rotation_factor, rotation_factor]
+ scale_factor (float): Scaling to [1-scale_factor, 1+scale_factor]
+ scale_type: wrt ``long`` or ``short`` length of the image.
+ trans_factor: Translation factor.
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self,
+ rot_factor,
+ scale_factor,
+ scale_type,
+ trans_factor,
+ use_udp=False):
+ self.max_rotation = rot_factor
+ self.min_scale = scale_factor[0]
+ self.max_scale = scale_factor[1]
+ self.scale_type = scale_type
+ self.trans_factor = trans_factor
+ self.use_udp = use_udp
+
+ def _get_scale(self, image_size, resized_size):
+ w, h = image_size
+ w_resized, h_resized = resized_size
+ if w / w_resized < h / h_resized:
+ if self.scale_type == 'long':
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ elif self.scale_type == 'short':
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+ else:
+ raise ValueError(f'Unknown scale type: {self.scale_type}')
+ else:
+ if self.scale_type == 'long':
+ w_pad = w
+ h_pad = w / w_resized * h_resized
+ elif self.scale_type == 'short':
+ w_pad = h / h_resized * w_resized
+ h_pad = h
+ else:
+ raise ValueError(f'Unknown scale type: {self.scale_type}')
+
+ scale = np.array([w_pad, h_pad], dtype=np.float32)
+
+ return scale
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ image, mask, joints = results['img'], results['mask'], results[
+ 'joints']
+
+ self.input_size = results['ann_info']['image_size']
+ if not isinstance(self.input_size, np.ndarray):
+ self.input_size = np.array(self.input_size)
+ if self.input_size.size > 1:
+ assert len(self.input_size) == 2
+ else:
+ self.input_size = [self.input_size, self.input_size]
+ self.output_size = results['ann_info']['heatmap_size']
+
+ assert isinstance(mask, list)
+ assert isinstance(joints, list)
+ assert len(mask) == len(joints)
+ assert len(mask) == len(self.output_size), (len(mask),
+ len(self.output_size),
+ self.output_size)
+
+ height, width = image.shape[:2]
+ if self.use_udp:
+ center = np.array(((width - 1.0) / 2, (height - 1.0) / 2))
+ else:
+ center = np.array((width / 2, height / 2))
+
+ img_scale = np.array([width, height], dtype=np.float32)
+ aug_scale = np.random.random() * (self.max_scale - self.min_scale) \
+ + self.min_scale
+ img_scale *= aug_scale
+ aug_rot = (np.random.random() * 2 - 1) * self.max_rotation
+
+ if self.trans_factor > 0:
+ dx = np.random.randint(-self.trans_factor * img_scale[0] / 200.0,
+ self.trans_factor * img_scale[0] / 200.0)
+ dy = np.random.randint(-self.trans_factor * img_scale[1] / 200.0,
+ self.trans_factor * img_scale[1] / 200.0)
+
+ center[0] += dx
+ center[1] += dy
+ if self.use_udp:
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = [_output_size, _output_size]
+
+ scale = self._get_scale(img_scale, _output_size)
+
+ trans = get_warp_matrix(
+ theta=aug_rot,
+ size_input=center * 2.0,
+ size_dst=np.array(
+ (_output_size[0], _output_size[1]), dtype=np.float32) -
+ 1.0,
+ size_target=scale)
+ mask[i] = cv2.warpAffine(
+ (mask[i] * 255).astype(np.uint8),
+ trans, (int(_output_size[0]), int(_output_size[1])),
+ flags=cv2.INTER_LINEAR) / 255
+ mask[i] = (mask[i] > 0.5).astype(np.float32)
+ joints[i][:, :, 0:2] = \
+ warp_affine_joints(joints[i][:, :, 0:2].copy(), trans)
+ if results['ann_info']['scale_aware_sigma']:
+ joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
+ scale = self._get_scale(img_scale, self.input_size)
+ mat_input = get_warp_matrix(
+ theta=aug_rot,
+ size_input=center * 2.0,
+ size_dst=np.array((self.input_size[0], self.input_size[1]),
+ dtype=np.float32) - 1.0,
+ size_target=scale)
+ image = cv2.warpAffine(
+ image,
+ mat_input, (int(self.input_size[0]), int(self.input_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ for i, _output_size in enumerate(self.output_size):
+ if not isinstance(_output_size, np.ndarray):
+ _output_size = np.array(_output_size)
+ if _output_size.size > 1:
+ assert len(_output_size) == 2
+ else:
+ _output_size = [_output_size, _output_size]
+ scale = self._get_scale(img_scale, _output_size)
+ mat_output = get_affine_transform(
+ center=center,
+ scale=scale / 200.0,
+ rot=aug_rot,
+ output_size=_output_size)
+ mask[i] = cv2.warpAffine(
+ (mask[i] * 255).astype(np.uint8), mat_output,
+ (int(_output_size[0]), int(_output_size[1]))) / 255
+ mask[i] = (mask[i] > 0.5).astype(np.float32)
+
+ joints[i][:, :, 0:2] = \
+ warp_affine_joints(joints[i][:, :, 0:2], mat_output)
+ if results['ann_info']['scale_aware_sigma']:
+ joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
+
+ scale = self._get_scale(img_scale, self.input_size)
+ mat_input = get_affine_transform(
+ center=center,
+ scale=scale / 200.0,
+ rot=aug_rot,
+ output_size=self.input_size)
+ image = cv2.warpAffine(image, mat_input, (int(
+ self.input_size[0]), int(self.input_size[1])))
+
+ results['img'], results['mask'], results[
+ 'joints'] = image, mask, joints
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGenerateHeatmapTarget:
+ """Generate multi-scale heatmap target for bottom-up.
+
+ Args:
+ sigma (int): Sigma of heatmap Gaussian
+ max_num_people (int): Maximum number of people in an image
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, sigma, use_udp=False):
+ self.sigma = sigma
+ self.use_udp = use_udp
+
+ def _generate(self, num_joints, heatmap_size):
+ """Get heatmap generator."""
+ heatmap_generator = [
+ HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp)
+ for output_size in heatmap_size
+ ]
+ return heatmap_generator
+
+ def __call__(self, results):
+ """Generate multi-scale heatmap target for bottom-up."""
+ heatmap_generator = \
+ self._generate(results['ann_info']['num_joints'],
+ results['ann_info']['heatmap_size'])
+ target_list = list()
+ joints_list = results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ heatmaps = heatmap_generator[scale_id](joints_list[scale_id])
+ target_list.append(heatmaps.astype(np.float32))
+ results['target'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGenerateTarget:
+ """Generate multi-scale heatmap target for associate embedding.
+
+ Args:
+ sigma (int): Sigma of heatmap Gaussian
+ max_num_people (int): Maximum number of people in an image
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, sigma, max_num_people, use_udp=False):
+ self.sigma = sigma
+ self.max_num_people = max_num_people
+ self.use_udp = use_udp
+
+ def _generate(self, num_joints, heatmap_size):
+ """Get heatmap generator and joint encoder."""
+ heatmap_generator = [
+ HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp)
+ for output_size in heatmap_size
+ ]
+ joints_encoder = [
+ JointsEncoder(self.max_num_people, num_joints, output_size, True)
+ for output_size in heatmap_size
+ ]
+ return heatmap_generator, joints_encoder
+
+ def __call__(self, results):
+ """Generate multi-scale heatmap target for bottom-up."""
+ heatmap_generator, joints_encoder = \
+ self._generate(results['ann_info']['num_joints'],
+ results['ann_info']['heatmap_size'])
+ target_list = list()
+ mask_list, joints_list = results['mask'], results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ target_t = heatmap_generator[scale_id](joints_list[scale_id])
+ joints_t = joints_encoder[scale_id](joints_list[scale_id])
+
+ target_list.append(target_t.astype(np.float32))
+ mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
+ joints_list[scale_id] = joints_t.astype(np.int32)
+
+ results['masks'], results['joints'] = mask_list, joints_list
+ results['targets'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGeneratePAFTarget:
+ """Generate multi-scale heatmaps and part affinity fields (PAF) target for
+ bottom-up. Paper ref: Cao et al. Realtime Multi-Person 2D Human Pose
+ Estimation using Part Affinity Fields (CVPR 2017).
+
+ Args:
+ limb_width (int): Limb width of part affinity fields
+ """
+
+ def __init__(self, limb_width, skeleton=None):
+ self.limb_width = limb_width
+ self.skeleton = skeleton
+
+ def _generate(self, heatmap_size, skeleton):
+ """Get PAF generator."""
+ paf_generator = [
+ PAFGenerator(output_size, self.limb_width, skeleton)
+ for output_size in heatmap_size
+ ]
+ return paf_generator
+
+ def __call__(self, results):
+ """Generate multi-scale part affinity fields for bottom-up."""
+ if self.skeleton is None:
+ assert results['ann_info']['skeleton'] is not None
+ self.skeleton = results['ann_info']['skeleton']
+
+ paf_generator = \
+ self._generate(results['ann_info']['heatmap_size'],
+ self.skeleton)
+ target_list = list()
+ joints_list = results['joints']
+
+ for scale_id in range(results['ann_info']['num_scales']):
+ pafs = paf_generator[scale_id](joints_list[scale_id])
+ target_list.append(pafs.astype(np.float32))
+
+ results['target'] = target_list
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpGetImgSize:
+ """Get multi-scale image sizes for bottom-up, including base_size and
+ test_scale_factor. Keep the ratio and the image is resized to
+ `results['ann_info']['image_size']×current_scale`.
+
+ Args:
+ test_scale_factor (List[float]): Multi scale
+ current_scale (int): default 1
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, test_scale_factor, current_scale=1, use_udp=False):
+ self.test_scale_factor = test_scale_factor
+ self.min_scale = min(test_scale_factor)
+ self.current_scale = current_scale
+ self.use_udp = use_udp
+
+ def __call__(self, results):
+ """Get multi-scale image sizes for bottom-up."""
+ input_size = results['ann_info']['image_size']
+ if not isinstance(input_size, np.ndarray):
+ input_size = np.array(input_size)
+ if input_size.size > 1:
+ assert len(input_size) == 2
+ else:
+ input_size = np.array([input_size, input_size], dtype=np.int)
+ img = results['img']
+
+ h, w, _ = img.shape
+
+ # calculate the size for min_scale
+ min_input_w = _ceil_to_multiples_of(self.min_scale * input_size[0], 64)
+ min_input_h = _ceil_to_multiples_of(self.min_scale * input_size[1], 64)
+ if w < h:
+ w_resized = int(min_input_w * self.current_scale / self.min_scale)
+ h_resized = int(
+ _ceil_to_multiples_of(min_input_w / w * h, 64) *
+ self.current_scale / self.min_scale)
+ if self.use_udp:
+ scale_w = w - 1.0
+ scale_h = (h_resized - 1.0) / (w_resized - 1.0) * (w - 1.0)
+ else:
+ scale_w = w / 200.0
+ scale_h = h_resized / w_resized * w / 200.0
+ else:
+ h_resized = int(min_input_h * self.current_scale / self.min_scale)
+ w_resized = int(
+ _ceil_to_multiples_of(min_input_h / h * w, 64) *
+ self.current_scale / self.min_scale)
+ if self.use_udp:
+ scale_h = h - 1.0
+ scale_w = (w_resized - 1.0) / (h_resized - 1.0) * (h - 1.0)
+ else:
+ scale_h = h / 200.0
+ scale_w = w_resized / h_resized * h / 200.0
+ if self.use_udp:
+ center = (scale_w / 2.0, scale_h / 2.0)
+ else:
+ center = np.array([round(w / 2.0), round(h / 2.0)])
+ results['ann_info']['test_scale_factor'] = self.test_scale_factor
+ results['ann_info']['base_size'] = (w_resized, h_resized)
+ results['ann_info']['center'] = center
+ results['ann_info']['scale'] = np.array([scale_w, scale_h])
+
+ return results
+
+
+@PIPELINES.register_module()
+class BottomUpResizeAlign:
+ """Resize multi-scale size and align transform for bottom-up.
+
+ Args:
+ transforms (List): ToTensor & Normalize
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, transforms, use_udp=False):
+ self.transforms = Compose(transforms)
+ if use_udp:
+ self._resize_align_multi_scale = _resize_align_multi_scale_udp
+ else:
+ self._resize_align_multi_scale = _resize_align_multi_scale
+
+ def __call__(self, results):
+ """Resize multi-scale size and align transform for bottom-up."""
+ input_size = results['ann_info']['image_size']
+ if not isinstance(input_size, np.ndarray):
+ input_size = np.array(input_size)
+ if input_size.size > 1:
+ assert len(input_size) == 2
+ else:
+ input_size = np.array([input_size, input_size], dtype=np.int)
+ test_scale_factor = results['ann_info']['test_scale_factor']
+ aug_data = []
+
+ for _, s in enumerate(sorted(test_scale_factor, reverse=True)):
+ _results = results.copy()
+ image_resized, _, _ = self._resize_align_multi_scale(
+ _results['img'], input_size, s, min(test_scale_factor))
+ _results['img'] = image_resized
+ _results = self.transforms(_results)
+ transformed_img = _results['img'].unsqueeze(0)
+ aug_data.append(transformed_img)
+
+ results['ann_info']['aug_data'] = aug_data
+
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/hand_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/hand_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..b83e399c4e7a5e5b07650cb01e9426da9d8cee4b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/hand_transform.py
@@ -0,0 +1,63 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from mmpose.datasets.builder import PIPELINES
+from .top_down_transform import TopDownRandomFlip
+
+
+@PIPELINES.register_module()
+class HandRandomFlip(TopDownRandomFlip):
+ """Data augmentation with random image flip. A child class of
+ TopDownRandomFlip.
+
+ Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+ 'hand_type', 'rel_root_depth' and 'ann_info'.
+
+ Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+ 'hand_type', 'rel_root_depth'.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ # base flip augmentation
+ super().__call__(results)
+
+ # flip hand type and root depth
+ hand_type = results['hand_type']
+ rel_root_depth = results['rel_root_depth']
+ flipped = results['flipped']
+ if flipped:
+ hand_type[0], hand_type[1] = hand_type[1], hand_type[0]
+ rel_root_depth = -rel_root_depth
+ results['hand_type'] = hand_type
+ results['rel_root_depth'] = rel_root_depth
+ return results
+
+
+@PIPELINES.register_module()
+class HandGenerateRelDepthTarget:
+ """Generate the target relative root depth.
+
+ Required keys: 'rel_root_depth', 'rel_root_valid', 'ann_info'.
+
+ Modified keys: 'target', 'target_weight'.
+ """
+
+ def __init__(self):
+ pass
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ rel_root_depth = results['rel_root_depth']
+ rel_root_valid = results['rel_root_valid']
+ cfg = results['ann_info']
+ D = cfg['heatmap_size_root']
+ root_depth_bound = cfg['root_depth_bound']
+ target = (rel_root_depth / root_depth_bound + 0.5) * D
+ target_weight = rel_root_valid * (target >= 0) * (target <= D)
+ results['target'] = target * np.ones(1, dtype=np.float32)
+ results['target_weight'] = target_weight * np.ones(1, dtype=np.float32)
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/loading.py b/vendor/ViTPose/mmpose/datasets/pipelines/loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..64750056438e8c06bcc4083dc1e8164f0671cd0f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/loading.py
@@ -0,0 +1,91 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadImageFromFile:
+ """Loading image(s) from file.
+
+ Required key: "image_file".
+
+ Added key: "img".
+
+ Args:
+ to_float32 (bool): Whether to convert the loaded image to a float32
+ numpy array. If set to False, the loaded image is an uint8 array.
+ Defaults to False.
+ color_type (str): Flags specifying the color type of a loaded image,
+ candidates are 'color', 'grayscale' and 'unchanged'.
+ channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmcv.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ """
+
+ def __init__(self,
+ to_float32=False,
+ color_type='color',
+ channel_order='rgb',
+ file_client_args=dict(backend='disk')):
+ self.to_float32 = to_float32
+ self.color_type = color_type
+ self.channel_order = channel_order
+ self.file_client_args = file_client_args.copy()
+ self.file_client = None
+
+ def _read_image(self, path):
+ img_bytes = self.file_client.get(path)
+ img = mmcv.imfrombytes(
+ img_bytes, flag=self.color_type, channel_order=self.channel_order)
+ if img is None:
+ raise ValueError(f'Fail to read {path}')
+ if self.to_float32:
+ img = img.astype(np.float32)
+ return img
+
+ def __call__(self, results):
+ """Loading image(s) from file."""
+ if self.file_client is None:
+ self.file_client = mmcv.FileClient(**self.file_client_args)
+
+ image_file = results.get('image_file', None)
+
+ if isinstance(image_file, (list, tuple)):
+ # Load images from a list of paths
+ results['img'] = [self._read_image(path) for path in image_file]
+ elif image_file is not None:
+ # Load single image from path
+ results['img'] = self._read_image(image_file)
+ else:
+ if 'img' not in results:
+ # If `image_file`` is not in results, check the `img` exists
+ # and format the image. This for compatibility when the image
+ # is manually set outside the pipeline.
+ raise KeyError('Either `image_file` or `img` should exist in '
+ 'results.')
+ assert isinstance(results['img'], np.ndarray)
+ if self.color_type == 'color' and self.channel_order == 'rgb':
+ # The original results['img'] is assumed to be image(s) in BGR
+ # order, so we convert the color according to the arguments.
+ if results['img'].ndim == 3:
+ results['img'] = mmcv.bgr2rgb(results['img'])
+ elif results['img'].ndim == 4:
+ results['img'] = np.concatenate(
+ [mmcv.bgr2rgb(img) for img in results['img']], axis=0)
+ else:
+ raise ValueError('results["img"] has invalid shape '
+ f'{results["img"].shape}')
+
+ results['image_file'] = None
+
+ return results
+
+ def __repr__(self):
+ repr_str = (f'{self.__class__.__name__}('
+ f'to_float32={self.to_float32}, '
+ f"color_type='{self.color_type}', "
+ f'file_client_args={self.file_client_args})')
+ return repr_str
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/mesh_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/mesh_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3f32febcf01f37daa4957bfb0f17b8478773d59
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/mesh_transform.py
@@ -0,0 +1,399 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import mmcv
+import numpy as np
+import torch
+
+from mmpose.core.post_processing import (affine_transform, fliplr_joints,
+ get_affine_transform)
+from mmpose.datasets.builder import PIPELINES
+
+
+def _flip_smpl_pose(pose):
+ """Flip SMPL pose parameters horizontally.
+
+ Args:
+ pose (np.ndarray([72])): SMPL pose parameters
+
+ Returns:
+ pose_flipped
+ """
+
+ flippedParts = [
+ 0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13, 14, 18, 19,
+ 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33, 34, 35, 30, 31, 32, 36, 37,
+ 38, 42, 43, 44, 39, 40, 41, 45, 46, 47, 51, 52, 53, 48, 49, 50, 57, 58,
+ 59, 54, 55, 56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 67, 68
+ ]
+ pose_flipped = pose[flippedParts]
+ # Negate the second and the third dimension of the axis-angle
+ pose_flipped[1::3] = -pose_flipped[1::3]
+ pose_flipped[2::3] = -pose_flipped[2::3]
+ return pose_flipped
+
+
+def _flip_iuv(iuv, uv_type='BF'):
+ """Flip IUV image horizontally.
+
+ Note:
+ IUV image height: H
+ IUV image width: W
+
+ Args:
+ iuv np.ndarray([H, W, 3]): IUV image
+ uv_type (str): The type of the UV map.
+ Candidate values:
+ 'DP': The UV map used in DensePose project.
+ 'SMPL': The default UV map of SMPL model.
+ 'BF': The UV map used in DecoMR project.
+ Default: 'BF'
+
+ Returns:
+ iuv_flipped np.ndarray([H, W, 3]): Flipped IUV image
+ """
+ assert uv_type in ['DP', 'SMPL', 'BF']
+ if uv_type == 'BF':
+ iuv_flipped = iuv[:, ::-1, :]
+ iuv_flipped[:, :, 1] = 255 - iuv_flipped[:, :, 1]
+ else:
+ # The flip of other UV map is complex, not finished yet.
+ raise NotImplementedError(
+ f'The flip of {uv_type} UV map is not implemented yet.')
+
+ return iuv_flipped
+
+
+def _construct_rotation_matrix(rot, size=3):
+ """Construct the in-plane rotation matrix.
+
+ Args:
+ rot (float): Rotation angle (degree).
+ size (int): The size of the rotation matrix.
+ Candidate Values: 2, 3. Defaults to 3.
+
+ Returns:
+ rot_mat (np.ndarray([size, size]): Rotation matrix.
+ """
+ rot_mat = np.eye(size, dtype=np.float32)
+ if rot != 0:
+ rot_rad = np.deg2rad(rot)
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0, :2] = [cs, -sn]
+ rot_mat[1, :2] = [sn, cs]
+
+ return rot_mat
+
+
+def _rotate_joints_3d(joints_3d, rot):
+ """Rotate the 3D joints in the local coordinates.
+
+ Note:
+ Joints number: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ rot (float): Rotation angle (degree).
+
+ Returns:
+ joints_3d_rotated
+ """
+ # in-plane rotation
+ # 3D joints are rotated counterclockwise,
+ # so the rot angle is inversed.
+ rot_mat = _construct_rotation_matrix(-rot, 3)
+
+ joints_3d_rotated = np.einsum('ij,kj->ki', rot_mat, joints_3d)
+ joints_3d_rotated = joints_3d_rotated.astype('float32')
+ return joints_3d_rotated
+
+
+def _rotate_smpl_pose(pose, rot):
+ """Rotate SMPL pose parameters. SMPL (https://smpl.is.tue.mpg.de/) is a 3D
+ human model.
+
+ Args:
+ pose (np.ndarray([72])): SMPL pose parameters
+ rot (float): Rotation angle (degree).
+
+ Returns:
+ pose_rotated
+ """
+ pose_rotated = pose.copy()
+ if rot != 0:
+ rot_mat = _construct_rotation_matrix(-rot)
+ orient = pose[:3]
+ # find the rotation of the body in camera frame
+ per_rdg, _ = cv2.Rodrigues(orient)
+ # apply the global rotation to the global orientation
+ res_rot, _ = cv2.Rodrigues(np.dot(rot_mat, per_rdg))
+ pose_rotated[:3] = (res_rot.T)[0]
+
+ return pose_rotated
+
+
+def _flip_joints_3d(joints_3d, joints_3d_visible, flip_pairs):
+ """Flip human joints in 3D space horizontally.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
+ joints_3d_visible (np.ndarray([K, 1])): Visibility of keypoints.
+ flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+
+ Returns:
+ joints_3d_flipped, joints_3d_visible_flipped
+ """
+
+ assert len(joints_3d) == len(joints_3d_visible)
+
+ joints_3d_flipped = joints_3d.copy()
+ joints_3d_visible_flipped = joints_3d_visible.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ joints_3d_flipped[left, :] = joints_3d[right, :]
+ joints_3d_flipped[right, :] = joints_3d[left, :]
+
+ joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
+ joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
+
+ # Flip horizontally
+ joints_3d_flipped[:, 0] = -joints_3d_flipped[:, 0]
+ joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
+
+ return joints_3d_flipped, joints_3d_visible_flipped
+
+
+@PIPELINES.register_module()
+class LoadIUVFromFile:
+ """Loading IUV image from file."""
+
+ def __init__(self, to_float32=False):
+ self.to_float32 = to_float32
+ self.color_type = 'color'
+ # channel relations: iuv->bgr
+ self.channel_order = 'bgr'
+
+ def __call__(self, results):
+ """Loading image from file."""
+ has_iuv = results['has_iuv']
+ use_iuv = results['ann_info']['use_IUV']
+ if has_iuv and use_iuv:
+ iuv_file = results['iuv_file']
+ iuv = mmcv.imread(iuv_file, self.color_type, self.channel_order)
+ if iuv is None:
+ raise ValueError(f'Fail to read {iuv_file}')
+ else:
+ has_iuv = 0
+ iuv = None
+
+ results['has_iuv'] = has_iuv
+ results['iuv'] = iuv
+ return results
+
+
+@PIPELINES.register_module()
+class IUVToTensor:
+ """Transform IUV image to part index mask and uv coordinates image. The 3
+ channels of IUV image means: part index, u coordinates, v coordinates.
+
+ Required key: 'iuv', 'ann_info'.
+ Modifies key: 'part_index', 'uv_coordinates'.
+
+ Args:
+ results (dict): contain all information about training.
+ """
+
+ def __call__(self, results):
+ iuv = results['iuv']
+ if iuv is None:
+ H, W = results['ann_info']['iuv_size']
+ part_index = torch.zeros([1, H, W], dtype=torch.long)
+ uv_coordinates = torch.zeros([2, H, W], dtype=torch.float32)
+ else:
+ part_index = torch.LongTensor(iuv[:, :, 0])[None, :, :]
+ uv_coordinates = torch.FloatTensor(iuv[:, :, 1:]) / 255
+ uv_coordinates = uv_coordinates.permute(2, 0, 1)
+ results['part_index'] = part_index
+ results['uv_coordinates'] = uv_coordinates
+ return results
+
+
+@PIPELINES.register_module()
+class MeshRandomChannelNoise:
+ """Data augmentation with random channel noise.
+
+ Required keys: 'img'
+ Modifies key: 'img'
+
+ Args:
+ noise_factor (float): Multiply each channel with
+ a factor between``[1-scale_factor, 1+scale_factor]``
+ """
+
+ def __init__(self, noise_factor=0.4):
+ self.noise_factor = noise_factor
+
+ def __call__(self, results):
+ """Perform data augmentation with random channel noise."""
+ img = results['img']
+
+ # Each channel is multiplied with a number
+ # in the area [1-self.noise_factor, 1+self.noise_factor]
+ pn = np.random.uniform(1 - self.noise_factor, 1 + self.noise_factor,
+ (1, 3))
+ img = cv2.multiply(img, pn)
+
+ results['img'] = img
+ return results
+
+
+@PIPELINES.register_module()
+class MeshRandomFlip:
+ """Data augmentation with random image flip.
+
+ Required keys: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'center', 'pose', 'iuv' and 'ann_info'.
+ Modifies key: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'center', 'pose', 'iuv'.
+
+ Args:
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ if np.random.rand() > self.flip_prob:
+ return results
+
+ img = results['img']
+ joints_2d = results['joints_2d']
+ joints_2d_visible = results['joints_2d_visible']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ pose = results['pose']
+ center = results['center']
+
+ img = img[:, ::-1, :]
+ pose = _flip_smpl_pose(pose)
+
+ joints_2d, joints_2d_visible = fliplr_joints(
+ joints_2d, joints_2d_visible, img.shape[1],
+ results['ann_info']['flip_pairs'])
+
+ joints_3d, joints_3d_visible = _flip_joints_3d(
+ joints_3d, joints_3d_visible, results['ann_info']['flip_pairs'])
+ center[0] = img.shape[1] - center[0] - 1
+
+ if 'iuv' in results.keys():
+ iuv = results['iuv']
+ if iuv is not None:
+ iuv = _flip_iuv(iuv, results['ann_info']['uv_type'])
+ results['iuv'] = iuv
+
+ results['img'] = img
+ results['joints_2d'] = joints_2d
+ results['joints_2d_visible'] = joints_2d_visible
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+ results['pose'] = pose
+ results['center'] = center
+ return results
+
+
+@PIPELINES.register_module()
+class MeshGetRandomScaleRotation:
+ """Data augmentation with random scaling & rotating.
+
+ Required key: 'scale'. Modifies key: 'scale' and 'rotation'.
+
+ Args:
+ rot_factor (int): Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor (float): Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ rot_prob (float): Probability of random rotation.
+ """
+
+ def __init__(self, rot_factor=30, scale_factor=0.25, rot_prob=0.6):
+ self.rot_factor = rot_factor
+ self.scale_factor = scale_factor
+ self.rot_prob = rot_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ s = results['scale']
+
+ sf = self.scale_factor
+ rf = self.rot_factor
+
+ s_factor = np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ s = s * s_factor
+
+ r_factor = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+ r = r_factor if np.random.rand() <= self.rot_prob else 0
+
+ results['scale'] = s
+ results['rotation'] = r
+
+ return results
+
+
+@PIPELINES.register_module()
+class MeshAffine:
+ """Affine transform the image to get input image. Affine transform the 2D
+ keypoints, 3D kepoints and IUV image too.
+
+ Required keys: 'img', 'joints_2d','joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'pose', 'iuv', 'ann_info','scale', 'rotation' and
+ 'center'. Modifies key: 'img', 'joints_2d','joints_2d_visible',
+ 'joints_3d', 'pose', 'iuv'.
+ """
+
+ def __call__(self, results):
+ image_size = results['ann_info']['image_size']
+
+ img = results['img']
+ joints_2d = results['joints_2d']
+ joints_2d_visible = results['joints_2d_visible']
+ joints_3d = results['joints_3d']
+ pose = results['pose']
+
+ c = results['center']
+ s = results['scale']
+ r = results['rotation']
+ trans = get_affine_transform(c, s, r, image_size)
+
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+
+ for i in range(results['ann_info']['num_joints']):
+ if joints_2d_visible[i, 0] > 0.0:
+ joints_2d[i] = affine_transform(joints_2d[i], trans)
+
+ joints_3d = _rotate_joints_3d(joints_3d, r)
+ pose = _rotate_smpl_pose(pose, r)
+
+ results['img'] = img
+ results['joints_2d'] = joints_2d
+ results['joints_2d_visible'] = joints_2d_visible
+ results['joints_3d'] = joints_3d
+ results['pose'] = pose
+
+ if 'iuv' in results.keys():
+ iuv = results['iuv']
+ if iuv is not None:
+ iuv_size = results['ann_info']['iuv_size']
+ iuv = cv2.warpAffine(
+ iuv,
+ trans, (int(iuv_size[0]), int(iuv_size[1])),
+ flags=cv2.INTER_NEAREST)
+ results['iuv'] = iuv
+
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/pose3d_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/pose3d_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..124937861f71bf8148641d59dbb42bd47457c902
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/pose3d_transform.py
@@ -0,0 +1,643 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.utils import build_from_cfg
+
+from mmpose.core.camera import CAMERAS
+from mmpose.core.post_processing import fliplr_regression
+from mmpose.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class GetRootCenteredPose:
+ """Zero-center the pose around a given root joint. Optionally, the root
+ joint can be removed from the original pose and stored as a separate item.
+
+ Note that the root-centered joints may no longer align with some annotation
+ information (e.g. flip_pairs, num_joints, inference_channel, etc.) due to
+ the removal of the root joint.
+
+ Args:
+ item (str): The name of the pose to apply root-centering.
+ root_index (int): Root joint index in the pose.
+ visible_item (str): The name of the visibility item.
+ remove_root (bool): If true, remove the root joint from the pose
+ root_name (str): Optional. If not none, it will be used as the key to
+ store the root position separated from the original pose.
+
+ Required keys:
+ item
+
+ Modified keys:
+ item, visible_item, root_name
+ """
+
+ def __init__(self,
+ item,
+ root_index,
+ visible_item=None,
+ remove_root=False,
+ root_name=None):
+ self.item = item
+ self.root_index = root_index
+ self.remove_root = remove_root
+ self.root_name = root_name
+ self.visible_item = visible_item
+
+ def __call__(self, results):
+ assert self.item in results
+ joints = results[self.item]
+ root_idx = self.root_index
+
+ assert joints.ndim >= 2 and joints.shape[-2] > root_idx,\
+ f'Got invalid joint shape {joints.shape}'
+
+ root = joints[..., root_idx:root_idx + 1, :]
+ joints = joints - root
+
+ results[self.item] = joints
+ if self.root_name is not None:
+ results[self.root_name] = root
+
+ if self.remove_root:
+ results[self.item] = np.delete(
+ results[self.item], root_idx, axis=-2)
+ if self.visible_item is not None:
+ assert self.visible_item in results
+ results[self.visible_item] = np.delete(
+ results[self.visible_item], root_idx, axis=-2)
+ # Add a flag to avoid latter transforms that rely on the root
+ # joint or the original joint index
+ results[f'{self.item}_root_removed'] = True
+
+ # Save the root index which is necessary to restore the global pose
+ if self.root_name is not None:
+ results[f'{self.root_name}_index'] = self.root_index
+
+ return results
+
+
+@PIPELINES.register_module()
+class NormalizeJointCoordinate:
+ """Normalize the joint coordinate with given mean and std.
+
+ Args:
+ item (str): The name of the pose to normalize.
+ mean (array): Mean values of joint coordinates in shape [K, C].
+ std (array): Std values of joint coordinates in shape [K, C].
+ norm_param_file (str): Optionally load a dict containing `mean` and
+ `std` from a file using `mmcv.load`.
+
+ Required keys:
+ item
+
+ Modified keys:
+ item
+ """
+
+ def __init__(self, item, mean=None, std=None, norm_param_file=None):
+ self.item = item
+ self.norm_param_file = norm_param_file
+ if norm_param_file is not None:
+ norm_param = mmcv.load(norm_param_file)
+ assert 'mean' in norm_param and 'std' in norm_param
+ mean = norm_param['mean']
+ std = norm_param['std']
+ else:
+ assert mean is not None
+ assert std is not None
+
+ self.mean = np.array(mean, dtype=np.float32)
+ self.std = np.array(std, dtype=np.float32)
+
+ def __call__(self, results):
+ assert self.item in results
+ results[self.item] = (results[self.item] - self.mean) / self.std
+ results[f'{self.item}_mean'] = self.mean.copy()
+ results[f'{self.item}_std'] = self.std.copy()
+ return results
+
+
+@PIPELINES.register_module()
+class ImageCoordinateNormalization:
+ """Normalize the 2D joint coordinate with image width and height. Range [0,
+ w] is mapped to [-1, 1], while preserving the aspect ratio.
+
+ Args:
+ item (str|list[str]): The name of the pose to normalize.
+ norm_camera (bool): Whether to normalize camera intrinsics.
+ Default: False.
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+ item
+
+ Modified keys:
+ item (, camera_param)
+ """
+
+ def __init__(self, item, norm_camera=False, camera_param=None):
+ self.item = item
+ if isinstance(self.item, str):
+ self.item = [self.item]
+
+ self.norm_camera = norm_camera
+
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+
+ def __call__(self, results):
+ center = np.array(
+ [0.5 * results['image_width'], 0.5 * results['image_height']],
+ dtype=np.float32)
+ scale = np.array(0.5 * results['image_width'], dtype=np.float32)
+
+ for item in self.item:
+ results[item] = (results[item] - center) / scale
+
+ if self.norm_camera:
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, \
+ 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'f' in camera_param and 'c' in camera_param
+ camera_param['f'] = camera_param['f'] / scale
+ camera_param['c'] = (camera_param['c'] - center[:, None]) / scale
+ if 'camera_param' not in results:
+ results['camera_param'] = dict()
+ results['camera_param'].update(camera_param)
+
+ return results
+
+
+@PIPELINES.register_module()
+class CollectCameraIntrinsics:
+ """Store camera intrinsics in a 1-dim array, including f, c, k, p.
+
+ Args:
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+ need_distortion (bool): Whether need distortion parameters k and p.
+ Default: True.
+
+ Required keys:
+ camera_param (if camera parameters are not given in initialization)
+
+ Modified keys:
+ intrinsics
+ """
+
+ def __init__(self, camera_param=None, need_distortion=True):
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+ self.need_distortion = need_distortion
+
+ def __call__(self, results):
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'f' in camera_param and 'c' in camera_param
+ intrinsics = np.concatenate(
+ [camera_param['f'].reshape(2), camera_param['c'].reshape(2)])
+ if self.need_distortion:
+ assert 'k' in camera_param and 'p' in camera_param
+ intrinsics = np.concatenate([
+ intrinsics, camera_param['k'].reshape(3),
+ camera_param['p'].reshape(2)
+ ])
+ results['intrinsics'] = intrinsics
+
+ return results
+
+
+@PIPELINES.register_module()
+class CameraProjection:
+ """Apply camera projection to joint coordinates.
+
+ Args:
+ item (str): The name of the pose to apply camera projection.
+ mode (str): The type of camera projection, supported options are
+
+ - world_to_camera
+ - world_to_pixel
+ - camera_to_world
+ - camera_to_pixel
+ output_name (str|None): The name of the projected pose. If None
+ (default) is given, the projected pose will be stored in place.
+ camera_type (str): The camera class name (should be registered in
+ CAMERA).
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+
+ - item
+ - camera_param (if camera parameters are not given in initialization)
+
+ Modified keys:
+ output_name
+ """
+
+ def __init__(self,
+ item,
+ mode,
+ output_name=None,
+ camera_type='SimpleCamera',
+ camera_param=None):
+ self.item = item
+ self.mode = mode
+ self.output_name = output_name
+ self.camera_type = camera_type
+ allowed_mode = {
+ 'world_to_camera',
+ 'world_to_pixel',
+ 'camera_to_world',
+ 'camera_to_pixel',
+ }
+ if mode not in allowed_mode:
+ raise ValueError(
+ f'Got invalid mode: {mode}, allowed modes are {allowed_mode}')
+
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera = self._build_camera(camera_param)
+
+ def _build_camera(self, param):
+ cfgs = dict(type=self.camera_type, param=param)
+ return build_from_cfg(cfgs, CAMERAS)
+
+ def __call__(self, results):
+ assert self.item in results
+ joints = results[self.item]
+
+ if self.static_camera:
+ camera = self.camera
+ else:
+ assert 'camera_param' in results, 'Camera parameters are missing.'
+ camera = self._build_camera(results['camera_param'])
+
+ if self.mode == 'world_to_camera':
+ output = camera.world_to_camera(joints)
+ elif self.mode == 'world_to_pixel':
+ output = camera.world_to_pixel(joints)
+ elif self.mode == 'camera_to_world':
+ output = camera.camera_to_world(joints)
+ elif self.mode == 'camera_to_pixel':
+ output = camera.camera_to_pixel(joints)
+ else:
+ raise NotImplementedError
+
+ output_name = self.output_name
+ if output_name is None:
+ output_name = self.item
+
+ results[output_name] = output
+ return results
+
+
+@PIPELINES.register_module()
+class RelativeJointRandomFlip:
+ """Data augmentation with random horizontal joint flip around a root joint.
+
+ Args:
+ item (str|list[str]): The name of the pose to flip.
+ flip_cfg (dict|list[dict]): Configurations of the fliplr_regression
+ function. It should contain the following arguments:
+
+ - ``center_mode``: The mode to set the center location on the \
+ x-axis to flip around.
+ - ``center_x`` or ``center_index``: Set the x-axis location or \
+ the root joint's index to define the flip center.
+
+ Please refer to the docstring of the fliplr_regression function for
+ more details.
+ visible_item (str|list[str]): The name of the visibility item which
+ will be flipped accordingly along with the pose.
+ flip_prob (float): Probability of flip.
+ flip_camera (bool): Whether to flip horizontal distortion coefficients.
+ camera_param (dict|None): The camera parameter dict. See the camera
+ class definition for more details. If None is given, the camera
+ parameter will be obtained during processing of each data sample
+ with the key "camera_param".
+
+ Required keys:
+ item
+
+ Modified keys:
+ item (, camera_param)
+ """
+
+ def __init__(self,
+ item,
+ flip_cfg,
+ visible_item=None,
+ flip_prob=0.5,
+ flip_camera=False,
+ camera_param=None):
+ self.item = item
+ self.flip_cfg = flip_cfg
+ self.vis_item = visible_item
+ self.flip_prob = flip_prob
+ self.flip_camera = flip_camera
+ if camera_param is None:
+ self.static_camera = False
+ else:
+ self.static_camera = True
+ self.camera_param = camera_param
+
+ if isinstance(self.item, str):
+ self.item = [self.item]
+ if isinstance(self.flip_cfg, dict):
+ self.flip_cfg = [self.flip_cfg] * len(self.item)
+ assert len(self.item) == len(self.flip_cfg)
+ if isinstance(self.vis_item, str):
+ self.vis_item = [self.vis_item]
+
+ def __call__(self, results):
+
+ if results.get(f'{self.item}_root_removed', False):
+ raise RuntimeError('The transform RelativeJointRandomFlip should '
+ f'not be applied to {self.item} whose root '
+ 'joint has been removed and joint indices have '
+ 'been changed')
+
+ if np.random.rand() <= self.flip_prob:
+
+ flip_pairs = results['ann_info']['flip_pairs']
+
+ # flip joint coordinates
+ for i, item in enumerate(self.item):
+ assert item in results
+ joints = results[item]
+
+ joints_flipped = fliplr_regression(joints, flip_pairs,
+ **self.flip_cfg[i])
+
+ results[item] = joints_flipped
+
+ # flip joint visibility
+ for vis_item in self.vis_item:
+ assert vis_item in results
+ visible = results[vis_item]
+ visible_flipped = visible.copy()
+ for left, right in flip_pairs:
+ visible_flipped[..., left, :] = visible[..., right, :]
+ visible_flipped[..., right, :] = visible[..., left, :]
+ results[vis_item] = visible_flipped
+
+ # flip horizontal distortion coefficients
+ if self.flip_camera:
+ if self.static_camera:
+ camera_param = copy.deepcopy(self.camera_param)
+ else:
+ assert 'camera_param' in results, \
+ 'Camera parameters are missing.'
+ camera_param = results['camera_param']
+ assert 'c' in camera_param
+ camera_param['c'][0] *= -1
+
+ if 'p' in camera_param:
+ camera_param['p'][0] *= -1
+
+ if 'camera_param' not in results:
+ results['camera_param'] = dict()
+ results['camera_param'].update(camera_param)
+
+ return results
+
+
+@PIPELINES.register_module()
+class PoseSequenceToTensor:
+ """Convert pose sequence from numpy array to Tensor.
+
+ The original pose sequence should have a shape of [T,K,C] or [K,C], where
+ T is the sequence length, K and C are keypoint number and dimension. The
+ converted pose sequence will have a shape of [KxC, T].
+
+ Args:
+ item (str): The name of the pose sequence
+
+ Required keys:
+ item
+
+ Modified keys:
+ item
+ """
+
+ def __init__(self, item):
+ self.item = item
+
+ def __call__(self, results):
+ assert self.item in results
+ seq = results[self.item]
+
+ assert isinstance(seq, np.ndarray)
+ assert seq.ndim in {2, 3}
+
+ if seq.ndim == 2:
+ seq = seq[None, ...]
+
+ T = seq.shape[0]
+ seq = seq.transpose(1, 2, 0).reshape(-1, T)
+ results[self.item] = torch.from_numpy(seq)
+
+ return results
+
+
+@PIPELINES.register_module()
+class Generate3DHeatmapTarget:
+ """Generate the target 3d heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian.
+ joint_indices (list): Indices of joints used for heatmap generation.
+ If None (default) is given, all joints will be used.
+ max_bound (float): The maximal value of heatmap.
+ """
+
+ def __init__(self, sigma=2, joint_indices=None, max_bound=1.0):
+ self.sigma = sigma
+ self.joint_indices = joint_indices
+ self.max_bound = max_bound
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ cfg = results['ann_info']
+ image_size = cfg['image_size']
+ W, H, D = cfg['heatmap_size']
+ heatmap3d_depth_bound = cfg['heatmap3d_depth_bound']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ # select the joints used for target generation
+ if self.joint_indices is not None:
+ joints_3d = joints_3d[self.joint_indices, ...]
+ joints_3d_visible = joints_3d_visible[self.joint_indices, ...]
+ joint_weights = joint_weights[self.joint_indices, ...]
+ num_joints = joints_3d.shape[0]
+
+ # get the joint location in heatmap coordinates
+ mu_x = joints_3d[:, 0] * W / image_size[0]
+ mu_y = joints_3d[:, 1] * H / image_size[1]
+ mu_z = (joints_3d[:, 2] / heatmap3d_depth_bound + 0.5) * D
+
+ target = np.zeros([num_joints, D, H, W], dtype=np.float32)
+
+ target_weight = joints_3d_visible[:, 0].astype(np.float32)
+ target_weight = target_weight * (mu_z >= 0) * (mu_z < D)
+ if use_different_joint_weights:
+ target_weight = target_weight * joint_weights
+ target_weight = target_weight[:, None]
+
+ # only compute the voxel value near the joints location
+ tmp_size = 3 * self.sigma
+
+ # get neighboring voxels coordinates
+ x = y = z = np.arange(2 * tmp_size + 1, dtype=np.float32) - tmp_size
+ zz, yy, xx = np.meshgrid(z, y, x)
+ xx = xx[None, ...].astype(np.float32)
+ yy = yy[None, ...].astype(np.float32)
+ zz = zz[None, ...].astype(np.float32)
+ mu_x = mu_x[..., None, None, None]
+ mu_y = mu_y[..., None, None, None]
+ mu_z = mu_z[..., None, None, None]
+ xx, yy, zz = xx + mu_x, yy + mu_y, zz + mu_z
+
+ # round the coordinates
+ xx = xx.round().clip(0, W - 1)
+ yy = yy.round().clip(0, H - 1)
+ zz = zz.round().clip(0, D - 1)
+
+ # compute the target value near joints
+ local_target = \
+ np.exp(-((xx - mu_x)**2 + (yy - mu_y)**2 + (zz - mu_z)**2) /
+ (2 * self.sigma**2))
+
+ # put the local target value to the full target heatmap
+ local_size = xx.shape[1]
+ idx_joints = np.tile(
+ np.arange(num_joints)[:, None, None, None],
+ [1, local_size, local_size, local_size])
+ idx = np.stack([idx_joints, zz, yy, xx],
+ axis=-1).astype(int).reshape(-1, 4)
+ target[idx[:, 0], idx[:, 1], idx[:, 2],
+ idx[:, 3]] = local_target.reshape(-1)
+ target = target * self.max_bound
+ results['target'] = target
+ results['target_weight'] = target_weight
+ return results
+
+
+@PIPELINES.register_module()
+class GenerateVoxel3DHeatmapTarget:
+ """Generate the target 3d heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info_3d'.
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian (mm).
+ joint_indices (list): Indices of joints used for heatmap generation.
+ If None (default) is given, all joints will be used.
+ """
+
+ def __init__(self, sigma=200.0, joint_indices=None):
+ self.sigma = sigma # mm
+ self.joint_indices = joint_indices
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ cfg = results['ann_info']
+
+ num_people = len(joints_3d)
+ num_joints = joints_3d[0].shape[0]
+
+ if self.joint_indices is not None:
+ num_joints = len(self.joint_indices)
+ joint_indices = self.joint_indices
+ else:
+ joint_indices = list(range(num_joints))
+
+ space_size = cfg['space_size']
+ space_center = cfg['space_center']
+ cube_size = cfg['cube_size']
+ grids_x = np.linspace(-space_size[0] / 2, space_size[0] / 2,
+ cube_size[0]) + space_center[0]
+ grids_y = np.linspace(-space_size[1] / 2, space_size[1] / 2,
+ cube_size[1]) + space_center[1]
+ grids_z = np.linspace(-space_size[2] / 2, space_size[2] / 2,
+ cube_size[2]) + space_center[2]
+
+ target = np.zeros(
+ (num_joints, cube_size[0], cube_size[1], cube_size[2]),
+ dtype=np.float32)
+
+ for n in range(num_people):
+ for idx, joint_id in enumerate(joint_indices):
+ mu_x = joints_3d[n][joint_id][0]
+ mu_y = joints_3d[n][joint_id][1]
+ mu_z = joints_3d[n][joint_id][2]
+ vis = joints_3d_visible[n][joint_id][0]
+ if vis < 1:
+ continue
+ i_x = [
+ np.searchsorted(grids_x, mu_x - 3 * self.sigma),
+ np.searchsorted(grids_x, mu_x + 3 * self.sigma, 'right')
+ ]
+ i_y = [
+ np.searchsorted(grids_y, mu_y - 3 * self.sigma),
+ np.searchsorted(grids_y, mu_y + 3 * self.sigma, 'right')
+ ]
+ i_z = [
+ np.searchsorted(grids_z, mu_z - 3 * self.sigma),
+ np.searchsorted(grids_z, mu_z + 3 * self.sigma, 'right')
+ ]
+ if i_x[0] >= i_x[1] or i_y[0] >= i_y[1] or i_z[0] >= i_z[1]:
+ continue
+ kernel_xs, kernel_ys, kernel_zs = np.meshgrid(
+ grids_x[i_x[0]:i_x[1]],
+ grids_y[i_y[0]:i_y[1]],
+ grids_z[i_z[0]:i_z[1]],
+ indexing='ij')
+ g = np.exp(-((kernel_xs - mu_x)**2 + (kernel_ys - mu_y)**2 +
+ (kernel_zs - mu_z)**2) / (2 * self.sigma**2))
+ target[idx, i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]] \
+ = np.maximum(target[idx, i_x[0]:i_x[1],
+ i_y[0]:i_y[1], i_z[0]:i_z[1]], g)
+
+ target = np.clip(target, 0, 1)
+ if target.shape[0] == 1:
+ target = target[0]
+
+ results['targets_3d'] = target
+
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/shared_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/shared_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4fea806ce84b0484cabb7b44ba09c34cc109be0
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/shared_transform.py
@@ -0,0 +1,527 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from collections.abc import Sequence
+
+import mmcv
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+from mmcv.utils import build_from_cfg
+from numpy import random
+from torchvision.transforms import functional as F
+
+from ..builder import PIPELINES
+
+try:
+ import albumentations
+except ImportError:
+ albumentations = None
+
+
+@PIPELINES.register_module()
+class ToTensor:
+ """Transform image to Tensor.
+
+ Required key: 'img'. Modifies key: 'img'.
+
+ Args:
+ results (dict): contain all information about training.
+ """
+
+ def __call__(self, results):
+ if isinstance(results['img'], (list, tuple)):
+ results['img'] = [F.to_tensor(img) for img in results['img']]
+ else:
+ results['img'] = F.to_tensor(results['img'])
+
+ return results
+
+
+@PIPELINES.register_module()
+class NormalizeTensor:
+ """Normalize the Tensor image (CxHxW), with mean and std.
+
+ Required key: 'img'. Modifies key: 'img'.
+
+ Args:
+ mean (list[float]): Mean values of 3 channels.
+ std (list[float]): Std values of 3 channels.
+ """
+
+ def __init__(self, mean, std):
+ self.mean = mean
+ self.std = std
+
+ def __call__(self, results):
+ if isinstance(results['img'], (list, tuple)):
+ results['img'] = [
+ F.normalize(img, mean=self.mean, std=self.std)
+ for img in results['img']
+ ]
+ else:
+ results['img'] = F.normalize(
+ results['img'], mean=self.mean, std=self.std)
+
+ return results
+
+
+@PIPELINES.register_module()
+class Compose:
+ """Compose a data pipeline with a sequence of transforms.
+
+ Args:
+ transforms (list[dict | callable]): Either config
+ dicts of transforms or transform objects.
+ """
+
+ def __init__(self, transforms):
+ assert isinstance(transforms, Sequence)
+ self.transforms = []
+ for transform in transforms:
+ if isinstance(transform, dict):
+ transform = build_from_cfg(transform, PIPELINES)
+ self.transforms.append(transform)
+ elif callable(transform):
+ self.transforms.append(transform)
+ else:
+ raise TypeError('transform must be callable or a dict, but got'
+ f' {type(transform)}')
+
+ def __call__(self, data):
+ """Call function to apply transforms sequentially.
+
+ Args:
+ data (dict): A result dict contains the data to transform.
+
+ Returns:
+ dict: Transformed data.
+ """
+ for t in self.transforms:
+ data = t(data)
+ if data is None:
+ return None
+ return data
+
+ def __repr__(self):
+ """Compute the string representation."""
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += f'\n {t}'
+ format_string += '\n)'
+ return format_string
+
+
+@PIPELINES.register_module()
+class Collect:
+ """Collect data from the loader relevant to the specific task.
+
+ This keeps the items in `keys` as it is, and collect items in `meta_keys`
+ into a meta item called `meta_name`.This is usually the last stage of the
+ data loader pipeline.
+ For example, when keys='imgs', meta_keys=('filename', 'label',
+ 'original_shape'), meta_name='img_metas', the results will be a dict with
+ keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of
+ another dict with keys 'filename', 'label', 'original_shape'.
+
+ Args:
+ keys (Sequence[str|tuple]): Required keys to be collected. If a tuple
+ (key, key_new) is given as an element, the item retrieved by key will
+ be renamed as key_new in collected data.
+ meta_name (str): The name of the key that contains meta information.
+ This key is always populated. Default: "img_metas".
+ meta_keys (Sequence[str|tuple]): Keys that are collected under
+ meta_name. The contents of the `meta_name` dictionary depends
+ on `meta_keys`.
+ """
+
+ def __init__(self, keys, meta_keys, meta_name='img_metas'):
+ self.keys = keys
+ self.meta_keys = meta_keys
+ self.meta_name = meta_name
+
+ def __call__(self, results):
+ """Performs the Collect formatting.
+
+ Args:
+ results (dict): The resulting dict to be modified and passed
+ to the next transform in pipeline.
+ """
+ if 'ann_info' in results:
+ results.update(results['ann_info'])
+
+ data = {}
+ for key in self.keys:
+ if isinstance(key, tuple):
+ assert len(key) == 2
+ key_src, key_tgt = key[:2]
+ else:
+ key_src = key_tgt = key
+ data[key_tgt] = results[key_src]
+
+ meta = {}
+ if len(self.meta_keys) != 0:
+ for key in self.meta_keys:
+ if isinstance(key, tuple):
+ assert len(key) == 2
+ key_src, key_tgt = key[:2]
+ else:
+ key_src = key_tgt = key
+ meta[key_tgt] = results[key_src]
+ if 'bbox_id' in results:
+ meta['bbox_id'] = results['bbox_id']
+ data[self.meta_name] = DC(meta, cpu_only=True)
+
+ return data
+
+ def __repr__(self):
+ """Compute the string representation."""
+ return (f'{self.__class__.__name__}('
+ f'keys={self.keys}, meta_keys={self.meta_keys})')
+
+
+@PIPELINES.register_module()
+class Albumentation:
+ """Albumentation augmentation (pixel-level transforms only). Adds custom
+ pixel-level transformations from Albumentations library. Please visit
+ `https://albumentations.readthedocs.io` to get more information.
+
+ Note: we only support pixel-level transforms.
+ Please visit `https://github.com/albumentations-team/`
+ `albumentations#pixel-level-transforms`
+ to get more information about pixel-level transforms.
+
+ An example of ``transforms`` is as followed:
+
+ .. code-block:: python
+
+ [
+ dict(
+ type='RandomBrightnessContrast',
+ brightness_limit=[0.1, 0.3],
+ contrast_limit=[0.1, 0.3],
+ p=0.2),
+ dict(type='ChannelShuffle', p=0.1),
+ dict(
+ type='OneOf',
+ transforms=[
+ dict(type='Blur', blur_limit=3, p=1.0),
+ dict(type='MedianBlur', blur_limit=3, p=1.0)
+ ],
+ p=0.1),
+ ]
+
+ Args:
+ transforms (list[dict]): A list of Albumentation transformations
+ keymap (dict): Contains {'input key':'albumentation-style key'},
+ e.g., {'img': 'image'}.
+ """
+
+ def __init__(self, transforms, keymap=None):
+ if albumentations is None:
+ raise RuntimeError('albumentations is not installed')
+
+ self.transforms = transforms
+ self.filter_lost_elements = False
+
+ self.aug = albumentations.Compose(
+ [self.albu_builder(t) for t in self.transforms])
+
+ if not keymap:
+ self.keymap_to_albu = {
+ 'img': 'image',
+ }
+ else:
+ self.keymap_to_albu = keymap
+ self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}
+
+ def albu_builder(self, cfg):
+ """Import a module from albumentations.
+
+ It resembles some of :func:`build_from_cfg` logic.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+
+ Returns:
+ obj: The constructed object.
+ """
+
+ assert isinstance(cfg, dict) and 'type' in cfg
+ args = cfg.copy()
+
+ obj_type = args.pop('type')
+ if mmcv.is_str(obj_type):
+ if albumentations is None:
+ raise RuntimeError('albumentations is not installed')
+ if not hasattr(albumentations.augmentations.transforms, obj_type):
+ warnings.warn('{obj_type} is not pixel-level transformations. '
+ 'Please use with caution.')
+ obj_cls = getattr(albumentations, obj_type)
+ else:
+ raise TypeError(f'type must be a str, but got {type(obj_type)}')
+
+ if 'transforms' in args:
+ args['transforms'] = [
+ self.albu_builder(transform)
+ for transform in args['transforms']
+ ]
+
+ return obj_cls(**args)
+
+ @staticmethod
+ def mapper(d, keymap):
+ """Dictionary mapper.
+
+ Renames keys according to keymap provided.
+
+ Args:
+ d (dict): old dict
+ keymap (dict): {'old_key':'new_key'}
+
+ Returns:
+ dict: new dict.
+ """
+
+ updated_dict = {keymap.get(k, k): v for k, v in d.items()}
+ return updated_dict
+
+ def __call__(self, results):
+ # dict to albumentations format
+ results = self.mapper(results, self.keymap_to_albu)
+
+ results = self.aug(**results)
+ # back to the original format
+ results = self.mapper(results, self.keymap_back)
+
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__ + f'(transforms={self.transforms})'
+ return repr_str
+
+
+@PIPELINES.register_module()
+class PhotometricDistortion:
+ """Apply photometric distortion to image sequentially, every transformation
+ is applied with a probability of 0.5. The position of random contrast is in
+ second or second to last.
+
+ 1. random brightness
+ 2. random contrast (mode 0)
+ 3. convert color from BGR to HSV
+ 4. random saturation
+ 5. random hue
+ 6. convert color from HSV to BGR
+ 7. random contrast (mode 1)
+ 8. randomly swap channels
+
+ Args:
+ brightness_delta (int): delta of brightness.
+ contrast_range (tuple): range of contrast.
+ saturation_range (tuple): range of saturation.
+ hue_delta (int): delta of hue.
+ """
+
+ def __init__(self,
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18):
+ self.brightness_delta = brightness_delta
+ self.contrast_lower, self.contrast_upper = contrast_range
+ self.saturation_lower, self.saturation_upper = saturation_range
+ self.hue_delta = hue_delta
+
+ def convert(self, img, alpha=1, beta=0):
+ """Multiple with alpha and add beta with clip."""
+ img = img.astype(np.float32) * alpha + beta
+ img = np.clip(img, 0, 255)
+ return img.astype(np.uint8)
+
+ def brightness(self, img):
+ """Brightness distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ beta=random.uniform(-self.brightness_delta,
+ self.brightness_delta))
+ return img
+
+ def contrast(self, img):
+ """Contrast distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ alpha=random.uniform(self.contrast_lower, self.contrast_upper))
+ return img
+
+ def saturation(self, img):
+ # Apply saturation distortion to hsv-formatted img
+ img[:, :, 1] = self.convert(
+ img[:, :, 1],
+ alpha=random.uniform(self.saturation_lower, self.saturation_upper))
+ return img
+
+ def hue(self, img):
+ # Apply hue distortion to hsv-formatted img
+ img[:, :, 0] = (img[:, :, 0].astype(int) +
+ random.randint(-self.hue_delta, self.hue_delta)) % 180
+ return img
+
+ def swap_channels(self, img):
+ # Apply channel swap
+ if random.randint(2):
+ img = img[..., random.permutation(3)]
+ return img
+
+ def __call__(self, results):
+ """Call function to perform photometric distortion on images.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Result dict with images distorted.
+ """
+
+ img = results['img']
+ # random brightness
+ img = self.brightness(img)
+
+ # mode == 0 --> do random contrast first
+ # mode == 1 --> do random contrast last
+ mode = random.randint(2)
+ if mode == 1:
+ img = self.contrast(img)
+
+ hsv_mode = random.randint(4)
+ if hsv_mode:
+ # random saturation/hue distortion
+ img = mmcv.bgr2hsv(img)
+ if hsv_mode == 1 or hsv_mode == 3:
+ img = self.saturation(img)
+ if hsv_mode == 2 or hsv_mode == 3:
+ img = self.hue(img)
+ img = mmcv.hsv2bgr(img)
+
+ # random contrast
+ if mode == 0:
+ img = self.contrast(img)
+
+ # randomly swap channels
+ self.swap_channels(img)
+
+ results['img'] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += (f'(brightness_delta={self.brightness_delta}, '
+ f'contrast_range=({self.contrast_lower}, '
+ f'{self.contrast_upper}), '
+ f'saturation_range=({self.saturation_lower}, '
+ f'{self.saturation_upper}), '
+ f'hue_delta={self.hue_delta})')
+ return repr_str
+
+
+@PIPELINES.register_module()
+class MultiItemProcess:
+ """Process each item and merge multi-item results to lists.
+
+ Args:
+ pipeline (dict): Dictionary to construct pipeline for a single item.
+ """
+
+ def __init__(self, pipeline):
+ self.pipeline = Compose(pipeline)
+
+ def __call__(self, results):
+ results_ = {}
+ for idx, result in results.items():
+ single_result = self.pipeline(result)
+ for k, v in single_result.items():
+ if k in results_:
+ results_[k].append(v)
+ else:
+ results_[k] = [v]
+
+ return results_
+
+
+@PIPELINES.register_module()
+class DiscardDuplicatedItems:
+
+ def __init__(self, keys_list):
+ """Discard duplicated single-item results.
+
+ Args:
+ keys_list (list): List of keys that need to be deduplicate.
+ """
+ self.keys_list = keys_list
+
+ def __call__(self, results):
+ for k, v in results.items():
+ if k in self.keys_list:
+ assert isinstance(v, Sequence)
+ results[k] = v[0]
+
+ return results
+
+
+@PIPELINES.register_module()
+class MultitaskGatherTarget:
+ """Gather the targets for multitask heads.
+
+ Args:
+ pipeline_list (list[list]): List of pipelines for all heads.
+ pipeline_indices (list[int]): Pipeline index of each head.
+ """
+
+ def __init__(self,
+ pipeline_list,
+ pipeline_indices=None,
+ keys=('target', 'target_weight')):
+ self.keys = keys
+ self.pipelines = []
+ for pipeline in pipeline_list:
+ self.pipelines.append(Compose(pipeline))
+ if pipeline_indices is None:
+ self.pipeline_indices = list(range(len(pipeline_list)))
+ else:
+ self.pipeline_indices = pipeline_indices
+
+ def __call__(self, results):
+ # generate target and target weights using all pipelines
+ pipeline_outputs = []
+ for pipeline in self.pipelines:
+ pipeline_output = pipeline(results)
+ pipeline_outputs.append(pipeline_output.copy())
+
+ for key in self.keys:
+ result_key = []
+ for ind in self.pipeline_indices:
+ result_key.append(pipeline_outputs[ind].get(key, None))
+ results[key] = result_key
+ return results
+
+
+@PIPELINES.register_module()
+class RenameKeys:
+ """Rename the keys.
+
+ Args:
+ key_pairs (Sequence[tuple]): Required keys to be renamed.
+ If a tuple (key_src, key_tgt) is given as an element,
+ the item retrieved by key_src will be renamed as key_tgt.
+ """
+
+ def __init__(self, key_pairs):
+ self.key_pairs = key_pairs
+
+ def __call__(self, results):
+ """Rename keys."""
+ for key_pair in self.key_pairs:
+ assert len(key_pair) == 2
+ key_src, key_tgt = key_pair
+ results[key_tgt] = results.pop(key_src)
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/pipelines/top_down_transform.py b/vendor/ViTPose/mmpose/datasets/pipelines/top_down_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..1af1ea92d0cc5f973356ab72f300661e30b5d439
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/pipelines/top_down_transform.py
@@ -0,0 +1,736 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import numpy as np
+
+from mmpose.core.post_processing import (affine_transform, fliplr_joints,
+ get_affine_transform, get_warp_matrix,
+ warp_affine_joints)
+from mmpose.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class TopDownRandomFlip:
+ """Data augmentation with random image flip.
+
+ Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
+ 'ann_info'.
+
+ Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
+ 'flipped'.
+
+ Args:
+ flip (bool): Option to perform random flip.
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_prob=0.5):
+ self.flip_prob = flip_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ img = results['img']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ center = results['center']
+
+ # A flag indicating whether the image is flipped,
+ # which can be used by child class.
+ flipped = False
+ if np.random.rand() <= self.flip_prob:
+ flipped = True
+ if not isinstance(img, list):
+ img = img[:, ::-1, :]
+ else:
+ img = [i[:, ::-1, :] for i in img]
+ if not isinstance(img, list):
+ joints_3d, joints_3d_visible = fliplr_joints(
+ joints_3d, joints_3d_visible, img.shape[1],
+ results['ann_info']['flip_pairs'])
+ center[0] = img.shape[1] - center[0] - 1
+ else:
+ joints_3d, joints_3d_visible = fliplr_joints(
+ joints_3d, joints_3d_visible, img[0].shape[1],
+ results['ann_info']['flip_pairs'])
+ center[0] = img[0].shape[1] - center[0] - 1
+
+ results['img'] = img
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+ results['center'] = center
+ results['flipped'] = flipped
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownHalfBodyTransform:
+ """Data augmentation with half-body transform. Keep only the upper body or
+ the lower body at random.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', and 'ann_info'.
+
+ Modifies key: 'scale' and 'center'.
+
+ Args:
+ num_joints_half_body (int): Threshold of performing
+ half-body transform. If the body has fewer number
+ of joints (< num_joints_half_body), ignore this step.
+ prob_half_body (float): Probability of half-body transform.
+ """
+
+ def __init__(self, num_joints_half_body=8, prob_half_body=0.3):
+ self.num_joints_half_body = num_joints_half_body
+ self.prob_half_body = prob_half_body
+
+ @staticmethod
+ def half_body_transform(cfg, joints_3d, joints_3d_visible):
+ """Get center&scale for half-body transform."""
+ upper_joints = []
+ lower_joints = []
+ for joint_id in range(cfg['num_joints']):
+ if joints_3d_visible[joint_id][0] > 0:
+ if joint_id in cfg['upper_body_ids']:
+ upper_joints.append(joints_3d[joint_id])
+ else:
+ lower_joints.append(joints_3d[joint_id])
+
+ if np.random.randn() < 0.5 and len(upper_joints) > 2:
+ selected_joints = upper_joints
+ elif len(lower_joints) > 2:
+ selected_joints = lower_joints
+ else:
+ selected_joints = upper_joints
+
+ if len(selected_joints) < 2:
+ return None, None
+
+ selected_joints = np.array(selected_joints, dtype=np.float32)
+ center = selected_joints.mean(axis=0)[:2]
+
+ left_top = np.amin(selected_joints, axis=0)
+
+ right_bottom = np.amax(selected_joints, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ aspect_ratio = cfg['image_size'][0] / cfg['image_size'][1]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+
+ scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+ scale = scale * 1.5
+ return center, scale
+
+ def __call__(self, results):
+ """Perform data augmentation with half-body transform."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ if (np.sum(joints_3d_visible[:, 0]) > self.num_joints_half_body
+ and np.random.rand() < self.prob_half_body):
+
+ c_half_body, s_half_body = self.half_body_transform(
+ results['ann_info'], joints_3d, joints_3d_visible)
+
+ if c_half_body is not None and s_half_body is not None:
+ results['center'] = c_half_body
+ results['scale'] = s_half_body
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGetRandomScaleRotation:
+ """Data augmentation with random scaling & rotating.
+
+ Required key: 'scale'.
+
+ Modifies key: 'scale' and 'rotation'.
+
+ Args:
+ rot_factor (int): Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor (float): Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ rot_prob (float): Probability of random rotation.
+ """
+
+ def __init__(self, rot_factor=40, scale_factor=0.5, rot_prob=0.6):
+ self.rot_factor = rot_factor
+ self.scale_factor = scale_factor
+ self.rot_prob = rot_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ s = results['scale']
+
+ sf = self.scale_factor
+ rf = self.rot_factor
+
+ s_factor = np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ s = s * s_factor
+
+ r_factor = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+ r = r_factor if np.random.rand() <= self.rot_prob else 0
+
+ results['scale'] = s
+ results['rotation'] = r
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownAffine:
+ """Affine transform the image to make input.
+
+ Required keys:'img', 'joints_3d', 'joints_3d_visible', 'ann_info','scale',
+ 'rotation' and 'center'.
+
+ Modified keys:'img', 'joints_3d', and 'joints_3d_visible'.
+
+ Args:
+ use_udp (bool): To use unbiased data processing.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self, use_udp=False):
+ self.use_udp = use_udp
+
+ def __call__(self, results):
+ image_size = results['ann_info']['image_size']
+
+ img = results['img']
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+ c = results['center']
+ s = results['scale']
+ r = results['rotation']
+
+ if self.use_udp:
+ trans = get_warp_matrix(r, c * 2.0, image_size - 1.0, s * 200.0)
+ if not isinstance(img, list):
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ img = [
+ cv2.warpAffine(
+ i,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR) for i in img
+ ]
+
+ joints_3d[:, 0:2] = \
+ warp_affine_joints(joints_3d[:, 0:2].copy(), trans)
+
+ else:
+ trans = get_affine_transform(c, s, r, image_size)
+ if not isinstance(img, list):
+ img = cv2.warpAffine(
+ img,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR)
+ else:
+ img = [
+ cv2.warpAffine(
+ i,
+ trans, (int(image_size[0]), int(image_size[1])),
+ flags=cv2.INTER_LINEAR) for i in img
+ ]
+ for i in range(results['ann_info']['num_joints']):
+ if joints_3d_visible[i, 0] > 0.0:
+ joints_3d[i,
+ 0:2] = affine_transform(joints_3d[i, 0:2], trans)
+
+ results['img'] = img
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGenerateTarget:
+ """Generate the target heatmap.
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+
+ Modified keys: 'target', and 'target_weight'.
+
+ Args:
+ sigma: Sigma of heatmap gaussian for 'MSRA' approach.
+ kernel: Kernel of heatmap gaussian for 'Megvii' approach.
+ encoding (str): Approach to generate target heatmaps.
+ Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
+ Default:'MSRA'
+ unbiased_encoding (bool): Option to use unbiased
+ encoding methods.
+ Paper ref: Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+ keypoint_pose_distance: Keypoint pose distance for UDP.
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ target_type (str): supported targets: 'GaussianHeatmap',
+ 'CombinedTarget'. Default:'GaussianHeatmap'
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+ """
+
+ def __init__(self,
+ sigma=2,
+ kernel=(11, 11),
+ valid_radius_factor=0.0546875,
+ target_type='GaussianHeatmap',
+ encoding='MSRA',
+ unbiased_encoding=False):
+ self.sigma = sigma
+ self.unbiased_encoding = unbiased_encoding
+ self.kernel = kernel
+ self.valid_radius_factor = valid_radius_factor
+ self.target_type = target_type
+ self.encoding = encoding
+
+ def _msra_generate_target(self, cfg, joints_3d, joints_3d_visible, sigma):
+ """Generate the target heatmap via "MSRA" approach.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray ([num_joints, 3])
+ joints_3d_visible: np.ndarray ([num_joints, 3])
+ sigma: Sigma of heatmap gaussian
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target: Target heatmaps.
+ - target_weight: (1: visible, 0: invisible)
+ """
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ W, H = cfg['heatmap_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ target_weight = np.zeros((num_joints, 1), dtype=np.float32)
+ target = np.zeros((num_joints, H, W), dtype=np.float32)
+
+ # 3-sigma rule
+ tmp_size = sigma * 3
+
+ if self.unbiased_encoding:
+ for joint_id in range(num_joints):
+ target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+
+ feat_stride = image_size / [W, H]
+ mu_x = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y = joints_3d[joint_id][1] / feat_stride[1]
+ # Check that any part of the gaussian is in-bounds
+ ul = [mu_x - tmp_size, mu_y - tmp_size]
+ br = [mu_x + tmp_size + 1, mu_y + tmp_size + 1]
+ if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+ target_weight[joint_id] = 0
+
+ if target_weight[joint_id] == 0:
+ continue
+
+ x = np.arange(0, W, 1, np.float32)
+ y = np.arange(0, H, 1, np.float32)
+ y = y[:, None]
+
+ if target_weight[joint_id] > 0.5:
+ target[joint_id] = np.exp(-((x - mu_x)**2 +
+ (y - mu_y)**2) /
+ (2 * sigma**2))
+ else:
+ for joint_id in range(num_joints):
+ target_weight[joint_id] = joints_3d_visible[joint_id, 0]
+
+ feat_stride = image_size / [W, H]
+ mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+ mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+ br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+ if ul[0] >= W or ul[1] >= H or br[0] < 0 or br[1] < 0:
+ target_weight[joint_id] = 0
+
+ if target_weight[joint_id] > 0.5:
+ size = 2 * tmp_size + 1
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+ x0 = y0 = size // 2
+ # The gaussian is not normalized,
+ # we want the center value to equal 1
+ g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], W) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], H) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], W)
+ img_y = max(0, ul[1]), min(br[1], H)
+
+ target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+ g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def _megvii_generate_target(self, cfg, joints_3d, joints_3d_visible,
+ kernel):
+ """Generate the target heatmap via "Megvii" approach.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray ([num_joints, 3])
+ joints_3d_visible: np.ndarray ([num_joints, 3])
+ kernel: Kernel of heatmap gaussian
+
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target: Target heatmaps.
+ - target_weight: (1: visible, 0: invisible)
+ """
+
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ W, H = cfg['heatmap_size']
+ heatmaps = np.zeros((num_joints, H, W), dtype='float32')
+ target_weight = np.zeros((num_joints, 1), dtype=np.float32)
+
+ for i in range(num_joints):
+ target_weight[i] = joints_3d_visible[i, 0]
+
+ if target_weight[i] < 1:
+ continue
+
+ target_y = int(joints_3d[i, 1] * H / image_size[1])
+ target_x = int(joints_3d[i, 0] * W / image_size[0])
+
+ if (target_x >= W or target_x < 0) \
+ or (target_y >= H or target_y < 0):
+ target_weight[i] = 0
+ continue
+
+ heatmaps[i, target_y, target_x] = 1
+ heatmaps[i] = cv2.GaussianBlur(heatmaps[i], kernel, 0)
+ maxi = heatmaps[i, target_y, target_x]
+
+ heatmaps[i] /= maxi / 255
+
+ return heatmaps, target_weight
+
+ def _udp_generate_target(self, cfg, joints_3d, joints_3d_visible, factor,
+ target_type):
+ """Generate the target heatmap via 'UDP' approach. Paper ref: Huang et
+ al. The Devil is in the Details: Delving into Unbiased Data Processing
+ for Human Pose Estimation (CVPR 2020).
+
+ Note:
+ - num keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+ - num target channels: C
+ - C = K if target_type=='GaussianHeatmap'
+ - C = 3*K if target_type=='CombinedTarget'
+
+ Args:
+ cfg (dict): data config
+ joints_3d (np.ndarray[K, 3]): Annotated keypoints.
+ joints_3d_visible (np.ndarray[K, 3]): Visibility of keypoints.
+ factor (float): kernel factor for GaussianHeatmap target or
+ valid radius factor for CombinedTarget.
+ target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
+ GaussianHeatmap: Heatmap target with gaussian distribution.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+
+ Returns:
+ tuple: A tuple containing targets.
+
+ - target (np.ndarray[C, H, W]): Target heatmaps.
+ - target_weight (np.ndarray[K, 1]): (1: visible, 0: invisible)
+ """
+ num_joints = cfg['num_joints']
+ image_size = cfg['image_size']
+ heatmap_size = cfg['heatmap_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ target_weight = np.ones((num_joints, 1), dtype=np.float32)
+ target_weight[:, 0] = joints_3d_visible[:, 0]
+
+ if target_type.lower() == 'GaussianHeatmap'.lower():
+ target = np.zeros((num_joints, heatmap_size[1], heatmap_size[0]),
+ dtype=np.float32)
+
+ tmp_size = factor * 3
+
+ # prepare for gaussian
+ size = 2 * tmp_size + 1
+ x = np.arange(0, size, 1, np.float32)
+ y = x[:, None]
+
+ for joint_id in range(num_joints):
+ feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+ mu_x = int(joints_3d[joint_id][0] / feat_stride[0] + 0.5)
+ mu_y = int(joints_3d[joint_id][1] / feat_stride[1] + 0.5)
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+ br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+ if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] \
+ or br[0] < 0 or br[1] < 0:
+ # If not, just return the image as is
+ target_weight[joint_id] = 0
+ continue
+
+ # # Generate gaussian
+ mu_x_ac = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y_ac = joints_3d[joint_id][1] / feat_stride[1]
+ x0 = y0 = size // 2
+ x0 += mu_x_ac - mu_x
+ y0 += mu_y_ac - mu_y
+ g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * factor**2))
+
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
+ img_y = max(0, ul[1]), min(br[1], heatmap_size[1])
+
+ v = target_weight[joint_id]
+ if v > 0.5:
+ target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+ g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+ elif target_type.lower() == 'CombinedTarget'.lower():
+ target = np.zeros(
+ (num_joints, 3, heatmap_size[1] * heatmap_size[0]),
+ dtype=np.float32)
+ feat_width = heatmap_size[0]
+ feat_height = heatmap_size[1]
+ feat_x_int = np.arange(0, feat_width)
+ feat_y_int = np.arange(0, feat_height)
+ feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
+ feat_x_int = feat_x_int.flatten()
+ feat_y_int = feat_y_int.flatten()
+ # Calculate the radius of the positive area in classification
+ # heatmap.
+ valid_radius = factor * heatmap_size[1]
+ feat_stride = (image_size - 1.0) / (heatmap_size - 1.0)
+ for joint_id in range(num_joints):
+ mu_x = joints_3d[joint_id][0] / feat_stride[0]
+ mu_y = joints_3d[joint_id][1] / feat_stride[1]
+ x_offset = (mu_x - feat_x_int) / valid_radius
+ y_offset = (mu_y - feat_y_int) / valid_radius
+ dis = x_offset**2 + y_offset**2
+ keep_pos = np.where(dis <= 1)[0]
+ v = target_weight[joint_id]
+ if v > 0.5:
+ target[joint_id, 0, keep_pos] = 1
+ target[joint_id, 1, keep_pos] = x_offset[keep_pos]
+ target[joint_id, 2, keep_pos] = y_offset[keep_pos]
+ target = target.reshape(num_joints * 3, heatmap_size[1],
+ heatmap_size[0])
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ assert self.encoding in ['MSRA', 'Megvii', 'UDP']
+
+ if self.encoding == 'MSRA':
+ if isinstance(self.sigma, list):
+ num_sigmas = len(self.sigma)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ heatmap_size = cfg['heatmap_size']
+
+ target = np.empty(
+ (0, num_joints, heatmap_size[1], heatmap_size[0]),
+ dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_sigmas):
+ target_i, target_weight_i = self._msra_generate_target(
+ cfg, joints_3d, joints_3d_visible, self.sigma[i])
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._msra_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible,
+ self.sigma)
+
+ elif self.encoding == 'Megvii':
+ if isinstance(self.kernel, list):
+ num_kernels = len(self.kernel)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ W, H = cfg['heatmap_size']
+
+ target = np.empty((0, num_joints, H, W), dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_kernels):
+ target_i, target_weight_i = self._megvii_generate_target(
+ cfg, joints_3d, joints_3d_visible, self.kernel[i])
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._megvii_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible,
+ self.kernel)
+
+ elif self.encoding == 'UDP':
+ if self.target_type.lower() == 'CombinedTarget'.lower():
+ factors = self.valid_radius_factor
+ channel_factor = 3
+ elif self.target_type.lower() == 'GaussianHeatmap'.lower():
+ factors = self.sigma
+ channel_factor = 1
+ else:
+ raise ValueError('target_type should be either '
+ "'GaussianHeatmap' or 'CombinedTarget'")
+ if isinstance(factors, list):
+ num_factors = len(factors)
+ cfg = results['ann_info']
+ num_joints = cfg['num_joints']
+ W, H = cfg['heatmap_size']
+
+ target = np.empty((0, channel_factor * num_joints, H, W),
+ dtype=np.float32)
+ target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
+ for i in range(num_factors):
+ target_i, target_weight_i = self._udp_generate_target(
+ cfg, joints_3d, joints_3d_visible, factors[i],
+ self.target_type)
+ target = np.concatenate([target, target_i[None]], axis=0)
+ target_weight = np.concatenate(
+ [target_weight, target_weight_i[None]], axis=0)
+ else:
+ target, target_weight = self._udp_generate_target(
+ results['ann_info'], joints_3d, joints_3d_visible, factors,
+ self.target_type)
+ else:
+ raise ValueError(
+ f'Encoding approach {self.encoding} is not supported!')
+
+ if results['ann_info'].get('max_num_joints', None) is not None:
+ W, H = results['ann_info']['heatmap_size']
+ padded_length = int(results['ann_info'].get('max_num_joints') - results['ann_info'].get('num_joints'))
+ target_weight = np.concatenate([target_weight, np.zeros((padded_length, 1), dtype=np.float32)], 0)
+ target = np.concatenate([target, np.zeros((padded_length, H, W), dtype=np.float32)], 0)
+
+ results['target'] = target
+ results['target_weight'] = target_weight
+
+ results['dataset_idx'] = results['ann_info'].get('dataset_idx', 0)
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownGenerateTargetRegression:
+ """Generate the target regression vector (coordinates).
+
+ Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'. Modified keys:
+ 'target', and 'target_weight'.
+ """
+
+ def __init__(self):
+ pass
+
+ def _generate_target(self, cfg, joints_3d, joints_3d_visible):
+ """Generate the target regression vector.
+
+ Args:
+ cfg (dict): data config
+ joints_3d: np.ndarray([num_joints, 3])
+ joints_3d_visible: np.ndarray([num_joints, 3])
+
+ Returns:
+ target, target_weight(1: visible, 0: invisible)
+ """
+ image_size = cfg['image_size']
+ joint_weights = cfg['joint_weights']
+ use_different_joint_weights = cfg['use_different_joint_weights']
+
+ mask = (joints_3d[:, 0] >= 0) * (
+ joints_3d[:, 0] <= image_size[0] - 1) * (joints_3d[:, 1] >= 0) * (
+ joints_3d[:, 1] <= image_size[1] - 1)
+
+ target = joints_3d[:, :2] / image_size
+
+ target = target.astype(np.float32)
+ target_weight = joints_3d_visible[:, :2] * mask[:, None]
+
+ if use_different_joint_weights:
+ target_weight = np.multiply(target_weight, joint_weights)
+
+ return target, target_weight
+
+ def __call__(self, results):
+ """Generate the target heatmap."""
+ joints_3d = results['joints_3d']
+ joints_3d_visible = results['joints_3d_visible']
+
+ target, target_weight = self._generate_target(results['ann_info'],
+ joints_3d,
+ joints_3d_visible)
+
+ results['target'] = target
+ results['target_weight'] = target_weight
+
+ return results
+
+
+@PIPELINES.register_module()
+class TopDownRandomTranslation:
+ """Data augmentation with random translation.
+
+ Required key: 'scale' and 'center'.
+
+ Modifies key: 'center'.
+
+ Note:
+ - bbox height: H
+ - bbox width: W
+
+ Args:
+ trans_factor (float): Translating center to
+ ``[-trans_factor, trans_factor] * [W, H] + center``.
+ trans_prob (float): Probability of random translation.
+ """
+
+ def __init__(self, trans_factor=0.15, trans_prob=1.0):
+ self.trans_factor = trans_factor
+ self.trans_prob = trans_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random translation."""
+ center = results['center']
+ scale = results['scale']
+ if np.random.rand() <= self.trans_prob:
+ # reference bbox size is [200, 200] pixels
+ center += self.trans_factor * np.random.uniform(
+ -1, 1, size=2) * scale * 200
+ results['center'] = center
+ return results
diff --git a/vendor/ViTPose/mmpose/datasets/registry.py b/vendor/ViTPose/mmpose/datasets/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba3cc49e452eb4bceefa3bbb1b994d7f2ab7fff9
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/registry.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .builder import DATASETS, PIPELINES
+
+__all__ = ['DATASETS', 'PIPELINES']
+
+warnings.simplefilter('once', DeprecationWarning)
+warnings.warn(
+ 'Registries (DATASETS, PIPELINES) have been moved to '
+ 'mmpose.datasets.builder. Importing from '
+ 'mmpose.models.registry will be deprecated in the future.',
+ DeprecationWarning)
diff --git a/vendor/ViTPose/mmpose/datasets/samplers/__init__.py b/vendor/ViTPose/mmpose/datasets/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..da09effaf20fefe1a102277672b98db7d884f002
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/samplers/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .distributed_sampler import DistributedSampler
+
+__all__ = ['DistributedSampler']
diff --git a/vendor/ViTPose/mmpose/datasets/samplers/distributed_sampler.py b/vendor/ViTPose/mmpose/datasets/samplers/distributed_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcb5f522a2252678250385f9b37463ce3a0e24f5
--- /dev/null
+++ b/vendor/ViTPose/mmpose/datasets/samplers/distributed_sampler.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.utils.data import DistributedSampler as _DistributedSampler
+
+
+class DistributedSampler(_DistributedSampler):
+ """DistributedSampler inheriting from
+ `torch.utils.data.DistributedSampler`.
+
+ In pytorch of lower versions, there is no `shuffle` argument. This child
+ class will port one to DistributedSampler.
+ """
+
+ def __init__(self,
+ dataset,
+ num_replicas=None,
+ rank=None,
+ shuffle=True,
+ seed=0):
+ super().__init__(
+ dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
+ # for the compatibility from PyTorch 1.3+
+ self.seed = seed if seed is not None else 0
+
+ def __iter__(self):
+ """Deterministically shuffle based on epoch."""
+ if self.shuffle:
+ g = torch.Generator()
+ g.manual_seed(self.epoch + self.seed)
+ indices = torch.randperm(len(self.dataset), generator=g).tolist()
+ else:
+ indices = torch.arange(len(self.dataset)).tolist()
+
+ # add extra samples to make it evenly divisible
+ indices += indices[:(self.total_size - len(indices))]
+ assert len(indices) == self.total_size
+
+ # subsample
+ indices = indices[self.rank:self.total_size:self.num_replicas]
+ assert len(indices) == self.num_samples
+ return iter(indices)
diff --git a/vendor/ViTPose/mmpose/deprecated.py b/vendor/ViTPose/mmpose/deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..b930901722ab8fe57455f8eaf9e7c1c728b4b4f8
--- /dev/null
+++ b/vendor/ViTPose/mmpose/deprecated.py
@@ -0,0 +1,199 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .datasets.builder import DATASETS
+from .datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset
+from .models.builder import HEADS, POSENETS
+from .models.detectors import AssociativeEmbedding
+from .models.heads import (AEHigherResolutionHead, AESimpleHead,
+ DeepposeRegressionHead, HMRMeshHead,
+ TopdownHeatmapMSMUHead,
+ TopdownHeatmapMultiStageHead,
+ TopdownHeatmapSimpleHead)
+
+
+@DATASETS.register_module()
+class TopDownFreiHandDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownFreiHandDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownFreiHandDataset has been renamed into FreiHandDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@DATASETS.register_module()
+class TopDownOneHand10KDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownOneHand10KDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownOneHand10KDataset has been renamed into OneHand10KDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@DATASETS.register_module()
+class TopDownPanopticDataset(Kpt2dSviewRgbImgTopDownDataset):
+ """Deprecated TopDownPanopticDataset."""
+
+ def __init__(self, *args, **kwargs):
+ raise (ImportError(
+ 'TopDownPanopticDataset has been renamed into PanopticDataset,'
+ 'check https://github.com/open-mmlab/mmpose/pull/202 for details.')
+ )
+
+ def _get_db(self):
+ return []
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ return None
+
+
+@HEADS.register_module()
+class BottomUpHigherResolutionHead(AEHigherResolutionHead):
+ """Bottom-up head for Higher Resolution.
+
+ BottomUpHigherResolutionHead has been renamed into AEHigherResolutionHead,
+ check https://github.com/open- mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUpHigherResolutionHead has been renamed into '
+ 'AEHigherResolutionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class BottomUpSimpleHead(AESimpleHead):
+ """Bottom-up simple head.
+
+ BottomUpSimpleHead has been renamed into AESimpleHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUpHigherResolutionHead has been renamed into '
+ 'AEHigherResolutionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownSimpleHead(TopdownHeatmapSimpleHead):
+ """Top-down heatmap simple head.
+
+ TopDownSimpleHead has been renamed into TopdownHeatmapSimpleHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownSimpleHead has been renamed into '
+ 'TopdownHeatmapSimpleHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownMultiStageHead(TopdownHeatmapMultiStageHead):
+ """Top-down heatmap multi-stage head.
+
+ TopDownMultiStageHead has been renamed into TopdownHeatmapMultiStageHead,
+ check https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownMultiStageHead has been renamed into '
+ 'TopdownHeatmapMultiStageHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class TopDownMSMUHead(TopdownHeatmapMSMUHead):
+ """Heads for multi-stage multi-unit heads.
+
+ TopDownMSMUHead has been renamed into TopdownHeatmapMSMUHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'TopDownMSMUHead has been renamed into '
+ 'TopdownHeatmapMSMUHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class MeshHMRHead(HMRMeshHead):
+ """SMPL parameters regressor head.
+
+ MeshHMRHead has been renamed into HMRMeshHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'MeshHMRHead has been renamed into '
+ 'HMRMeshHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@HEADS.register_module()
+class FcHead(DeepposeRegressionHead):
+ """FcHead (deprecated).
+
+ FcHead has been renamed into DeepposeRegressionHead, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'FcHead has been renamed into '
+ 'DeepposeRegressionHead, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
+
+
+@POSENETS.register_module()
+class BottomUp(AssociativeEmbedding):
+ """Associative Embedding.
+
+ BottomUp has been renamed into AssociativeEmbedding, check
+ https://github.com/open-mmlab/mmpose/pull/656 for details.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ 'BottomUp has been renamed into '
+ 'AssociativeEmbedding, check '
+ 'https://github.com/open-mmlab/mmpose/pull/656 for details.',
+ DeprecationWarning)
diff --git a/vendor/ViTPose/mmpose/models/__init__.py b/vendor/ViTPose/mmpose/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbec55e439201119145ebb7423f9281b63f0ec07
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .backbones import * # noqa
+from .builder import (BACKBONES, HEADS, LOSSES, MESH_MODELS, NECKS, POSENETS,
+ build_backbone, build_head, build_loss, build_mesh_model,
+ build_neck, build_posenet)
+from .detectors import * # noqa
+from .heads import * # noqa
+from .losses import * # noqa
+from .necks import * # noqa
+from .utils import * # noqa
+
+__all__ = [
+ 'BACKBONES', 'HEADS', 'NECKS', 'LOSSES', 'POSENETS', 'MESH_MODELS',
+ 'build_backbone', 'build_head', 'build_loss', 'build_posenet',
+ 'build_neck', 'build_mesh_model'
+]
diff --git a/vendor/ViTPose/mmpose/models/backbones/__init__.py b/vendor/ViTPose/mmpose/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b8efcfbb5ac55e0f3b2de78e96bb799f54eab39
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .alexnet import AlexNet
+from .cpm import CPM
+from .hourglass import HourglassNet
+from .hourglass_ae import HourglassAENet
+from .hrformer import HRFormer
+from .hrnet import HRNet
+from .litehrnet import LiteHRNet
+from .mobilenet_v2 import MobileNetV2
+from .mobilenet_v3 import MobileNetV3
+from .mspn import MSPN
+from .regnet import RegNet
+from .resnest import ResNeSt
+from .resnet import ResNet, ResNetV1d
+from .resnext import ResNeXt
+from .rsn import RSN
+from .scnet import SCNet
+from .seresnet import SEResNet
+from .seresnext import SEResNeXt
+from .shufflenet_v1 import ShuffleNetV1
+from .shufflenet_v2 import ShuffleNetV2
+from .tcn import TCN
+from .v2v_net import V2VNet
+from .vgg import VGG
+from .vipnas_mbv3 import ViPNAS_MobileNetV3
+from .vipnas_resnet import ViPNAS_ResNet
+from .vit import ViT
+from .vit_moe import ViTMoE
+
+__all__ = [
+ 'AlexNet', 'HourglassNet', 'HourglassAENet', 'HRNet', 'MobileNetV2',
+ 'MobileNetV3', 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SCNet',
+ 'SEResNet', 'SEResNeXt', 'ShuffleNetV1', 'ShuffleNetV2', 'CPM', 'RSN',
+ 'MSPN', 'ResNeSt', 'VGG', 'TCN', 'ViPNAS_ResNet', 'ViPNAS_MobileNetV3',
+ 'LiteHRNet', 'V2VNet', 'HRFormer', 'ViT', 'ViTMoE'
+]
diff --git a/vendor/ViTPose/mmpose/models/backbones/alexnet.py b/vendor/ViTPose/mmpose/models/backbones/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8efd74d118f5abe4d9c880ebe80ce7cbd58c6b2
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/alexnet.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+@BACKBONES.register_module()
+class AlexNet(BaseBackbone):
+ """`AlexNet `__ backbone.
+
+ The input for AlexNet is a 224x224 RGB image.
+
+ Args:
+ num_classes (int): number of classes for classification.
+ The default value is -1, which uses the backbone as
+ a feature extractor without the top classifier.
+ """
+
+ def __init__(self, num_classes=-1):
+ super().__init__()
+ self.num_classes = num_classes
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(64, 192, kernel_size=5, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(192, 384, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(384, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ )
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Dropout(),
+ nn.Linear(256 * 6 * 6, 4096),
+ nn.ReLU(inplace=True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(inplace=True),
+ nn.Linear(4096, num_classes),
+ )
+
+ def forward(self, x):
+
+ x = self.features(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), 256 * 6 * 6)
+ x = self.classifier(x)
+
+ return x
diff --git a/vendor/ViTPose/mmpose/models/backbones/base_backbone.py b/vendor/ViTPose/mmpose/models/backbones/base_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..d64dca1da1380aca4521bc1066c76e8a6f56c18c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/base_backbone.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+from abc import ABCMeta, abstractmethod
+
+import torch.nn as nn
+
+# from .utils import load_checkpoint
+from mmcv_custom.checkpoint import load_checkpoint
+
+class BaseBackbone(nn.Module, metaclass=ABCMeta):
+ """Base backbone.
+
+ This class defines the basic functions of a backbone. Any backbone that
+ inherits this class should at least define its own `forward` function.
+ """
+
+ def init_weights(self, pretrained=None, patch_padding='pad', part_features=None):
+ """Init backbone weights.
+
+ Args:
+ pretrained (str | None): If pretrained is a string, then it
+ initializes backbone weights by loading the pretrained
+ checkpoint. If pretrained is None, then it follows default
+ initializer or customized initializer in subclasses.
+ """
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger, patch_padding=patch_padding, part_features=part_features)
+ elif pretrained is None:
+ # use default initializer or customized initializer in subclasses
+ pass
+ else:
+ raise TypeError('pretrained must be a str or None.'
+ f' But received {type(pretrained)}.')
+
+ @abstractmethod
+ def forward(self, x):
+ """Forward function.
+
+ Args:
+ x (Tensor | tuple[Tensor]): x could be a torch.Tensor or a tuple of
+ torch.Tensor, containing input data for forward computation.
+ """
diff --git a/vendor/ViTPose/mmpose/models/backbones/cpm.py b/vendor/ViTPose/mmpose/models/backbones/cpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..458245d755f930f4ff625a754aadbab5c13494a6
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/cpm.py
@@ -0,0 +1,186 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class CpmBlock(nn.Module):
+ """CpmBlock for Convolutional Pose Machine.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ channels (list): Output channels of each conv module.
+ kernels (list): Kernel sizes of each conv module.
+ """
+
+ def __init__(self,
+ in_channels,
+ channels=(128, 128, 128),
+ kernels=(11, 11, 11),
+ norm_cfg=None):
+ super().__init__()
+
+ assert len(channels) == len(kernels)
+ layers = []
+ for i in range(len(channels)):
+ if i == 0:
+ input_channels = in_channels
+ else:
+ input_channels = channels[i - 1]
+ layers.append(
+ ConvModule(
+ input_channels,
+ channels[i],
+ kernels[i],
+ padding=(kernels[i] - 1) // 2,
+ norm_cfg=norm_cfg))
+ self.model = nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Model forward function."""
+ out = self.model(x)
+ return out
+
+
+@BACKBONES.register_module()
+class CPM(BaseBackbone):
+ """CPM backbone.
+
+ Convolutional Pose Machines.
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ in_channels (int): The input channels of the CPM.
+ out_channels (int): The output channels of the CPM.
+ feat_channels (int): Feature channel of each CPM stage.
+ middle_channels (int): Feature channel of conv after the middle stage.
+ num_stages (int): Number of stages.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import CPM
+ >>> import torch
+ >>> self = CPM(3, 17)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 368, 368)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ (1, 17, 46, 46)
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ feat_channels=128,
+ middle_channels=32,
+ num_stages=6,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ assert in_channels == 3
+
+ self.num_stages = num_stages
+ assert self.num_stages >= 1
+
+ self.stem = nn.Sequential(
+ ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 32, 5, padding=2, norm_cfg=norm_cfg),
+ ConvModule(32, 512, 9, padding=4, norm_cfg=norm_cfg),
+ ConvModule(512, 512, 1, padding=0, norm_cfg=norm_cfg),
+ ConvModule(512, out_channels, 1, padding=0, act_cfg=None))
+
+ self.middle = nn.Sequential(
+ ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+ ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg),
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ self.cpm_stages = nn.ModuleList([
+ CpmBlock(
+ middle_channels + out_channels,
+ channels=[feat_channels, feat_channels, feat_channels],
+ kernels=[11, 11, 11],
+ norm_cfg=norm_cfg) for _ in range(num_stages - 1)
+ ])
+
+ self.middle_conv = nn.ModuleList([
+ nn.Sequential(
+ ConvModule(
+ 128, middle_channels, 5, padding=2, norm_cfg=norm_cfg))
+ for _ in range(num_stages - 1)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ nn.Sequential(
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 1,
+ padding=0,
+ norm_cfg=norm_cfg),
+ ConvModule(feat_channels, out_channels, 1, act_cfg=None))
+ for _ in range(num_stages - 1)
+ ])
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ stage1_out = self.stem(x)
+ middle_out = self.middle(x)
+ out_feats = []
+
+ out_feats.append(stage1_out)
+
+ for ind in range(self.num_stages - 1):
+ single_stage = self.cpm_stages[ind]
+ out_conv = self.out_convs[ind]
+
+ inp_feat = torch.cat(
+ [out_feats[-1], self.middle_conv[ind](middle_out)], 1)
+ cpm_feat = single_stage(inp_feat)
+ out_feat = out_conv(cpm_feat)
+ out_feats.append(out_feat)
+
+ return out_feats
diff --git a/vendor/ViTPose/mmpose/models/backbones/hourglass.py b/vendor/ViTPose/mmpose/models/backbones/hourglass.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf75fad9895ebfd3f3c2a6bffedb3d7e4cc77cba
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/hourglass.py
@@ -0,0 +1,212 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .resnet import BasicBlock, ResLayer
+from .utils import load_checkpoint
+
+
+class HourglassModule(nn.Module):
+ """Hourglass Module for HourglassNet backbone.
+
+ Generate module recursively and use BasicBlock as the base unit.
+
+ Args:
+ depth (int): Depth of current HourglassModule.
+ stage_channels (list[int]): Feature channels of sub-modules in current
+ and follow-up HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in current and
+ follow-up HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ """
+
+ def __init__(self,
+ depth,
+ stage_channels,
+ stage_blocks,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.depth = depth
+
+ cur_block = stage_blocks[0]
+ next_block = stage_blocks[1]
+
+ cur_channel = stage_channels[0]
+ next_channel = stage_channels[1]
+
+ self.up1 = ResLayer(
+ BasicBlock, cur_block, cur_channel, cur_channel, norm_cfg=norm_cfg)
+
+ self.low1 = ResLayer(
+ BasicBlock,
+ cur_block,
+ cur_channel,
+ next_channel,
+ stride=2,
+ norm_cfg=norm_cfg)
+
+ if self.depth > 1:
+ self.low2 = HourglassModule(depth - 1, stage_channels[1:],
+ stage_blocks[1:])
+ else:
+ self.low2 = ResLayer(
+ BasicBlock,
+ next_block,
+ next_channel,
+ next_channel,
+ norm_cfg=norm_cfg)
+
+ self.low3 = ResLayer(
+ BasicBlock,
+ cur_block,
+ next_channel,
+ cur_channel,
+ norm_cfg=norm_cfg,
+ downsample_first=False)
+
+ self.up2 = nn.Upsample(scale_factor=2)
+
+ def forward(self, x):
+ """Model forward function."""
+ up1 = self.up1(x)
+ low1 = self.low1(x)
+ low2 = self.low2(low1)
+ low3 = self.low3(low2)
+ up2 = self.up2(low3)
+ return up1 + up2
+
+
+@BACKBONES.register_module()
+class HourglassNet(BaseBackbone):
+ """HourglassNet backbone.
+
+ Stacked Hourglass Networks for Human Pose Estimation.
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ downsample_times (int): Downsample times in a HourglassModule.
+ num_stacks (int): Number of HourglassModule modules stacked,
+ 1 for Hourglass-52, 2 for Hourglass-104.
+ stage_channels (list[int]): Feature channel of each sub-module in a
+ HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in a
+ HourglassModule.
+ feat_channel (int): Feature channel of conv after a HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import HourglassNet
+ >>> import torch
+ >>> self = HourglassNet()
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 256, 128, 128)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ downsample_times=5,
+ num_stacks=2,
+ stage_channels=(256, 256, 384, 384, 384, 512),
+ stage_blocks=(2, 2, 2, 2, 2, 4),
+ feat_channel=256,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.num_stacks = num_stacks
+ assert self.num_stacks >= 1
+ assert len(stage_channels) == len(stage_blocks)
+ assert len(stage_channels) > downsample_times
+
+ cur_channel = stage_channels[0]
+
+ self.stem = nn.Sequential(
+ ConvModule(3, 128, 7, padding=3, stride=2, norm_cfg=norm_cfg),
+ ResLayer(BasicBlock, 1, 128, 256, stride=2, norm_cfg=norm_cfg))
+
+ self.hourglass_modules = nn.ModuleList([
+ HourglassModule(downsample_times, stage_channels, stage_blocks)
+ for _ in range(num_stacks)
+ ])
+
+ self.inters = ResLayer(
+ BasicBlock,
+ num_stacks - 1,
+ cur_channel,
+ cur_channel,
+ norm_cfg=norm_cfg)
+
+ self.conv1x1s = nn.ModuleList([
+ ConvModule(
+ cur_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)
+ for _ in range(num_stacks - 1)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ ConvModule(
+ cur_channel, feat_channel, 3, padding=1, norm_cfg=norm_cfg)
+ for _ in range(num_stacks)
+ ])
+
+ self.remap_convs = nn.ModuleList([
+ ConvModule(
+ feat_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)
+ for _ in range(num_stacks - 1)
+ ])
+
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ inter_feat = self.stem(x)
+ out_feats = []
+
+ for ind in range(self.num_stacks):
+ single_hourglass = self.hourglass_modules[ind]
+ out_conv = self.out_convs[ind]
+
+ hourglass_feat = single_hourglass(inter_feat)
+ out_feat = out_conv(hourglass_feat)
+ out_feats.append(out_feat)
+
+ if ind < self.num_stacks - 1:
+ inter_feat = self.conv1x1s[ind](
+ inter_feat) + self.remap_convs[ind](
+ out_feat)
+ inter_feat = self.inters[ind](self.relu(inter_feat))
+
+ return out_feats
diff --git a/vendor/ViTPose/mmpose/models/backbones/hourglass_ae.py b/vendor/ViTPose/mmpose/models/backbones/hourglass_ae.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a700e5cb2157fd1dc16771145f065e991b270ea
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/hourglass_ae.py
@@ -0,0 +1,212 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, MaxPool2d, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class HourglassAEModule(nn.Module):
+ """Modified Hourglass Module for HourglassNet_AE backbone.
+
+ Generate module recursively and use BasicBlock as the base unit.
+
+ Args:
+ depth (int): Depth of current HourglassModule.
+ stage_channels (list[int]): Feature channels of sub-modules in current
+ and follow-up HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ """
+
+ def __init__(self,
+ depth,
+ stage_channels,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.depth = depth
+
+ cur_channel = stage_channels[0]
+ next_channel = stage_channels[1]
+
+ self.up1 = ConvModule(
+ cur_channel, cur_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.pool1 = MaxPool2d(2, 2)
+
+ self.low1 = ConvModule(
+ cur_channel, next_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ if self.depth > 1:
+ self.low2 = HourglassAEModule(depth - 1, stage_channels[1:])
+ else:
+ self.low2 = ConvModule(
+ next_channel, next_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.low3 = ConvModule(
+ next_channel, cur_channel, 3, padding=1, norm_cfg=norm_cfg)
+
+ self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
+
+ def forward(self, x):
+ """Model forward function."""
+ up1 = self.up1(x)
+ pool1 = self.pool1(x)
+ low1 = self.low1(pool1)
+ low2 = self.low2(low1)
+ low3 = self.low3(low2)
+ up2 = self.up2(low3)
+ return up1 + up2
+
+
+@BACKBONES.register_module()
+class HourglassAENet(BaseBackbone):
+ """Hourglass-AE Network proposed by Newell et al.
+
+ Associative Embedding: End-to-End Learning for Joint
+ Detection and Grouping.
+
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ downsample_times (int): Downsample times in a HourglassModule.
+ num_stacks (int): Number of HourglassModule modules stacked,
+ 1 for Hourglass-52, 2 for Hourglass-104.
+ stage_channels (list[int]): Feature channel of each sub-module in a
+ HourglassModule.
+ stage_blocks (list[int]): Number of sub-modules stacked in a
+ HourglassModule.
+ feat_channels (int): Feature channel of conv after a HourglassModule.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+
+ Example:
+ >>> from mmpose.models import HourglassAENet
+ >>> import torch
+ >>> self = HourglassAENet()
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 512, 512)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... print(tuple(level_output.shape))
+ (1, 34, 128, 128)
+ """
+
+ def __init__(self,
+ downsample_times=4,
+ num_stacks=1,
+ out_channels=34,
+ stage_channels=(256, 384, 512, 640, 768),
+ feat_channels=256,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.num_stacks = num_stacks
+ assert self.num_stacks >= 1
+ assert len(stage_channels) > downsample_times
+
+ cur_channels = stage_channels[0]
+
+ self.stem = nn.Sequential(
+ ConvModule(3, 64, 7, padding=3, stride=2, norm_cfg=norm_cfg),
+ ConvModule(64, 128, 3, padding=1, norm_cfg=norm_cfg),
+ MaxPool2d(2, 2),
+ ConvModule(128, 128, 3, padding=1, norm_cfg=norm_cfg),
+ ConvModule(128, feat_channels, 3, padding=1, norm_cfg=norm_cfg),
+ )
+
+ self.hourglass_modules = nn.ModuleList([
+ nn.Sequential(
+ HourglassAEModule(
+ downsample_times, stage_channels, norm_cfg=norm_cfg),
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 3,
+ padding=1,
+ norm_cfg=norm_cfg),
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 3,
+ padding=1,
+ norm_cfg=norm_cfg)) for _ in range(num_stacks)
+ ])
+
+ self.out_convs = nn.ModuleList([
+ ConvModule(
+ cur_channels,
+ out_channels,
+ 1,
+ padding=0,
+ norm_cfg=None,
+ act_cfg=None) for _ in range(num_stacks)
+ ])
+
+ self.remap_out_convs = nn.ModuleList([
+ ConvModule(
+ out_channels,
+ feat_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for _ in range(num_stacks - 1)
+ ])
+
+ self.remap_feature_convs = nn.ModuleList([
+ ConvModule(
+ feat_channels,
+ feat_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for _ in range(num_stacks - 1)
+ ])
+
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Model forward function."""
+ inter_feat = self.stem(x)
+ out_feats = []
+
+ for ind in range(self.num_stacks):
+ single_hourglass = self.hourglass_modules[ind]
+ out_conv = self.out_convs[ind]
+
+ hourglass_feat = single_hourglass(inter_feat)
+ out_feat = out_conv(hourglass_feat)
+ out_feats.append(out_feat)
+
+ if ind < self.num_stacks - 1:
+ inter_feat = inter_feat + self.remap_out_convs[ind](
+ out_feat) + self.remap_feature_convs[ind](
+ hourglass_feat)
+
+ return out_feats
diff --git a/vendor/ViTPose/mmpose/models/backbones/hrformer.py b/vendor/ViTPose/mmpose/models/backbones/hrformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b843300a9fdb85908678c5a3fd45ce19e97ce2fe
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/hrformer.py
@@ -0,0 +1,746 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+import math
+
+import torch
+import torch.nn as nn
+# from timm.models.layers import to_2tuple, trunc_normal_
+from mmcv.cnn import (build_activation_layer, build_conv_layer,
+ build_norm_layer, trunc_normal_init)
+from mmcv.cnn.bricks.transformer import build_dropout
+from mmcv.runner import BaseModule
+from torch.nn.functional import pad
+
+from ..builder import BACKBONES
+from .hrnet import Bottleneck, HRModule, HRNet
+
+
+def nlc_to_nchw(x, hw_shape):
+ """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.
+
+ Args:
+ x (Tensor): The input tensor of shape [N, L, C] before conversion.
+ hw_shape (Sequence[int]): The height and width of output feature map.
+
+ Returns:
+ Tensor: The output tensor of shape [N, C, H, W] after conversion.
+ """
+ H, W = hw_shape
+ assert len(x.shape) == 3
+ B, L, C = x.shape
+ assert L == H * W, 'The seq_len doesn\'t match H, W'
+ return x.transpose(1, 2).reshape(B, C, H, W)
+
+
+def nchw_to_nlc(x):
+ """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.
+
+ Args:
+ x (Tensor): The input tensor of shape [N, C, H, W] before conversion.
+
+ Returns:
+ Tensor: The output tensor of shape [N, L, C] after conversion.
+ """
+ assert len(x.shape) == 4
+ return x.flatten(2).transpose(1, 2).contiguous()
+
+
+def build_drop_path(drop_path_rate):
+ """Build drop path layer."""
+ return build_dropout(dict(type='DropPath', drop_prob=drop_path_rate))
+
+
+class WindowMSA(BaseModule):
+ """Window based multi-head self-attention (W-MSA) module with relative
+ position bias.
+
+ Args:
+ embed_dims (int): Number of input channels.
+ num_heads (int): Number of attention heads.
+ window_size (tuple[int]): The height and width of the window.
+ qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.
+ Default: True.
+ qk_scale (float | None, optional): Override default qk scale of
+ head_dim ** -0.5 if set. Default: None.
+ attn_drop_rate (float, optional): Dropout ratio of attention weight.
+ Default: 0.0
+ proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.
+ with_rpe (bool, optional): If True, use relative position bias.
+ Default: True.
+ init_cfg (dict | None, optional): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ embed_dims,
+ num_heads,
+ window_size,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop_rate=0.,
+ proj_drop_rate=0.,
+ with_rpe=True,
+ init_cfg=None):
+
+ super().__init__(init_cfg=init_cfg)
+ self.embed_dims = embed_dims
+ self.window_size = window_size # Wh, Ww
+ self.num_heads = num_heads
+ head_embed_dims = embed_dims // num_heads
+ self.scale = qk_scale or head_embed_dims**-0.5
+
+ self.with_rpe = with_rpe
+ if self.with_rpe:
+ # define a parameter table of relative position bias
+ self.relative_position_bias_table = nn.Parameter(
+ torch.zeros(
+ (2 * window_size[0] - 1) * (2 * window_size[1] - 1),
+ num_heads)) # 2*Wh-1 * 2*Ww-1, nH
+
+ Wh, Ww = self.window_size
+ rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww)
+ rel_position_index = rel_index_coords + rel_index_coords.T
+ rel_position_index = rel_position_index.flip(1).contiguous()
+ self.register_buffer('relative_position_index', rel_position_index)
+
+ self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias)
+ self.attn_drop = nn.Dropout(attn_drop_rate)
+ self.proj = nn.Linear(embed_dims, embed_dims)
+ self.proj_drop = nn.Dropout(proj_drop_rate)
+
+ self.softmax = nn.Softmax(dim=-1)
+
+ def init_weights(self):
+ trunc_normal_init(self.relative_position_bias_table, std=0.02)
+
+ def forward(self, x, mask=None):
+ """
+ Args:
+
+ x (tensor): input features with shape of (B*num_windows, N, C)
+ mask (tensor | None, Optional): mask with shape of (num_windows,
+ Wh*Ww, Wh*Ww), value should be between (-inf, 0].
+ """
+ B, N, C = x.shape
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
+ C // self.num_heads).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2]
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ if self.with_rpe:
+ relative_position_bias = self.relative_position_bias_table[
+ self.relative_position_index.view(-1)].view(
+ self.window_size[0] * self.window_size[1],
+ self.window_size[0] * self.window_size[1],
+ -1) # Wh*Ww,Wh*Ww,nH
+ relative_position_bias = relative_position_bias.permute(
+ 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
+ attn = attn + relative_position_bias.unsqueeze(0)
+
+ if mask is not None:
+ nW = mask.shape[0]
+ attn = attn.view(B // nW, nW, self.num_heads, N,
+ N) + mask.unsqueeze(1).unsqueeze(0)
+ attn = attn.view(-1, self.num_heads, N, N)
+ attn = self.softmax(attn)
+
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+ return x
+
+ @staticmethod
+ def double_step_seq(step1, len1, step2, len2):
+ seq1 = torch.arange(0, step1 * len1, step1)
+ seq2 = torch.arange(0, step2 * len2, step2)
+ return (seq1[:, None] + seq2[None, :]).reshape(1, -1)
+
+
+class LocalWindowSelfAttention(BaseModule):
+ r""" Local-window Self Attention (LSA) module with relative position bias.
+
+ This module is the short-range self-attention module in the
+ Interlaced Sparse Self-Attention `_.
+
+ Args:
+ embed_dims (int): Number of input channels.
+ num_heads (int): Number of attention heads.
+ window_size (tuple[int] | int): The height and width of the window.
+ qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.
+ Default: True.
+ qk_scale (float | None, optional): Override default qk scale of
+ head_dim ** -0.5 if set. Default: None.
+ attn_drop_rate (float, optional): Dropout ratio of attention weight.
+ Default: 0.0
+ proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.
+ with_rpe (bool, optional): If True, use relative position bias.
+ Default: True.
+ with_pad_mask (bool, optional): If True, mask out the padded tokens in
+ the attention process. Default: False.
+ init_cfg (dict | None, optional): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ embed_dims,
+ num_heads,
+ window_size,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop_rate=0.,
+ proj_drop_rate=0.,
+ with_rpe=True,
+ with_pad_mask=False,
+ init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+ if isinstance(window_size, int):
+ window_size = (window_size, window_size)
+ self.window_size = window_size
+ self.with_pad_mask = with_pad_mask
+ self.attn = WindowMSA(
+ embed_dims=embed_dims,
+ num_heads=num_heads,
+ window_size=window_size,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ attn_drop_rate=attn_drop_rate,
+ proj_drop_rate=proj_drop_rate,
+ with_rpe=with_rpe,
+ init_cfg=init_cfg)
+
+ def forward(self, x, H, W, **kwargs):
+ """Forward function."""
+ B, N, C = x.shape
+ x = x.view(B, H, W, C)
+ Wh, Ww = self.window_size
+
+ # center-pad the feature on H and W axes
+ pad_h = math.ceil(H / Wh) * Wh - H
+ pad_w = math.ceil(W / Ww) * Ww - W
+ x = pad(x, (0, 0, pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
+ pad_h - pad_h // 2))
+
+ # permute
+ x = x.view(B, math.ceil(H / Wh), Wh, math.ceil(W / Ww), Ww, C)
+ x = x.permute(0, 1, 3, 2, 4, 5)
+ x = x.reshape(-1, Wh * Ww, C) # (B*num_window, Wh*Ww, C)
+
+ # attention
+ if self.with_pad_mask and pad_h > 0 and pad_w > 0:
+ pad_mask = x.new_zeros(1, H, W, 1)
+ pad_mask = pad(
+ pad_mask, [
+ 0, 0, pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
+ pad_h - pad_h // 2
+ ],
+ value=-float('inf'))
+ pad_mask = pad_mask.view(1, math.ceil(H / Wh), Wh,
+ math.ceil(W / Ww), Ww, 1)
+ pad_mask = pad_mask.permute(1, 3, 0, 2, 4, 5)
+ pad_mask = pad_mask.reshape(-1, Wh * Ww)
+ pad_mask = pad_mask[:, None, :].expand([-1, Wh * Ww, -1])
+ out = self.attn(x, pad_mask, **kwargs)
+ else:
+ out = self.attn(x, **kwargs)
+
+ # reverse permutation
+ out = out.reshape(B, math.ceil(H / Wh), math.ceil(W / Ww), Wh, Ww, C)
+ out = out.permute(0, 1, 3, 2, 4, 5)
+ out = out.reshape(B, H + pad_h, W + pad_w, C)
+
+ # de-pad
+ out = out[:, pad_h // 2:H + pad_h // 2, pad_w // 2:W + pad_w // 2]
+ return out.reshape(B, N, C)
+
+
+class CrossFFN(BaseModule):
+ r"""FFN with Depthwise Conv of HRFormer.
+
+ Args:
+ in_features (int): The feature dimension.
+ hidden_features (int, optional): The hidden dimension of FFNs.
+ Defaults: The same as in_features.
+ act_cfg (dict, optional): Config of activation layer.
+ Default: dict(type='GELU').
+ dw_act_cfg (dict, optional): Config of activation layer appended
+ right after DW Conv. Default: dict(type='GELU').
+ norm_cfg (dict, optional): Config of norm layer.
+ Default: dict(type='SyncBN').
+ init_cfg (dict | list | None, optional): The init config.
+ Default: None.
+ """
+
+ def __init__(self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_cfg=dict(type='GELU'),
+ dw_act_cfg=dict(type='GELU'),
+ norm_cfg=dict(type='SyncBN'),
+ init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Conv2d(in_features, hidden_features, kernel_size=1)
+ self.act1 = build_activation_layer(act_cfg)
+ self.norm1 = build_norm_layer(norm_cfg, hidden_features)[1]
+ self.dw3x3 = nn.Conv2d(
+ hidden_features,
+ hidden_features,
+ kernel_size=3,
+ stride=1,
+ groups=hidden_features,
+ padding=1)
+ self.act2 = build_activation_layer(dw_act_cfg)
+ self.norm2 = build_norm_layer(norm_cfg, hidden_features)[1]
+ self.fc2 = nn.Conv2d(hidden_features, out_features, kernel_size=1)
+ self.act3 = build_activation_layer(act_cfg)
+ self.norm3 = build_norm_layer(norm_cfg, out_features)[1]
+
+ # put the modules togather
+ self.layers = [
+ self.fc1, self.norm1, self.act1, self.dw3x3, self.norm2, self.act2,
+ self.fc2, self.norm3, self.act3
+ ]
+
+ def forward(self, x, H, W):
+ """Forward function."""
+ x = nlc_to_nchw(x, (H, W))
+ for layer in self.layers:
+ x = layer(x)
+ x = nchw_to_nlc(x)
+ return x
+
+
+class HRFormerBlock(BaseModule):
+ """High-Resolution Block for HRFormer.
+
+ Args:
+ in_features (int): The input dimension.
+ out_features (int): The output dimension.
+ num_heads (int): The number of head within each LSA.
+ window_size (int, optional): The window size for the LSA.
+ Default: 7
+ mlp_ratio (int, optional): The expansion ration of FFN.
+ Default: 4
+ act_cfg (dict, optional): Config of activation layer.
+ Default: dict(type='GELU').
+ norm_cfg (dict, optional): Config of norm layer.
+ Default: dict(type='SyncBN').
+ transformer_norm_cfg (dict, optional): Config of transformer norm
+ layer. Default: dict(type='LN', eps=1e-6).
+ init_cfg (dict | list | None, optional): The init config.
+ Default: None.
+ """
+
+ expansion = 1
+
+ def __init__(self,
+ in_features,
+ out_features,
+ num_heads,
+ window_size=7,
+ mlp_ratio=4.0,
+ drop_path=0.0,
+ act_cfg=dict(type='GELU'),
+ norm_cfg=dict(type='SyncBN'),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ init_cfg=None,
+ **kwargs):
+ super(HRFormerBlock, self).__init__(init_cfg=init_cfg)
+ self.num_heads = num_heads
+ self.window_size = window_size
+ self.mlp_ratio = mlp_ratio
+
+ self.norm1 = build_norm_layer(transformer_norm_cfg, in_features)[1]
+ self.attn = LocalWindowSelfAttention(
+ in_features,
+ num_heads=num_heads,
+ window_size=window_size,
+ init_cfg=None,
+ **kwargs)
+
+ self.norm2 = build_norm_layer(transformer_norm_cfg, out_features)[1]
+ self.ffn = CrossFFN(
+ in_features=in_features,
+ hidden_features=int(in_features * mlp_ratio),
+ out_features=out_features,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ dw_act_cfg=act_cfg,
+ init_cfg=None)
+
+ self.drop_path = build_drop_path(
+ drop_path) if drop_path > 0.0 else nn.Identity()
+
+ def forward(self, x):
+ """Forward function."""
+ B, C, H, W = x.size()
+ # Attention
+ x = x.view(B, C, -1).permute(0, 2, 1)
+ x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+ # FFN
+ x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
+ x = x.permute(0, 2, 1).view(B, C, H, W)
+ return x
+
+ def extra_repr(self):
+ """(Optional) Set the extra information about this module."""
+ return 'num_heads={}, window_size={}, mlp_ratio={}'.format(
+ self.num_heads, self.window_size, self.mlp_ratio)
+
+
+class HRFomerModule(HRModule):
+ """High-Resolution Module for HRFormer.
+
+ Args:
+ num_branches (int): The number of branches in the HRFormerModule.
+ block (nn.Module): The building block of HRFormer.
+ The block should be the HRFormerBlock.
+ num_blocks (tuple): The number of blocks in each branch.
+ The length must be equal to num_branches.
+ num_inchannels (tuple): The number of input channels in each branch.
+ The length must be equal to num_branches.
+ num_channels (tuple): The number of channels in each branch.
+ The length must be equal to num_branches.
+ num_heads (tuple): The number of heads within the LSAs.
+ num_window_sizes (tuple): The window size for the LSAs.
+ num_mlp_ratios (tuple): The expansion ratio for the FFNs.
+ drop_path (int, optional): The drop path rate of HRFomer.
+ Default: 0.0
+ multiscale_output (bool, optional): Whether to output multi-level
+ features produced by multiple branches. If False, only the first
+ level feature will be output. Default: True.
+ conv_cfg (dict, optional): Config of the conv layers.
+ Default: None.
+ norm_cfg (dict, optional): Config of the norm layers appended
+ right after conv. Default: dict(type='SyncBN', requires_grad=True)
+ transformer_norm_cfg (dict, optional): Config of the norm layers.
+ Default: dict(type='LN', eps=1e-6)
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False
+ upsample_cfg(dict, optional): The config of upsample layers in fuse
+ layers. Default: dict(mode='bilinear', align_corners=False)
+ """
+
+ def __init__(self,
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ num_heads,
+ num_window_sizes,
+ num_mlp_ratios,
+ multiscale_output=True,
+ drop_paths=0.0,
+ with_rpe=True,
+ with_pad_mask=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ with_cp=False,
+ upsample_cfg=dict(mode='bilinear', align_corners=False)):
+
+ self.transformer_norm_cfg = transformer_norm_cfg
+ self.drop_paths = drop_paths
+ self.num_heads = num_heads
+ self.num_window_sizes = num_window_sizes
+ self.num_mlp_ratios = num_mlp_ratios
+ self.with_rpe = with_rpe
+ self.with_pad_mask = with_pad_mask
+
+ super().__init__(num_branches, block, num_blocks, num_inchannels,
+ num_channels, multiscale_output, with_cp, conv_cfg,
+ norm_cfg, upsample_cfg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ """Build one branch."""
+ # HRFormerBlock does not support down sample layer yet.
+ assert stride == 1 and self.in_channels[branch_index] == num_channels[
+ branch_index]
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ num_heads=self.num_heads[branch_index],
+ window_size=self.num_window_sizes[branch_index],
+ mlp_ratio=self.num_mlp_ratios[branch_index],
+ drop_path=self.drop_paths[0],
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ init_cfg=None,
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask))
+
+ self.in_channels[
+ branch_index] = self.in_channels[branch_index] * block.expansion
+ for i in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ num_heads=self.num_heads[branch_index],
+ window_size=self.num_window_sizes[branch_index],
+ mlp_ratio=self.num_mlp_ratios[branch_index],
+ drop_path=self.drop_paths[i],
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ init_cfg=None,
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask))
+ return nn.Sequential(*layers)
+
+ def _make_fuse_layers(self):
+ """Build fuse layers."""
+ if self.num_branches == 1:
+ return None
+ num_branches = self.num_branches
+ num_inchannels = self.in_channels
+ fuse_layers = []
+ for i in range(num_branches if self.multiscale_output else 1):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_inchannels[i],
+ kernel_size=1,
+ stride=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_inchannels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i),
+ mode=self.upsample_cfg['mode'],
+ align_corners=self.
+ upsample_cfg['align_corners'])))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv3x3s = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ num_outchannels_conv3x3 = num_inchannels[i]
+ with_out_act = False
+ else:
+ num_outchannels_conv3x3 = num_inchannels[j]
+ with_out_act = True
+ sub_modules = [
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_inchannels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=num_inchannels[j],
+ bias=False,
+ ),
+ build_norm_layer(self.norm_cfg,
+ num_inchannels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ kernel_size=1,
+ stride=1,
+ bias=False,
+ ),
+ build_norm_layer(self.norm_cfg,
+ num_outchannels_conv3x3)[1]
+ ]
+ if with_out_act:
+ sub_modules.append(nn.ReLU(False))
+ conv3x3s.append(nn.Sequential(*sub_modules))
+ fuse_layer.append(nn.Sequential(*conv3x3s))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def get_num_inchannels(self):
+ """Return the number of input channels."""
+ return self.in_channels
+
+
+@BACKBONES.register_module()
+class HRFormer(HRNet):
+ """HRFormer backbone.
+
+ This backbone is the implementation of `HRFormer: High-Resolution
+ Transformer for Dense Prediction `_.
+
+ Args:
+ extra (dict): Detailed configuration for each stage of HRNet.
+ There must be 4 stages, the configuration for each stage must have
+ 5 keys:
+
+ - num_modules (int): The number of HRModule in this stage.
+ - num_branches (int): The number of branches in the HRModule.
+ - block (str): The type of block.
+ - num_blocks (tuple): The number of blocks in each branch.
+ The length must be equal to num_branches.
+ - num_channels (tuple): The number of channels in each branch.
+ The length must be equal to num_branches.
+ in_channels (int): Number of input image channels. Normally 3.
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: None.
+ norm_cfg (dict): Config of norm layer.
+ Use `SyncBN` by default.
+ transformer_norm_cfg (dict): Config of transformer norm layer.
+ Use `LN` by default.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ Example:
+ >>> from mmpose.models import HRFormer
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(2, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7),
+ >>> num_heads=(1, 2),
+ >>> mlp_ratios=(4, 4),
+ >>> num_blocks=(2, 2),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7, 7),
+ >>> num_heads=(1, 2, 4),
+ >>> mlp_ratios=(4, 4, 4),
+ >>> num_blocks=(2, 2, 2),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=2,
+ >>> num_branches=4,
+ >>> block='HRFORMER',
+ >>> window_sizes=(7, 7, 7, 7),
+ >>> num_heads=(1, 2, 4, 8),
+ >>> mlp_ratios=(4, 4, 4, 4),
+ >>> num_blocks=(2, 2, 2, 2),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRFormer(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ (1, 64, 4, 4)
+ (1, 128, 2, 2)
+ (1, 256, 1, 1)
+ """
+
+ blocks_dict = {'BOTTLENECK': Bottleneck, 'HRFORMERBLOCK': HRFormerBlock}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ transformer_norm_cfg=dict(type='LN', eps=1e-6),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=False,
+ frozen_stages=-1):
+
+ # stochastic depth
+ depths = [
+ extra[stage]['num_blocks'][0] * extra[stage]['num_modules']
+ for stage in ['stage2', 'stage3', 'stage4']
+ ]
+ depth_s2, depth_s3, _ = depths
+ drop_path_rate = extra['drop_path_rate']
+ dpr = [
+ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+ ]
+ extra['stage2']['drop_path_rates'] = dpr[0:depth_s2]
+ extra['stage3']['drop_path_rates'] = dpr[depth_s2:depth_s2 + depth_s3]
+ extra['stage4']['drop_path_rates'] = dpr[depth_s2 + depth_s3:]
+
+ # HRFormer use bilinear upsample as default
+ upsample_cfg = extra.get('upsample', {
+ 'mode': 'bilinear',
+ 'align_corners': False
+ })
+ extra['upsample'] = upsample_cfg
+ self.transformer_norm_cfg = transformer_norm_cfg
+ self.with_rpe = extra.get('with_rpe', True)
+ self.with_pad_mask = extra.get('with_pad_mask', False)
+
+ super().__init__(extra, in_channels, conv_cfg, norm_cfg, norm_eval,
+ with_cp, zero_init_residual, frozen_stages)
+
+ def _make_stage(self,
+ layer_config,
+ num_inchannels,
+ multiscale_output=True):
+ """Make each stage."""
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+ num_heads = layer_config['num_heads']
+ num_window_sizes = layer_config['window_sizes']
+ num_mlp_ratios = layer_config['mlp_ratios']
+ drop_path_rates = layer_config['drop_path_rates']
+
+ modules = []
+ for i in range(num_modules):
+ # multiscale_output is only used at the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ modules.append(
+ HRFomerModule(
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ num_heads,
+ num_window_sizes,
+ num_mlp_ratios,
+ reset_multiscale_output,
+ drop_paths=drop_path_rates[num_blocks[0] *
+ i:num_blocks[0] * (i + 1)],
+ with_rpe=self.with_rpe,
+ with_pad_mask=self.with_pad_mask,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ transformer_norm_cfg=self.transformer_norm_cfg,
+ with_cp=self.with_cp,
+ upsample_cfg=self.upsample_cfg))
+ num_inchannels = modules[-1].get_num_inchannels()
+
+ return nn.Sequential(*modules), num_inchannels
diff --git a/vendor/ViTPose/mmpose/models/backbones/hrnet.py b/vendor/ViTPose/mmpose/models/backbones/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..87dc8cef555b5e8d78fcc69293047b0cbe2ea8a6
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/hrnet.py
@@ -0,0 +1,604 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .resnet import BasicBlock, Bottleneck, get_expansion
+from .utils import load_checkpoint
+
+
+class HRModule(nn.Module):
+ """High-Resolution Module for HRNet.
+
+ In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange
+ is in this module.
+ """
+
+ def __init__(self,
+ num_branches,
+ blocks,
+ num_blocks,
+ in_channels,
+ num_channels,
+ multiscale_output=False,
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ upsample_cfg=dict(mode='nearest', align_corners=None)):
+
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self._check_branches(num_branches, num_blocks, in_channels,
+ num_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.multiscale_output = multiscale_output
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.upsample_cfg = upsample_cfg
+ self.with_cp = with_cp
+ self.branches = self._make_branches(num_branches, blocks, num_blocks,
+ num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(inplace=True)
+
+ @staticmethod
+ def _check_branches(num_branches, num_blocks, in_channels, num_channels):
+ """Check input to avoid ValueError."""
+ if num_branches != len(num_blocks):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_BLOCKS({len(num_blocks)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_CHANNELS({len(num_channels)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(in_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_INCHANNELS({len(in_channels)})'
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ """Make one branch."""
+ downsample = None
+ if stride != 1 or \
+ self.in_channels[branch_index] != \
+ num_channels[branch_index] * get_expansion(block):
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(
+ self.norm_cfg,
+ num_channels[branch_index] * get_expansion(block))[1])
+
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ stride=stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ self.in_channels[branch_index] = \
+ num_channels[branch_index] * get_expansion(block)
+ for _ in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index] * get_expansion(block),
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ """Make branches."""
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(
+ self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ """Make fuse layer."""
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i),
+ mode=self.upsample_cfg['mode'],
+ align_corners=self.
+ upsample_cfg['align_corners'])))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=True)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+ for i in range(len(self.fuse_layers)):
+ y = 0
+ for j in range(self.num_branches):
+ if i == j:
+ y += x[j]
+ else:
+ y += self.fuse_layers[i][j](x[j])
+ x_fuse.append(self.relu(y))
+ return x_fuse
+
+
+@BACKBONES.register_module()
+class HRNet(nn.Module):
+ """HRNet backbone.
+
+ `High-Resolution Representations for Labeling Pixels and Regions
+ `__
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Default: 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+
+ Example:
+ >>> from mmpose.models import HRNet
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(4, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=3,
+ >>> num_branches=4,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4, 4),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ """
+
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=False,
+ frozen_stages=-1):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+ self.zero_init_residual = zero_init_residual
+ self.frozen_stages = frozen_stages
+
+ # stem net
+ self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ 64,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.upsample_cfg = self.extra.get('upsample', {
+ 'mode': 'nearest',
+ 'align_corners': None
+ })
+
+ # stage 1
+ self.stage1_cfg = self.extra['stage1']
+ num_channels = self.stage1_cfg['num_channels'][0]
+ block_type = self.stage1_cfg['block']
+ num_blocks = self.stage1_cfg['num_blocks'][0]
+
+ block = self.blocks_dict[block_type]
+ stage1_out_channels = num_channels * get_expansion(block)
+ self.layer1 = self._make_layer(block, 64, stage1_out_channels,
+ num_blocks)
+
+ # stage 2
+ self.stage2_cfg = self.extra['stage2']
+ num_channels = self.stage2_cfg['num_channels']
+ block_type = self.stage2_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition1 = self._make_transition_layer([stage1_out_channels],
+ num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(
+ self.stage2_cfg, num_channels)
+
+ # stage 3
+ self.stage3_cfg = self.extra['stage3']
+ num_channels = self.stage3_cfg['num_channels']
+ block_type = self.stage3_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition2 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(
+ self.stage3_cfg, num_channels)
+
+ # stage 4
+ self.stage4_cfg = self.extra['stage4']
+ num_channels = self.stage4_cfg['num_channels']
+ block_type = self.stage4_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [
+ channel * get_expansion(block) for channel in num_channels
+ ]
+ self.transition3 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg,
+ num_channels,
+ multiscale_output=self.stage4_cfg.get('multiscale_output', False))
+
+ self._freeze_stages()
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ """Make transition layer."""
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU(inplace=True)))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True)))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
+ """Make layer."""
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1])
+
+ layers = []
+ layers.append(
+ block(
+ in_channels,
+ out_channels,
+ stride=stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ for _ in range(1, blocks):
+ layers.append(
+ block(
+ out_channels,
+ out_channels,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, in_channels, multiscale_output=True):
+ """Make stage."""
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+
+ hr_modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used for the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ hr_modules.append(
+ HRModule(
+ num_branches,
+ block,
+ num_blocks,
+ in_channels,
+ num_channels,
+ reset_multiscale_output,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg,
+ upsample_cfg=self.upsample_cfg))
+
+ in_channels = hr_modules[-1].in_channels
+
+ return nn.Sequential(*hr_modules), in_channels
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.norm1.eval()
+ self.norm2.eval()
+
+ for m in [self.conv1, self.norm1, self.conv2, self.norm2]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ if i == 1:
+ m = getattr(self, 'layer1')
+ else:
+ m = getattr(self, f'stage{i}')
+
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if i < 4:
+ m = getattr(self, f'transition{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.norm2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg['num_branches']):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg['num_branches']):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg['num_branches']):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ return y_list
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/litehrnet.py b/vendor/ViTPose/mmpose/models/backbones/litehrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..954368841eb631e3dc6c77e9810f6980f3739bf3
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/litehrnet.py
@@ -0,0 +1,984 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HRNet/Lite-HRNet
+# Original licence: Apache License 2.0.
+# ------------------------------------------------------------------------------
+
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
+ build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .utils import channel_shuffle, load_checkpoint
+
+
+class SpatialWeighting(nn.Module):
+ """Spatial weighting module.
+
+ Args:
+ channels (int): The channels of the module.
+ ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: None.
+ act_cfg (dict): Config dict for activation layer.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid')).
+ The last ConvModule uses Sigmoid by default.
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+ self.conv1 = ConvModule(
+ in_channels=channels,
+ out_channels=int(channels / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(channels / ratio),
+ out_channels=channels,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ out = self.global_avgpool(x)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ return x * out
+
+
+class CrossResolutionWeighting(nn.Module):
+ """Cross-resolution channel weighting module.
+
+ Args:
+ channels (int): The channels of the module.
+ ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: None.
+ act_cfg (dict): Config dict for activation layer.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid')).
+ The last ConvModule uses Sigmoid by default.
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.channels = channels
+ total_channel = sum(channels)
+ self.conv1 = ConvModule(
+ in_channels=total_channel,
+ out_channels=int(total_channel / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(total_channel / ratio),
+ out_channels=total_channel,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ mini_size = x[-1].size()[-2:]
+ out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
+ out = torch.cat(out, dim=1)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ out = torch.split(out, self.channels, dim=1)
+ out = [
+ s * F.interpolate(a, size=s.size()[-2:], mode='nearest')
+ for s, a in zip(x, out)
+ ]
+ return out
+
+
+class ConditionalChannelWeighting(nn.Module):
+ """Conditional channel weighting block.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ stride (int): Stride of the 3x3 convolution layer.
+ reduce_ratio (int): channel reduction ratio.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ stride,
+ reduce_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False):
+ super().__init__()
+ self.with_cp = with_cp
+ self.stride = stride
+ assert stride in [1, 2]
+
+ branch_channels = [channel // 2 for channel in in_channels]
+
+ self.cross_resolution_weighting = CrossResolutionWeighting(
+ branch_channels,
+ ratio=reduce_ratio,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ self.depthwise_convs = nn.ModuleList([
+ ConvModule(
+ channel,
+ channel,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=channel,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None) for channel in branch_channels
+ ])
+
+ self.spatial_weighting = nn.ModuleList([
+ SpatialWeighting(channels=channel, ratio=4)
+ for channel in branch_channels
+ ])
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ x = [s.chunk(2, dim=1) for s in x]
+ x1 = [s[0] for s in x]
+ x2 = [s[1] for s in x]
+
+ x2 = self.cross_resolution_weighting(x2)
+ x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+ x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+ out = [torch.cat([s1, s2], dim=1) for s1, s2 in zip(x1, x2)]
+ out = [channel_shuffle(s, 2) for s in out]
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class Stem(nn.Module):
+ """Stem network block.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ stem_channels (int): Output channels of the stem layer.
+ out_channels (int): The output channels of the block.
+ expand_ratio (int): adjusts number of channels of the hidden layer
+ in InvertedResidual by this amount.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ stem_channels,
+ out_channels,
+ expand_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+
+ self.conv1 = ConvModule(
+ in_channels=in_channels,
+ out_channels=stem_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'))
+
+ mid_channels = int(round(stem_channels * expand_ratio))
+ branch_channels = stem_channels // 2
+ if stem_channels == self.out_channels:
+ inc_channels = self.out_channels - branch_channels
+ else:
+ inc_channels = self.out_channels - stem_channels
+
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ branch_channels,
+ branch_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=branch_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_channels,
+ inc_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU')),
+ )
+
+ self.expand_conv = ConvModule(
+ branch_channels,
+ mid_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'))
+ self.depthwise_conv = ConvModule(
+ mid_channels,
+ mid_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=mid_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+ self.linear_conv = ConvModule(
+ mid_channels,
+ branch_channels
+ if stem_channels == self.out_channels else stem_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ x = self.conv1(x)
+ x1, x2 = x.chunk(2, dim=1)
+
+ x2 = self.expand_conv(x2)
+ x2 = self.depthwise_conv(x2)
+ x2 = self.linear_conv(x2)
+
+ out = torch.cat((self.branch1(x1), x2), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class IterativeHead(nn.Module):
+ """Extra iterative head for feature learning.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ """
+
+ def __init__(self, in_channels, norm_cfg=dict(type='BN')):
+ super().__init__()
+ projects = []
+ num_branchs = len(in_channels)
+ self.in_channels = in_channels[::-1]
+
+ for i in range(num_branchs):
+ if i != num_branchs - 1:
+ projects.append(
+ DepthwiseSeparableConvModule(
+ in_channels=self.in_channels[i],
+ out_channels=self.in_channels[i + 1],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ dw_act_cfg=None,
+ pw_act_cfg=dict(type='ReLU')))
+ else:
+ projects.append(
+ DepthwiseSeparableConvModule(
+ in_channels=self.in_channels[i],
+ out_channels=self.in_channels[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ dw_act_cfg=None,
+ pw_act_cfg=dict(type='ReLU')))
+ self.projects = nn.ModuleList(projects)
+
+ def forward(self, x):
+ x = x[::-1]
+
+ y = []
+ last_x = None
+ for i, s in enumerate(x):
+ if last_x is not None:
+ last_x = F.interpolate(
+ last_x,
+ size=s.size()[-2:],
+ mode='bilinear',
+ align_corners=True)
+ s = s + last_x
+ s = self.projects[i](s)
+ y.append(s)
+ last_x = s
+
+ return y[::-1]
+
+
+class ShuffleUnit(nn.Module):
+ """InvertedResidual block for ShuffleNetV2 backbone.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ out_channels (int): The output channels of the block.
+ stride (int): Stride of the 3x3 convolution layer. Default: 1
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride=1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ super().__init__()
+ self.stride = stride
+ self.with_cp = with_cp
+
+ branch_features = out_channels // 2
+ if self.stride == 1:
+ assert in_channels == branch_features * 2, (
+ f'in_channels ({in_channels}) should equal to '
+ f'branch_features * 2 ({branch_features * 2}) '
+ 'when stride is 1')
+
+ if in_channels != branch_features * 2:
+ assert self.stride != 1, (
+ f'stride ({self.stride}) should not equal 1 when '
+ f'in_channels != branch_features * 2')
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=in_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ in_channels,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ )
+
+ self.branch2 = nn.Sequential(
+ ConvModule(
+ in_channels if (self.stride > 1) else branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=branch_features,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.stride > 1:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+ else:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+class LiteHRModule(nn.Module):
+ """High-Resolution Module for LiteHRNet.
+
+ It contains conditional channel weighting blocks and
+ shuffle blocks.
+
+
+ Args:
+ num_branches (int): Number of branches in the module.
+ num_blocks (int): Number of blocks in the module.
+ in_channels (list(int)): Number of input image channels.
+ reduce_ratio (int): Channel reduction ratio.
+ module_type (str): 'LITE' or 'NAIVE'
+ multiscale_output (bool): Whether to output multi-scale features.
+ with_fuse (bool): Whether to use fuse layers.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(
+ self,
+ num_branches,
+ num_blocks,
+ in_channels,
+ reduce_ratio,
+ module_type,
+ multiscale_output=False,
+ with_fuse=True,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ with_cp=False,
+ ):
+ super().__init__()
+ self._check_branches(num_branches, in_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.module_type = module_type
+ self.multiscale_output = multiscale_output
+ self.with_fuse = with_fuse
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.with_cp = with_cp
+
+ if self.module_type.upper() == 'LITE':
+ self.layers = self._make_weighting_blocks(num_blocks, reduce_ratio)
+ elif self.module_type.upper() == 'NAIVE':
+ self.layers = self._make_naive_branches(num_branches, num_blocks)
+ else:
+ raise ValueError("module_type should be either 'LITE' or 'NAIVE'.")
+ if self.with_fuse:
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU()
+
+ def _check_branches(self, num_branches, in_channels):
+ """Check input to avoid ValueError."""
+ if num_branches != len(in_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) ' \
+ f'!= NUM_INCHANNELS({len(in_channels)})'
+ raise ValueError(error_msg)
+
+ def _make_weighting_blocks(self, num_blocks, reduce_ratio, stride=1):
+ """Make channel weighting blocks."""
+ layers = []
+ for i in range(num_blocks):
+ layers.append(
+ ConditionalChannelWeighting(
+ self.in_channels,
+ stride=stride,
+ reduce_ratio=reduce_ratio,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ with_cp=self.with_cp))
+
+ return nn.Sequential(*layers)
+
+ def _make_one_branch(self, branch_index, num_blocks, stride=1):
+ """Make one branch."""
+ layers = []
+ layers.append(
+ ShuffleUnit(
+ self.in_channels[branch_index],
+ self.in_channels[branch_index],
+ stride=stride,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ with_cp=self.with_cp))
+ for i in range(1, num_blocks):
+ layers.append(
+ ShuffleUnit(
+ self.in_channels[branch_index],
+ self.in_channels[branch_index],
+ stride=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type='ReLU'),
+ with_cp=self.with_cp))
+
+ return nn.Sequential(*layers)
+
+ def _make_naive_branches(self, num_branches, num_blocks):
+ """Make branches."""
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(self._make_one_branch(i, num_blocks))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ """Make fuse layer."""
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i), mode='nearest')))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels[j],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels[j],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=True)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.num_branches == 1:
+ return [self.layers[0](x[0])]
+
+ if self.module_type.upper() == 'LITE':
+ out = self.layers(x)
+ elif self.module_type.upper() == 'NAIVE':
+ for i in range(self.num_branches):
+ x[i] = self.layers[i](x[i])
+ out = x
+
+ if self.with_fuse:
+ out_fuse = []
+ for i in range(len(self.fuse_layers)):
+ # `y = 0` will lead to decreased accuracy (0.5~1 mAP)
+ y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+ for j in range(self.num_branches):
+ if i == j:
+ y += out[j]
+ else:
+ y += self.fuse_layers[i][j](out[j])
+ out_fuse.append(self.relu(y))
+ out = out_fuse
+ if not self.multiscale_output:
+ out = [out[0]]
+ return out
+
+
+@BACKBONES.register_module()
+class LiteHRNet(nn.Module):
+ """Lite-HRNet backbone.
+
+ `Lite-HRNet: A Lightweight High-Resolution Network
+ `_.
+
+ Code adapted from 'https://github.com/HRNet/Lite-HRNet'.
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Default: 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+
+ Example:
+ >>> from mmpose.models import LiteHRNet
+ >>> import torch
+ >>> extra=dict(
+ >>> stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ >>> num_stages=3,
+ >>> stages_spec=dict(
+ >>> num_modules=(2, 4, 2),
+ >>> num_branches=(2, 3, 4),
+ >>> num_blocks=(2, 2, 2),
+ >>> module_type=('LITE', 'LITE', 'LITE'),
+ >>> with_fuse=(True, True, True),
+ >>> reduce_ratios=(8, 8, 8),
+ >>> num_channels=(
+ >>> (40, 80),
+ >>> (40, 80, 160),
+ >>> (40, 80, 160, 320),
+ >>> )),
+ >>> with_head=False)
+ >>> self = LiteHRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 40, 8, 8)
+ """
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ norm_eval=False,
+ with_cp=False):
+ super().__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.stem = Stem(
+ in_channels,
+ stem_channels=self.extra['stem']['stem_channels'],
+ out_channels=self.extra['stem']['out_channels'],
+ expand_ratio=self.extra['stem']['expand_ratio'],
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+
+ self.num_stages = self.extra['num_stages']
+ self.stages_spec = self.extra['stages_spec']
+
+ num_channels_last = [
+ self.stem.out_channels,
+ ]
+ for i in range(self.num_stages):
+ num_channels = self.stages_spec['num_channels'][i]
+ num_channels = [num_channels[i] for i in range(len(num_channels))]
+ setattr(
+ self, f'transition{i}',
+ self._make_transition_layer(num_channels_last, num_channels))
+
+ stage, num_channels_last = self._make_stage(
+ self.stages_spec, i, num_channels, multiscale_output=True)
+ setattr(self, f'stage{i}', stage)
+
+ self.with_head = self.extra['with_head']
+ if self.with_head:
+ self.head_layer = IterativeHead(
+ in_channels=num_channels_last,
+ norm_cfg=self.norm_cfg,
+ )
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ """Make transition layer."""
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_pre_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=num_channels_pre_layer[i],
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_pre_layer[i])[1],
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU()))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=in_channels,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels)[1],
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU()))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_stage(self,
+ stages_spec,
+ stage_index,
+ in_channels,
+ multiscale_output=True):
+ num_modules = stages_spec['num_modules'][stage_index]
+ num_branches = stages_spec['num_branches'][stage_index]
+ num_blocks = stages_spec['num_blocks'][stage_index]
+ reduce_ratio = stages_spec['reduce_ratios'][stage_index]
+ with_fuse = stages_spec['with_fuse'][stage_index]
+ module_type = stages_spec['module_type'][stage_index]
+
+ modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ modules.append(
+ LiteHRModule(
+ num_branches,
+ num_blocks,
+ in_channels,
+ reduce_ratio,
+ module_type,
+ multiscale_output=reset_multiscale_output,
+ with_fuse=with_fuse,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ with_cp=self.with_cp))
+ in_channels = modules[-1].in_channels
+
+ return nn.Sequential(*modules), in_channels
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.stem(x)
+
+ y_list = [x]
+ for i in range(self.num_stages):
+ x_list = []
+ transition = getattr(self, f'transition{i}')
+ for j in range(self.stages_spec['num_branches'][i]):
+ if transition[j]:
+ if j >= len(y_list):
+ x_list.append(transition[j](y_list[-1]))
+ else:
+ x_list.append(transition[j](y_list[j]))
+ else:
+ x_list.append(y_list[j])
+ y_list = getattr(self, f'stage{i}')(x_list)
+
+ x = y_list
+ if self.with_head:
+ x = self.head_layer(x)
+
+ return [x[0]]
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/mobilenet_v2.py b/vendor/ViTPose/mmpose/models/backbones/mobilenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dc0cd1b7dfdec2aa751861e39fc1c1a45ec488e
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/mobilenet_v2.py
@@ -0,0 +1,275 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint, make_divisible
+
+
+class InvertedResidual(nn.Module):
+ """InvertedResidual block for MobileNetV2.
+
+ Args:
+ in_channels (int): The input channels of the InvertedResidual block.
+ out_channels (int): The output channels of the InvertedResidual block.
+ stride (int): Stride of the middle (first) 3x3 convolution.
+ expand_ratio (int): adjusts number of channels of the hidden layer
+ in InvertedResidual by this amount.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU6').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride,
+ expand_ratio,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU6'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stride = stride
+ assert stride in [1, 2], f'stride must in [1, 2]. ' \
+ f'But received {stride}.'
+ self.with_cp = with_cp
+ self.use_res_connect = self.stride == 1 and in_channels == out_channels
+ hidden_dim = int(round(in_channels * expand_ratio))
+
+ layers = []
+ if expand_ratio != 1:
+ layers.append(
+ ConvModule(
+ in_channels=in_channels,
+ out_channels=hidden_dim,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+ layers.extend([
+ ConvModule(
+ in_channels=hidden_dim,
+ out_channels=hidden_dim,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ groups=hidden_dim,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ in_channels=hidden_dim,
+ out_channels=out_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+ ])
+ self.conv = nn.Sequential(*layers)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.use_res_connect:
+ return x + self.conv(x)
+ return self.conv(x)
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class MobileNetV2(BaseBackbone):
+ """MobileNetV2 backbone.
+
+ Args:
+ widen_factor (float): Width multiplier, multiply number of
+ channels in each layer by this amount. Default: 1.0.
+ out_indices (None or Sequence[int]): Output from which stages.
+ Default: (7, ).
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU6').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ # Parameters to build layers. 4 parameters are needed to construct a
+ # layer, from left to right: expand_ratio, channel, num_blocks, stride.
+ arch_settings = [[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2],
+ [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2],
+ [6, 320, 1, 1]]
+
+ def __init__(self,
+ widen_factor=1.,
+ out_indices=(7, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU6'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.widen_factor = widen_factor
+ self.out_indices = out_indices
+ for index in out_indices:
+ if index not in range(0, 8):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 8). But received {index}')
+
+ if frozen_stages not in range(-1, 8):
+ raise ValueError('frozen_stages must be in range(-1, 8). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.in_channels = make_divisible(32 * widen_factor, 8)
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ self.layers = []
+
+ for i, layer_cfg in enumerate(self.arch_settings):
+ expand_ratio, channel, num_blocks, stride = layer_cfg
+ out_channels = make_divisible(channel * widen_factor, 8)
+ inverted_res_layer = self.make_layer(
+ out_channels=out_channels,
+ num_blocks=num_blocks,
+ stride=stride,
+ expand_ratio=expand_ratio)
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, inverted_res_layer)
+ self.layers.append(layer_name)
+
+ if widen_factor > 1.0:
+ self.out_channel = int(1280 * widen_factor)
+ else:
+ self.out_channel = 1280
+
+ layer = ConvModule(
+ in_channels=self.in_channels,
+ out_channels=self.out_channel,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.add_module('conv2', layer)
+ self.layers.append('conv2')
+
+ def make_layer(self, out_channels, num_blocks, stride, expand_ratio):
+ """Stack InvertedResidual blocks to build a layer for MobileNetV2.
+
+ Args:
+ out_channels (int): out_channels of block.
+ num_blocks (int): number of blocks.
+ stride (int): stride of the first block. Default: 1
+ expand_ratio (int): Expand the number of channels of the
+ hidden layer in InvertedResidual by this ratio. Default: 6.
+ """
+ layers = []
+ for i in range(num_blocks):
+ if i >= 1:
+ stride = 1
+ layers.append(
+ InvertedResidual(
+ self.in_channels,
+ out_channels,
+ stride,
+ expand_ratio=expand_ratio,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/mobilenet_v3.py b/vendor/ViTPose/mmpose/models/backbones/mobilenet_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..d640abec79f06d689f2d4bc1e92999946bc07261
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/mobilenet_v3.py
@@ -0,0 +1,188 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import InvertedResidual, load_checkpoint
+
+
+@BACKBONES.register_module()
+class MobileNetV3(BaseBackbone):
+ """MobileNetV3 backbone.
+
+ Args:
+ arch (str): Architecture of mobilnetv3, from {small, big}.
+ Default: small.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ out_indices (None or Sequence[int]): Output from which stages.
+ Default: (-1, ), which means output tensors from final stage.
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save
+ some memory while slowing down the training speed.
+ Default: False.
+ """
+ # Parameters to build each block:
+ # [kernel size, mid channels, out channels, with_se, act type, stride]
+ arch_settings = {
+ 'small': [[3, 16, 16, True, 'ReLU', 2],
+ [3, 72, 24, False, 'ReLU', 2],
+ [3, 88, 24, False, 'ReLU', 1],
+ [5, 96, 40, True, 'HSwish', 2],
+ [5, 240, 40, True, 'HSwish', 1],
+ [5, 240, 40, True, 'HSwish', 1],
+ [5, 120, 48, True, 'HSwish', 1],
+ [5, 144, 48, True, 'HSwish', 1],
+ [5, 288, 96, True, 'HSwish', 2],
+ [5, 576, 96, True, 'HSwish', 1],
+ [5, 576, 96, True, 'HSwish', 1]],
+ 'big': [[3, 16, 16, False, 'ReLU', 1],
+ [3, 64, 24, False, 'ReLU', 2],
+ [3, 72, 24, False, 'ReLU', 1],
+ [5, 72, 40, True, 'ReLU', 2],
+ [5, 120, 40, True, 'ReLU', 1],
+ [5, 120, 40, True, 'ReLU', 1],
+ [3, 240, 80, False, 'HSwish', 2],
+ [3, 200, 80, False, 'HSwish', 1],
+ [3, 184, 80, False, 'HSwish', 1],
+ [3, 184, 80, False, 'HSwish', 1],
+ [3, 480, 112, True, 'HSwish', 1],
+ [3, 672, 112, True, 'HSwish', 1],
+ [5, 672, 160, True, 'HSwish', 1],
+ [5, 672, 160, True, 'HSwish', 2],
+ [5, 960, 160, True, 'HSwish', 1]]
+ } # yapf: disable
+
+ def __init__(self,
+ arch='small',
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ out_indices=(-1, ),
+ frozen_stages=-1,
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert arch in self.arch_settings
+ for index in out_indices:
+ if index not in range(-len(self.arch_settings[arch]),
+ len(self.arch_settings[arch])):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, {len(self.arch_settings[arch])}). '
+ f'But received {index}')
+
+ if frozen_stages not in range(-1, len(self.arch_settings[arch])):
+ raise ValueError('frozen_stages must be in range(-1, '
+ f'{len(self.arch_settings[arch])}). '
+ f'But received {frozen_stages}')
+ self.arch = arch
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.in_channels = 16
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type='HSwish'))
+
+ self.layers = self._make_layer()
+ self.feat_dim = self.arch_settings[arch][-1][2]
+
+ def _make_layer(self):
+ layers = []
+ layer_setting = self.arch_settings[self.arch]
+ for i, params in enumerate(layer_setting):
+ (kernel_size, mid_channels, out_channels, with_se, act,
+ stride) = params
+ if with_se:
+ se_cfg = dict(
+ channels=mid_channels,
+ ratio=4,
+ act_cfg=(dict(type='ReLU'), dict(type='HSigmoid')))
+ else:
+ se_cfg = None
+
+ layer = InvertedResidual(
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ mid_channels=mid_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ se_cfg=se_cfg,
+ with_expand_conv=True,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type=act),
+ with_cp=self.with_cp)
+ self.in_channels = out_channels
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, layer)
+ layers.append(layer_name)
+ return layers
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+ if i in self.out_indices or \
+ i - len(self.layers) in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/mspn.py b/vendor/ViTPose/mmpose/models/backbones/mspn.py
new file mode 100644
index 0000000000000000000000000000000000000000..71cee34e399780e8b67eac43d862b65a3ce05412
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/mspn.py
@@ -0,0 +1,513 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+from collections import OrderedDict
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, MaxPool2d, constant_init, kaiming_init,
+ normal_init)
+from mmcv.runner.checkpoint import load_state_dict
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .resnet import Bottleneck as _Bottleneck
+from .utils.utils import get_state_dict
+
+
+class Bottleneck(_Bottleneck):
+ expansion = 4
+ """Bottleneck block for MSPN.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ stride (int): stride of the block. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super().__init__(in_channels, out_channels * 4, **kwargs)
+
+
+class DownsampleModule(nn.Module):
+ """Downsample module for MSPN.
+
+ Args:
+ block (nn.Module): Downsample block.
+ num_blocks (list): Number of blocks in each downsample unit.
+ num_units (int): Numbers of downsample units. Default: 4
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the input feature to
+ downsample module. Default: 64
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ num_units=4,
+ has_skip=False,
+ norm_cfg=dict(type='BN'),
+ in_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.has_skip = has_skip
+ self.in_channels = in_channels
+ assert len(num_blocks) == num_units
+ self.num_blocks = num_blocks
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.layer1 = self._make_layer(block, in_channels, num_blocks[0])
+ for i in range(1, num_units):
+ module_name = f'layer{i + 1}'
+ self.add_module(
+ module_name,
+ self._make_layer(
+ block, in_channels * pow(2, i), num_blocks[i], stride=2))
+
+ def _make_layer(self, block, out_channels, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.in_channels != out_channels * block.expansion:
+ downsample = ConvModule(
+ self.in_channels,
+ out_channels * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ units = list()
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ stride=stride,
+ downsample=downsample,
+ norm_cfg=self.norm_cfg))
+ self.in_channels = out_channels * block.expansion
+ for _ in range(1, blocks):
+ units.append(block(self.in_channels, out_channels))
+
+ return nn.Sequential(*units)
+
+ def forward(self, x, skip1, skip2):
+ out = list()
+ for i in range(self.num_units):
+ module_name = f'layer{i + 1}'
+ module_i = getattr(self, module_name)
+ x = module_i(x)
+ if self.has_skip:
+ x = x + skip1[i] + skip2[i]
+ out.append(x)
+ out.reverse()
+
+ return tuple(out)
+
+
+class UpsampleUnit(nn.Module):
+ """Upsample unit for upsample module.
+
+ Args:
+ ind (int): Indicates whether to interpolate (>0) and whether to
+ generate feature map for the next hourglass-like module.
+ num_units (int): Number of units that form a upsample module. Along
+ with ind and gen_cross_conv, nm_units is used to decide whether
+ to generate feature map for the next hourglass-like module.
+ in_channels (int): Channel number of the skip-in feature maps from
+ the corresponding downsample unit.
+ unit_channels (int): Channel number in this unit. Default:256.
+ gen_skip: (bool): Whether or not to generate skips for the posterior
+ downsample module. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ ind,
+ num_units,
+ in_channels,
+ unit_channels=256,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.in_skip = ConvModule(
+ in_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.ind = ind
+ if self.ind > 0:
+ self.up_conv = ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ self.gen_skip = gen_skip
+ if self.gen_skip:
+ self.out_skip1 = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.out_skip2 = ConvModule(
+ unit_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.gen_cross_conv = gen_cross_conv
+ if self.ind == num_units - 1 and self.gen_cross_conv:
+ self.cross_conv = ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ def forward(self, x, up_x):
+ out = self.in_skip(x)
+
+ if self.ind > 0:
+ up_x = F.interpolate(
+ up_x,
+ size=(x.size(2), x.size(3)),
+ mode='bilinear',
+ align_corners=True)
+ up_x = self.up_conv(up_x)
+ out = out + up_x
+ out = self.relu(out)
+
+ skip1 = None
+ skip2 = None
+ if self.gen_skip:
+ skip1 = self.out_skip1(x)
+ skip2 = self.out_skip2(out)
+
+ cross_conv = None
+ if self.ind == self.num_units - 1 and self.gen_cross_conv:
+ cross_conv = self.cross_conv(out)
+
+ return out, skip1, skip2, cross_conv
+
+
+class UpsampleModule(nn.Module):
+ """Upsample module for MSPN.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units.
+ Default:256.
+ num_units (int): Numbers of upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_units=4,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = list()
+ for i in range(num_units):
+ self.in_channels.append(Bottleneck.expansion * out_channels *
+ pow(2, i))
+ self.in_channels.reverse()
+ self.num_units = num_units
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.norm_cfg = norm_cfg
+ for i in range(num_units):
+ module_name = f'up{i + 1}'
+ self.add_module(
+ module_name,
+ UpsampleUnit(
+ i,
+ self.num_units,
+ self.in_channels[i],
+ unit_channels,
+ self.gen_skip,
+ self.gen_cross_conv,
+ norm_cfg=self.norm_cfg,
+ out_channels=64))
+
+ def forward(self, x):
+ out = list()
+ skip1 = list()
+ skip2 = list()
+ cross_conv = None
+ for i in range(self.num_units):
+ module_i = getattr(self, f'up{i + 1}')
+ if i == 0:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], None)
+ elif i == self.num_units - 1:
+ outi, skip1_i, skip2_i, cross_conv = module_i(x[i], out[i - 1])
+ else:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], out[i - 1])
+ out.append(outi)
+ skip1.append(skip1_i)
+ skip2.append(skip2_i)
+ skip1.reverse()
+ skip2.reverse()
+
+ return out, skip1, skip2, cross_conv
+
+
+class SingleStageNetwork(nn.Module):
+ """Single_stage Network.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units. Default:256.
+ num_units (int): Numbers of downsample/upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_blocks (list): Number of blocks in each downsample unit.
+ Default: [2, 2, 2, 2] Note: Make sure num_units==len(num_blocks)
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the feature from ResNetTop.
+ Default: 64.
+ """
+
+ def __init__(self,
+ has_skip=False,
+ gen_skip=False,
+ gen_cross_conv=False,
+ unit_channels=256,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ in_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ assert len(num_blocks) == num_units
+ self.has_skip = has_skip
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.num_units = num_units
+ self.unit_channels = unit_channels
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ self.downsample = DownsampleModule(Bottleneck, num_blocks, num_units,
+ has_skip, norm_cfg, in_channels)
+ self.upsample = UpsampleModule(unit_channels, num_units, gen_skip,
+ gen_cross_conv, norm_cfg, in_channels)
+
+ def forward(self, x, skip1, skip2):
+ mid = self.downsample(x, skip1, skip2)
+ out, skip1, skip2, cross_conv = self.upsample(mid)
+
+ return out, skip1, skip2, cross_conv
+
+
+class ResNetTop(nn.Module):
+ """ResNet top for MSPN.
+
+ Args:
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ channels (int): Number of channels of the feature output by ResNetTop.
+ """
+
+ def __init__(self, norm_cfg=dict(type='BN'), channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.top = nn.Sequential(
+ ConvModule(
+ 3,
+ channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ norm_cfg=norm_cfg,
+ inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ def forward(self, img):
+ return self.top(img)
+
+
+@BACKBONES.register_module()
+class MSPN(BaseBackbone):
+ """MSPN backbone. Paper ref: Li et al. "Rethinking on Multi-Stage Networks
+ for Human Pose Estimation" (CVPR 2020).
+
+ Args:
+ unit_channels (int): Number of Channels in an upsample unit.
+ Default: 256
+ num_stages (int): Number of stages in a multi-stage MSPN. Default: 4
+ num_units (int): Number of downsample/upsample units in a single-stage
+ network. Default: 4
+ Note: Make sure num_units == len(self.num_blocks)
+ num_blocks (list): Number of bottlenecks in each
+ downsample unit. Default: [2, 2, 2, 2]
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ res_top_channels (int): Number of channels of feature from ResNetTop.
+ Default: 64.
+
+ Example:
+ >>> from mmpose.models import MSPN
+ >>> import torch
+ >>> self = MSPN(num_stages=2,num_units=2,num_blocks=[2,2])
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... for feature in level_output:
+ ... print(tuple(feature.shape))
+ ...
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ res_top_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ assert self.num_stages > 0
+ assert self.num_units > 1
+ assert self.num_units == len(self.num_blocks)
+ self.top = ResNetTop(norm_cfg=norm_cfg)
+ self.multi_stage_mspn = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if i == 0:
+ has_skip = False
+ else:
+ has_skip = True
+ if i != self.num_stages - 1:
+ gen_skip = True
+ gen_cross_conv = True
+ else:
+ gen_skip = False
+ gen_cross_conv = False
+ self.multi_stage_mspn.append(
+ SingleStageNetwork(has_skip, gen_skip, gen_cross_conv,
+ unit_channels, num_units, num_blocks,
+ norm_cfg, res_top_channels))
+
+ def forward(self, x):
+ """Model forward function."""
+ out_feats = []
+ skip1 = None
+ skip2 = None
+ x = self.top(x)
+ for i in range(self.num_stages):
+ out, skip1, skip2, x = self.multi_stage_mspn[i](x, skip1, skip2)
+ out_feats.append(out)
+
+ return out_feats
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ state_dict_tmp = get_state_dict(pretrained)
+ state_dict = OrderedDict()
+ state_dict['top'] = OrderedDict()
+ state_dict['bottlenecks'] = OrderedDict()
+ for k, v in state_dict_tmp.items():
+ if k.startswith('layer'):
+ if 'downsample.0' in k:
+ state_dict['bottlenecks'][k.replace(
+ 'downsample.0', 'downsample.conv')] = v
+ elif 'downsample.1' in k:
+ state_dict['bottlenecks'][k.replace(
+ 'downsample.1', 'downsample.bn')] = v
+ else:
+ state_dict['bottlenecks'][k] = v
+ elif k.startswith('conv1'):
+ state_dict['top'][k.replace('conv1', 'top.0.conv')] = v
+ elif k.startswith('bn1'):
+ state_dict['top'][k.replace('bn1', 'top.0.bn')] = v
+
+ load_state_dict(
+ self.top, state_dict['top'], strict=False, logger=logger)
+ for i in range(self.num_stages):
+ load_state_dict(
+ self.multi_stage_mspn[i].downsample,
+ state_dict['bottlenecks'],
+ strict=False,
+ logger=logger)
+ else:
+ for m in self.multi_stage_mspn.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ for m in self.top.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
diff --git a/vendor/ViTPose/mmpose/models/backbones/regnet.py b/vendor/ViTPose/mmpose/models/backbones/regnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..693417c2d61066e4e9a90989ad61700448028e58
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/regnet.py
@@ -0,0 +1,317 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import ResNet
+from .resnext import Bottleneck
+
+
+@BACKBONES.register_module()
+class RegNet(ResNet):
+ """RegNet backbone.
+
+ More details can be found in `paper `__ .
+
+ Args:
+ arch (dict): The parameter of RegNets.
+ - w0 (int): initial width
+ - wa (float): slope of width
+ - wm (float): quantization parameter to quantize the width
+ - depth (int): depth of the backbone
+ - group_w (int): width of group
+ - bot_mul (float): bottleneck ratio, i.e. expansion of bottleneck.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ base_channels (int): Base channels after stem layer.
+ in_channels (int): Number of input image channels. Default: 3.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer. Default: "pytorch".
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters. Default: -1.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN', requires_grad=True).
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import RegNet
+ >>> import torch
+ >>> self = RegNet(
+ arch=dict(
+ w0=88,
+ wa=26.31,
+ wm=2.25,
+ group_w=48,
+ depth=25,
+ bot_mul=1.0),
+ out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 96, 8, 8)
+ (1, 192, 4, 4)
+ (1, 432, 2, 2)
+ (1, 1008, 1, 1)
+ """
+ arch_settings = {
+ 'regnetx_400mf':
+ dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
+ 'regnetx_800mf':
+ dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16, bot_mul=1.0),
+ 'regnetx_1.6gf':
+ dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18, bot_mul=1.0),
+ 'regnetx_3.2gf':
+ dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25, bot_mul=1.0),
+ 'regnetx_4.0gf':
+ dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23, bot_mul=1.0),
+ 'regnetx_6.4gf':
+ dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17, bot_mul=1.0),
+ 'regnetx_8.0gf':
+ dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23, bot_mul=1.0),
+ 'regnetx_12gf':
+ dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, bot_mul=1.0),
+ }
+
+ def __init__(self,
+ arch,
+ in_channels=3,
+ stem_channels=32,
+ base_channels=32,
+ strides=(2, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super(ResNet, self).__init__()
+
+ # Generate RegNet parameters first
+ if isinstance(arch, str):
+ assert arch in self.arch_settings, \
+ f'"arch": "{arch}" is not one of the' \
+ ' arch_settings'
+ arch = self.arch_settings[arch]
+ elif not isinstance(arch, dict):
+ raise TypeError('Expect "arch" to be either a string '
+ f'or a dict, got {type(arch)}')
+
+ widths, num_stages = self.generate_regnet(
+ arch['w0'],
+ arch['wa'],
+ arch['wm'],
+ arch['depth'],
+ )
+ # Convert to per stage format
+ stage_widths, stage_blocks = self.get_stages_from_blocks(widths)
+ # Generate group widths and bot muls
+ group_widths = [arch['group_w'] for _ in range(num_stages)]
+ self.bottleneck_ratio = [arch['bot_mul'] for _ in range(num_stages)]
+ # Adjust the compatibility of stage_widths and group_widths
+ stage_widths, group_widths = self.adjust_width_group(
+ stage_widths, self.bottleneck_ratio, group_widths)
+
+ # Group params by stage
+ self.stage_widths = stage_widths
+ self.group_widths = group_widths
+ self.depth = sum(stage_blocks)
+ self.stem_channels = stem_channels
+ self.base_channels = base_channels
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ if self.deep_stem:
+ raise NotImplementedError(
+ 'deep_stem has not been implemented for RegNet')
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.stage_blocks = stage_blocks[:num_stages]
+
+ self._make_stem_layer(in_channels, stem_channels)
+
+ _in_channels = stem_channels
+ self.res_layers = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = self.strides[i]
+ dilation = self.dilations[i]
+ group_width = self.group_widths[i]
+ width = int(round(self.stage_widths[i] * self.bottleneck_ratio[i]))
+ stage_groups = width // group_width
+
+ res_layer = self.make_res_layer(
+ block=Bottleneck,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=self.stage_widths[i],
+ expansion=1,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=self.with_cp,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ base_channels=self.stage_widths[i],
+ groups=stage_groups,
+ width_per_group=group_width)
+ _in_channels = self.stage_widths[i]
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = stage_widths[-1]
+
+ def _make_stem_layer(self, in_channels, base_channels):
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ base_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, base_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+
+ @staticmethod
+ def generate_regnet(initial_width,
+ width_slope,
+ width_parameter,
+ depth,
+ divisor=8):
+ """Generates per block width from RegNet parameters.
+
+ Args:
+ initial_width ([int]): Initial width of the backbone
+ width_slope ([float]): Slope of the quantized linear function
+ width_parameter ([int]): Parameter used to quantize the width.
+ depth ([int]): Depth of the backbone.
+ divisor (int, optional): The divisor of channels. Defaults to 8.
+
+ Returns:
+ list, int: return a list of widths of each stage and the number of
+ stages
+ """
+ assert width_slope >= 0
+ assert initial_width > 0
+ assert width_parameter > 1
+ assert initial_width % divisor == 0
+ widths_cont = np.arange(depth) * width_slope + initial_width
+ ks = np.round(
+ np.log(widths_cont / initial_width) / np.log(width_parameter))
+ widths = initial_width * np.power(width_parameter, ks)
+ widths = np.round(np.divide(widths, divisor)) * divisor
+ num_stages = len(np.unique(widths))
+ widths, widths_cont = widths.astype(int).tolist(), widths_cont.tolist()
+ return widths, num_stages
+
+ @staticmethod
+ def quantize_float(number, divisor):
+ """Converts a float to closest non-zero int divisible by divior.
+
+ Args:
+ number (int): Original number to be quantized.
+ divisor (int): Divisor used to quantize the number.
+
+ Returns:
+ int: quantized number that is divisible by devisor.
+ """
+ return int(round(number / divisor) * divisor)
+
+ def adjust_width_group(self, widths, bottleneck_ratio, groups):
+ """Adjusts the compatibility of widths and groups.
+
+ Args:
+ widths (list[int]): Width of each stage.
+ bottleneck_ratio (float): Bottleneck ratio.
+ groups (int): number of groups in each stage
+
+ Returns:
+ tuple(list): The adjusted widths and groups of each stage.
+ """
+ bottleneck_width = [
+ int(w * b) for w, b in zip(widths, bottleneck_ratio)
+ ]
+ groups = [min(g, w_bot) for g, w_bot in zip(groups, bottleneck_width)]
+ bottleneck_width = [
+ self.quantize_float(w_bot, g)
+ for w_bot, g in zip(bottleneck_width, groups)
+ ]
+ widths = [
+ int(w_bot / b)
+ for w_bot, b in zip(bottleneck_width, bottleneck_ratio)
+ ]
+ return widths, groups
+
+ def get_stages_from_blocks(self, widths):
+ """Gets widths/stage_blocks of network at each stage.
+
+ Args:
+ widths (list[int]): Width in each stage.
+
+ Returns:
+ tuple(list): width and depth of each stage
+ """
+ width_diff = [
+ width != width_prev
+ for width, width_prev in zip(widths + [0], [0] + widths)
+ ]
+ stage_widths = [
+ width for width, diff in zip(widths, width_diff[:-1]) if diff
+ ]
+ stage_blocks = np.diff([
+ depth for depth, diff in zip(range(len(width_diff)), width_diff)
+ if diff
+ ]).tolist()
+ return stage_widths, stage_blocks
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/resnest.py b/vendor/ViTPose/mmpose/models/backbones/resnest.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a2d4081df1417155f0626646f5fe3d0dbfc2864
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/resnest.py
@@ -0,0 +1,338 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResLayer, ResNetV1d
+
+
+class RSoftmax(nn.Module):
+ """Radix Softmax module in ``SplitAttentionConv2d``.
+
+ Args:
+ radix (int): Radix of input.
+ groups (int): Groups of input.
+ """
+
+ def __init__(self, radix, groups):
+ super().__init__()
+ self.radix = radix
+ self.groups = groups
+
+ def forward(self, x):
+ batch = x.size(0)
+ if self.radix > 1:
+ x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2)
+ x = F.softmax(x, dim=1)
+ x = x.reshape(batch, -1)
+ else:
+ x = torch.sigmoid(x)
+ return x
+
+
+class SplitAttentionConv2d(nn.Module):
+ """Split-Attention Conv2d.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int | tuple[int]): Same as nn.Conv2d.
+ stride (int | tuple[int]): Same as nn.Conv2d.
+ padding (int | tuple[int]): Same as nn.Conv2d.
+ dilation (int | tuple[int]): Same as nn.Conv2d.
+ groups (int): Same as nn.Conv2d.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ radix=2,
+ reduction_factor=4,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ super().__init__()
+ inter_channels = max(in_channels * radix // reduction_factor, 32)
+ self.radix = radix
+ self.groups = groups
+ self.channels = channels
+ self.conv = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ channels * radix,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups * radix,
+ bias=False)
+ self.norm0_name, norm0 = build_norm_layer(
+ norm_cfg, channels * radix, postfix=0)
+ self.add_module(self.norm0_name, norm0)
+ self.relu = nn.ReLU(inplace=True)
+ self.fc1 = build_conv_layer(
+ None, channels, inter_channels, 1, groups=self.groups)
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, inter_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.fc2 = build_conv_layer(
+ None, inter_channels, channels * radix, 1, groups=self.groups)
+ self.rsoftmax = RSoftmax(radix, groups)
+
+ @property
+ def norm0(self):
+ return getattr(self, self.norm0_name)
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.norm0(x)
+ x = self.relu(x)
+
+ batch, rchannel = x.shape[:2]
+ if self.radix > 1:
+ splits = x.view(batch, self.radix, -1, *x.shape[2:])
+ gap = splits.sum(dim=1)
+ else:
+ gap = x
+ gap = F.adaptive_avg_pool2d(gap, 1)
+ gap = self.fc1(gap)
+
+ gap = self.norm1(gap)
+ gap = self.relu(gap)
+
+ atten = self.fc2(gap)
+ atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
+
+ if self.radix > 1:
+ attens = atten.view(batch, self.radix, -1, *atten.shape[2:])
+ out = torch.sum(attens * splits, dim=1)
+ else:
+ out = atten * x
+ return out.contiguous()
+
+
+class Bottleneck(_Bottleneck):
+ """Bottleneck block for ResNeSt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ avg_down_stride (bool): Whether to use average pool for stride in
+ Bottleneck. Default: True.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ groups=1,
+ width_per_group=4,
+ base_channels=64,
+ radix=2,
+ reduction_factor=4,
+ avg_down_stride=True,
+ **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # For ResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for ResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.avg_down_stride = avg_down_stride and self.conv2_stride > 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = SplitAttentionConv2d(
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=1 if self.avg_down_stride else self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ radix=radix,
+ reduction_factor=reduction_factor,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+ delattr(self, self.norm2_name)
+
+ if self.avg_down_stride:
+ self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)
+
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+
+ if self.avg_down_stride:
+ out = self.avd_layer(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ResNeSt(ResNetV1d):
+ """ResNeSt backbone.
+
+ Please refer to the `paper `__
+ for details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152, 200}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ radix (int): Radix of SpltAtConv2d. Default: 2
+ reduction_factor (int): Reduction factor of SplitAttentionConv2d.
+ Default: 4.
+ avg_down_stride (bool): Whether to use average pool for stride in
+ Bottleneck. Default: True.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3)),
+ 200: (Bottleneck, (3, 24, 36, 3)),
+ 269: (Bottleneck, (3, 30, 48, 8))
+ }
+
+ def __init__(self,
+ depth,
+ groups=1,
+ width_per_group=4,
+ radix=2,
+ reduction_factor=4,
+ avg_down_stride=True,
+ **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ self.radix = radix
+ self.reduction_factor = reduction_factor
+ self.avg_down_stride = avg_down_stride
+ super().__init__(depth=depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ radix=self.radix,
+ reduction_factor=self.reduction_factor,
+ avg_down_stride=self.avg_down_stride,
+ **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/resnet.py b/vendor/ViTPose/mmpose/models/backbones/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..649496a755020140d94eb32fbe79d1ff135c86ca
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/resnet.py
@@ -0,0 +1,701 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
+ constant_init, kaiming_init)
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class BasicBlock(nn.Module):
+ """BasicBlock for ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the output channels of conv1. This is a
+ reserved argument in BasicBlock and should always be 1. Default: 1.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): `pytorch` or `caffe`. It is unused and reserved for
+ unified API with Bottleneck.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=1,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert self.expansion == 1
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, out_channels, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ 3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ 3,
+ padding=1,
+ bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ """Bottleneck block for ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the input/output channels of conv2. Default: 4.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
+ stride-two layer is the 3x3 conv layer, otherwise the stride-two
+ layer is the first 1x1 conv layer. Default: "pytorch".
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=4,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert style in ['pytorch', 'caffe']
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ """nn.Module: the normalization layer named "norm3" """
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def get_expansion(block, expansion=None):
+ """Get the expansion of a residual block.
+
+ The block expansion will be obtained by the following order:
+
+ 1. If ``expansion`` is given, just return it.
+ 2. If ``block`` has the attribute ``expansion``, then return
+ ``block.expansion``.
+ 3. Return the default value according the the block type:
+ 1 for ``BasicBlock`` and 4 for ``Bottleneck``.
+
+ Args:
+ block (class): The block class.
+ expansion (int | None): The given expansion ratio.
+
+ Returns:
+ int: The expansion of the block.
+ """
+ if isinstance(expansion, int):
+ assert expansion > 0
+ elif expansion is None:
+ if hasattr(block, 'expansion'):
+ expansion = block.expansion
+ elif issubclass(block, BasicBlock):
+ expansion = 1
+ elif issubclass(block, Bottleneck):
+ expansion = 4
+ else:
+ raise TypeError(f'expansion is not specified for {block.__name__}')
+ else:
+ raise TypeError('expansion must be an integer or None')
+
+ return expansion
+
+
+class ResLayer(nn.Sequential):
+ """ResLayer to build ResNet style backbone.
+
+ Args:
+ block (nn.Module): Residual block used to build ResLayer.
+ num_blocks (int): Number of blocks.
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int, optional): The expansion for BasicBlock/Bottleneck.
+ If not specified, it will firstly be obtained via
+ ``block.expansion``. If the block has no attribute "expansion",
+ the following default values will be used: 1 for BasicBlock and
+ 4 for Bottleneck. Default: None.
+ stride (int): stride of the first block. Default: 1.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ downsample_first (bool): Downsample at the first block or last block.
+ False for Hourglass, True for ResNet. Default: True
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ in_channels,
+ out_channels,
+ expansion=None,
+ stride=1,
+ avg_down=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ downsample_first=True,
+ **kwargs):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ self.block = block
+ self.expansion = get_expansion(block, expansion)
+
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = []
+ conv_stride = stride
+ if avg_down and stride != 1:
+ conv_stride = 1
+ downsample.append(
+ nn.AvgPool2d(
+ kernel_size=stride,
+ stride=stride,
+ ceil_mode=True,
+ count_include_pad=False))
+ downsample.extend([
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=conv_stride,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1]
+ ])
+ downsample = nn.Sequential(*downsample)
+
+ layers = []
+ if downsample_first:
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ in_channels = out_channels
+ for _ in range(1, num_blocks):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ else: # downsample_first=False is for HourglassModule
+ for i in range(0, num_blocks - 1):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+
+ super().__init__(*layers)
+
+
+@BACKBONES.register_module()
+class ResNet(BaseBackbone):
+ """ResNet backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ base_channels (int): Middle channels of the first stage. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import ResNet
+ >>> import torch
+ >>> self = ResNet(depth=18, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 64, 8, 8)
+ (1, 128, 4, 4)
+ (1, 256, 2, 2)
+ (1, 512, 1, 1)
+ """
+
+ arch_settings = {
+ 18: (BasicBlock, (2, 2, 2, 2)),
+ 34: (BasicBlock, (3, 4, 6, 3)),
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ stem_channels=64,
+ base_channels=64,
+ expansion=None,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ self.depth = depth
+ self.stem_channels = stem_channels
+ self.base_channels = base_channels
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.block, stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ self.expansion = get_expansion(self.block, expansion)
+
+ self._make_stem_layer(in_channels, stem_channels)
+
+ self.res_layers = []
+ _in_channels = stem_channels
+ _out_channels = base_channels * self.expansion
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = strides[i]
+ dilation = dilations[i]
+ res_layer = self.make_res_layer(
+ block=self.block,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=_out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+ _in_channels = _out_channels
+ _out_channels *= 2
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = res_layer[-1].out_channels
+
+ def make_res_layer(self, **kwargs):
+ """Make a ResLayer."""
+ return ResLayer(**kwargs)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels, stem_channels):
+ """Make stem layer."""
+ if self.deep_stem:
+ self.stem = nn.Sequential(
+ ConvModule(
+ in_channels,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True))
+ else:
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, stem_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ if self.deep_stem:
+ self.stem.eval()
+ for param in self.stem.parameters():
+ param.requires_grad = False
+ else:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, f'layer{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.deep_stem:
+ x = self.stem(x)
+ else:
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
+
+
+@BACKBONES.register_module()
+class ResNetV1d(ResNet):
+ r"""ResNetV1d variant described in `Bag of Tricks
+ `__.
+
+ Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in
+ the input stem with three 3x3 convs. And in the downsampling block, a 2x2
+ avg_pool with stride 2 is added before conv, whose stride is changed to 1.
+ """
+
+ def __init__(self, **kwargs):
+ super().__init__(deep_stem=True, avg_down=True, **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/resnext.py b/vendor/ViTPose/mmpose/models/backbones/resnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..c10dc33f98ac3229c77bf306acf19950c295f904
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/resnext.py
@@ -0,0 +1,162 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResLayer, ResNet
+
+
+class Bottleneck(_Bottleneck):
+ """Bottleneck block for ResNeXt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ base_channels=64,
+ groups=32,
+ width_per_group=4,
+ **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # For ResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for ResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class ResNeXt(ResNet):
+ """ResNeXt backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import ResNeXt
+ >>> import torch
+ >>> self = ResNeXt(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 8, 8)
+ (1, 512, 4, 4)
+ (1, 1024, 2, 2)
+ (1, 2048, 1, 1)
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, groups=32, width_per_group=4, **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/rsn.py b/vendor/ViTPose/mmpose/models/backbones/rsn.py
new file mode 100644
index 0000000000000000000000000000000000000000..29038afe2a77dcb3d3b027b1549d478916a50727
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/rsn.py
@@ -0,0 +1,616 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (ConvModule, MaxPool2d, constant_init, kaiming_init,
+ normal_init)
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class RSB(nn.Module):
+ """Residual Steps block for RSN. Paper ref: Cai et al. "Learning Delicate
+ Local Representations for Multi-Person Pose Estimation" (ECCV 2020).
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ num_steps (int): Numbers of steps in RSB
+ stride (int): stride of the block. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ expand_times (int): Times by which the in_channels are expanded.
+ Default:26.
+ res_top_channels (int): Number of channels of feature output by
+ ResNet_top. Default:64.
+ """
+
+ expansion = 1
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_steps=4,
+ stride=1,
+ downsample=None,
+ with_cp=False,
+ norm_cfg=dict(type='BN'),
+ expand_times=26,
+ res_top_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ assert num_steps > 1
+ self.in_channels = in_channels
+ self.branch_channels = self.in_channels * expand_times
+ self.branch_channels //= res_top_channels
+ self.out_channels = out_channels
+ self.stride = stride
+ self.downsample = downsample
+ self.with_cp = with_cp
+ self.norm_cfg = norm_cfg
+ self.num_steps = num_steps
+ self.conv_bn_relu1 = ConvModule(
+ self.in_channels,
+ self.num_steps * self.branch_channels,
+ kernel_size=1,
+ stride=self.stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ for i in range(self.num_steps):
+ for j in range(i + 1):
+ module_name = f'conv_bn_relu2_{i + 1}_{j + 1}'
+ self.add_module(
+ module_name,
+ ConvModule(
+ self.branch_channels,
+ self.branch_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=self.norm_cfg,
+ inplace=False))
+ self.conv_bn3 = ConvModule(
+ self.num_steps * self.branch_channels,
+ self.out_channels * self.expansion,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ act_cfg=None,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ self.relu = nn.ReLU(inplace=False)
+
+ def forward(self, x):
+ """Forward function."""
+
+ identity = x
+ x = self.conv_bn_relu1(x)
+ spx = torch.split(x, self.branch_channels, 1)
+ outputs = list()
+ outs = list()
+ for i in range(self.num_steps):
+ outputs_i = list()
+ outputs.append(outputs_i)
+ for j in range(i + 1):
+ if j == 0:
+ inputs = spx[i]
+ else:
+ inputs = outputs[i][j - 1]
+ if i > j:
+ inputs = inputs + outputs[i - 1][j]
+ module_name = f'conv_bn_relu2_{i + 1}_{j + 1}'
+ module_i_j = getattr(self, module_name)
+ outputs[i].append(module_i_j(inputs))
+
+ outs.append(outputs[i][i])
+ out = torch.cat(tuple(outs), 1)
+ out = self.conv_bn3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(identity)
+ out = out + identity
+
+ out = self.relu(out)
+
+ return out
+
+
+class Downsample_module(nn.Module):
+ """Downsample module for RSN.
+
+ Args:
+ block (nn.Module): Downsample block.
+ num_blocks (list): Number of blocks in each downsample unit.
+ num_units (int): Numbers of downsample units. Default: 4
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_steps (int): Number of steps in a block. Default:4
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the input feature to
+ downsample module. Default: 64
+ expand_times (int): Times by which the in_channels are expanded.
+ Default:26.
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ num_steps=4,
+ num_units=4,
+ has_skip=False,
+ norm_cfg=dict(type='BN'),
+ in_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.has_skip = has_skip
+ self.in_channels = in_channels
+ assert len(num_blocks) == num_units
+ self.num_blocks = num_blocks
+ self.num_units = num_units
+ self.num_steps = num_steps
+ self.norm_cfg = norm_cfg
+ self.layer1 = self._make_layer(
+ block,
+ in_channels,
+ num_blocks[0],
+ expand_times=expand_times,
+ res_top_channels=in_channels)
+ for i in range(1, num_units):
+ module_name = f'layer{i + 1}'
+ self.add_module(
+ module_name,
+ self._make_layer(
+ block,
+ in_channels * pow(2, i),
+ num_blocks[i],
+ stride=2,
+ expand_times=expand_times,
+ res_top_channels=in_channels))
+
+ def _make_layer(self,
+ block,
+ out_channels,
+ blocks,
+ stride=1,
+ expand_times=26,
+ res_top_channels=64):
+ downsample = None
+ if stride != 1 or self.in_channels != out_channels * block.expansion:
+ downsample = ConvModule(
+ self.in_channels,
+ out_channels * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ units = list()
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ num_steps=self.num_steps,
+ stride=stride,
+ downsample=downsample,
+ norm_cfg=self.norm_cfg,
+ expand_times=expand_times,
+ res_top_channels=res_top_channels))
+ self.in_channels = out_channels * block.expansion
+ for _ in range(1, blocks):
+ units.append(
+ block(
+ self.in_channels,
+ out_channels,
+ num_steps=self.num_steps,
+ expand_times=expand_times,
+ res_top_channels=res_top_channels))
+
+ return nn.Sequential(*units)
+
+ def forward(self, x, skip1, skip2):
+ out = list()
+ for i in range(self.num_units):
+ module_name = f'layer{i + 1}'
+ module_i = getattr(self, module_name)
+ x = module_i(x)
+ if self.has_skip:
+ x = x + skip1[i] + skip2[i]
+ out.append(x)
+ out.reverse()
+
+ return tuple(out)
+
+
+class Upsample_unit(nn.Module):
+ """Upsample unit for upsample module.
+
+ Args:
+ ind (int): Indicates whether to interpolate (>0) and whether to
+ generate feature map for the next hourglass-like module.
+ num_units (int): Number of units that form a upsample module. Along
+ with ind and gen_cross_conv, nm_units is used to decide whether
+ to generate feature map for the next hourglass-like module.
+ in_channels (int): Channel number of the skip-in feature maps from
+ the corresponding downsample unit.
+ unit_channels (int): Channel number in this unit. Default:256.
+ gen_skip: (bool): Whether or not to generate skips for the posterior
+ downsample module. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (in): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ ind,
+ num_units,
+ in_channels,
+ unit_channels=256,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.num_units = num_units
+ self.norm_cfg = norm_cfg
+ self.in_skip = ConvModule(
+ in_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+ self.relu = nn.ReLU(inplace=True)
+
+ self.ind = ind
+ if self.ind > 0:
+ self.up_conv = ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ act_cfg=None,
+ inplace=True)
+
+ self.gen_skip = gen_skip
+ if self.gen_skip:
+ self.out_skip1 = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.out_skip2 = ConvModule(
+ unit_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ self.gen_cross_conv = gen_cross_conv
+ if self.ind == num_units - 1 and self.gen_cross_conv:
+ self.cross_conv = ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=self.norm_cfg,
+ inplace=True)
+
+ def forward(self, x, up_x):
+ out = self.in_skip(x)
+
+ if self.ind > 0:
+ up_x = F.interpolate(
+ up_x,
+ size=(x.size(2), x.size(3)),
+ mode='bilinear',
+ align_corners=True)
+ up_x = self.up_conv(up_x)
+ out = out + up_x
+ out = self.relu(out)
+
+ skip1 = None
+ skip2 = None
+ if self.gen_skip:
+ skip1 = self.out_skip1(x)
+ skip2 = self.out_skip2(out)
+
+ cross_conv = None
+ if self.ind == self.num_units - 1 and self.gen_cross_conv:
+ cross_conv = self.cross_conv(out)
+
+ return out, skip1, skip2, cross_conv
+
+
+class Upsample_module(nn.Module):
+ """Upsample module for RSN.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units.
+ Default:256.
+ num_units (int): Numbers of upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ out_channels (int): Number of channels of feature output by upsample
+ module. Must equal to in_channels of downsample module. Default:64
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_units=4,
+ gen_skip=False,
+ gen_cross_conv=False,
+ norm_cfg=dict(type='BN'),
+ out_channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = list()
+ for i in range(num_units):
+ self.in_channels.append(RSB.expansion * out_channels * pow(2, i))
+ self.in_channels.reverse()
+ self.num_units = num_units
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.norm_cfg = norm_cfg
+ for i in range(num_units):
+ module_name = f'up{i + 1}'
+ self.add_module(
+ module_name,
+ Upsample_unit(
+ i,
+ self.num_units,
+ self.in_channels[i],
+ unit_channels,
+ self.gen_skip,
+ self.gen_cross_conv,
+ norm_cfg=self.norm_cfg,
+ out_channels=64))
+
+ def forward(self, x):
+ out = list()
+ skip1 = list()
+ skip2 = list()
+ cross_conv = None
+ for i in range(self.num_units):
+ module_i = getattr(self, f'up{i + 1}')
+ if i == 0:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], None)
+ elif i == self.num_units - 1:
+ outi, skip1_i, skip2_i, cross_conv = module_i(x[i], out[i - 1])
+ else:
+ outi, skip1_i, skip2_i, _ = module_i(x[i], out[i - 1])
+ out.append(outi)
+ skip1.append(skip1_i)
+ skip2.append(skip2_i)
+ skip1.reverse()
+ skip2.reverse()
+
+ return out, skip1, skip2, cross_conv
+
+
+class Single_stage_RSN(nn.Module):
+ """Single_stage Residual Steps Network.
+
+ Args:
+ unit_channels (int): Channel number in the upsample units. Default:256.
+ num_units (int): Numbers of downsample/upsample units. Default: 4
+ gen_skip (bool): Whether to generate skip for posterior downsample
+ module or not. Default:False
+ gen_cross_conv (bool): Whether to generate feature map for the next
+ hourglass-like module. Default:False
+ has_skip (bool): Have skip connections from prior upsample
+ module or not. Default:False
+ num_steps (int): Number of steps in RSB. Default: 4
+ num_blocks (list): Number of blocks in each downsample unit.
+ Default: [2, 2, 2, 2] Note: Make sure num_units==len(num_blocks)
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ in_channels (int): Number of channels of the feature from ResNet_Top.
+ Default: 64.
+ expand_times (int): Times by which the in_channels are expanded in RSB.
+ Default:26.
+ """
+
+ def __init__(self,
+ has_skip=False,
+ gen_skip=False,
+ gen_cross_conv=False,
+ unit_channels=256,
+ num_units=4,
+ num_steps=4,
+ num_blocks=[2, 2, 2, 2],
+ norm_cfg=dict(type='BN'),
+ in_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ assert len(num_blocks) == num_units
+ self.has_skip = has_skip
+ self.gen_skip = gen_skip
+ self.gen_cross_conv = gen_cross_conv
+ self.num_units = num_units
+ self.num_steps = num_steps
+ self.unit_channels = unit_channels
+ self.num_blocks = num_blocks
+ self.norm_cfg = norm_cfg
+
+ self.downsample = Downsample_module(RSB, num_blocks, num_steps,
+ num_units, has_skip, norm_cfg,
+ in_channels, expand_times)
+ self.upsample = Upsample_module(unit_channels, num_units, gen_skip,
+ gen_cross_conv, norm_cfg, in_channels)
+
+ def forward(self, x, skip1, skip2):
+ mid = self.downsample(x, skip1, skip2)
+ out, skip1, skip2, cross_conv = self.upsample(mid)
+
+ return out, skip1, skip2, cross_conv
+
+
+class ResNet_top(nn.Module):
+ """ResNet top for RSN.
+
+ Args:
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ channels (int): Number of channels of the feature output by ResNet_top.
+ """
+
+ def __init__(self, norm_cfg=dict(type='BN'), channels=64):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.top = nn.Sequential(
+ ConvModule(
+ 3,
+ channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ norm_cfg=norm_cfg,
+ inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+ def forward(self, img):
+ return self.top(img)
+
+
+@BACKBONES.register_module()
+class RSN(BaseBackbone):
+ """Residual Steps Network backbone. Paper ref: Cai et al. "Learning
+ Delicate Local Representations for Multi-Person Pose Estimation" (ECCV
+ 2020).
+
+ Args:
+ unit_channels (int): Number of Channels in an upsample unit.
+ Default: 256
+ num_stages (int): Number of stages in a multi-stage RSN. Default: 4
+ num_units (int): NUmber of downsample/upsample units in a single-stage
+ RSN. Default: 4 Note: Make sure num_units == len(self.num_blocks)
+ num_blocks (list): Number of RSBs (Residual Steps Block) in each
+ downsample unit. Default: [2, 2, 2, 2]
+ num_steps (int): Number of steps in a RSB. Default:4
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ res_top_channels (int): Number of channels of feature from ResNet_top.
+ Default: 64.
+ expand_times (int): Times by which the in_channels are expanded in RSB.
+ Default:26.
+ Example:
+ >>> from mmpose.models import RSN
+ >>> import torch
+ >>> self = RSN(num_stages=2,num_units=2,num_blocks=[2,2])
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 511, 511)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_output in level_outputs:
+ ... for feature in level_output:
+ ... print(tuple(feature.shape))
+ ...
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ (1, 256, 64, 64)
+ (1, 256, 128, 128)
+ """
+
+ def __init__(self,
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ res_top_channels=64,
+ expand_times=26):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ num_blocks = cp.deepcopy(num_blocks)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+ self.num_blocks = num_blocks
+ self.num_steps = num_steps
+ self.norm_cfg = norm_cfg
+
+ assert self.num_stages > 0
+ assert self.num_steps > 1
+ assert self.num_units > 1
+ assert self.num_units == len(self.num_blocks)
+ self.top = ResNet_top(norm_cfg=norm_cfg)
+ self.multi_stage_rsn = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if i == 0:
+ has_skip = False
+ else:
+ has_skip = True
+ if i != self.num_stages - 1:
+ gen_skip = True
+ gen_cross_conv = True
+ else:
+ gen_skip = False
+ gen_cross_conv = False
+ self.multi_stage_rsn.append(
+ Single_stage_RSN(has_skip, gen_skip, gen_cross_conv,
+ unit_channels, num_units, num_steps,
+ num_blocks, norm_cfg, res_top_channels,
+ expand_times))
+
+ def forward(self, x):
+ """Model forward function."""
+ out_feats = []
+ skip1 = None
+ skip2 = None
+ x = self.top(x)
+ for i in range(self.num_stages):
+ out, skip1, skip2, x = self.multi_stage_rsn[i](x, skip1, skip2)
+ out_feats.append(out)
+
+ return out_feats
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ for m in self.multi_stage_rsn.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ for m in self.top.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
diff --git a/vendor/ViTPose/mmpose/models/backbones/scnet.py b/vendor/ViTPose/mmpose/models/backbones/scnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3786c5731d685638cfa64a83e5d4a5e2eee545de
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/scnet.py
@@ -0,0 +1,248 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck, ResNet
+
+
+class SCConv(nn.Module):
+ """SCConv (Self-calibrated Convolution)
+
+ Args:
+ in_channels (int): The input channels of the SCConv.
+ out_channels (int): The output channel of the SCConv.
+ stride (int): stride of SCConv.
+ pooling_r (int): size of pooling for scconv.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride,
+ pooling_r,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', momentum=0.1)):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+
+ assert in_channels == out_channels
+
+ self.k2 = nn.Sequential(
+ nn.AvgPool2d(kernel_size=pooling_r, stride=pooling_r),
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, in_channels)[1],
+ )
+ self.k3 = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, in_channels)[1],
+ )
+ self.k4 = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True),
+ )
+
+ def forward(self, x):
+ """Forward function."""
+ identity = x
+
+ out = torch.sigmoid(
+ torch.add(identity, F.interpolate(self.k2(x),
+ identity.size()[2:])))
+ out = torch.mul(self.k3(x), out)
+ out = self.k4(out)
+
+ return out
+
+
+class SCBottleneck(Bottleneck):
+ """SC(Self-calibrated) Bottleneck.
+
+ Args:
+ in_channels (int): The input channels of the SCBottleneck block.
+ out_channels (int): The output channel of the SCBottleneck block.
+ """
+
+ pooling_r = 4
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.mid_channels = out_channels // self.expansion // 2
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+
+ self.k1 = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, self.mid_channels)[1],
+ nn.ReLU(inplace=True))
+
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.scconv = SCConv(self.mid_channels, self.mid_channels, self.stride,
+ self.pooling_r, self.conv_cfg, self.norm_cfg)
+
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels * 2,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out_a = self.conv1(x)
+ out_a = self.norm1(out_a)
+ out_a = self.relu(out_a)
+
+ out_a = self.k1(out_a)
+
+ out_b = self.conv2(x)
+ out_b = self.norm2(out_b)
+ out_b = self.relu(out_b)
+
+ out_b = self.scconv(out_b)
+
+ out = self.conv3(torch.cat([out_a, out_b], dim=1))
+ out = self.norm3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class SCNet(ResNet):
+ """SCNet backbone.
+
+ Improving Convolutional Networks with Self-Calibrated Convolutions,
+ Jiang-Jiang Liu, Qibin Hou, Ming-Ming Cheng, Changhu Wang, Jiashi Feng,
+ IEEE CVPR, 2020.
+ http://mftp.mmcheng.net/Papers/20cvprSCNet.pdf
+
+ Args:
+ depth (int): Depth of scnet, from {50, 101}.
+ in_channels (int): Number of input image channels. Normally 3.
+ base_channels (int): Number of base channels of hidden layer.
+ num_stages (int): SCNet stages, normally 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmpose.models import SCNet
+ >>> import torch
+ >>> self = SCNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SCBottleneck, [3, 4, 6, 3]),
+ 101: (SCBottleneck, [3, 4, 23, 3])
+ }
+
+ def __init__(self, depth, **kwargs):
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for SCNet')
+ super().__init__(depth, **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/seresnet.py b/vendor/ViTPose/mmpose/models/backbones/seresnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac2d53b40a4593bce96d5c7c3bb4e06d38353d0b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/seresnet.py
@@ -0,0 +1,125 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.utils.checkpoint as cp
+
+from ..builder import BACKBONES
+from .resnet import Bottleneck, ResLayer, ResNet
+from .utils.se_layer import SELayer
+
+
+class SEBottleneck(Bottleneck):
+ """SEBottleneck block for SEResNet.
+
+ Args:
+ in_channels (int): The input channels of the SEBottleneck block.
+ out_channels (int): The output channel of the SEBottleneck block.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16
+ """
+
+ def __init__(self, in_channels, out_channels, se_ratio=16, **kwargs):
+ super().__init__(in_channels, out_channels, **kwargs)
+ self.se_layer = SELayer(out_channels, ratio=se_ratio)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ out = self.se_layer(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class SEResNet(ResNet):
+ """SEResNet backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import SEResNet
+ >>> import torch
+ >>> self = SEResNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SEBottleneck, (3, 4, 6, 3)),
+ 101: (SEBottleneck, (3, 4, 23, 3)),
+ 152: (SEBottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, se_ratio=16, **kwargs):
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for SEResNet')
+ self.se_ratio = se_ratio
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(se_ratio=self.se_ratio, **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/seresnext.py b/vendor/ViTPose/mmpose/models/backbones/seresnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c4e4ce03684f8a9bd0c6166969c01bace54bd2
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/seresnext.py
@@ -0,0 +1,168 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from .resnet import ResLayer
+from .seresnet import SEBottleneck as _SEBottleneck
+from .seresnet import SEResNet
+
+
+class SEBottleneck(_SEBottleneck):
+ """SEBottleneck block for SEResNeXt.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ base_channels (int): Middle channels of the first stage. Default: 64.
+ groups (int): Groups of conv2.
+ width_per_group (int): Width per group of conv2. 64x4d indicates
+ ``groups=64, width_per_group=4`` and 32x8d indicates
+ ``groups=32, width_per_group=8``.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ base_channels=64,
+ groups=32,
+ width_per_group=4,
+ se_ratio=16,
+ **kwargs):
+ super().__init__(in_channels, out_channels, se_ratio, **kwargs)
+ self.groups = groups
+ self.width_per_group = width_per_group
+
+ # We follow the same rational of ResNext to compute mid_channels.
+ # For SEResNet bottleneck, middle channels are determined by expansion
+ # and out_channels, but for SEResNeXt bottleneck, it is determined by
+ # groups and width_per_group and the stage it is located in.
+ if groups != 1:
+ assert self.mid_channels % base_channels == 0
+ self.mid_channels = (
+ groups * width_per_group * self.mid_channels // base_channels)
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ self.mid_channels,
+ self.out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class SEResNeXt(SEResNet):
+ """SEResNeXt backbone.
+
+ Please refer to the `paper `__ for
+ details.
+
+ Args:
+ depth (int): Network depth, from {50, 101, 152}.
+ groups (int): Groups of conv2 in Bottleneck. Default: 32.
+ width_per_group (int): Width per group of conv2 in Bottleneck.
+ Default: 4.
+ se_ratio (int): Squeeze ratio in SELayer. Default: 16.
+ in_channels (int): Number of input image channels. Default: 3.
+ stem_channels (int): Output channels of the stem layer. Default: 64.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+
+ Example:
+ >>> from mmpose.models import SEResNeXt
+ >>> import torch
+ >>> self = SEResNet(depth=50, out_indices=(0, 1, 2, 3))
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 224, 224)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 56, 56)
+ (1, 512, 28, 28)
+ (1, 1024, 14, 14)
+ (1, 2048, 7, 7)
+ """
+
+ arch_settings = {
+ 50: (SEBottleneck, (3, 4, 6, 3)),
+ 101: (SEBottleneck, (3, 4, 23, 3)),
+ 152: (SEBottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, depth, groups=32, width_per_group=4, **kwargs):
+ self.groups = groups
+ self.width_per_group = width_per_group
+ super().__init__(depth, **kwargs)
+
+ def make_res_layer(self, **kwargs):
+ return ResLayer(
+ groups=self.groups,
+ width_per_group=self.width_per_group,
+ base_channels=self.base_channels,
+ **kwargs)
diff --git a/vendor/ViTPose/mmpose/models/backbones/shufflenet_v1.py b/vendor/ViTPose/mmpose/models/backbones/shufflenet_v1.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f98cbd2132250ec13adcce6e642c966b0dbd7cc
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/shufflenet_v1.py
@@ -0,0 +1,329 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, build_activation_layer, constant_init,
+ normal_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import channel_shuffle, load_checkpoint, make_divisible
+
+
+class ShuffleUnit(nn.Module):
+ """ShuffleUnit block.
+
+ ShuffleNet unit with pointwise group convolution (GConv) and channel
+ shuffle.
+
+ Args:
+ in_channels (int): The input channels of the ShuffleUnit.
+ out_channels (int): The output channels of the ShuffleUnit.
+ groups (int, optional): The number of groups to be used in grouped 1x1
+ convolutions in each ShuffleUnit. Default: 3
+ first_block (bool, optional): Whether it is the first ShuffleUnit of a
+ sequential ShuffleUnits. Default: True, which means not using the
+ grouped 1x1 convolution.
+ combine (str, optional): The ways to combine the input and output
+ branches. Default: 'add'.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool, optional): Use checkpoint or not. Using checkpoint
+ will save some memory while slowing down the training speed.
+ Default: False.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ groups=3,
+ first_block=True,
+ combine='add',
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.first_block = first_block
+ self.combine = combine
+ self.groups = groups
+ self.bottleneck_channels = self.out_channels // 4
+ self.with_cp = with_cp
+
+ if self.combine == 'add':
+ self.depthwise_stride = 1
+ self._combine_func = self._add
+ assert in_channels == out_channels, (
+ 'in_channels must be equal to out_channels when combine '
+ 'is add')
+ elif self.combine == 'concat':
+ self.depthwise_stride = 2
+ self._combine_func = self._concat
+ self.out_channels -= self.in_channels
+ self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
+ else:
+ raise ValueError(f'Cannot combine tensors with {self.combine}. '
+ 'Only "add" and "concat" are supported')
+
+ self.first_1x1_groups = 1 if first_block else self.groups
+ self.g_conv_1x1_compress = ConvModule(
+ in_channels=self.in_channels,
+ out_channels=self.bottleneck_channels,
+ kernel_size=1,
+ groups=self.first_1x1_groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ self.depthwise_conv3x3_bn = ConvModule(
+ in_channels=self.bottleneck_channels,
+ out_channels=self.bottleneck_channels,
+ kernel_size=3,
+ stride=self.depthwise_stride,
+ padding=1,
+ groups=self.bottleneck_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ self.g_conv_1x1_expand = ConvModule(
+ in_channels=self.bottleneck_channels,
+ out_channels=self.out_channels,
+ kernel_size=1,
+ groups=self.groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ self.act = build_activation_layer(act_cfg)
+
+ @staticmethod
+ def _add(x, out):
+ # residual connection
+ return x + out
+
+ @staticmethod
+ def _concat(x, out):
+ # concatenate along channel axis
+ return torch.cat((x, out), 1)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ residual = x
+
+ out = self.g_conv_1x1_compress(x)
+ out = self.depthwise_conv3x3_bn(out)
+
+ if self.groups > 1:
+ out = channel_shuffle(out, self.groups)
+
+ out = self.g_conv_1x1_expand(out)
+
+ if self.combine == 'concat':
+ residual = self.avgpool(residual)
+ out = self.act(out)
+ out = self._combine_func(residual, out)
+ else:
+ out = self._combine_func(residual, out)
+ out = self.act(out)
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ShuffleNetV1(BaseBackbone):
+ """ShuffleNetV1 backbone.
+
+ Args:
+ groups (int, optional): The number of groups to be used in grouped 1x1
+ convolutions in each ShuffleUnit. Default: 3.
+ widen_factor (float, optional): Width multiplier - adjusts the number
+ of channels in each layer by this amount. Default: 1.0.
+ out_indices (Sequence[int]): Output from which stages.
+ Default: (2, )
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ groups=3,
+ widen_factor=1.0,
+ out_indices=(2, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stage_blocks = [4, 8, 4]
+ self.groups = groups
+
+ for index in out_indices:
+ if index not in range(0, 3):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 3). But received {index}')
+
+ if frozen_stages not in range(-1, 3):
+ raise ValueError('frozen_stages must be in range(-1, 3). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ if groups == 1:
+ channels = (144, 288, 576)
+ elif groups == 2:
+ channels = (200, 400, 800)
+ elif groups == 3:
+ channels = (240, 480, 960)
+ elif groups == 4:
+ channels = (272, 544, 1088)
+ elif groups == 8:
+ channels = (384, 768, 1536)
+ else:
+ raise ValueError(f'{groups} groups is not supported for 1x1 '
+ 'Grouped Convolutions')
+
+ channels = [make_divisible(ch * widen_factor, 8) for ch in channels]
+
+ self.in_channels = int(24 * widen_factor)
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layers = nn.ModuleList()
+ for i, num_blocks in enumerate(self.stage_blocks):
+ first_block = (i == 0)
+ layer = self.make_layer(channels[i], num_blocks, first_block)
+ self.layers.append(layer)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(self.frozen_stages):
+ layer = self.layers[i]
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for name, m in self.named_modules():
+ if isinstance(m, nn.Conv2d):
+ if 'conv1' in name:
+ normal_init(m, mean=0, std=0.01)
+ else:
+ normal_init(m, mean=0, std=1.0 / m.weight.shape[1])
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, val=1, bias=0.0001)
+ if isinstance(m, _BatchNorm):
+ if m.running_mean is not None:
+ nn.init.constant_(m.running_mean, 0)
+ else:
+ raise TypeError('pretrained must be a str or None. But received '
+ f'{type(pretrained)}')
+
+ def make_layer(self, out_channels, num_blocks, first_block=False):
+ """Stack ShuffleUnit blocks to make a layer.
+
+ Args:
+ out_channels (int): out_channels of the block.
+ num_blocks (int): Number of blocks.
+ first_block (bool, optional): Whether is the first ShuffleUnit of a
+ sequential ShuffleUnits. Default: False, which means using
+ the grouped 1x1 convolution.
+ """
+ layers = []
+ for i in range(num_blocks):
+ first_block = first_block if i == 0 else False
+ combine_mode = 'concat' if i == 0 else 'add'
+ layers.append(
+ ShuffleUnit(
+ self.in_channels,
+ out_channels,
+ groups=self.groups,
+ first_block=first_block,
+ combine=combine_mode,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.maxpool(x)
+
+ outs = []
+ for i, layer in enumerate(self.layers):
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/shufflenet_v2.py b/vendor/ViTPose/mmpose/models/backbones/shufflenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..e93533367afe4efa01fa67d14cafcca006c990e8
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/shufflenet_v2.py
@@ -0,0 +1,302 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import channel_shuffle, load_checkpoint
+
+
+class InvertedResidual(nn.Module):
+ """InvertedResidual block for ShuffleNetV2 backbone.
+
+ Args:
+ in_channels (int): The input channels of the block.
+ out_channels (int): The output channels of the block.
+ stride (int): Stride of the 3x3 convolution layer. Default: 1
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ stride=1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stride = stride
+ self.with_cp = with_cp
+
+ branch_features = out_channels // 2
+ if self.stride == 1:
+ assert in_channels == branch_features * 2, (
+ f'in_channels ({in_channels}) should equal to '
+ f'branch_features * 2 ({branch_features * 2}) '
+ 'when stride is 1')
+
+ if in_channels != branch_features * 2:
+ assert self.stride != 1, (
+ f'stride ({self.stride}) should not equal 1 when '
+ f'in_channels != branch_features * 2')
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=in_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ in_channels,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ )
+
+ self.branch2 = nn.Sequential(
+ ConvModule(
+ in_channels if (self.stride > 1) else branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=3,
+ stride=self.stride,
+ padding=1,
+ groups=branch_features,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None),
+ ConvModule(
+ branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ if self.stride > 1:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+ else:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ShuffleNetV2(BaseBackbone):
+ """ShuffleNetV2 backbone.
+
+ Args:
+ widen_factor (float): Width multiplier - adjusts the number of
+ channels in each layer by this amount. Default: 1.0.
+ out_indices (Sequence[int]): Output from which stages.
+ Default: (0, 1, 2, 3).
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ """
+
+ def __init__(self,
+ widen_factor=1.0,
+ out_indices=(3, ),
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.stage_blocks = [4, 8, 4]
+ for index in out_indices:
+ if index not in range(0, 4):
+ raise ValueError('the item in out_indices must in '
+ f'range(0, 4). But received {index}')
+
+ if frozen_stages not in range(-1, 4):
+ raise ValueError('frozen_stages must be in range(-1, 4). '
+ f'But received {frozen_stages}')
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ if widen_factor == 0.5:
+ channels = [48, 96, 192, 1024]
+ elif widen_factor == 1.0:
+ channels = [116, 232, 464, 1024]
+ elif widen_factor == 1.5:
+ channels = [176, 352, 704, 1024]
+ elif widen_factor == 2.0:
+ channels = [244, 488, 976, 2048]
+ else:
+ raise ValueError('widen_factor must be in [0.5, 1.0, 1.5, 2.0]. '
+ f'But received {widen_factor}')
+
+ self.in_channels = 24
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.in_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layers = nn.ModuleList()
+ for i, num_blocks in enumerate(self.stage_blocks):
+ layer = self._make_layer(channels[i], num_blocks)
+ self.layers.append(layer)
+
+ output_channels = channels[-1]
+ self.layers.append(
+ ConvModule(
+ in_channels=self.in_channels,
+ out_channels=output_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+
+ def _make_layer(self, out_channels, num_blocks):
+ """Stack blocks to make a layer.
+
+ Args:
+ out_channels (int): out_channels of the block.
+ num_blocks (int): number of blocks.
+ """
+ layers = []
+ for i in range(num_blocks):
+ stride = 2 if i == 0 else 1
+ layers.append(
+ InvertedResidual(
+ in_channels=self.in_channels,
+ out_channels=out_channels,
+ stride=stride,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ with_cp=self.with_cp))
+ self.in_channels = out_channels
+
+ return nn.Sequential(*layers)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+
+ for i in range(self.frozen_stages):
+ m = self.layers[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for name, m in self.named_modules():
+ if isinstance(m, nn.Conv2d):
+ if 'conv1' in name:
+ normal_init(m, mean=0, std=0.01)
+ else:
+ normal_init(m, mean=0, std=1.0 / m.weight.shape[1])
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m.weight, val=1, bias=0.0001)
+ if isinstance(m, _BatchNorm):
+ if m.running_mean is not None:
+ nn.init.constant_(m.running_mean, 0)
+ else:
+ raise TypeError('pretrained must be a str or None. But received '
+ f'{type(pretrained)}')
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.maxpool(x)
+
+ outs = []
+ for i, layer in enumerate(self.layers):
+ x = layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/tcn.py b/vendor/ViTPose/mmpose/models/backbones/tcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..deca2290aeb1830bc3e241b819157369371aaf27
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/tcn.py
@@ -0,0 +1,267 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, build_conv_layer, constant_init, kaiming_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.core import WeightNormClipHook
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class BasicTemporalBlock(nn.Module):
+ """Basic block for VideoPose3D.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ mid_channels (int): The output channels of conv1. Default: 1024.
+ kernel_size (int): Size of the convolving kernel. Default: 3.
+ dilation (int): Spacing between kernel elements. Default: 3.
+ dropout (float): Dropout rate. Default: 0.25.
+ causal (bool): Use causal convolutions instead of symmetric
+ convolutions (for real-time applications). Default: False.
+ residual (bool): Use residual connection. Default: True.
+ use_stride_conv (bool): Use optimized TCN that designed
+ specifically for single-frame batching, i.e. where batches have
+ input length = receptive field, and output length = 1. This
+ implementation replaces dilated convolutions with strided
+ convolutions to avoid generating unused intermediate results.
+ Default: False.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: dict(type='Conv1d').
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN1d').
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ mid_channels=1024,
+ kernel_size=3,
+ dilation=3,
+ dropout=0.25,
+ causal=False,
+ residual=True,
+ use_stride_conv=False,
+ conv_cfg=dict(type='Conv1d'),
+ norm_cfg=dict(type='BN1d')):
+ # Protect mutable default arguments
+ conv_cfg = copy.deepcopy(conv_cfg)
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.mid_channels = mid_channels
+ self.kernel_size = kernel_size
+ self.dilation = dilation
+ self.dropout = dropout
+ self.causal = causal
+ self.residual = residual
+ self.use_stride_conv = use_stride_conv
+
+ self.pad = (kernel_size - 1) * dilation // 2
+ if use_stride_conv:
+ self.stride = kernel_size
+ self.causal_shift = kernel_size // 2 if causal else 0
+ self.dilation = 1
+ else:
+ self.stride = 1
+ self.causal_shift = kernel_size // 2 * dilation if causal else 0
+
+ self.conv1 = nn.Sequential(
+ ConvModule(
+ in_channels,
+ mid_channels,
+ kernel_size=kernel_size,
+ stride=self.stride,
+ dilation=self.dilation,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+ self.conv2 = nn.Sequential(
+ ConvModule(
+ mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+
+ if residual and in_channels != out_channels:
+ self.short_cut = build_conv_layer(conv_cfg, in_channels,
+ out_channels, 1)
+ else:
+ self.short_cut = None
+
+ self.dropout = nn.Dropout(dropout) if dropout > 0 else None
+
+ def forward(self, x):
+ """Forward function."""
+ if self.use_stride_conv:
+ assert self.causal_shift + self.kernel_size // 2 < x.shape[2]
+ else:
+ assert 0 <= self.pad + self.causal_shift < x.shape[2] - \
+ self.pad + self.causal_shift <= x.shape[2]
+
+ out = self.conv1(x)
+ if self.dropout is not None:
+ out = self.dropout(out)
+
+ out = self.conv2(out)
+ if self.dropout is not None:
+ out = self.dropout(out)
+
+ if self.residual:
+ if self.use_stride_conv:
+ res = x[:, :, self.causal_shift +
+ self.kernel_size // 2::self.kernel_size]
+ else:
+ res = x[:, :,
+ (self.pad + self.causal_shift):(x.shape[2] - self.pad +
+ self.causal_shift)]
+
+ if self.short_cut is not None:
+ res = self.short_cut(res)
+ out = out + res
+
+ return out
+
+
+@BACKBONES.register_module()
+class TCN(BaseBackbone):
+ """TCN backbone.
+
+ Temporal Convolutional Networks.
+ More details can be found in the
+ `paper `__ .
+
+ Args:
+ in_channels (int): Number of input channels, which equals to
+ num_keypoints * num_features.
+ stem_channels (int): Number of feature channels. Default: 1024.
+ num_blocks (int): NUmber of basic temporal convolutional blocks.
+ Default: 2.
+ kernel_sizes (Sequence[int]): Sizes of the convolving kernel of
+ each basic block. Default: ``(3, 3, 3)``.
+ dropout (float): Dropout rate. Default: 0.25.
+ causal (bool): Use causal convolutions instead of symmetric
+ convolutions (for real-time applications).
+ Default: False.
+ residual (bool): Use residual connection. Default: True.
+ use_stride_conv (bool): Use TCN backbone optimized for
+ single-frame batching, i.e. where batches have input length =
+ receptive field, and output length = 1. This implementation
+ replaces dilated convolutions with strided convolutions to avoid
+ generating unused intermediate results. The weights are
+ interchangeable with the reference implementation. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: dict(type='Conv1d').
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN1d').
+ max_norm (float|None): if not None, the weight of convolution layers
+ will be clipped to have a maximum norm of max_norm.
+
+ Example:
+ >>> from mmpose.models import TCN
+ >>> import torch
+ >>> self = TCN(in_channels=34)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 34, 243)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 1024, 235)
+ (1, 1024, 217)
+ """
+
+ def __init__(self,
+ in_channels,
+ stem_channels=1024,
+ num_blocks=2,
+ kernel_sizes=(3, 3, 3),
+ dropout=0.25,
+ causal=False,
+ residual=True,
+ use_stride_conv=False,
+ conv_cfg=dict(type='Conv1d'),
+ norm_cfg=dict(type='BN1d'),
+ max_norm=None):
+ # Protect mutable default arguments
+ conv_cfg = copy.deepcopy(conv_cfg)
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.in_channels = in_channels
+ self.stem_channels = stem_channels
+ self.num_blocks = num_blocks
+ self.kernel_sizes = kernel_sizes
+ self.dropout = dropout
+ self.causal = causal
+ self.residual = residual
+ self.use_stride_conv = use_stride_conv
+ self.max_norm = max_norm
+
+ assert num_blocks == len(kernel_sizes) - 1
+ for ks in kernel_sizes:
+ assert ks % 2 == 1, 'Only odd filter widths are supported.'
+
+ self.expand_conv = ConvModule(
+ in_channels,
+ stem_channels,
+ kernel_size=kernel_sizes[0],
+ stride=kernel_sizes[0] if use_stride_conv else 1,
+ bias='auto',
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ dilation = kernel_sizes[0]
+ self.tcn_blocks = nn.ModuleList()
+ for i in range(1, num_blocks + 1):
+ self.tcn_blocks.append(
+ BasicTemporalBlock(
+ in_channels=stem_channels,
+ out_channels=stem_channels,
+ mid_channels=stem_channels,
+ kernel_size=kernel_sizes[i],
+ dilation=dilation,
+ dropout=dropout,
+ causal=causal,
+ residual=residual,
+ use_stride_conv=use_stride_conv,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+ dilation *= kernel_sizes[i]
+
+ if self.max_norm is not None:
+ # Apply weight norm clip to conv layers
+ weight_clip = WeightNormClipHook(self.max_norm)
+ for module in self.modules():
+ if isinstance(module, nn.modules.conv._ConvNd):
+ weight_clip.register(module)
+
+ self.dropout = nn.Dropout(dropout) if dropout > 0 else None
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.expand_conv(x)
+
+ if self.dropout is not None:
+ x = self.dropout(x)
+
+ outs = []
+ for i in range(self.num_blocks):
+ x = self.tcn_blocks[i](x)
+ outs.append(x)
+
+ return tuple(outs)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights."""
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.modules.conv._ConvNd):
+ kaiming_init(m, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/__init__.py b/vendor/ViTPose/mmpose/models/backbones/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..52a30ca9f7c8e90b6c6fa2fd8a9705ca0403b259
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .channel_shuffle import channel_shuffle
+from .inverted_residual import InvertedResidual
+from .make_divisible import make_divisible
+from .se_layer import SELayer
+from .utils import load_checkpoint
+
+__all__ = [
+ 'channel_shuffle', 'make_divisible', 'InvertedResidual', 'SELayer',
+ 'load_checkpoint'
+]
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/channel_shuffle.py b/vendor/ViTPose/mmpose/models/backbones/utils/channel_shuffle.py
new file mode 100644
index 0000000000000000000000000000000000000000..27006a8065db35a14c4207ce6613104374b064ad
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/channel_shuffle.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def channel_shuffle(x, groups):
+ """Channel Shuffle operation.
+
+ This function enables cross-group information flow for multiple groups
+ convolution layers.
+
+ Args:
+ x (Tensor): The input tensor.
+ groups (int): The number of groups to divide the input tensor
+ in the channel dimension.
+
+ Returns:
+ Tensor: The output tensor after channel shuffle operation.
+ """
+
+ batch_size, num_channels, height, width = x.size()
+ assert (num_channels % groups == 0), ('num_channels should be '
+ 'divisible by groups')
+ channels_per_group = num_channels // groups
+
+ x = x.view(batch_size, groups, channels_per_group, height, width)
+ x = torch.transpose(x, 1, 2).contiguous()
+ x = x.view(batch_size, -1, height, width)
+
+ return x
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/inverted_residual.py b/vendor/ViTPose/mmpose/models/backbones/utils/inverted_residual.py
new file mode 100644
index 0000000000000000000000000000000000000000..dff762c570550e4a738ae1833a4c82c18777115d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/inverted_residual.py
@@ -0,0 +1,128 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule
+
+from .se_layer import SELayer
+
+
+class InvertedResidual(nn.Module):
+ """Inverted Residual Block.
+
+ Args:
+ in_channels (int): The input channels of this Module.
+ out_channels (int): The output channels of this Module.
+ mid_channels (int): The input channels of the depthwise convolution.
+ kernel_size (int): The kernel size of the depthwise convolution.
+ Default: 3.
+ groups (None or int): The group number of the depthwise convolution.
+ Default: None, which means group number = mid_channels.
+ stride (int): The stride of the depthwise convolution. Default: 1.
+ se_cfg (dict): Config dict for se layer. Default: None, which means no
+ se layer.
+ with_expand_conv (bool): Use expand conv or not. If set False,
+ mid_channels must be the same with in_channels.
+ Default: True.
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ mid_channels,
+ kernel_size=3,
+ groups=None,
+ stride=1,
+ se_cfg=None,
+ with_expand_conv=True,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ act_cfg = copy.deepcopy(act_cfg)
+ super().__init__()
+ self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
+ assert stride in [1, 2]
+ self.with_cp = with_cp
+ self.with_se = se_cfg is not None
+ self.with_expand_conv = with_expand_conv
+
+ if groups is None:
+ groups = mid_channels
+
+ if self.with_se:
+ assert isinstance(se_cfg, dict)
+ if not self.with_expand_conv:
+ assert mid_channels == in_channels
+
+ if self.with_expand_conv:
+ self.expand_conv = ConvModule(
+ in_channels=in_channels,
+ out_channels=mid_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.depthwise_conv = ConvModule(
+ in_channels=mid_channels,
+ out_channels=mid_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=kernel_size // 2,
+ groups=groups,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ if self.with_se:
+ self.se = SELayer(**se_cfg)
+ self.linear_conv = ConvModule(
+ in_channels=mid_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ out = x
+
+ if self.with_expand_conv:
+ out = self.expand_conv(out)
+
+ out = self.depthwise_conv(out)
+
+ if self.with_se:
+ out = self.se(out)
+
+ out = self.linear_conv(out)
+
+ if self.with_res_shortcut:
+ return x + out
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ return out
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/make_divisible.py b/vendor/ViTPose/mmpose/models/backbones/utils/make_divisible.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7666be65939d5c76057e73927c230029cb1871d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/make_divisible.py
@@ -0,0 +1,25 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
+ """Make divisible function.
+
+ This function rounds the channel number down to the nearest value that can
+ be divisible by the divisor.
+
+ Args:
+ value (int): The original channel number.
+ divisor (int): The divisor to fully divide the channel number.
+ min_value (int, optional): The minimum value of the output channel.
+ Default: None, means that the minimum value equal to the divisor.
+ min_ratio (float, optional): The minimum ratio of the rounded channel
+ number to the original channel number. Default: 0.9.
+ Returns:
+ int: The modified output channel number
+ """
+
+ if min_value is None:
+ min_value = divisor
+ new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than (1-min_ratio).
+ if new_value < min_ratio * value:
+ new_value += divisor
+ return new_value
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/se_layer.py b/vendor/ViTPose/mmpose/models/backbones/utils/se_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..07f70802eb1b98b1f22516ba62b1533557f428ed
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/se_layer.py
@@ -0,0 +1,54 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+
+class SELayer(nn.Module):
+ """Squeeze-and-Excitation Module.
+
+ Args:
+ channels (int): The input (and output) channels of the SE layer.
+ ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
+ ``int(channels/ratio)``. Default: 16.
+ conv_cfg (None or dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ act_cfg (dict or Sequence[dict]): Config dict for activation layer.
+ If act_cfg is a dict, two activation layers will be configurated
+ by this dict. If act_cfg is a sequence of dicts, the first
+ activation layer will be configurated by the first dict and the
+ second activation layer will be configurated by the second dict.
+ Default: (dict(type='ReLU'), dict(type='Sigmoid'))
+ """
+
+ def __init__(self,
+ channels,
+ ratio=16,
+ conv_cfg=None,
+ act_cfg=(dict(type='ReLU'), dict(type='Sigmoid'))):
+ super().__init__()
+ if isinstance(act_cfg, dict):
+ act_cfg = (act_cfg, act_cfg)
+ assert len(act_cfg) == 2
+ assert mmcv.is_tuple_of(act_cfg, dict)
+ self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+ self.conv1 = ConvModule(
+ in_channels=channels,
+ out_channels=int(channels / ratio),
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ act_cfg=act_cfg[0])
+ self.conv2 = ConvModule(
+ in_channels=int(channels / ratio),
+ out_channels=channels,
+ kernel_size=1,
+ stride=1,
+ conv_cfg=conv_cfg,
+ act_cfg=act_cfg[1])
+
+ def forward(self, x):
+ out = self.global_avgpool(x)
+ out = self.conv1(out)
+ out = self.conv2(out)
+ return x * out
diff --git a/vendor/ViTPose/mmpose/models/backbones/utils/utils.py b/vendor/ViTPose/mmpose/models/backbones/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9ac948653adeb849e0f510bc1014664741fe6f9
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/utils/utils.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import OrderedDict
+
+from mmcv.runner.checkpoint import _load_checkpoint, load_state_dict
+
+
+def load_checkpoint(model,
+ filename,
+ map_location='cpu',
+ strict=False,
+ logger=None):
+ """Load checkpoint from a file or URI.
+
+ Args:
+ model (Module): Module to load checkpoint.
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``.
+ map_location (str): Same as :func:`torch.load`.
+ strict (bool): Whether to allow different params for the model and
+ checkpoint.
+ logger (:mod:`logging.Logger` or None): The logger for error message.
+
+ Returns:
+ dict or OrderedDict: The loaded checkpoint.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict_tmp = checkpoint['state_dict']
+ else:
+ state_dict_tmp = checkpoint
+
+ state_dict = OrderedDict()
+ # strip prefix of state_dict
+ for k, v in state_dict_tmp.items():
+ if k.startswith('module.backbone.'):
+ state_dict[k[16:]] = v
+ elif k.startswith('module.'):
+ state_dict[k[7:]] = v
+ elif k.startswith('backbone.'):
+ state_dict[k[9:]] = v
+ else:
+ state_dict[k] = v
+ # load state_dict
+ load_state_dict(model, state_dict, strict, logger)
+ return checkpoint
+
+
+def get_state_dict(filename, map_location='cpu'):
+ """Get state_dict from a file or URI.
+
+ Args:
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+ ``open-mmlab://xxx``.
+ map_location (str): Same as :func:`torch.load`.
+
+ Returns:
+ OrderedDict: The state_dict.
+ """
+ checkpoint = _load_checkpoint(filename, map_location)
+ # OrderedDict is a subclass of dict
+ if not isinstance(checkpoint, dict):
+ raise RuntimeError(
+ f'No state_dict found in checkpoint file {filename}')
+ # get state_dict from checkpoint
+ if 'state_dict' in checkpoint:
+ state_dict_tmp = checkpoint['state_dict']
+ else:
+ state_dict_tmp = checkpoint
+
+ state_dict = OrderedDict()
+ # strip prefix of state_dict
+ for k, v in state_dict_tmp.items():
+ if k.startswith('module.backbone.'):
+ state_dict[k[16:]] = v
+ elif k.startswith('module.'):
+ state_dict[k[7:]] = v
+ elif k.startswith('backbone.'):
+ state_dict[k[9:]] = v
+ else:
+ state_dict[k] = v
+
+ return state_dict
diff --git a/vendor/ViTPose/mmpose/models/backbones/v2v_net.py b/vendor/ViTPose/mmpose/models/backbones/v2v_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..99462af711069a34c13628364e2c466163507861
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/v2v_net.py
@@ -0,0 +1,257 @@
+# ------------------------------------------------------------------------------
+# Copyright and License Information
+# Adapted from
+# https://github.com/microsoft/voxelpose-pytorch/blob/main/lib/models/v2v_net.py
+# Original Licence: MIT License
+# ------------------------------------------------------------------------------
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class Basic3DBlock(nn.Module):
+ """A basic 3D convolutional block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the convolution operation
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: dict(type='Conv3d')
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN3d')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ conv_cfg=dict(type='Conv3d'),
+ norm_cfg=dict(type='BN3d')):
+ super(Basic3DBlock, self).__init__()
+ self.block = ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ bias=True)
+
+ def forward(self, x):
+ """Forward function."""
+ return self.block(x)
+
+
+class Res3DBlock(nn.Module):
+ """A residual 3D convolutional block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the convolution operation
+ Default: 3
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: dict(type='Conv3d')
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN3d')
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ conv_cfg=dict(type='Conv3d'),
+ norm_cfg=dict(type='BN3d')):
+ super(Res3DBlock, self).__init__()
+ self.res_branch = nn.Sequential(
+ ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ bias=True),
+ ConvModule(
+ out_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=((kernel_size - 1) // 2),
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ bias=True))
+
+ if in_channels == out_channels:
+ self.skip_con = nn.Sequential()
+ else:
+ self.skip_con = ConvModule(
+ in_channels,
+ out_channels,
+ 1,
+ stride=1,
+ padding=0,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ bias=True)
+
+ def forward(self, x):
+ """Forward function."""
+ res = self.res_branch(x)
+ skip = self.skip_con(x)
+ return F.relu(res + skip, True)
+
+
+class Pool3DBlock(nn.Module):
+ """A 3D max-pool block.
+
+ Args:
+ pool_size (int): Pool size of the 3D max-pool layer
+ """
+
+ def __init__(self, pool_size):
+ super(Pool3DBlock, self).__init__()
+ self.pool_size = pool_size
+
+ def forward(self, x):
+ """Forward function."""
+ return F.max_pool3d(
+ x, kernel_size=self.pool_size, stride=self.pool_size)
+
+
+class Upsample3DBlock(nn.Module):
+ """A 3D upsample block.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ kernel_size (int): Kernel size of the transposed convolution operation.
+ Default: 2
+ stride (int): Kernel size of the transposed convolution operation.
+ Default: 2
+ """
+
+ def __init__(self, in_channels, out_channels, kernel_size=2, stride=2):
+ super(Upsample3DBlock, self).__init__()
+ assert kernel_size == 2
+ assert stride == 2
+ self.block = nn.Sequential(
+ nn.ConvTranspose3d(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=0,
+ output_padding=0), nn.BatchNorm3d(out_channels), nn.ReLU(True))
+
+ def forward(self, x):
+ """Forward function."""
+ return self.block(x)
+
+
+class EncoderDecorder(nn.Module):
+ """An encoder-decoder block.
+
+ Args:
+ in_channels (int): Input channels of this block
+ """
+
+ def __init__(self, in_channels=32):
+ super(EncoderDecorder, self).__init__()
+
+ self.encoder_pool1 = Pool3DBlock(2)
+ self.encoder_res1 = Res3DBlock(in_channels, in_channels * 2)
+ self.encoder_pool2 = Pool3DBlock(2)
+ self.encoder_res2 = Res3DBlock(in_channels * 2, in_channels * 4)
+
+ self.mid_res = Res3DBlock(in_channels * 4, in_channels * 4)
+
+ self.decoder_res2 = Res3DBlock(in_channels * 4, in_channels * 4)
+ self.decoder_upsample2 = Upsample3DBlock(in_channels * 4,
+ in_channels * 2, 2, 2)
+ self.decoder_res1 = Res3DBlock(in_channels * 2, in_channels * 2)
+ self.decoder_upsample1 = Upsample3DBlock(in_channels * 2, in_channels,
+ 2, 2)
+
+ self.skip_res1 = Res3DBlock(in_channels, in_channels)
+ self.skip_res2 = Res3DBlock(in_channels * 2, in_channels * 2)
+
+ def forward(self, x):
+ """Forward function."""
+ skip_x1 = self.skip_res1(x)
+ x = self.encoder_pool1(x)
+ x = self.encoder_res1(x)
+
+ skip_x2 = self.skip_res2(x)
+ x = self.encoder_pool2(x)
+ x = self.encoder_res2(x)
+
+ x = self.mid_res(x)
+
+ x = self.decoder_res2(x)
+ x = self.decoder_upsample2(x)
+ x = x + skip_x2
+
+ x = self.decoder_res1(x)
+ x = self.decoder_upsample1(x)
+ x = x + skip_x1
+
+ return x
+
+
+@BACKBONES.register_module()
+class V2VNet(BaseBackbone):
+ """V2VNet.
+
+ Please refer to the `paper `
+ for details.
+
+ Args:
+ input_channels (int):
+ Number of channels of the input feature volume.
+ output_channels (int):
+ Number of channels of the output volume.
+ mid_channels (int):
+ Input and output channels of the encoder-decoder block.
+ """
+
+ def __init__(self, input_channels, output_channels, mid_channels=32):
+ super(V2VNet, self).__init__()
+
+ self.front_layers = nn.Sequential(
+ Basic3DBlock(input_channels, mid_channels // 2, 7),
+ Res3DBlock(mid_channels // 2, mid_channels),
+ )
+
+ self.encoder_decoder = EncoderDecorder(in_channels=mid_channels)
+
+ self.output_layer = nn.Conv3d(
+ mid_channels, output_channels, kernel_size=1, stride=1, padding=0)
+
+ self._initialize_weights()
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.front_layers(x)
+ x = self.encoder_decoder(x)
+ x = self.output_layer(x)
+
+ return x
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv3d):
+ nn.init.normal_(m.weight, 0, 0.001)
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.ConvTranspose3d):
+ nn.init.normal_(m.weight, 0, 0.001)
+ nn.init.constant_(m.bias, 0)
diff --git a/vendor/ViTPose/mmpose/models/backbones/vgg.py b/vendor/ViTPose/mmpose/models/backbones/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7d467017a5520f399c84b1235ec64c99b805b42
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/vgg.py
@@ -0,0 +1,193 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init, normal_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+def make_vgg_layer(in_channels,
+ out_channels,
+ num_blocks,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ dilation=1,
+ with_norm=False,
+ ceil_mode=False):
+ layers = []
+ for _ in range(num_blocks):
+ layer = ConvModule(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3,
+ dilation=dilation,
+ padding=dilation,
+ bias=True,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ layers.append(layer)
+ in_channels = out_channels
+ layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))
+
+ return layers
+
+
+@BACKBONES.register_module()
+class VGG(BaseBackbone):
+ """VGG backbone.
+
+ Args:
+ depth (int): Depth of vgg, from {11, 13, 16, 19}.
+ with_norm (bool): Use BatchNorm or not.
+ num_classes (int): number of classes for classification.
+ num_stages (int): VGG stages, normally 5.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. When it is None, the default behavior depends on
+ whether num_classes is specified. If num_classes <= 0, the default
+ value is (4, ), outputting the last feature map before classifier.
+ If num_classes > 0, the default value is (5, ), outputting the
+ classification score. Default: None.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ ceil_mode (bool): Whether to use ceil_mode of MaxPool. Default: False.
+ with_last_pool (bool): Whether to keep the last pooling before
+ classifier. Default: True.
+ """
+
+ # Parameters to build layers. Each element specifies the number of conv in
+ # each stage. For example, VGG11 contains 11 layers with learnable
+ # parameters. 11 is computed as 11 = (1 + 1 + 2 + 2 + 2) + 3,
+ # where 3 indicates the last three fully-connected layers.
+ arch_settings = {
+ 11: (1, 1, 2, 2, 2),
+ 13: (2, 2, 2, 2, 2),
+ 16: (2, 2, 3, 3, 3),
+ 19: (2, 2, 4, 4, 4)
+ }
+
+ def __init__(self,
+ depth,
+ num_classes=-1,
+ num_stages=5,
+ dilations=(1, 1, 1, 1, 1),
+ out_indices=None,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ norm_eval=False,
+ ceil_mode=False,
+ with_last_pool=True):
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for vgg')
+ assert num_stages >= 1 and num_stages <= 5
+ stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ assert len(dilations) == num_stages
+
+ self.num_classes = num_classes
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ with_norm = norm_cfg is not None
+
+ if out_indices is None:
+ out_indices = (5, ) if num_classes > 0 else (4, )
+ assert max(out_indices) <= num_stages
+ self.out_indices = out_indices
+
+ self.in_channels = 3
+ start_idx = 0
+ vgg_layers = []
+ self.range_sub_modules = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ num_modules = num_blocks + 1
+ end_idx = start_idx + num_modules
+ dilation = dilations[i]
+ out_channels = 64 * 2**i if i < 4 else 512
+ vgg_layer = make_vgg_layer(
+ self.in_channels,
+ out_channels,
+ num_blocks,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ dilation=dilation,
+ with_norm=with_norm,
+ ceil_mode=ceil_mode)
+ vgg_layers.extend(vgg_layer)
+ self.in_channels = out_channels
+ self.range_sub_modules.append([start_idx, end_idx])
+ start_idx = end_idx
+ if not with_last_pool:
+ vgg_layers.pop(-1)
+ self.range_sub_modules[-1][1] -= 1
+ self.module_name = 'features'
+ self.add_module(self.module_name, nn.Sequential(*vgg_layers))
+
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Linear(512 * 7 * 7, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, num_classes),
+ )
+
+ def init_weights(self, pretrained=None):
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+
+ def forward(self, x):
+ outs = []
+ vgg_layers = getattr(self, self.module_name)
+ for i in range(len(self.stage_blocks)):
+ for j in range(*self.range_sub_modules[i]):
+ vgg_layer = vgg_layers[j]
+ x = vgg_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), -1)
+ x = self.classifier(x)
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ else:
+ return tuple(outs)
+
+ def _freeze_stages(self):
+ vgg_layers = getattr(self, self.module_name)
+ for i in range(self.frozen_stages):
+ for j in range(*self.range_sub_modules[i]):
+ m = vgg_layers[j]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/vipnas_mbv3.py b/vendor/ViTPose/mmpose/models/backbones/vipnas_mbv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed990e3966b27301dbaf081e3ec0e908704dfc8b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/vipnas_mbv3.py
@@ -0,0 +1,179 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import InvertedResidual, load_checkpoint
+
+
+@BACKBONES.register_module()
+class ViPNAS_MobileNetV3(BaseBackbone):
+ """ViPNAS_MobileNetV3 backbone.
+
+ "ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ wid (list(int)): Searched width config for each stage.
+ expan (list(int)): Searched expansion ratio config for each stage.
+ dep (list(int)): Searched depth config for each stage.
+ ks (list(int)): Searched kernel size config for each stage.
+ group (list(int)): Searched group number config for each stage.
+ att (list(bool)): Searched attention config for each stage.
+ stride (list(int)): Stride config for each stage.
+ act (list(dict)): Activation config for each stage.
+ conv_cfg (dict): Config dict for convolution layer.
+ Default: None, which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='BN').
+ frozen_stages (int): Stages to be frozen (all param fixed).
+ Default: -1, which means not freezing any parameters.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save
+ some memory while slowing down the training speed.
+ Default: False.
+ """
+
+ def __init__(self,
+ wid=[16, 16, 24, 40, 80, 112, 160],
+ expan=[None, 1, 5, 4, 5, 5, 6],
+ dep=[None, 1, 4, 4, 4, 4, 4],
+ ks=[3, 3, 7, 7, 5, 7, 5],
+ group=[None, 8, 120, 20, 100, 280, 240],
+ att=[None, True, True, False, True, True, True],
+ stride=[2, 1, 2, 2, 2, 1, 2],
+ act=[
+ 'HSwish', 'ReLU', 'ReLU', 'ReLU', 'HSwish', 'HSwish',
+ 'HSwish'
+ ],
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ frozen_stages=-1,
+ norm_eval=False,
+ with_cp=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ self.wid = wid
+ self.expan = expan
+ self.dep = dep
+ self.ks = ks
+ self.group = group
+ self.att = att
+ self.stride = stride
+ self.act = act
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.frozen_stages = frozen_stages
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+
+ self.conv1 = ConvModule(
+ in_channels=3,
+ out_channels=self.wid[0],
+ kernel_size=self.ks[0],
+ stride=self.stride[0],
+ padding=self.ks[0] // 2,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=dict(type=self.act[0]))
+
+ self.layers = self._make_layer()
+
+ def _make_layer(self):
+ layers = []
+ layer_index = 0
+ for i, dep in enumerate(self.dep[1:]):
+ mid_channels = self.wid[i + 1] * self.expan[i + 1]
+
+ if self.att[i + 1]:
+ se_cfg = dict(
+ channels=mid_channels,
+ ratio=4,
+ act_cfg=(dict(type='ReLU'), dict(type='HSigmoid')))
+ else:
+ se_cfg = None
+
+ if self.expan[i + 1] == 1:
+ with_expand_conv = False
+ else:
+ with_expand_conv = True
+
+ for j in range(dep):
+ if j == 0:
+ stride = self.stride[i + 1]
+ in_channels = self.wid[i]
+ else:
+ stride = 1
+ in_channels = self.wid[i + 1]
+
+ layer = InvertedResidual(
+ in_channels=in_channels,
+ out_channels=self.wid[i + 1],
+ mid_channels=mid_channels,
+ kernel_size=self.ks[i + 1],
+ groups=self.group[i + 1],
+ stride=stride,
+ se_cfg=se_cfg,
+ with_expand_conv=with_expand_conv,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=dict(type=self.act[i + 1]),
+ with_cp=self.with_cp)
+ layer_index += 1
+ layer_name = f'layer{layer_index}'
+ self.add_module(layer_name, layer)
+ layers.append(layer_name)
+ return layers
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.normal_(m.weight, std=0.001)
+ for name, _ in m.named_parameters():
+ if name in ['bias']:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+
+ for i, layer_name in enumerate(self.layers):
+ layer = getattr(self, layer_name)
+ x = layer(x)
+
+ return x
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ layer = getattr(self, f'layer{i}')
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/vipnas_resnet.py b/vendor/ViTPose/mmpose/models/backbones/vipnas_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..81b028ed5f5caad5f59c68b7f82c1a4661cf4d6f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/vipnas_resnet.py
@@ -0,0 +1,589 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule, build_conv_layer, build_norm_layer
+from mmcv.cnn.bricks import ContextBlock
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+
+class ViPNAS_Bottleneck(nn.Module):
+ """Bottleneck block for ViPNAS_ResNet.
+
+ Args:
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int): The ratio of ``out_channels/mid_channels`` where
+ ``mid_channels`` is the input/output channels of conv2. Default: 4.
+ stride (int): stride of the block. Default: 1
+ dilation (int): dilation of convolution. Default: 1
+ downsample (nn.Module): downsample operation on identity branch.
+ Default: None.
+ style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
+ stride-two layer is the 3x3 conv layer, otherwise the stride-two
+ layer is the first 1x1 conv layer. Default: "pytorch".
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ kernel_size (int): kernel size of conv2 searched in ViPANS.
+ groups (int): group number of conv2 searched in ViPNAS.
+ attention (bool): whether to use attention module in the end of
+ the block.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ expansion=4,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ kernel_size=3,
+ groups=1,
+ attention=False):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ assert style in ['pytorch', 'caffe']
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.expansion = expansion
+ assert out_channels % expansion == 0
+ self.mid_channels = out_channels // expansion
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ norm_cfg, self.mid_channels, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, out_channels, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ self.mid_channels,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ self.mid_channels,
+ kernel_size=kernel_size,
+ stride=self.conv2_stride,
+ padding=kernel_size // 2,
+ groups=groups,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ self.mid_channels,
+ out_channels,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ if attention:
+ self.attention = ContextBlock(out_channels,
+ max(1.0 / 16, 16.0 / out_channels))
+ else:
+ self.attention = None
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ """nn.Module: the normalization layer named "norm3" """
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.attention is not None:
+ out = self.attention(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def get_expansion(block, expansion=None):
+ """Get the expansion of a residual block.
+
+ The block expansion will be obtained by the following order:
+
+ 1. If ``expansion`` is given, just return it.
+ 2. If ``block`` has the attribute ``expansion``, then return
+ ``block.expansion``.
+ 3. Return the default value according the the block type:
+ 4 for ``ViPNAS_Bottleneck``.
+
+ Args:
+ block (class): The block class.
+ expansion (int | None): The given expansion ratio.
+
+ Returns:
+ int: The expansion of the block.
+ """
+ if isinstance(expansion, int):
+ assert expansion > 0
+ elif expansion is None:
+ if hasattr(block, 'expansion'):
+ expansion = block.expansion
+ elif issubclass(block, ViPNAS_Bottleneck):
+ expansion = 1
+ else:
+ raise TypeError(f'expansion is not specified for {block.__name__}')
+ else:
+ raise TypeError('expansion must be an integer or None')
+
+ return expansion
+
+
+class ViPNAS_ResLayer(nn.Sequential):
+ """ViPNAS_ResLayer to build ResNet style backbone.
+
+ Args:
+ block (nn.Module): Residual block used to build ViPNAS ResLayer.
+ num_blocks (int): Number of blocks.
+ in_channels (int): Input channels of this block.
+ out_channels (int): Output channels of this block.
+ expansion (int, optional): The expansion for BasicBlock/Bottleneck.
+ If not specified, it will firstly be obtained via
+ ``block.expansion``. If the block has no attribute "expansion",
+ the following default values will be used: 1 for BasicBlock and
+ 4 for Bottleneck. Default: None.
+ stride (int): stride of the first block. Default: 1.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ downsample_first (bool): Downsample at the first block or last block.
+ False for Hourglass, True for ResNet. Default: True
+ kernel_size (int): Kernel Size of the corresponding convolution layer
+ searched in the block.
+ groups (int): Group number of the corresponding convolution layer
+ searched in the block.
+ attention (bool): Whether to use attention module in the end of the
+ block.
+ """
+
+ def __init__(self,
+ block,
+ num_blocks,
+ in_channels,
+ out_channels,
+ expansion=None,
+ stride=1,
+ avg_down=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ downsample_first=True,
+ kernel_size=3,
+ groups=1,
+ attention=False,
+ **kwargs):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ self.block = block
+ self.expansion = get_expansion(block, expansion)
+
+ downsample = None
+ if stride != 1 or in_channels != out_channels:
+ downsample = []
+ conv_stride = stride
+ if avg_down and stride != 1:
+ conv_stride = 1
+ downsample.append(
+ nn.AvgPool2d(
+ kernel_size=stride,
+ stride=stride,
+ ceil_mode=True,
+ count_include_pad=False))
+ downsample.extend([
+ build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=conv_stride,
+ bias=False),
+ build_norm_layer(norm_cfg, out_channels)[1]
+ ])
+ downsample = nn.Sequential(*downsample)
+
+ layers = []
+ if downsample_first:
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ in_channels = out_channels
+ for _ in range(1, num_blocks):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ else: # downsample_first=False is for HourglassModule
+ for i in range(0, num_blocks - 1):
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ expansion=self.expansion,
+ stride=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+ layers.append(
+ block(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ expansion=self.expansion,
+ stride=stride,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=kernel_size,
+ groups=groups,
+ attention=attention,
+ **kwargs))
+
+ super().__init__(*layers)
+
+
+@BACKBONES.register_module()
+class ViPNAS_ResNet(BaseBackbone):
+ """ViPNAS_ResNet backbone.
+
+ "ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search"
+ More details can be found in the `paper
+ `__ .
+
+ Args:
+ depth (int): Network depth, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Default: 3.
+ num_stages (int): Stages of the network. Default: 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ Default: ``(1, 2, 2, 2)``.
+ dilations (Sequence[int]): Dilation of each stage.
+ Default: ``(1, 1, 1, 1)``.
+ out_indices (Sequence[int]): Output from which stages. If only one
+ stage is specified, a single tensor (feature map) is returned,
+ otherwise multiple stages are specified, a tuple of tensors will
+ be returned. Default: ``(3, )``.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
+ Default: False.
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters. Default: -1.
+ conv_cfg (dict | None): The config dict for conv layers. Default: None.
+ norm_cfg (dict): The config dict for norm layers.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed. Default: False.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity. Default: True.
+ wid (list(int)): Searched width config for each stage.
+ expan (list(int)): Searched expansion ratio config for each stage.
+ dep (list(int)): Searched depth config for each stage.
+ ks (list(int)): Searched kernel size config for each stage.
+ group (list(int)): Searched group number config for each stage.
+ att (list(bool)): Searched attention config for each stage.
+ """
+
+ arch_settings = {
+ 50: ViPNAS_Bottleneck,
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(3, ),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=True,
+ wid=[48, 80, 160, 304, 608],
+ expan=[None, 1, 1, 1, 1],
+ dep=[None, 4, 6, 7, 3],
+ ks=[7, 3, 5, 5, 5],
+ group=[None, 16, 16, 16, 16],
+ att=[None, True, False, True, True]):
+ # Protect mutable default arguments
+ norm_cfg = copy.deepcopy(norm_cfg)
+ super().__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ self.depth = depth
+ self.stem_channels = dep[0]
+ self.num_stages = num_stages
+ assert 1 <= num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.zero_init_residual = zero_init_residual
+ self.block = self.arch_settings[depth]
+ self.stage_blocks = dep[1:1 + num_stages]
+
+ self._make_stem_layer(in_channels, wid[0], ks[0])
+
+ self.res_layers = []
+ _in_channels = wid[0]
+ for i, num_blocks in enumerate(self.stage_blocks):
+ expansion = get_expansion(self.block, expan[i + 1])
+ _out_channels = wid[i + 1] * expansion
+ stride = strides[i]
+ dilation = dilations[i]
+ res_layer = self.make_res_layer(
+ block=self.block,
+ num_blocks=num_blocks,
+ in_channels=_in_channels,
+ out_channels=_out_channels,
+ expansion=expansion,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ kernel_size=ks[i + 1],
+ groups=group[i + 1],
+ attention=att[i + 1])
+ _in_channels = _out_channels
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = res_layer[-1].out_channels
+
+ def make_res_layer(self, **kwargs):
+ """Make a ViPNAS ResLayer."""
+ return ViPNAS_ResLayer(**kwargs)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels, stem_channels, kernel_size):
+ """Make stem layer."""
+ if self.deep_stem:
+ self.stem = nn.Sequential(
+ ConvModule(
+ in_channels,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True),
+ ConvModule(
+ stem_channels // 2,
+ stem_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=True))
+ else:
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels,
+ kernel_size=kernel_size,
+ stride=2,
+ padding=kernel_size // 2,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, stem_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ if self.deep_stem:
+ self.stem.eval()
+ for param in self.stem.parameters():
+ param.requires_grad = False
+ else:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, f'layer{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize model weights."""
+ super().init_weights(pretrained)
+ if pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.normal_(m.weight, std=0.001)
+ for name, _ in m.named_parameters():
+ if name in ['bias']:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.deep_stem:
+ x = self.stem(x)
+ else:
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ return tuple(outs)
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/backbones/vit.py b/vendor/ViTPose/mmpose/models/backbones/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..2719d1a6991b67e1b0832247c2f1259bbacda3f6
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/vit.py
@@ -0,0 +1,341 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+ def forward(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+@BACKBONES.register_module()
+class ViT(BaseBackbone):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+ ):
+ # Protect mutable default arguments
+ super(ViT, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ # since the pretraining model has class token
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained, patch_padding=self.patch_padding)
+
+ if pretrained is None:
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x)
+ else:
+ x = blk(x)
+
+ x = self.last_norm(x)
+
+ xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+
+ return xp
+
+ def forward(self, x):
+ x = self.forward_features(x)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/vendor/ViTPose/mmpose/models/backbones/vit_moe.py b/vendor/ViTPose/mmpose/models/backbones/vit_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..880a58fbb2ac2892ef6e1e349f4ef98e38c1d274
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/backbones/vit_moe.py
@@ -0,0 +1,385 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class MoEMlp(nn.Module):
+ def __init__(self, num_expert=1, in_features=1024, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., part_features=256):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.part_features = part_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features - part_features)
+ self.drop = nn.Dropout(drop)
+
+ self.num_expert = num_expert
+ experts = []
+
+ for i in range(num_expert):
+ experts.append(
+ nn.Linear(hidden_features, part_features)
+ )
+ self.experts = nn.ModuleList(experts)
+
+ def forward(self, x, indices):
+
+ expert_x = torch.zeros_like(x[:, :, -self.part_features:], device=x.device, dtype=x.dtype)
+
+ x = self.fc1(x)
+ x = self.act(x)
+ shared_x = self.fc2(x)
+ indices = indices.view(-1, 1, 1)
+
+ # to support ddp training
+ for i in range(self.num_expert):
+ selectedIndex = (indices == i)
+ current_x = self.experts[i](x) * selectedIndex
+ expert_x = expert_x + current_x
+
+ x = torch.cat([shared_x, expert_x], dim=-1)
+
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None, num_expert=1, part_features=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = MoEMlp(num_expert=num_expert, in_features=dim, hidden_features=mlp_hidden_dim,
+ act_layer=act_layer, drop=drop, part_features=part_features)
+
+ def forward(self, x, indices=None):
+
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x), indices))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+@BACKBONES.register_module()
+class ViTMoE(BaseBackbone):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+ num_expert=1, part_features=None
+ ):
+ # Protect mutable default arguments
+ super(ViTMoE, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ self.part_features = part_features
+
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ num_expert=num_expert, part_features=part_features
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ super().init_weights(pretrained, patch_padding=self.patch_padding, part_features=self.part_features)
+
+ if pretrained is None:
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x, dataset_source=None):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x, dataset_source)
+ else:
+ x = blk(x, dataset_source)
+
+ x = self.last_norm(x)
+
+ xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+
+ return xp
+
+ def forward(self, x, dataset_source=None):
+ x = self.forward_features(x, dataset_source)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/vendor/ViTPose/mmpose/models/builder.py b/vendor/ViTPose/mmpose/models/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..220839d47d6b1e66a06eb143b1f1ef8145c6a3be
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/builder.py
@@ -0,0 +1,44 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import MODELS as MMCV_MODELS
+from mmcv.cnn import build_model_from_cfg
+from mmcv.utils import Registry
+
+MODELS = Registry(
+ 'models', build_func=build_model_from_cfg, parent=MMCV_MODELS)
+
+BACKBONES = MODELS
+NECKS = MODELS
+HEADS = MODELS
+LOSSES = MODELS
+POSENETS = MODELS
+MESH_MODELS = MODELS
+
+
+def build_backbone(cfg):
+ """Build backbone."""
+ return BACKBONES.build(cfg)
+
+
+def build_neck(cfg):
+ """Build neck."""
+ return NECKS.build(cfg)
+
+
+def build_head(cfg):
+ """Build head."""
+ return HEADS.build(cfg)
+
+
+def build_loss(cfg):
+ """Build loss."""
+ return LOSSES.build(cfg)
+
+
+def build_posenet(cfg):
+ """Build posenet."""
+ return POSENETS.build(cfg)
+
+
+def build_mesh_model(cfg):
+ """Build mesh model."""
+ return MESH_MODELS.build(cfg)
diff --git a/vendor/ViTPose/mmpose/models/detectors/__init__.py b/vendor/ViTPose/mmpose/models/detectors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0982094c96295f3f8a0e63e1e0a15964c2c286a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .associative_embedding import AssociativeEmbedding
+from .interhand_3d import Interhand3D
+from .mesh import ParametricMesh
+from .multi_task import MultiTask
+from .multiview_pose import (DetectAndRegress, VoxelCenterDetector,
+ VoxelSinglePose)
+from .pose_lifter import PoseLifter
+from .posewarper import PoseWarper
+from .top_down import TopDown
+from .top_down_moe import TopDownMoE
+
+__all__ = [
+ 'TopDown', 'AssociativeEmbedding', 'ParametricMesh', 'MultiTask',
+ 'PoseLifter', 'Interhand3D', 'PoseWarper', 'DetectAndRegress',
+ 'VoxelCenterDetector', 'VoxelSinglePose', 'TopDownMoE'
+]
diff --git a/vendor/ViTPose/mmpose/models/detectors/associative_embedding.py b/vendor/ViTPose/mmpose/models/detectors/associative_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..100c7806d361d323abb720eb8ad5649ddc3c1a03
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/associative_embedding.py
@@ -0,0 +1,420 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import torch
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core.evaluation import (aggregate_scale, aggregate_stage_flip,
+ flip_feature_maps, get_group_preds,
+ split_ae_outputs)
+from mmpose.core.post_processing.group import HeatmapParser
+from mmpose.core.visualization import imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class AssociativeEmbedding(BasePose):
+ """Associative embedding pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ ``loss_keypoint`` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ if keypoint_head is not None:
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for BottomUp is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+ self.use_udp = test_cfg.get('use_udp', False)
+ self.parser = HeatmapParser(self.test_cfg)
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img=None,
+ targets=None,
+ masks=None,
+ joints=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss is True.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C
+ - img_width: imgW
+ - img_height: imgH
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input image.
+ targets (list(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (list(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (list(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ img_metas (dict): Information about val & test.
+ By default it includes:
+
+ - "image_file": image path
+ - "aug_data": input
+ - "test_scale_factor": test scale factor
+ - "base_size": base size of input
+ - "center": center of image
+ - "scale": scale of image
+ - "flip_index": flip index of keypoints
+ return loss (bool): ``return_loss=True`` for training,
+ ``return_loss=False`` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if 'return_loss' is true, then return losses. \
+ Otherwise, return predicted poses, scores, image \
+ paths and heatmaps.
+ """
+
+ if return_loss:
+ return self.forward_train(img, targets, masks, joints, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, targets, masks, joints, img_metas, **kwargs):
+ """Forward the bottom-up model and calculate the loss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps weight: W
+ heatmaps height: H
+ max_num_people: M
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input image.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ img_metas (dict):Information about val&test
+ By default this includes:
+ - "image_file": image path
+ - "aug_data": input
+ - "test_scale_factor": test scale factor
+ - "base_size": base size of input
+ - "center": center of image
+ - "scale": scale of image
+ - "flip_index": flip index of keypoints
+
+ Returns:
+ dict: The total loss for bottom-up
+ """
+
+ output = self.backbone(img)
+
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, targets, masks, joints)
+ losses.update(keypoint_losses)
+
+ return losses
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Outputs.
+ """
+ output = self.backbone(img)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Inference the bottom-up model.
+
+ Note:
+ - Batchsize: N (currently support batchsize = 1)
+ - num_img_channel: C
+ - img_width: imgW
+ - img_height: imgH
+
+ Args:
+ flip_index (List(int)):
+ aug_data (List(Tensor[NxCximgHximgW])): Multi-scale image
+ test_scale_factor (List(float)): Multi-scale factor
+ base_size (Tuple(int)): Base size of image when scale is 1
+ center (np.ndarray): center of image
+ scale (np.ndarray): the scale of image
+ """
+ assert img.size(0) == 1
+ assert len(img_metas) == 1
+
+ img_metas = img_metas[0]
+
+ aug_data = img_metas['aug_data']
+
+ test_scale_factor = img_metas['test_scale_factor']
+ base_size = img_metas['base_size']
+ center = img_metas['center']
+ scale = img_metas['scale']
+
+ result = {}
+
+ scale_heatmaps_list = []
+ scale_tags_list = []
+
+ for idx, s in enumerate(sorted(test_scale_factor, reverse=True)):
+ image_resized = aug_data[idx].to(img.device)
+
+ features = self.backbone(image_resized)
+ if self.with_keypoint:
+ outputs = self.keypoint_head(features)
+
+ heatmaps, tags = split_ae_outputs(
+ outputs, self.test_cfg['num_joints'],
+ self.test_cfg['with_heatmaps'], self.test_cfg['with_ae'],
+ self.test_cfg.get('select_output_index', range(len(outputs))))
+
+ if self.test_cfg.get('flip_test', True):
+ # use flip test
+ features_flipped = self.backbone(
+ torch.flip(image_resized, [3]))
+ if self.with_keypoint:
+ outputs_flipped = self.keypoint_head(features_flipped)
+
+ heatmaps_flipped, tags_flipped = split_ae_outputs(
+ outputs_flipped, self.test_cfg['num_joints'],
+ self.test_cfg['with_heatmaps'], self.test_cfg['with_ae'],
+ self.test_cfg.get('select_output_index',
+ range(len(outputs))))
+
+ heatmaps_flipped = flip_feature_maps(
+ heatmaps_flipped, flip_index=img_metas['flip_index'])
+ if self.test_cfg['tag_per_joint']:
+ tags_flipped = flip_feature_maps(
+ tags_flipped, flip_index=img_metas['flip_index'])
+ else:
+ tags_flipped = flip_feature_maps(
+ tags_flipped, flip_index=None, flip_output=True)
+
+ else:
+ heatmaps_flipped = None
+ tags_flipped = None
+
+ aggregated_heatmaps = aggregate_stage_flip(
+ heatmaps,
+ heatmaps_flipped,
+ index=-1,
+ project2image=self.test_cfg['project2image'],
+ size_projected=base_size,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_stage='average',
+ aggregate_flip='average')
+
+ aggregated_tags = aggregate_stage_flip(
+ tags,
+ tags_flipped,
+ index=-1,
+ project2image=self.test_cfg['project2image'],
+ size_projected=base_size,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_stage='concat',
+ aggregate_flip='concat')
+
+ if s == 1 or len(test_scale_factor) == 1:
+ if isinstance(aggregated_tags, list):
+ scale_tags_list.extend(aggregated_tags)
+ else:
+ scale_tags_list.append(aggregated_tags)
+
+ if isinstance(aggregated_heatmaps, list):
+ scale_heatmaps_list.extend(aggregated_heatmaps)
+ else:
+ scale_heatmaps_list.append(aggregated_heatmaps)
+
+ aggregated_heatmaps = aggregate_scale(
+ scale_heatmaps_list,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_scale='average')
+
+ aggregated_tags = aggregate_scale(
+ scale_tags_list,
+ align_corners=self.test_cfg.get('align_corners', True),
+ aggregate_scale='unsqueeze_concat')
+
+ heatmap_size = aggregated_heatmaps.shape[2:4]
+ tag_size = aggregated_tags.shape[2:4]
+ if heatmap_size != tag_size:
+ tmp = []
+ for idx in range(aggregated_tags.shape[-1]):
+ tmp.append(
+ torch.nn.functional.interpolate(
+ aggregated_tags[..., idx],
+ size=heatmap_size,
+ mode='bilinear',
+ align_corners=self.test_cfg.get('align_corners',
+ True)).unsqueeze(-1))
+ aggregated_tags = torch.cat(tmp, dim=-1)
+
+ # perform grouping
+ grouped, scores = self.parser.parse(aggregated_heatmaps,
+ aggregated_tags,
+ self.test_cfg['adjust'],
+ self.test_cfg['refine'])
+
+ preds = get_group_preds(
+ grouped,
+ center,
+ scale, [aggregated_heatmaps.size(3),
+ aggregated_heatmaps.size(2)],
+ use_udp=self.use_udp)
+
+ image_paths = []
+ image_paths.append(img_metas['image_file'])
+
+ if return_heatmap:
+ output_heatmap = aggregated_heatmaps.detach().cpu().numpy()
+ else:
+ output_heatmap = None
+
+ result['preds'] = preds
+ result['scores'] = scores
+ result['image_paths'] = image_paths
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='AssociativeEmbedding')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized image only if not `show` or `out_file`
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+ img_h, img_w, _ = img.shape
+
+ pose_result = []
+ for res in result:
+ pose_result.append(res['keypoints'])
+
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius, thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/models/detectors/base.py b/vendor/ViTPose/mmpose/models/detectors/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d459b42de66012c88ff37d7d845265d06efebc7
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/base.py
@@ -0,0 +1,131 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+from collections import OrderedDict
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+
+class BasePose(nn.Module, metaclass=ABCMeta):
+ """Base class for pose detectors.
+
+ All recognizers should subclass it.
+ All subclass should overwrite:
+ Methods:`forward_train`, supporting to forward when training.
+ Methods:`forward_test`, supporting to forward when testing.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ head (dict): Head modules to give output.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ """
+
+ @abstractmethod
+ def forward_train(self, img, img_metas, **kwargs):
+ """Defines the computation performed at training."""
+
+ @abstractmethod
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at testing."""
+
+ @abstractmethod
+ def forward(self, img, img_metas, return_loss=True, **kwargs):
+ """Forward function."""
+
+ @staticmethod
+ def _parse_losses(losses):
+ """Parse the raw outputs (losses) of the network.
+
+ Args:
+ losses (dict): Raw output of the network, which usually contain
+ losses and other necessary information.
+
+ Returns:
+ tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor \
+ which may be a weighted sum of all losses, log_vars \
+ contains all the variables to be sent to the logger.
+ """
+ log_vars = OrderedDict()
+ for loss_name, loss_value in losses.items():
+ if isinstance(loss_value, torch.Tensor):
+ log_vars[loss_name] = loss_value.mean()
+ elif isinstance(loss_value, float):
+ log_vars[loss_name] = loss_value
+ elif isinstance(loss_value, list):
+ log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+ else:
+ raise TypeError(
+ f'{loss_name} is not a tensor or list of tensors or float')
+
+ loss = sum(_value for _key, _value in log_vars.items()
+ if 'loss' in _key)
+
+ log_vars['loss'] = loss
+ for loss_name, loss_value in log_vars.items():
+ # reduce loss when distributed training
+ if not isinstance(loss_value, float):
+ if dist.is_available() and dist.is_initialized():
+ loss_value = loss_value.data.clone()
+ dist.all_reduce(loss_value.div_(dist.get_world_size()))
+ log_vars[loss_name] = loss_value.item()
+ else:
+ log_vars[loss_name] = loss_value
+
+ return loss, log_vars
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during training.
+
+ This method defines an iteration step during training, except for the
+ back propagation and optimizer updating, which are done in an optimizer
+ hook. Note that in some complicated cases or models, the whole process
+ including back propagation and optimizer updating is also defined in
+ this method, such as GAN.
+
+ Args:
+ data_batch (dict): The output of dataloader.
+ optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+ runner is passed to ``train_step()``. This argument is unused
+ and reserved.
+
+ Returns:
+ dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+ ``num_samples``.
+ ``loss`` is a tensor for back propagation, which can be a
+ weighted sum of multiple losses.
+ ``log_vars`` contains all the variables to be sent to the
+ logger.
+ ``num_samples`` indicates the batch size (when the model is
+ DDP, it means the batch size on each GPU), which is used for
+ averaging the logs.
+ """
+ losses = self.forward(**data_batch)
+
+ loss, log_vars = self._parse_losses(losses)
+
+ outputs = dict(
+ loss=loss,
+ log_vars=log_vars,
+ num_samples=len(next(iter(data_batch.values()))))
+
+ return outputs
+
+ def val_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during validation.
+
+ This method shares the same signature as :func:`train_step`, but used
+ during val epochs. Note that the evaluation after training epochs is
+ not implemented with this method, but an evaluation hook.
+ """
+ results = self.forward(return_loss=False, **data_batch)
+
+ outputs = dict(results=results)
+
+ return outputs
+
+ @abstractmethod
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
diff --git a/vendor/ViTPose/mmpose/models/detectors/interhand_3d.py b/vendor/ViTPose/mmpose/models/detectors/interhand_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a4d6bde1b097d1649a65de8075744ac1978ad15
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/interhand_3d.py
@@ -0,0 +1,227 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+from mmcv.utils.misc import deprecated_api_warning
+
+from mmpose.core import imshow_keypoints, imshow_keypoints_3d
+from ..builder import POSENETS
+from .top_down import TopDown
+
+
+@POSENETS.register_module()
+class Interhand3D(TopDown):
+ """Top-down interhand 3D pose detector of paper ref: Gyeongsik Moon.
+
+ "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose
+ Estimation from a Single RGB Image". A child class of TopDown detector.
+ """
+
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. list[Tensor], list[list[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (list[torch.Tensor]): Target heatmaps, relative hand
+ root depth and hand type.
+ target_weight (list[torch.Tensor]): Weights for target
+ heatmaps, relative hand root depth and hand type.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ - "heatmap3d_depth_bound": depth bound of hand keypoint 3D
+ heatmap
+ - "root_depth_bound": depth bound of relative root depth 1D
+ heatmap
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths, \
+ heatmaps, relative hand root depth and hand type.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ features = self.backbone(img)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output = [(out + out_flipped) * 0.5
+ for out, out_flipped in zip(output, output_flipped)]
+
+ if self.with_keypoint:
+ result = self.keypoint_head.decode(
+ img_metas, output, img_size=[img_width, img_height])
+ else:
+ result = {}
+ return result
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='Interhand3D')
+ def show_result(self,
+ result,
+ img=None,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ radius=8,
+ bbox_color='green',
+ thickness=2,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ vis_height=400,
+ num_instances=-1,
+ win_name='',
+ show=False,
+ wait_time=0,
+ out_file=None):
+ """Visualize 3D pose estimation results.
+
+ Args:
+ result (list[dict]): The pose estimation results containing:
+
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing
+ 2D inputs. If a sequence is given, only the last frame
+ will be used for visualization
+ - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs
+ - "title" (str): title for the subplot
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ radius (int): Radius of circles.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ thickness (int): Thickness of lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M limbs.
+ If None, do not draw limbs.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ num_instances (int): Number of instances to be shown in 3D. If
+ smaller than 0, all the instances in the pose_result will be
+ shown. Otherwise, pad or truncate the pose_result to a length
+ of num_instances.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ if num_instances < 0:
+ assert len(result) > 0
+ result = sorted(result, key=lambda x: x.get('track_id', 0))
+
+ # draw image and 2d poses
+ if img is not None:
+ img = mmcv.imread(img)
+
+ bbox_result = []
+ pose_2d = []
+ for res in result:
+ if 'bbox' in res:
+ bbox = np.array(res['bbox'])
+ if bbox.ndim != 1:
+ assert bbox.ndim == 2
+ bbox = bbox[-1] # Get bbox from the last frame
+ bbox_result.append(bbox)
+ if 'keypoints' in res:
+ kpts = np.array(res['keypoints'])
+ if kpts.ndim != 2:
+ assert kpts.ndim == 3
+ kpts = kpts[-1] # Get 2D keypoints from the last frame
+ pose_2d.append(kpts)
+
+ if len(bbox_result) > 0:
+ bboxes = np.vstack(bbox_result)
+ mmcv.imshow_bboxes(
+ img,
+ bboxes,
+ colors=bbox_color,
+ top_k=-1,
+ thickness=2,
+ show=False)
+ if len(pose_2d) > 0:
+ imshow_keypoints(
+ img,
+ pose_2d,
+ skeleton,
+ kpt_score_thr=kpt_score_thr,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ radius=radius,
+ thickness=thickness)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ img_vis = imshow_keypoints_3d(
+ result,
+ img,
+ skeleton,
+ pose_kpt_color,
+ pose_link_color,
+ vis_height,
+ axis_limit=300,
+ axis_azimuth=-115,
+ axis_elev=15,
+ kpt_score_thr=kpt_score_thr,
+ num_instances=num_instances)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/vendor/ViTPose/mmpose/models/detectors/mesh.py b/vendor/ViTPose/mmpose/models/detectors/mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..0af18e3844659c7d2a3755ab891819bbf7ef4c22
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/mesh.py
@@ -0,0 +1,438 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import cv2
+import mmcv
+import numpy as np
+import torch
+
+from mmpose.core.visualization.image import imshow_mesh_3d
+from mmpose.models.misc.discriminator import SMPLDiscriminator
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+
+def set_requires_grad(nets, requires_grad=False):
+ """Set requies_grad for all the networks.
+
+ Args:
+ nets (nn.Module | list[nn.Module]): A list of networks or a single
+ network.
+ requires_grad (bool): Whether the networks require gradients or not
+ """
+ if not isinstance(nets, list):
+ nets = [nets]
+ for net in nets:
+ if net is not None:
+ for param in net.parameters():
+ param.requires_grad = requires_grad
+
+
+@POSENETS.register_module()
+class ParametricMesh(BasePose):
+ """Model-based 3D human mesh detector. Take a single color image as input
+ and output 3D joints, SMPL parameters and camera parameters.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ mesh_head (dict): Mesh head to process feature.
+ smpl (dict): Config for SMPL model.
+ disc (dict): Discriminator for SMPL parameters. Default: None.
+ loss_gan (dict): Config for adversarial loss. Default: None.
+ loss_mesh (dict): Config for mesh loss. Default: None.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ """
+
+ def __init__(self,
+ backbone,
+ mesh_head,
+ smpl,
+ disc=None,
+ loss_gan=None,
+ loss_mesh=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super().__init__()
+
+ self.backbone = builder.build_backbone(backbone)
+ self.mesh_head = builder.build_head(mesh_head)
+ self.generator = torch.nn.Sequential(self.backbone, self.mesh_head)
+
+ self.smpl = builder.build_mesh_model(smpl)
+
+ self.with_gan = disc is not None and loss_gan is not None
+ if self.with_gan:
+ self.discriminator = SMPLDiscriminator(**disc)
+ self.loss_gan = builder.build_loss(loss_gan)
+ self.disc_step_count = 0
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ self.loss_mesh = builder.build_loss(loss_mesh)
+ self.init_weights(pretrained=pretrained)
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ self.mesh_head.init_weights()
+ if self.with_gan:
+ self.discriminator.init_weights()
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """Train step function.
+
+ In this function, the detector will finish the train step following
+ the pipeline:
+
+ 1. get fake and real SMPL parameters
+ 2. optimize discriminator (if have)
+ 3. optimize generator
+
+ If `self.train_cfg.disc_step > 1`, the train step will contain multiple
+ iterations for optimizing discriminator with different input data and
+ only one iteration for optimizing generator after `disc_step`
+ iterations for discriminator.
+
+ Args:
+ data_batch (torch.Tensor): Batch of data as input.
+ optimizer (dict[torch.optim.Optimizer]): Dict with optimizers for
+ generator and discriminator (if have).
+
+ Returns:
+ outputs (dict): Dict with loss, information for logger,
+ the number of samples.
+ """
+
+ img = data_batch['img']
+ pred_smpl = self.generator(img)
+ pred_pose, pred_beta, pred_camera = pred_smpl
+
+ # optimize discriminator (if have)
+ if self.train_cfg['disc_step'] > 0 and self.with_gan:
+ set_requires_grad(self.discriminator, True)
+ fake_data = (pred_camera.detach(), pred_pose.detach(),
+ pred_beta.detach())
+ mosh_theta = data_batch['mosh_theta']
+ real_data = (mosh_theta[:, :3], mosh_theta[:,
+ 3:75], mosh_theta[:,
+ 75:])
+ fake_score = self.discriminator(fake_data)
+ real_score = self.discriminator(real_data)
+
+ disc_losses = {}
+ disc_losses['real_loss'] = self.loss_gan(
+ real_score, target_is_real=True, is_disc=True)
+ disc_losses['fake_loss'] = self.loss_gan(
+ fake_score, target_is_real=False, is_disc=True)
+ loss_disc, log_vars_d = self._parse_losses(disc_losses)
+
+ optimizer['discriminator'].zero_grad()
+ loss_disc.backward()
+ optimizer['discriminator'].step()
+ self.disc_step_count = \
+ (self.disc_step_count + 1) % self.train_cfg['disc_step']
+
+ if self.disc_step_count != 0:
+ outputs = dict(
+ loss=loss_disc,
+ log_vars=log_vars_d,
+ num_samples=len(next(iter(data_batch.values()))))
+ return outputs
+
+ # optimize generator
+ pred_out = self.smpl(
+ betas=pred_beta,
+ body_pose=pred_pose[:, 1:],
+ global_orient=pred_pose[:, :1])
+ pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[
+ 'joints']
+
+ gt_beta = data_batch['beta']
+ gt_pose = data_batch['pose']
+ gt_vertices = self.smpl(
+ betas=gt_beta,
+ body_pose=gt_pose[:, 3:],
+ global_orient=gt_pose[:, :3])['vertices']
+
+ pred = dict(
+ pose=pred_pose,
+ beta=pred_beta,
+ camera=pred_camera,
+ vertices=pred_vertices,
+ joints_3d=pred_joints_3d)
+
+ target = {
+ key: data_batch[key]
+ for key in [
+ 'pose', 'beta', 'has_smpl', 'joints_3d', 'joints_2d',
+ 'joints_3d_visible', 'joints_2d_visible'
+ ]
+ }
+ target['vertices'] = gt_vertices
+
+ losses = self.loss_mesh(pred, target)
+
+ if self.with_gan:
+ set_requires_grad(self.discriminator, False)
+ pred_theta = (pred_camera, pred_pose, pred_beta)
+ pred_score = self.discriminator(pred_theta)
+ loss_adv = self.loss_gan(
+ pred_score, target_is_real=True, is_disc=False)
+ losses['adv_loss'] = loss_adv
+
+ loss, log_vars = self._parse_losses(losses)
+ optimizer['generator'].zero_grad()
+ loss.backward()
+ optimizer['generator'].step()
+
+ outputs = dict(
+ loss=loss,
+ log_vars=log_vars,
+ num_samples=len(next(iter(data_batch.values()))))
+
+ return outputs
+
+ def forward_train(self, *args, **kwargs):
+ """Forward function for training.
+
+ For ParametricMesh, we do not use this interface.
+ """
+ raise NotImplementedError('This interface should not be used in '
+ 'current training schedule. Please use '
+ '`train_step` for training.')
+
+ def val_step(self, data_batch, **kwargs):
+ """Forward function for evaluation.
+
+ Args:
+ data_batch (dict): Contain data for forward.
+
+ Returns:
+ dict: Contain the results from model.
+ """
+ output = self.forward_test(**data_batch, **kwargs)
+ return output
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Outputs.
+ """
+ output = self.generator(img)
+ return output
+
+ def forward_test(self,
+ img,
+ img_metas,
+ return_vertices=False,
+ return_faces=False,
+ **kwargs):
+ """Defines the computation performed at every call when testing."""
+
+ pred_smpl = self.generator(img)
+ pred_pose, pred_beta, pred_camera = pred_smpl
+ pred_out = self.smpl(
+ betas=pred_beta,
+ body_pose=pred_pose[:, 1:],
+ global_orient=pred_pose[:, :1])
+ pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[
+ 'joints']
+
+ all_preds = {}
+ all_preds['keypoints_3d'] = pred_joints_3d.detach().cpu().numpy()
+ all_preds['smpl_pose'] = pred_pose.detach().cpu().numpy()
+ all_preds['smpl_beta'] = pred_beta.detach().cpu().numpy()
+ all_preds['camera'] = pred_camera.detach().cpu().numpy()
+
+ if return_vertices:
+ all_preds['vertices'] = pred_vertices.detach().cpu().numpy()
+ if return_faces:
+ all_preds['faces'] = self.smpl.get_faces()
+
+ all_boxes = []
+ image_path = []
+ for img_meta in img_metas:
+ box = np.zeros(6, dtype=np.float32)
+ c = img_meta['center']
+ s = img_meta['scale']
+ if 'bbox_score' in img_metas:
+ score = np.array(img_metas['bbox_score']).reshape(-1)
+ else:
+ score = 1.0
+ box[0:2] = c
+ box[2:4] = s
+ box[4] = np.prod(s * 200.0, axis=0)
+ box[5] = score
+ all_boxes.append(box)
+ image_path.append(img_meta['image_file'])
+
+ all_preds['bboxes'] = np.stack(all_boxes, axis=0)
+ all_preds['image_path'] = image_path
+ return all_preds
+
+ def get_3d_joints_from_mesh(self, vertices):
+ """Get 3D joints from 3D mesh using predefined joints regressor."""
+ return torch.matmul(
+ self.joints_regressor.to(vertices.device), vertices)
+
+ def forward(self, img, img_metas=None, return_loss=False, **kwargs):
+ """Forward function.
+
+ Calls either forward_train or forward_test depending on whether
+ return_loss=True.
+
+ Note:
+ - batch_size: N
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+
+ Args:
+ img (torch.Tensor[N x C x imgH x imgW]): Input images.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ Return predicted 3D joints, SMPL parameters, boxes and image paths.
+ """
+
+ if return_loss:
+ return self.forward_train(img, img_metas, **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def show_result(self,
+ result,
+ img,
+ show=False,
+ out_file=None,
+ win_name='',
+ wait_time=0,
+ bbox_color='green',
+ mesh_color=(76, 76, 204),
+ **kwargs):
+ """Visualize 3D mesh estimation results.
+
+ Args:
+ result (list[dict]): The mesh estimation results containing:
+
+ - "bbox" (ndarray[4]): instance bounding bbox
+ - "center" (ndarray[2]): bbox center
+ - "scale" (ndarray[2]): bbox scale
+ - "keypoints_3d" (ndarray[K,3]): predicted 3D keypoints
+ - "camera" (ndarray[3]): camera parameters
+ - "vertices" (ndarray[V, 3]): predicted 3D vertices
+ - "faces" (ndarray[F, 3]): mesh faces
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ wait_time (int): Value of waitKey param. Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ mesh_color (str or tuple or :obj:`Color`): Color of mesh surface.
+
+ Returns:
+ ndarray: Visualized img, only if not `show` or `out_file`.
+ """
+
+ if img is not None:
+ img = mmcv.imread(img)
+
+ focal_length = self.loss_mesh.focal_length
+ H, W, C = img.shape
+ img_center = np.array([[0.5 * W], [0.5 * H]])
+
+ # show bounding boxes
+ bboxes = [res['bbox'] for res in result]
+ bboxes = np.vstack(bboxes)
+ mmcv.imshow_bboxes(
+ img, bboxes, colors=bbox_color, top_k=-1, thickness=2, show=False)
+
+ vertex_list = []
+ face_list = []
+ for res in result:
+ vertices = res['vertices']
+ faces = res['faces']
+ camera = res['camera']
+ camera_center = res['center']
+ scale = res['scale']
+
+ # predicted vertices are in root-relative space,
+ # we need to translate them to camera space.
+ translation = np.array([
+ camera[1], camera[2],
+ 2 * focal_length / (scale[0] * 200.0 * camera[0] + 1e-9)
+ ])
+ mean_depth = vertices[:, -1].mean() + translation[-1]
+ translation[:2] += (camera_center -
+ img_center[:, 0]) / focal_length * mean_depth
+ vertices += translation[None, :]
+
+ vertex_list.append(vertices)
+ face_list.append(faces)
+
+ # render from front view
+ img_vis = imshow_mesh_3d(
+ img,
+ vertex_list,
+ face_list,
+ img_center, [focal_length, focal_length],
+ colors=mesh_color)
+
+ # render from side view
+ # rotate mesh vertices
+ R = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0]
+ rot_vertex_list = [np.dot(vert, R) for vert in vertex_list]
+
+ # get the 3D bbox containing all meshes
+ rot_vertices = np.concatenate(rot_vertex_list, axis=0)
+ min_corner = rot_vertices.min(0)
+ max_corner = rot_vertices.max(0)
+
+ center_3d = 0.5 * (min_corner + max_corner)
+ ratio = 0.8
+ bbox3d_size = max_corner - min_corner
+
+ # set appropriate translation to make all meshes appear in the image
+ z_x = bbox3d_size[0] * focal_length / (ratio * W) - min_corner[2]
+ z_y = bbox3d_size[1] * focal_length / (ratio * H) - min_corner[2]
+ z = max(z_x, z_y)
+ translation = -center_3d
+ translation[2] = z
+ translation = translation[None, :]
+ rot_vertex_list = [
+ rot_vert + translation for rot_vert in rot_vertex_list
+ ]
+
+ # render from side view
+ img_side = imshow_mesh_3d(
+ np.ones_like(img) * 255, rot_vertex_list, face_list, img_center,
+ [focal_length, focal_length])
+
+ # merger images from front view and side view
+ img_vis = np.concatenate([img_vis, img_side], axis=1)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/vendor/ViTPose/mmpose/models/detectors/multi_task.py b/vendor/ViTPose/mmpose/models/detectors/multi_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b6f3178a4b0413f5118eee27b535f46a1baaf84
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/multi_task.py
@@ -0,0 +1,187 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+
+from .. import builder
+from ..builder import POSENETS
+
+
+@POSENETS.register_module()
+class MultiTask(nn.Module):
+ """Multi-task detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ heads (list[dict]): heads to output predictions.
+ necks (list[dict] | None): necks to process feature.
+ head2neck (dict{int:int}): head index to neck index.
+ pretrained (str): Path to the pretrained models.
+ """
+
+ def __init__(self,
+ backbone,
+ heads,
+ necks=None,
+ head2neck=None,
+ pretrained=None):
+ super().__init__()
+
+ self.backbone = builder.build_backbone(backbone)
+
+ if head2neck is None:
+ assert necks is None
+ head2neck = {}
+
+ self.head2neck = {}
+ for i in range(len(heads)):
+ self.head2neck[i] = head2neck[i] if i in head2neck else -1
+
+ self.necks = nn.ModuleList([])
+ if necks is not None:
+ for neck in necks:
+ self.necks.append(builder.build_neck(neck))
+ self.necks.append(nn.Identity())
+
+ self.heads = nn.ModuleList([])
+ assert heads is not None
+ for head in heads:
+ assert head is not None
+ self.heads.append(builder.build_head(head))
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_necks(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'necks')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_necks:
+ for neck in self.necks:
+ if hasattr(neck, 'init_weights'):
+ neck.init_weights()
+
+ for head in self.heads:
+ if hasattr(head, 'init_weights'):
+ head.init_weights()
+
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img weight: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW]): Input images.
+ target (list[torch.Tensor]): Targets.
+ target_weight (List[torch.Tensor]): Weights.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ features = self.backbone(img)
+ outputs = []
+
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ outputs.append(head(self.necks[neck_id](features)))
+
+ # if return loss
+ losses = dict()
+
+ for head, output, gt, gt_weight in zip(self.heads, outputs, target,
+ target_weight):
+ loss = head.get_loss(output, gt, gt_weight)
+ assert len(set(losses.keys()).intersection(set(loss.keys()))) == 0
+ losses.update(loss)
+
+ if hasattr(head, 'get_accuracy'):
+ acc = head.get_accuracy(output, gt, gt_weight)
+ assert len(set(losses.keys()).intersection(set(
+ acc.keys()))) == 0
+ losses.update(acc)
+
+ return losses
+
+ def forward_test(self, img, img_metas, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ results = {}
+
+ features = self.backbone(img)
+ outputs = []
+
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ if hasattr(head, 'inference_model'):
+ head_output = head.inference_model(
+ self.necks[neck_id](features), flip_pairs=None)
+ else:
+ head_output = head(
+ self.necks[neck_id](features)).detach().cpu().numpy()
+ outputs.append(head_output)
+
+ for head, output in zip(self.heads, outputs):
+ result = head.decode(
+ img_metas, output, img_size=[img_width, img_height])
+ results.update(result)
+ return results
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ list[Tensor]: Outputs.
+ """
+ features = self.backbone(img)
+ outputs = []
+ for head_id, head in enumerate(self.heads):
+ neck_id = self.head2neck[head_id]
+ outputs.append(head(self.necks[neck_id](features)))
+ return outputs
diff --git a/vendor/ViTPose/mmpose/models/detectors/multiview_pose.py b/vendor/ViTPose/mmpose/models/detectors/multiview_pose.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3d2221eee4198d0cbaad7c8e7031f85dc35cf33
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/multiview_pose.py
@@ -0,0 +1,889 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.runner import load_checkpoint
+
+from mmpose.core.camera import SimpleCameraTorch
+from mmpose.core.post_processing.post_transforms import (
+ affine_transform_torch, get_affine_transform)
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+
+class ProjectLayer(nn.Module):
+
+ def __init__(self, image_size, heatmap_size):
+ """Project layer to get voxel feature. Adapted from
+ https://github.com/microsoft/voxelpose-
+ pytorch/blob/main/lib/models/project_layer.py.
+
+ Args:
+ image_size (int or list): input size of the 2D model
+ heatmap_size (int or list): output size of the 2D model
+ """
+ super(ProjectLayer, self).__init__()
+ self.image_size = image_size
+ self.heatmap_size = heatmap_size
+ if isinstance(self.image_size, int):
+ self.image_size = [self.image_size, self.image_size]
+ if isinstance(self.heatmap_size, int):
+ self.heatmap_size = [self.heatmap_size, self.heatmap_size]
+
+ def compute_grid(self, box_size, box_center, num_bins, device=None):
+ if isinstance(box_size, int) or isinstance(box_size, float):
+ box_size = [box_size, box_size, box_size]
+ if isinstance(num_bins, int):
+ num_bins = [num_bins, num_bins, num_bins]
+
+ grid_1D_x = torch.linspace(
+ -box_size[0] / 2, box_size[0] / 2, num_bins[0], device=device)
+ grid_1D_y = torch.linspace(
+ -box_size[1] / 2, box_size[1] / 2, num_bins[1], device=device)
+ grid_1D_z = torch.linspace(
+ -box_size[2] / 2, box_size[2] / 2, num_bins[2], device=device)
+ grid_x, grid_y, grid_z = torch.meshgrid(
+ grid_1D_x + box_center[0],
+ grid_1D_y + box_center[1],
+ grid_1D_z + box_center[2],
+ )
+ grid_x = grid_x.contiguous().view(-1, 1)
+ grid_y = grid_y.contiguous().view(-1, 1)
+ grid_z = grid_z.contiguous().view(-1, 1)
+ grid = torch.cat([grid_x, grid_y, grid_z], dim=1)
+
+ return grid
+
+ def get_voxel(self, feature_maps, meta, grid_size, grid_center, cube_size):
+ device = feature_maps[0].device
+ batch_size = feature_maps[0].shape[0]
+ num_channels = feature_maps[0].shape[1]
+ num_bins = cube_size[0] * cube_size[1] * cube_size[2]
+ n = len(feature_maps)
+ cubes = torch.zeros(
+ batch_size, num_channels, 1, num_bins, n, device=device)
+ w, h = self.heatmap_size
+ grids = torch.zeros(batch_size, num_bins, 3, device=device)
+ bounding = torch.zeros(batch_size, 1, 1, num_bins, n, device=device)
+ for i in range(batch_size):
+ if len(grid_center[0]) == 3 or grid_center[i][3] >= 0:
+ if len(grid_center) == 1:
+ grid = self.compute_grid(
+ grid_size, grid_center[0], cube_size, device=device)
+ else:
+ grid = self.compute_grid(
+ grid_size, grid_center[i], cube_size, device=device)
+ grids[i:i + 1] = grid
+ for c in range(n):
+ center = meta[i]['center'][c]
+ scale = meta[i]['scale'][c]
+
+ width, height = center * 2
+ trans = torch.as_tensor(
+ get_affine_transform(center, scale / 200.0, 0,
+ self.image_size),
+ dtype=torch.float,
+ device=device)
+
+ cam_param = meta[i]['camera'][c].copy()
+
+ single_view_camera = SimpleCameraTorch(
+ param=cam_param, device=device)
+ xy = single_view_camera.world_to_pixel(grid)
+
+ bounding[i, 0, 0, :, c] = (xy[:, 0] >= 0) & (
+ xy[:, 1] >= 0) & (xy[:, 0] < width) & (
+ xy[:, 1] < height)
+ xy = torch.clamp(xy, -1.0, max(width, height))
+ xy = affine_transform_torch(xy, trans)
+ xy = xy * torch.tensor(
+ [w, h], dtype=torch.float,
+ device=device) / torch.tensor(
+ self.image_size, dtype=torch.float, device=device)
+ sample_grid = xy / torch.tensor([w - 1, h - 1],
+ dtype=torch.float,
+ device=device) * 2.0 - 1.0
+ sample_grid = torch.clamp(
+ sample_grid.view(1, 1, num_bins, 2), -1.1, 1.1)
+
+ cubes[i:i + 1, :, :, :, c] += F.grid_sample(
+ feature_maps[c][i:i + 1, :, :, :],
+ sample_grid,
+ align_corners=True)
+
+ cubes = torch.sum(
+ torch.mul(cubes, bounding), dim=-1) / (
+ torch.sum(bounding, dim=-1) + 1e-6)
+ cubes[cubes != cubes] = 0.0
+ cubes = cubes.clamp(0.0, 1.0)
+
+ cubes = cubes.view(batch_size, num_channels, cube_size[0],
+ cube_size[1], cube_size[2])
+ return cubes, grids
+
+ def forward(self, feature_maps, meta, grid_size, grid_center, cube_size):
+ cubes, grids = self.get_voxel(feature_maps, meta, grid_size,
+ grid_center, cube_size)
+ return cubes, grids
+
+
+@POSENETS.register_module()
+class DetectAndRegress(BasePose):
+ """DetectAndRegress approach for multiview human pose detection.
+
+ Args:
+ backbone (ConfigDict): Dictionary to construct the 2D pose detector
+ human_detector (ConfigDict): dictionary to construct human detector
+ pose_regressor (ConfigDict): dictionary to construct pose regressor
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained 2D model. Default: None.
+ freeze_2d (bool): Whether to freeze the 2D model in training.
+ Default: True.
+ """
+
+ def __init__(self,
+ backbone,
+ human_detector,
+ pose_regressor,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ freeze_2d=True):
+ super(DetectAndRegress, self).__init__()
+ if backbone is not None:
+ self.backbone = builder.build_posenet(backbone)
+ if self.training and pretrained is not None:
+ load_checkpoint(self.backbone, pretrained)
+ else:
+ self.backbone = None
+
+ self.freeze_2d = freeze_2d
+ self.human_detector = builder.MODELS.build(human_detector)
+ self.pose_regressor = builder.MODELS.build(pose_regressor)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ @staticmethod
+ def _freeze(model):
+ """Freeze parameters."""
+ model.eval()
+ for param in model.parameters():
+ param.requires_grad = False
+
+ def train(self, mode=True):
+ """Sets the module in training mode.
+ Args:
+ mode (bool): whether to set training mode (``True``)
+ or evaluation mode (``False``). Default: ``True``.
+
+ Returns:
+ Module: self
+ """
+ super().train(mode)
+ if mode and self.freeze_2d and self.backbone is not None:
+ self._freeze(self.backbone)
+
+ return self
+
+ def forward(self,
+ img=None,
+ img_metas=None,
+ return_loss=True,
+ targets=None,
+ masks=None,
+ targets_3d=None,
+ input_heatmaps=None,
+ **kwargs):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ targets (list(torch.Tensor[NxKxHxW])):
+ Multi-camera target feature_maps of the 2D model.
+ masks (list(torch.Tensor[NxHxW])):
+ Multi-camera masks of the input to the 2D model.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+ **kwargs:
+
+ Returns:
+ dict: if 'return_loss' is true, then return losses.
+ Otherwise, return predicted poses, human centers and sample_id
+
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, targets, masks,
+ targets_3d, input_heatmaps)
+ else:
+ return self.forward_test(img, img_metas, input_heatmaps)
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during training.
+
+ This method defines an iteration step during training, except for the
+ back propagation and optimizer updating, which are done in an optimizer
+ hook. Note that in some complicated cases or models, the whole process
+ including back propagation and optimizer updating is also defined in
+ this method, such as GAN.
+
+ Args:
+ data_batch (dict): The output of dataloader.
+ optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+ runner is passed to ``train_step()``. This argument is unused
+ and reserved.
+
+ Returns:
+ dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+ ``num_samples``.
+ ``loss`` is a tensor for back propagation, which can be a
+ weighted sum of multiple losses.
+ ``log_vars`` contains all the variables to be sent to the
+ logger.
+ ``num_samples`` indicates the batch size (when the model is
+ DDP, it means the batch size on each GPU), which is used for
+ averaging the logs.
+ """
+ losses = self.forward(**data_batch)
+
+ loss, log_vars = self._parse_losses(losses)
+ if 'img' in data_batch:
+ batch_size = data_batch['img'][0].shape[0]
+ else:
+ assert 'input_heatmaps' in data_batch
+ batch_size = data_batch['input_heatmaps'][0][0].shape[0]
+
+ outputs = dict(loss=loss, log_vars=log_vars, num_samples=batch_size)
+
+ return outputs
+
+ def forward_train(self,
+ img,
+ img_metas,
+ targets=None,
+ masks=None,
+ targets_3d=None,
+ input_heatmaps=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ targets (list(torch.Tensor[NxKxHxW])):
+ Multi-camera target feature_maps of the 2D model.
+ masks (list(torch.Tensor[NxHxW])):
+ Multi-camera masks of the input to the 2D model.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+
+ Returns:
+ dict: losses.
+
+ """
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ losses = dict()
+ human_candidates, human_loss = self.human_detector.forward_train(
+ None, img_metas, feature_maps, targets_3d, return_preds=True)
+ losses.update(human_loss)
+
+ pose_loss = self.pose_regressor(
+ None,
+ img_metas,
+ return_loss=True,
+ feature_maps=feature_maps,
+ human_candidates=human_candidates)
+ losses.update(pose_loss)
+
+ if not self.freeze_2d:
+ losses_2d = {}
+ heatmaps_tensor = torch.cat(feature_maps, dim=0)
+ targets_tensor = torch.cat(targets, dim=0)
+ masks_tensor = torch.cat(masks, dim=0)
+ losses_2d_ = self.backbone.get_loss(heatmaps_tensor,
+ targets_tensor, masks_tensor)
+ for k, v in losses_2d_.items():
+ losses_2d[k + '_2d'] = v
+ losses.update(losses_2d)
+
+ return losses
+
+ def forward_test(
+ self,
+ img,
+ img_metas,
+ input_heatmaps=None,
+ ):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ input_heatmaps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps when the 2D model is not available.
+ Default: None.
+
+ Returns:
+ dict: predicted poses, human centers and sample_id
+
+ """
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ human_candidates = self.human_detector.forward_test(
+ None, img_metas, feature_maps)
+
+ human_poses = self.pose_regressor(
+ None,
+ img_metas,
+ return_loss=False,
+ feature_maps=feature_maps,
+ human_candidates=human_candidates)
+
+ result = {}
+ result['pose_3d'] = human_poses.cpu().numpy()
+ result['human_detection_3d'] = human_candidates.cpu().numpy()
+ result['sample_id'] = [img_meta['sample_id'] for img_meta in img_metas]
+
+ return result
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, img, input_heatmaps=None, num_candidates=5):
+ """Used for computing network FLOPs."""
+ if self.backbone is None:
+ assert input_heatmaps is not None
+ feature_maps = []
+ for input_heatmap in input_heatmaps:
+ feature_maps.append(input_heatmap[0])
+ else:
+ feature_maps = []
+ assert isinstance(img, list)
+ for img_ in img:
+ feature_maps.append(self.backbone.forward_dummy(img_)[0])
+
+ _ = self.human_detector.forward_dummy(feature_maps)
+
+ _ = self.pose_regressor.forward_dummy(feature_maps, num_candidates)
+
+
+@POSENETS.register_module()
+class VoxelSinglePose(BasePose):
+ """VoxelPose Please refer to the `paper `
+ for details.
+
+ Args:
+ image_size (list): input size of the 2D model.
+ heatmap_size (list): output size of the 2D model.
+ sub_space_size (list): Size of the cuboid human proposal.
+ sub_cube_size (list): Size of the input volume to the pose net.
+ pose_net (ConfigDict): Dictionary to construct the pose net.
+ pose_head (ConfigDict): Dictionary to construct the pose head.
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ """
+
+ def __init__(
+ self,
+ image_size,
+ heatmap_size,
+ sub_space_size,
+ sub_cube_size,
+ num_joints,
+ pose_net,
+ pose_head,
+ train_cfg=None,
+ test_cfg=None,
+ ):
+ super(VoxelSinglePose, self).__init__()
+ self.project_layer = ProjectLayer(image_size, heatmap_size)
+ self.pose_net = builder.build_backbone(pose_net)
+ self.pose_head = builder.build_head(pose_head)
+
+ self.sub_space_size = sub_space_size
+ self.sub_cube_size = sub_cube_size
+
+ self.num_joints = num_joints
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ def forward(self,
+ img,
+ img_metas,
+ return_loss=True,
+ feature_maps=None,
+ human_candidates=None,
+ **kwargs):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, feature_maps,
+ human_candidates)
+ else:
+ return self.forward_test(img, img_metas, feature_maps,
+ human_candidates)
+
+ def forward_train(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ human_candidates=None,
+ return_preds=False,
+ **kwargs):
+ """Defines the computation performed at training.
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+ return_preds (bool): Whether to return prediction results
+
+ Returns:
+ dict: losses.
+
+ """
+ batch_size, num_candidates, _ = human_candidates.shape
+ pred = human_candidates.new_zeros(batch_size, num_candidates,
+ self.num_joints, 5)
+ pred[:, :, :, 3:] = human_candidates[:, :, None, 3:]
+
+ device = feature_maps[0].device
+ gt_3d = torch.stack([
+ torch.tensor(img_meta['joints_3d'], device=device)
+ for img_meta in img_metas
+ ])
+ gt_3d_vis = torch.stack([
+ torch.tensor(img_meta['joints_3d_visible'], device=device)
+ for img_meta in img_metas
+ ])
+ valid_preds = []
+ valid_targets = []
+ valid_weights = []
+
+ for n in range(num_candidates):
+ index = pred[:, n, 0, 3] >= 0
+ num_valid = index.sum()
+ if num_valid > 0:
+ pose_input_cube, coordinates \
+ = self.project_layer(feature_maps,
+ img_metas,
+ self.sub_space_size,
+ human_candidates[:, n, :3],
+ self.sub_cube_size)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d[index],
+ coordinates[index])
+
+ pred[index, n, :, 0:3] = pose_3d.detach()
+ valid_targets.append(gt_3d[index, pred[index, n, 0, 3].long()])
+ valid_weights.append(gt_3d_vis[index, pred[index, n, 0,
+ 3].long(), :,
+ 0:1].float())
+ valid_preds.append(pose_3d)
+
+ losses = dict()
+ if len(valid_preds) > 0:
+ valid_targets = torch.cat(valid_targets, dim=0)
+ valid_weights = torch.cat(valid_weights, dim=0)
+ valid_preds = torch.cat(valid_preds, dim=0)
+ losses.update(
+ self.pose_head.get_loss(valid_preds, valid_targets,
+ valid_weights))
+ else:
+ pose_input_cube = feature_maps[0].new_zeros(
+ batch_size, self.num_joints, *self.sub_cube_size)
+ coordinates = feature_maps[0].new_zeros(batch_size,
+ *self.sub_cube_size,
+ 3).view(batch_size, -1, 3)
+ pseudo_targets = feature_maps[0].new_zeros(batch_size,
+ self.num_joints, 3)
+ pseudo_weights = feature_maps[0].new_zeros(batch_size,
+ self.num_joints, 1)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d, coordinates)
+ losses.update(
+ self.pose_head.get_loss(pose_3d, pseudo_targets,
+ pseudo_weights))
+ if return_preds:
+ return pred, losses
+ else:
+ return losses
+
+ def forward_test(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ human_candidates=None,
+ **kwargs):
+ """Defines the computation performed at training.
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ feature_maps width: W
+ feature_maps height: H
+ volume_length: cubeL
+ volume_width: cubeW
+ volume_height: cubeH
+
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ feature_maps (list(torch.Tensor[NxCxHxW])):
+ Multi-camera input feature_maps.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ human_candidates (torch.Tensor[NxPx5]):
+ Human candidates.
+
+ Returns:
+ dict: predicted poses, human centers and sample_id
+
+ """
+ batch_size, num_candidates, _ = human_candidates.shape
+ pred = human_candidates.new_zeros(batch_size, num_candidates,
+ self.num_joints, 5)
+ pred[:, :, :, 3:] = human_candidates[:, :, None, 3:]
+
+ for n in range(num_candidates):
+ index = pred[:, n, 0, 3] >= 0
+ num_valid = index.sum()
+ if num_valid > 0:
+ pose_input_cube, coordinates \
+ = self.project_layer(feature_maps,
+ img_metas,
+ self.sub_space_size,
+ human_candidates[:, n, :3],
+ self.sub_cube_size)
+ pose_heatmaps_3d = self.pose_net(pose_input_cube)
+ pose_3d = self.pose_head(pose_heatmaps_3d[index],
+ coordinates[index])
+
+ pred[index, n, :, 0:3] = pose_3d.detach()
+
+ return pred
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, feature_maps, num_candidates=5):
+ """Used for computing network FLOPs."""
+ batch_size, num_channels = feature_maps[0].shape
+ pose_input_cube = feature_maps[0].new_zeros(batch_size, num_channels,
+ *self.sub_cube_size)
+ for n in range(num_candidates):
+ _ = self.pose_net(pose_input_cube)
+
+
+@POSENETS.register_module()
+class VoxelCenterDetector(BasePose):
+ """Detect human center by 3D CNN on voxels.
+
+ Please refer to the
+ `paper ` for details.
+ Args:
+ image_size (list): input size of the 2D model.
+ heatmap_size (list): output size of the 2D model.
+ space_size (list): Size of the 3D space.
+ cube_size (list): Size of the input volume to the 3D CNN.
+ space_center (list): Coordinate of the center of the 3D space.
+ center_net (ConfigDict): Dictionary to construct the center net.
+ center_head (ConfigDict): Dictionary to construct the center head.
+ train_cfg (ConfigDict): Config for training. Default: None.
+ test_cfg (ConfigDict): Config for testing. Default: None.
+ """
+
+ def __init__(
+ self,
+ image_size,
+ heatmap_size,
+ space_size,
+ cube_size,
+ space_center,
+ center_net,
+ center_head,
+ train_cfg=None,
+ test_cfg=None,
+ ):
+ super(VoxelCenterDetector, self).__init__()
+ self.project_layer = ProjectLayer(image_size, heatmap_size)
+ self.center_net = builder.build_backbone(center_net)
+ self.center_head = builder.build_head(center_head)
+
+ self.space_size = space_size
+ self.cube_size = cube_size
+ self.space_center = space_center
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ def assign2gt(self, center_candidates, gt_centers, gt_num_persons):
+ """"Assign gt id to each valid human center candidate."""
+ det_centers = center_candidates[..., :3]
+ batch_size = center_candidates.shape[0]
+ cand_num = center_candidates.shape[1]
+ cand2gt = torch.zeros(batch_size, cand_num)
+
+ for i in range(batch_size):
+ cand = det_centers[i].view(cand_num, 1, -1)
+ gt = gt_centers[None, i, :gt_num_persons[i]]
+
+ dist = torch.sqrt(torch.sum((cand - gt)**2, dim=-1))
+ min_dist, min_gt = torch.min(dist, dim=-1)
+
+ cand2gt[i] = min_gt
+ cand2gt[i][min_dist > self.train_cfg['dist_threshold']] = -1.0
+
+ center_candidates[:, :, 3] = cand2gt
+
+ return center_candidates
+
+ def forward(self,
+ img,
+ img_metas,
+ return_loss=True,
+ feature_maps=None,
+ targets_3d=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ return_loss: Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ Returns:
+ dict: if 'return_loss' is true, then return losses.
+ Otherwise, return predicted poses
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, feature_maps, targets_3d)
+ else:
+ return self.forward_test(img, img_metas, feature_maps)
+
+ def forward_train(self,
+ img,
+ img_metas,
+ feature_maps=None,
+ targets_3d=None,
+ return_preds=False):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ targets_3d (torch.Tensor[NxcubeLxcubeWxcubeH]):
+ Ground-truth 3D heatmap of human centers.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ return_preds (bool): Whether to return prediction results
+ Returns:
+ dict: if 'return_pred' is true, then return losses
+ and human centers. Otherwise, return losses only
+ """
+ initial_cubes, _ = self.project_layer(feature_maps, img_metas,
+ self.space_size,
+ [self.space_center],
+ self.cube_size)
+ center_heatmaps_3d = self.center_net(initial_cubes)
+ center_heatmaps_3d = center_heatmaps_3d.squeeze(1)
+ center_candidates = self.center_head(center_heatmaps_3d)
+
+ device = center_candidates.device
+
+ gt_centers = torch.stack([
+ torch.tensor(img_meta['roots_3d'], device=device)
+ for img_meta in img_metas
+ ])
+ gt_num_persons = torch.stack([
+ torch.tensor(img_meta['num_persons'], device=device)
+ for img_meta in img_metas
+ ])
+ center_candidates = self.assign2gt(center_candidates, gt_centers,
+ gt_num_persons)
+
+ losses = dict()
+ losses.update(
+ self.center_head.get_loss(center_heatmaps_3d, targets_3d))
+
+ if return_preds:
+ return center_candidates, losses
+ else:
+ return losses
+
+ def forward_test(self, img, img_metas, feature_maps=None):
+ """
+ Note:
+ batch_size: N
+ num_keypoints: K
+ num_img_channel: C
+ img_width: imgW
+ img_height: imgH
+ heatmaps width: W
+ heatmaps height: H
+ Args:
+ img (list(torch.Tensor[NxCximgHximgW])):
+ Multi-camera input images to the 2D model.
+ img_metas (list(dict)):
+ Information about image, 3D groundtruth and camera parameters.
+ feature_maps (list(torch.Tensor[NxKxHxW])):
+ Multi-camera feature_maps.
+ Returns:
+ human centers
+ """
+ initial_cubes, _ = self.project_layer(feature_maps, img_metas,
+ self.space_size,
+ [self.space_center],
+ self.cube_size)
+ center_heatmaps_3d = self.center_net(initial_cubes)
+ center_heatmaps_3d = center_heatmaps_3d.squeeze(1)
+ center_candidates = self.center_head(center_heatmaps_3d)
+ center_candidates[..., 3] = \
+ (center_candidates[..., 4] >
+ self.test_cfg['center_threshold']).float() - 1.0
+
+ return center_candidates
+
+ def show_result(self, **kwargs):
+ """Visualize the results."""
+ raise NotImplementedError
+
+ def forward_dummy(self, feature_maps):
+ """Used for computing network FLOPs."""
+ batch_size, num_channels, _, _ = feature_maps[0].shape
+ initial_cubes = feature_maps[0].new_zeros(batch_size, num_channels,
+ *self.cube_size)
+ _ = self.center_net(initial_cubes)
diff --git a/vendor/ViTPose/mmpose/models/detectors/pose_lifter.py b/vendor/ViTPose/mmpose/models/detectors/pose_lifter.py
new file mode 100644
index 0000000000000000000000000000000000000000..ace6b9f3e8b0363666da5d96858b3864213aeabe
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/pose_lifter.py
@@ -0,0 +1,392 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+from mmcv.utils.misc import deprecated_api_warning
+
+from mmpose.core import imshow_bboxes, imshow_keypoints, imshow_keypoints_3d
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class PoseLifter(BasePose):
+ """Pose lifter that lifts 2D pose to 3D pose.
+
+ The basic model is a pose model that predicts root-relative pose. If
+ traj_head is not None, a trajectory model that predicts absolute root joint
+ position is also built.
+
+ Args:
+ backbone (dict): Config for the backbone of pose model.
+ neck (dict|None): Config for the neck of pose model.
+ keypoint_head (dict|None): Config for the head of pose model.
+ traj_backbone (dict|None): Config for the backbone of trajectory model.
+ If traj_backbone is None and traj_head is not None, trajectory
+ model will share backbone with pose model.
+ traj_neck (dict|None): Config for the neck of trajectory model.
+ traj_head (dict|None): Config for the head of trajectory model.
+ loss_semi (dict|None): Config for semi-supervision loss.
+ train_cfg (dict|None): Config for keypoint head during training.
+ test_cfg (dict|None): Config for keypoint head during testing.
+ pretrained (str|None): Path to pretrained weights.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ traj_backbone=None,
+ traj_neck=None,
+ traj_head=None,
+ loss_semi=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ # pose model
+ self.backbone = builder.build_backbone(backbone)
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ # trajectory model
+ if traj_head is not None:
+ self.traj_head = builder.build_head(traj_head)
+
+ if traj_backbone is not None:
+ self.traj_backbone = builder.build_backbone(traj_backbone)
+ else:
+ self.traj_backbone = self.backbone
+
+ if traj_neck is not None:
+ self.traj_neck = builder.build_neck(traj_neck)
+
+ # semi-supervised learning
+ self.semi = loss_semi is not None
+ if self.semi:
+ assert keypoint_head is not None and traj_head is not None
+ self.loss_semi = builder.build_loss(loss_semi)
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has keypoint_neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ @property
+ def with_traj_backbone(self):
+ """Check if has trajectory_backbone."""
+ return hasattr(self, 'traj_backbone')
+
+ @property
+ def with_traj_neck(self):
+ """Check if has trajectory_neck."""
+ return hasattr(self, 'traj_neck')
+
+ @property
+ def with_traj(self):
+ """Check if has trajectory_head."""
+ return hasattr(self, 'traj_head')
+
+ @property
+ def causal(self):
+ if hasattr(self.backbone, 'causal'):
+ return self.backbone.causal
+ else:
+ raise AttributeError('A PoseLifter\'s backbone should have '
+ 'the bool attribute "causal" to indicate if'
+ 'it performs causal inference.')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+ if self.with_traj_backbone:
+ self.traj_backbone.init_weights(pretrained)
+ if self.with_traj_neck:
+ self.traj_neck.init_weights()
+ if self.with_traj:
+ self.traj_head.init_weights()
+
+ @auto_fp16(apply_to=('input', ))
+ def forward(self,
+ input,
+ target=None,
+ target_weight=None,
+ metas=None,
+ return_loss=True,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True.
+
+ Note:
+ - batch_size: N
+ - num_input_keypoints: Ki
+ - input_keypoint_dim: Ci
+ - input_sequence_len: Ti
+ - num_output_keypoints: Ko
+ - output_keypoint_dim: Co
+ - input_sequence_len: To
+
+ Args:
+ input (torch.Tensor[NxKixCixTi]): Input keypoint coordinates.
+ target (torch.Tensor[NxKoxCoxTo]): Output keypoint coordinates.
+ Defaults to None.
+ target_weight (torch.Tensor[NxKox1]): Weights across different
+ joint types. Defaults to None.
+ metas (list(dict)): Information about data augmentation
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+
+ Returns:
+ dict|Tensor: If `reutrn_loss` is true, return losses. \
+ Otherwise return predicted poses.
+ """
+ if return_loss:
+ return self.forward_train(input, target, target_weight, metas,
+ **kwargs)
+ else:
+ return self.forward_test(input, metas, **kwargs)
+
+ def forward_train(self, input, target, target_weight, metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ assert input.size(0) == len(metas)
+
+ # supervised learning
+ # pose model
+ features = self.backbone(input)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head(features)
+
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight, metas)
+ losses.update(keypoint_losses)
+ losses.update(keypoint_accuracy)
+
+ # trajectory model
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_traj_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head(traj_features)
+
+ traj_losses = self.traj_head.get_loss(traj_output,
+ kwargs['traj_target'], None)
+ losses.update(traj_losses)
+
+ # semi-supervised learning
+ if self.semi:
+ ul_input = kwargs['unlabeled_input']
+ ul_features = self.backbone(ul_input)
+ if self.with_neck:
+ ul_features = self.neck(ul_features)
+ ul_output = self.keypoint_head(ul_features)
+
+ ul_traj_features = self.traj_backbone(ul_input)
+ if self.with_traj_neck:
+ ul_traj_features = self.traj_neck(ul_traj_features)
+ ul_traj_output = self.traj_head(ul_traj_features)
+
+ output_semi = dict(
+ labeled_pose=output,
+ unlabeled_pose=ul_output,
+ unlabeled_traj=ul_traj_output)
+ target_semi = dict(
+ unlabeled_target_2d=kwargs['unlabeled_target_2d'],
+ intrinsics=kwargs['intrinsics'])
+
+ semi_losses = self.loss_semi(output_semi, target_semi)
+ losses.update(semi_losses)
+
+ return losses
+
+ def forward_test(self, input, metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ assert input.size(0) == len(metas)
+
+ results = {}
+
+ features = self.backbone(input)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output = self.keypoint_head.inference_model(features)
+ keypoint_result = self.keypoint_head.decode(metas, output)
+ results.update(keypoint_result)
+
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_traj_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head.inference_model(traj_features)
+ results['traj_preds'] = traj_output
+
+ return results
+
+ def forward_dummy(self, input):
+ """Used for computing network FLOPs. See ``tools/get_flops.py``.
+
+ Args:
+ input (torch.Tensor): Input pose
+
+ Returns:
+ Tensor: Model output
+ """
+ output = self.backbone(input)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ if self.with_traj:
+ traj_features = self.traj_backbone(input)
+ if self.with_neck:
+ traj_features = self.traj_neck(traj_features)
+ traj_output = self.traj_head(traj_features)
+ output = output + traj_output
+
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='PoseLifter')
+ def show_result(self,
+ result,
+ img=None,
+ skeleton=None,
+ pose_kpt_color=None,
+ pose_link_color=None,
+ radius=8,
+ thickness=2,
+ vis_height=400,
+ num_instances=-1,
+ win_name='',
+ show=False,
+ wait_time=0,
+ out_file=None):
+ """Visualize 3D pose estimation results.
+
+ Args:
+ result (list[dict]): The pose estimation results containing:
+
+ - "keypoints_3d" ([K,4]): 3D keypoints
+ - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing
+ 2D inputs. If a sequence is given, only the last frame
+ will be used for visualization
+ - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs
+ - "title" (str): title for the subplot
+ img (str or Tensor): Optional. The image to visualize 2D inputs on.
+ skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
+ links, each is a pair of joint indices.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ vis_height (int): The image height of the visualization. The width
+ will be N*vis_height depending on the number of visualized
+ items.
+ win_name (str): The window name.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ if num_instances < 0:
+ assert len(result) > 0
+ result = sorted(result, key=lambda x: x.get('track_id', 1e4))
+
+ # draw image and input 2d poses
+ if img is not None:
+ img = mmcv.imread(img)
+
+ bbox_result = []
+ pose_input_2d = []
+ for res in result:
+ if 'bbox' in res:
+ bbox = np.array(res['bbox'])
+ if bbox.ndim != 1:
+ assert bbox.ndim == 2
+ bbox = bbox[-1] # Get bbox from the last frame
+ bbox_result.append(bbox)
+ if 'keypoints' in res:
+ kpts = np.array(res['keypoints'])
+ if kpts.ndim != 2:
+ assert kpts.ndim == 3
+ kpts = kpts[-1] # Get 2D keypoints from the last frame
+ pose_input_2d.append(kpts)
+
+ if len(bbox_result) > 0:
+ bboxes = np.vstack(bbox_result)
+ imshow_bboxes(
+ img,
+ bboxes,
+ colors='green',
+ thickness=thickness,
+ show=False)
+ if len(pose_input_2d) > 0:
+ imshow_keypoints(
+ img,
+ pose_input_2d,
+ skeleton,
+ kpt_score_thr=0.3,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ radius=radius,
+ thickness=thickness)
+ img = mmcv.imrescale(img, scale=vis_height / img.shape[0])
+
+ img_vis = imshow_keypoints_3d(
+ result,
+ img,
+ skeleton,
+ pose_kpt_color,
+ pose_link_color,
+ vis_height,
+ num_instances=num_instances)
+
+ if show:
+ mmcv.visualization.imshow(img_vis, win_name, wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(img_vis, out_file)
+
+ return img_vis
diff --git a/vendor/ViTPose/mmpose/models/detectors/posewarper.py b/vendor/ViTPose/mmpose/models/detectors/posewarper.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa1d05f2a4f73728400ebe5205703bf96110c31a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/posewarper.py
@@ -0,0 +1,244 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import numpy as np
+import torch
+
+from ..builder import POSENETS
+from .top_down import TopDown
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class PoseWarper(TopDown):
+ """Top-down pose detectors for multi-frame settings for video inputs.
+
+ `"Learning temporal pose estimation from sparsely-labeled videos"
+ `_.
+
+ A child class of TopDown detector. The main difference between PoseWarper
+ and TopDown lies in that the former takes a list of tensors as input image
+ while the latter takes a single tensor as input image in forward method.
+
+ Args:
+ backbone (dict): Backbone modules to extract features.
+ neck (dict): intermediate modules to transform features.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ concat_tensors (bool): Whether to concat the tensors on the batch dim,
+ which can speed up, Default: True
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None,
+ concat_tensors=True):
+ super().__init__(
+ backbone=backbone,
+ neck=neck,
+ keypoint_head=keypoint_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained,
+ loss_pose=loss_pose)
+ self.concat_tensors = concat_tensors
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - number of frames: F
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ imgs (list[F,torch.Tensor[N,C,imgH,imgW]]): multiple input frames
+ target (torch.Tensor[N,K,H,W]): Target heatmaps for one frame.
+ target_weight (torch.Tensor[N,K,1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: paths to multiple video frames
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, imgs, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ # imgs (list[Fxtorch.Tensor[NxCximgHximgW]]): multiple input frames
+ assert imgs[0].size(0) == len(img_metas)
+ num_frames = len(imgs)
+ frame_weight = img_metas[0]['frame_weight']
+
+ assert num_frames == len(frame_weight), f'The number of frames ' \
+ f'({num_frames}) and the length of weights for each frame ' \
+ f'({len(frame_weight)}) must match'
+
+ if self.concat_tensors:
+ features = [self.backbone(torch.cat(imgs, 0))]
+ else:
+ features = [self.backbone(img) for img in imgs]
+
+ if self.with_neck:
+ features = self.neck(features, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output = self.keypoint_head(features)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ losses.update(keypoint_losses)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight)
+ losses.update(keypoint_accuracy)
+
+ return losses
+
+ def forward_test(self, imgs, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ # imgs (list[Fxtorch.Tensor[NxCximgHximgW]]): multiple input frames
+ assert imgs[0].size(0) == len(img_metas)
+ num_frames = len(imgs)
+ frame_weight = img_metas[0]['frame_weight']
+
+ assert num_frames == len(frame_weight), f'The number of frames ' \
+ f'({num_frames}) and the length of weights for each frame ' \
+ f'({len(frame_weight)}) must match'
+
+ batch_size, _, img_height, img_width = imgs[0].shape
+
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+
+ if self.concat_tensors:
+ features = [self.backbone(torch.cat(imgs, 0))]
+ else:
+ features = [self.backbone(img) for img in imgs]
+
+ if self.with_neck:
+ features = self.neck(features, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ imgs_flipped = [img.flip(3) for img in imgs]
+
+ if self.concat_tensors:
+ features_flipped = [self.backbone(torch.cat(imgs_flipped, 0))]
+ else:
+ features_flipped = [
+ self.backbone(img_flipped) for img_flipped in imgs_flipped
+ ]
+
+ if self.with_neck:
+ features_flipped = self.neck(
+ features_flipped, frame_weight=frame_weight)
+
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor[N,C,imgH,imgW], or list|tuple of tensors):
+ multiple input frames, N >= 2.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ # concat tensors if they are in a list
+ if isinstance(img, (list, tuple)):
+ img = torch.cat(img, 0)
+
+ batch_size = img.size(0)
+ assert batch_size > 1, 'Input batch size to PoseWarper ' \
+ 'should be larger than 1.'
+ if batch_size == 2:
+ warnings.warn('Current batch size: 2, for pytorch2onnx and '
+ 'getting flops both.')
+ else:
+ warnings.warn(
+ f'Current batch size: {batch_size}, for getting flops only.')
+
+ frame_weight = np.random.uniform(0, 1, batch_size)
+ output = [self.backbone(img)]
+
+ if self.with_neck:
+ output = self.neck(output, frame_weight=frame_weight)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
diff --git a/vendor/ViTPose/mmpose/models/detectors/top_down.py b/vendor/ViTPose/mmpose/models/detectors/top_down.py
new file mode 100644
index 0000000000000000000000000000000000000000..af0ab51c5b230f4bd39d2fdd082e0fb2daf4594f
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/top_down.py
@@ -0,0 +1,307 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import mmcv
+import numpy as np
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core import imshow_bboxes, imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class TopDown(BasePose):
+ """Top-down pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for TopDown is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+
+ # if return loss
+ losses = dict()
+ if self.with_keypoint:
+ keypoint_losses = self.keypoint_head.get_loss(
+ output, target, target_weight)
+ losses.update(keypoint_losses)
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output, target, target_weight)
+ losses.update(keypoint_accuracy)
+
+ return losses
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+
+ features = self.backbone(img)
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='TopDown')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ pose_kpt_color=None,
+ pose_link_color=None,
+ text_color='white',
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ bbox_thickness=1,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+
+ bbox_result = []
+ bbox_labels = []
+ pose_result = []
+ for res in result:
+ if 'bbox' in res:
+ bbox_result.append(res['bbox'])
+ bbox_labels.append(res.get('label', None))
+ pose_result.append(res['keypoints'])
+
+ if bbox_result:
+ bboxes = np.vstack(bbox_result)
+ # draw bounding boxes
+ imshow_bboxes(
+ img,
+ bboxes,
+ labels=bbox_labels,
+ colors=bbox_color,
+ text_color=text_color,
+ thickness=bbox_thickness,
+ font_scale=font_scale,
+ show=False)
+
+ if pose_result:
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius,
+ thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/models/detectors/top_down_moe.py b/vendor/ViTPose/mmpose/models/detectors/top_down_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d499b7ff2723b96104815b3f15fcfcb79489d7d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/detectors/top_down_moe.py
@@ -0,0 +1,351 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+import torch.nn as nn
+
+import mmcv
+import numpy as np
+from mmcv.image import imwrite
+from mmcv.utils.misc import deprecated_api_warning
+from mmcv.visualization.image import imshow
+
+from mmpose.core import imshow_bboxes, imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+ from mmcv.runner import auto_fp16
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class TopDownMoE(BasePose):
+ """Top-down pose detectors.
+
+ Args:
+ backbone (dict): Backbone modules to extract feature.
+ keypoint_head (dict): Keypoint head to process feature.
+ train_cfg (dict): Config for training. Default: None.
+ test_cfg (dict): Config for testing. Default: None.
+ pretrained (str): Path to the pretrained models.
+ loss_pose (None): Deprecated arguments. Please use
+ `loss_keypoint` for heads instead.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ keypoint_head=None,
+ associate_keypoint_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None,
+ loss_pose=None):
+ super().__init__()
+ self.fp16_enabled = False
+
+ self.backbone = builder.build_backbone(backbone)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if keypoint_head is not None:
+ keypoint_head['train_cfg'] = train_cfg
+ keypoint_head['test_cfg'] = test_cfg
+
+ if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+ warnings.warn(
+ '`loss_pose` for TopDown is deprecated, '
+ 'use `loss_keypoint` for heads instead. See '
+ 'https://github.com/open-mmlab/mmpose/pull/382'
+ ' for more information.', DeprecationWarning)
+ keypoint_head['loss_keypoint'] = loss_pose
+
+ self.keypoint_head = builder.build_head(keypoint_head)
+
+
+ associate_keypoint_heads = []
+ keypoint_heads_cnt = 1
+
+ if associate_keypoint_head is not None:
+ if not isinstance(associate_keypoint_head, list):
+ associate_keypoint_head = [associate_keypoint_head]
+ for single_keypoint_head in associate_keypoint_head:
+ single_keypoint_head['train_cfg'] = train_cfg
+ single_keypoint_head['test_cfg'] = test_cfg
+ associate_keypoint_heads.append(builder.build_head(single_keypoint_head))
+ keypoint_heads_cnt += 1
+
+ self.associate_keypoint_heads = nn.ModuleList(associate_keypoint_heads)
+
+ self.keypoint_heads_cnt = keypoint_heads_cnt
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_neck(self):
+ """Check if has neck."""
+ return hasattr(self, 'neck')
+
+ @property
+ def with_keypoint(self):
+ """Check if has keypoint_head."""
+ return hasattr(self, 'keypoint_head')
+
+ def init_weights(self, pretrained=None):
+ """Weight initialization for model."""
+ self.backbone.init_weights(pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ if self.with_keypoint:
+ self.keypoint_head.init_weights()
+ for item in self.associate_keypoint_heads:
+ item.init_weights()
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self,
+ img,
+ target=None,
+ target_weight=None,
+ img_metas=None,
+ return_loss=True,
+ return_heatmap=False,
+ **kwargs):
+ """Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_img_channel: C (Default: 3)
+ - img height: imgH
+ - img width: imgW
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ img (torch.Tensor[NxCximgHximgW]): Input images.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]): Weights across
+ different joint types.
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ return_loss (bool): Option to `return loss`. `return loss=True`
+ for training, `return loss=False` for validation & test.
+ return_heatmap (bool) : Option to return heatmap.
+
+ Returns:
+ dict|tuple: if `return loss` is true, then return losses. \
+ Otherwise, return predicted poses, boxes, image paths \
+ and heatmaps.
+ """
+ if return_loss:
+ return self.forward_train(img, target, target_weight, img_metas,
+ **kwargs)
+ return self.forward_test(
+ img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
+ """Defines the computation performed at every call when training."""
+
+ img_sources = torch.from_numpy(np.array([ele['dataset_idx'] for ele in img_metas])).to(img.device)
+
+ output = self.backbone(img, img_sources)
+ if self.with_neck:
+ output = self.neck(output)
+ # if return loss
+ losses = dict()
+
+ main_stream_select = (img_sources == 0)
+ # if torch.sum(main_stream_select) > 0:
+ output_select = self.keypoint_head(output)
+
+ target_select = target * main_stream_select.view(-1, 1, 1, 1)
+ target_weight_select = target_weight * main_stream_select.view(-1, 1, 1)
+
+ keypoint_losses = self.keypoint_head.get_loss(
+ output_select, target_select, target_weight_select)
+ losses['main_stream_loss'] = keypoint_losses['heatmap_loss']
+ keypoint_accuracy = self.keypoint_head.get_accuracy(
+ output_select, target_select, target_weight_select)
+ losses['main_stream_acc'] = keypoint_accuracy['acc_pose']
+
+ for idx in range(1, self.keypoint_heads_cnt):
+ idx_select = (img_sources == idx)
+ target_select = target * idx_select.view(-1, 1, 1, 1)
+ target_weight_select = target_weight * idx_select.view(-1, 1, 1)
+ output_select = self.associate_keypoint_heads[idx - 1](output)
+ keypoint_losses = self.associate_keypoint_heads[idx - 1].get_loss(
+ output_select, target_select, target_weight_select)
+ losses[f'{idx}_loss'] = keypoint_losses['heatmap_loss']
+ keypoint_accuracy = self.associate_keypoint_heads[idx - 1].get_accuracy(
+ output_select, target_select, target_weight_select)
+ losses[f'{idx}_acc'] = keypoint_accuracy['acc_pose']
+
+ return losses
+
+ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+ """Defines the computation performed at every call when testing."""
+ assert img.size(0) == len(img_metas)
+ batch_size, _, img_height, img_width = img.shape
+ if batch_size > 1:
+ assert 'bbox_id' in img_metas[0]
+
+ result = {}
+ img_sources = torch.from_numpy(np.array([ele['dataset_idx'] for ele in img_metas])).to(img.device)
+
+ features = self.backbone(img, img_sources)
+
+ if self.with_neck:
+ features = self.neck(features)
+ if self.with_keypoint:
+ output_heatmap = self.keypoint_head.inference_model(
+ features, flip_pairs=None)
+
+ if self.test_cfg.get('flip_test', True):
+ img_flipped = img.flip(3)
+ features_flipped = self.backbone(img_flipped, img_sources)
+ if self.with_neck:
+ features_flipped = self.neck(features_flipped)
+ if self.with_keypoint:
+ output_flipped_heatmap = self.keypoint_head.inference_model(
+ features_flipped, img_metas[0]['flip_pairs'])
+ output_heatmap = (output_heatmap +
+ output_flipped_heatmap) * 0.5
+
+ if self.with_keypoint:
+ keypoint_result = self.keypoint_head.decode(
+ img_metas, output_heatmap, img_size=[img_width, img_height])
+ result.update(keypoint_result)
+
+ if not return_heatmap:
+ output_heatmap = None
+
+ result['output_heatmap'] = output_heatmap
+
+ return result
+
+ def forward_dummy(self, img):
+ """Used for computing network FLOPs.
+
+ See ``tools/get_flops.py``.
+
+ Args:
+ img (torch.Tensor): Input image.
+
+ Returns:
+ Tensor: Output heatmaps.
+ """
+ output = self.backbone(img)
+ if self.with_neck:
+ output = self.neck(output)
+ if self.with_keypoint:
+ output = self.keypoint_head(output)
+ return output
+
+ @deprecated_api_warning({'pose_limb_color': 'pose_link_color'},
+ cls_name='TopDown')
+ def show_result(self,
+ img,
+ result,
+ skeleton=None,
+ kpt_score_thr=0.3,
+ bbox_color='green',
+ pose_kpt_color=None,
+ pose_link_color=None,
+ text_color='white',
+ radius=4,
+ thickness=1,
+ font_scale=0.5,
+ bbox_thickness=1,
+ win_name='',
+ show=False,
+ show_keypoint_weight=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (list[dict]): The results to draw over `img`
+ (bbox_result, pose_result).
+ skeleton (list[list]): The connection of keypoints.
+ skeleton is 0-based indexing.
+ kpt_score_thr (float, optional): Minimum score of keypoints
+ to be shown. Default: 0.3.
+ bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+ pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+ If None, do not draw keypoints.
+ pose_link_color (np.array[Mx3]): Color of M links.
+ If None, do not draw links.
+ text_color (str or tuple or :obj:`Color`): Color of texts.
+ radius (int): Radius of circles.
+ thickness (int): Thickness of lines.
+ font_scale (float): Font scales of texts.
+ win_name (str): The window name.
+ show (bool): Whether to show the image. Default: False.
+ show_keypoint_weight (bool): Whether to change the transparency
+ using the predicted confidence scores of keypoints.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ Tensor: Visualized img, only if not `show` or `out_file`.
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+
+ bbox_result = []
+ bbox_labels = []
+ pose_result = []
+ for res in result:
+ if 'bbox' in res:
+ bbox_result.append(res['bbox'])
+ bbox_labels.append(res.get('label', None))
+ pose_result.append(res['keypoints'])
+
+ if bbox_result:
+ bboxes = np.vstack(bbox_result)
+ # draw bounding boxes
+ imshow_bboxes(
+ img,
+ bboxes,
+ labels=bbox_labels,
+ colors=bbox_color,
+ text_color=text_color,
+ thickness=bbox_thickness,
+ font_scale=font_scale,
+ show=False)
+
+ if pose_result:
+ imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+ pose_kpt_color, pose_link_color, radius,
+ thickness)
+
+ if show:
+ imshow(img, win_name, wait_time)
+
+ if out_file is not None:
+ imwrite(img, out_file)
+
+ return img
diff --git a/vendor/ViTPose/mmpose/models/heads/__init__.py b/vendor/ViTPose/mmpose/models/heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a98e91140e7af574816787e9ace4ede24214c189
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .ae_higher_resolution_head import AEHigherResolutionHead
+from .ae_multi_stage_head import AEMultiStageHead
+from .ae_simple_head import AESimpleHead
+from .deconv_head import DeconvHead
+from .deeppose_regression_head import DeepposeRegressionHead
+from .hmr_head import HMRMeshHead
+from .interhand_3d_head import Interhand3DHead
+from .temporal_regression_head import TemporalRegressionHead
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+from .topdown_heatmap_multi_stage_head import (TopdownHeatmapMSMUHead,
+ TopdownHeatmapMultiStageHead)
+from .topdown_heatmap_simple_head import TopdownHeatmapSimpleHead
+from .vipnas_heatmap_simple_head import ViPNASHeatmapSimpleHead
+from .voxelpose_head import CuboidCenterHead, CuboidPoseHead
+
+__all__ = [
+ 'TopdownHeatmapSimpleHead', 'TopdownHeatmapMultiStageHead',
+ 'TopdownHeatmapMSMUHead', 'TopdownHeatmapBaseHead',
+ 'AEHigherResolutionHead', 'AESimpleHead', 'AEMultiStageHead',
+ 'DeepposeRegressionHead', 'TemporalRegressionHead', 'Interhand3DHead',
+ 'HMRMeshHead', 'DeconvHead', 'ViPNASHeatmapSimpleHead', 'CuboidCenterHead',
+ 'CuboidPoseHead'
+]
diff --git a/vendor/ViTPose/mmpose/models/heads/ae_higher_resolution_head.py b/vendor/ViTPose/mmpose/models/heads/ae_higher_resolution_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bf3399cb6facb232931ab9a763fadaf717b138b
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/ae_higher_resolution_head.py
@@ -0,0 +1,249 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
+ normal_init)
+
+from mmpose.models.builder import build_loss
+from ..backbones.resnet import BasicBlock
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class AEHigherResolutionHead(nn.Module):
+ """Associative embedding with higher resolution head. paper ref: Bowen
+ Cheng et al. "HigherHRNet: Scale-Aware Representation Learning for Bottom-
+ Up Human Pose Estimation".
+
+ Args:
+ in_channels (int): Number of input channels.
+ num_joints (int): Number of joints
+ tag_per_joint (bool): If tag_per_joint is True,
+ the dimension of tags equals to num_joints,
+ else the dimension of tags is 1. Default: True
+ extra (dict): Configs for extra conv layers. Default: None
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ cat_output (list[bool]): Option to concat outputs.
+ with_ae_loss (list[bool]): Option to use ae loss.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ tag_per_joint=True,
+ extra=None,
+ num_deconv_layers=1,
+ num_deconv_filters=(32, ),
+ num_deconv_kernels=(4, ),
+ num_basic_blocks=4,
+ cat_output=None,
+ with_ae_loss=None,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.loss = build_loss(loss_keypoint)
+ dim_tag = num_joints if tag_per_joint else 1
+
+ self.num_deconvs = num_deconv_layers
+ self.cat_output = cat_output
+
+ final_layer_output_channels = []
+
+ if with_ae_loss[0]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+
+ final_layer_output_channels.append(out_channels)
+ for i in range(num_deconv_layers):
+ if with_ae_loss[i + 1]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+ final_layer_output_channels.append(out_channels)
+
+ deconv_layer_output_channels = []
+ for i in range(num_deconv_layers):
+ if with_ae_loss[i]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+ deconv_layer_output_channels.append(out_channels)
+
+ self.final_layers = self._make_final_layers(
+ in_channels, final_layer_output_channels, extra, num_deconv_layers,
+ num_deconv_filters)
+ self.deconv_layers = self._make_deconv_layers(
+ in_channels, deconv_layer_output_channels, num_deconv_layers,
+ num_deconv_filters, num_deconv_kernels, num_basic_blocks,
+ cat_output)
+
+ @staticmethod
+ def _make_final_layers(in_channels, final_layer_output_channels, extra,
+ num_deconv_layers, num_deconv_filters):
+ """Make final layers."""
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ else:
+ padding = 0
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ final_layers = []
+ final_layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=final_layer_output_channels[0],
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ for i in range(num_deconv_layers):
+ in_channels = num_deconv_filters[i]
+ final_layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=final_layer_output_channels[i + 1],
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ return nn.ModuleList(final_layers)
+
+ def _make_deconv_layers(self, in_channels, deconv_layer_output_channels,
+ num_deconv_layers, num_deconv_filters,
+ num_deconv_kernels, num_basic_blocks, cat_output):
+ """Make deconv layers."""
+ deconv_layers = []
+ for i in range(num_deconv_layers):
+ if cat_output[i]:
+ in_channels += deconv_layer_output_channels[i]
+
+ planes = num_deconv_filters[i]
+ deconv_kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_deconv_kernels[i])
+
+ layers = []
+ layers.append(
+ nn.Sequential(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=in_channels,
+ out_channels=planes,
+ kernel_size=deconv_kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False), nn.BatchNorm2d(planes, momentum=0.1),
+ nn.ReLU(inplace=True)))
+ for _ in range(num_basic_blocks):
+ layers.append(nn.Sequential(BasicBlock(planes, planes), ))
+ deconv_layers.append(nn.Sequential(*layers))
+ in_channels = planes
+
+ return nn.ModuleList(deconv_layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def get_loss(self, outputs, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (list(torch.Tensor[N,K,H,W])): Multi-scale output heatmaps.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints (List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ outputs, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
+
+ def forward(self, x):
+ """Forward function."""
+ if isinstance(x, list):
+ x = x[0]
+
+ final_outputs = []
+ y = self.final_layers[0](x)
+ final_outputs.append(y)
+
+ for i in range(self.num_deconvs):
+ if self.cat_output[i]:
+ x = torch.cat((x, y), 1)
+
+ x = self.deconv_layers[i](x)
+ y = self.final_layers[i + 1](x)
+ final_outputs.append(y)
+
+ return final_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for _, m in self.final_layers.named_modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
diff --git a/vendor/ViTPose/mmpose/models/heads/ae_multi_stage_head.py b/vendor/ViTPose/mmpose/models/heads/ae_multi_stage_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..195666b27ed50402a073c9eff7c5579c710a36f6
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/ae_multi_stage_head.py
@@ -0,0 +1,222 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
+ normal_init)
+
+from mmpose.models.builder import build_loss
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class AEMultiStageHead(nn.Module):
+ """Associative embedding multi-stage head.
+ paper ref: Alejandro Newell et al. "Associative
+ Embedding: End-to-end Learning for Joint Detection
+ and Grouping"
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.in_channels = in_channels
+ self.num_stages = num_stages
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ # build multi-stage deconv layers
+ self.multi_deconv_layers = nn.ModuleList([])
+ for _ in range(self.num_stages):
+ if num_deconv_layers > 0:
+ deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+ self.multi_deconv_layers.append(deconv_layers)
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ # build multi-stage final layers
+ self.multi_final_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if identity_final_layer:
+ final_layer = nn.Identity()
+ else:
+ final_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=num_deconv_filters[-1]
+ if num_deconv_layers > 0 else in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+ self.multi_final_layers.append(final_layer)
+
+ def get_loss(self, output, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (List(torch.Tensor[NxKxHxW])): Output heatmaps.
+ targets(List(List(torch.Tensor[NxKxHxW]))):
+ Multi-stage and multi-scale target heatmaps.
+ masks(List(List(torch.Tensor[NxHxW]))):
+ Masks of multi-stage and multi-scale target heatmaps
+ joints(List(List(torch.Tensor[NxMxKx2]))):
+ Joints of multi-stage multi-scale target heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ # Flatten list:
+ # [stage_1_scale_1, stage_1_scale_2, ... , stage_1_scale_m,
+ # ...
+ # stage_n_scale_1, stage_n_scale_2, ... , stage_n_scale_m]
+ targets = [target for _targets in targets for target in _targets]
+ masks = [mask for _masks in masks for mask in _masks]
+ joints = [joint for _joints in joints for joint in _joints]
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ output, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages.
+ """
+ out = []
+ assert isinstance(x, list)
+ for i in range(self.num_stages):
+ y = self.multi_deconv_layers[i](x[i])
+ y = self.multi_final_layers[i](y)
+ out.append(y)
+ return out
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.multi_deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.multi_final_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
diff --git a/vendor/ViTPose/mmpose/models/heads/ae_simple_head.py b/vendor/ViTPose/mmpose/models/heads/ae_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..9297f71fd319ab26700f90d797fdd7fea508cb7a
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/ae_simple_head.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from ..builder import HEADS
+from .deconv_head import DeconvHead
+
+
+@HEADS.register_module()
+class AESimpleHead(DeconvHead):
+ """Associative embedding simple head.
+ paper ref: Alejandro Newell et al. "Associative
+ Embedding: End-to-end Learning for Joint Detection
+ and Grouping"
+
+ Args:
+ in_channels (int): Number of input channels.
+ num_joints (int): Number of joints.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ tag_per_joint (bool): If tag_per_joint is True,
+ the dimension of tags equals to num_joints,
+ else the dimension of tags is 1. Default: True
+ with_ae_loss (list[bool]): Option to use ae loss or not.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ tag_per_joint=True,
+ with_ae_loss=None,
+ extra=None,
+ loss_keypoint=None):
+
+ dim_tag = num_joints if tag_per_joint else 1
+ if with_ae_loss[0]:
+ out_channels = num_joints + dim_tag
+ else:
+ out_channels = num_joints
+
+ super().__init__(
+ in_channels,
+ out_channels,
+ num_deconv_layers=num_deconv_layers,
+ num_deconv_filters=num_deconv_filters,
+ num_deconv_kernels=num_deconv_kernels,
+ extra=extra,
+ loss_keypoint=loss_keypoint)
+
+ def get_loss(self, outputs, targets, masks, joints):
+ """Calculate bottom-up keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (list(torch.Tensor[N,K,H,W])): Multi-scale output heatmaps.
+ targets (List(torch.Tensor[N,K,H,W])): Multi-scale target heatmaps.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale target
+ heatmaps
+ joints(List(torch.Tensor[N,M,K,2])): Joints of multi-scale target
+ heatmaps for ae loss
+ """
+
+ losses = dict()
+
+ heatmaps_losses, push_losses, pull_losses = self.loss(
+ outputs, targets, masks, joints)
+
+ for idx in range(len(targets)):
+ if heatmaps_losses[idx] is not None:
+ heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = heatmaps_loss
+ else:
+ losses['heatmap_loss'] += heatmaps_loss
+ if push_losses[idx] is not None:
+ push_loss = push_losses[idx].mean(dim=0)
+ if 'push_loss' not in losses:
+ losses['push_loss'] = push_loss
+ else:
+ losses['push_loss'] += push_loss
+ if pull_losses[idx] is not None:
+ pull_loss = pull_losses[idx].mean(dim=0)
+ if 'pull_loss' not in losses:
+ losses['pull_loss'] = pull_loss
+ else:
+ losses['pull_loss'] += pull_loss
+
+ return losses
diff --git a/vendor/ViTPose/mmpose/models/heads/deconv_head.py b/vendor/ViTPose/mmpose/models/heads/deconv_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..90846d27af46d65091f4ad7e0e6687377ebd86e1
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/deconv_head.py
@@ -0,0 +1,295 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.models.builder import HEADS, build_loss
+from mmpose.models.utils.ops import resize
+
+
+@HEADS.register_module()
+class DeconvHead(nn.Module):
+ """Simple deconv head.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resized to the
+ same size as the first one and then concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels=3,
+ out_channels=17,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def get_loss(self, outputs, targets, masks):
+ """Calculate bottom-up masked mse loss.
+
+ Note:
+ - batch_size: N
+ - num_channels: C
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ outputs (List(torch.Tensor[N,C,H,W])): Multi-scale outputs.
+ targets (List(torch.Tensor[N,C,H,W])): Multi-scale targets.
+ masks (List(torch.Tensor[N,H,W])): Masks of multi-scale targets.
+ """
+
+ losses = dict()
+
+ for idx in range(len(targets)):
+ if 'loss' not in losses:
+ losses['loss'] = self.loss(outputs[idx], targets[idx],
+ masks[idx])
+ else:
+ losses['loss'] += self.loss(outputs[idx], targets[idx],
+ masks[idx])
+
+ return losses
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ final_outputs = []
+ x = self.deconv_layers(x)
+ y = self.final_layer(x)
+ final_outputs.append(y)
+ return final_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/vendor/ViTPose/mmpose/models/heads/deeppose_regression_head.py b/vendor/ViTPose/mmpose/models/heads/deeppose_regression_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..f326e26fa624bd99e9603ad28ff71dccb29b5638
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/deeppose_regression_head.py
@@ -0,0 +1,176 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmpose.core.evaluation import (keypoint_pck_accuracy,
+ keypoints_from_regression)
+from mmpose.core.post_processing import fliplr_regression
+from mmpose.models.builder import HEADS, build_loss
+
+
+@HEADS.register_module()
+class DeepposeRegressionHead(nn.Module):
+ """Deeppose regression head with fully connected layers.
+
+ "DeepPose: Human Pose Estimation via Deep Neural Networks".
+
+ Args:
+ in_channels (int): Number of input channels
+ num_joints (int): Number of joints
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_joints = num_joints
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+
+ self.fc = nn.Linear(self.in_channels, self.num_joints * 2)
+
+ def forward(self, x):
+ """Forward function."""
+ output = self.fc(x)
+ N, C = output.shape
+ return output.reshape([N, C // 2, 2])
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output keypoints.
+ target (torch.Tensor[N, K, 2]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 3 and target_weight.dim() == 3
+ losses['reg_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output keypoints.
+ target (torch.Tensor[N, K, 2]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ N = output.shape[0]
+
+ _, avg_acc, cnt = keypoint_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight[:, :, 0].detach().cpu().numpy() > 0,
+ thr=0.05,
+ normalize=np.ones((N, 2), dtype=np.float32))
+ accuracy['acc_pose'] = avg_acc
+
+ return accuracy
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_regression (np.ndarray): Output regression.
+
+ Args:
+ x (torch.Tensor[N, K, 2]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_regression = fliplr_regression(
+ output.detach().cpu().numpy(), flip_pairs)
+ else:
+ output_regression = output.detach().cpu().numpy()
+ return output_regression
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode the keypoints from output regression.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ output (np.ndarray[N, K, 2]): predicted regression vector.
+ kwargs: dict contains 'img_size'.
+ img_size (tuple(img_width, img_height)): input image size.
+ """
+ batch_size = len(img_metas)
+
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ c = np.zeros((batch_size, 2), dtype=np.float32)
+ s = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size)
+ for i in range(batch_size):
+ c[i, :] = img_metas[i]['center']
+ s[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ preds, maxvals = keypoints_from_regression(output, c, s,
+ kwargs['img_size'])
+
+ all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_preds[:, :, 0:2] = preds[:, :, 0:2]
+ all_preds[:, :, 2:3] = maxvals
+ all_boxes[:, 0:2] = c[:, 0:2]
+ all_boxes[:, 2:4] = s[:, 0:2]
+ all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
+ all_boxes[:, 5] = score
+
+ result = {}
+
+ result['preds'] = all_preds
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ return result
+
+ def init_weights(self):
+ normal_init(self.fc, mean=0, std=0.01, bias=0)
diff --git a/vendor/ViTPose/mmpose/models/heads/hmr_head.py b/vendor/ViTPose/mmpose/models/heads/hmr_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..015a3076bcba53d1590de226fab39444708cb3f9
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/hmr_head.py
@@ -0,0 +1,94 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import xavier_init
+
+from ..builder import HEADS
+from ..utils.geometry import rot6d_to_rotmat
+
+
+@HEADS.register_module()
+class HMRMeshHead(nn.Module):
+ """SMPL parameters regressor head of simple baseline. "End-to-end Recovery
+ of Human Shape and Pose", CVPR'2018.
+
+ Args:
+ in_channels (int): Number of input channels
+ smpl_mean_params (str): The file name of the mean SMPL parameters
+ n_iter (int): The iterations of estimating delta parameters
+ """
+
+ def __init__(self, in_channels, smpl_mean_params=None, n_iter=3):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.n_iter = n_iter
+
+ npose = 24 * 6
+ nbeta = 10
+ ncam = 3
+ hidden_dim = 1024
+
+ self.fc1 = nn.Linear(in_channels + npose + nbeta + ncam, hidden_dim)
+ self.drop1 = nn.Dropout()
+ self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+ self.drop2 = nn.Dropout()
+ self.decpose = nn.Linear(hidden_dim, npose)
+ self.decshape = nn.Linear(hidden_dim, nbeta)
+ self.deccam = nn.Linear(hidden_dim, ncam)
+
+ # Load mean SMPL parameters
+ if smpl_mean_params is None:
+ init_pose = torch.zeros([1, npose])
+ init_shape = torch.zeros([1, nbeta])
+ init_cam = torch.FloatTensor([[1, 0, 0]])
+ else:
+ mean_params = np.load(smpl_mean_params)
+ init_pose = torch.from_numpy(
+ mean_params['pose'][:]).unsqueeze(0).float()
+ init_shape = torch.from_numpy(
+ mean_params['shape'][:]).unsqueeze(0).float()
+ init_cam = torch.from_numpy(
+ mean_params['cam']).unsqueeze(0).float()
+ self.register_buffer('init_pose', init_pose)
+ self.register_buffer('init_shape', init_shape)
+ self.register_buffer('init_cam', init_cam)
+
+ def forward(self, x):
+ """Forward function.
+
+ x is the image feature map and is expected to be in shape (batch size x
+ channel number x height x width)
+ """
+ batch_size = x.shape[0]
+ # extract the global feature vector by average along
+ # spatial dimension.
+ x = x.mean(dim=-1).mean(dim=-1)
+
+ init_pose = self.init_pose.expand(batch_size, -1)
+ init_shape = self.init_shape.expand(batch_size, -1)
+ init_cam = self.init_cam.expand(batch_size, -1)
+
+ pred_pose = init_pose
+ pred_shape = init_shape
+ pred_cam = init_cam
+ for _ in range(self.n_iter):
+ xc = torch.cat([x, pred_pose, pred_shape, pred_cam], 1)
+ xc = self.fc1(xc)
+ xc = self.drop1(xc)
+ xc = self.fc2(xc)
+ xc = self.drop2(xc)
+ pred_pose = self.decpose(xc) + pred_pose
+ pred_shape = self.decshape(xc) + pred_shape
+ pred_cam = self.deccam(xc) + pred_cam
+
+ pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
+ out = (pred_rotmat, pred_shape, pred_cam)
+ return out
+
+ def init_weights(self):
+ """Initialize model weights."""
+ xavier_init(self.decpose, gain=0.01)
+ xavier_init(self.decshape, gain=0.01)
+ xavier_init(self.deccam, gain=0.01)
diff --git a/vendor/ViTPose/mmpose/models/heads/interhand_3d_head.py b/vendor/ViTPose/mmpose/models/heads/interhand_3d_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..aebe4a5f61e5fd1dcd5ecfb64962f88da94d5664
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/interhand_3d_head.py
@@ -0,0 +1,521 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation.top_down_eval import (
+ keypoints_from_heatmaps3d, multilabel_classification_accuracy)
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.necks import GlobalAveragePooling
+from ..builder import HEADS
+
+
+class Heatmap3DHead(nn.Module):
+ """Heatmap3DHead is a sub-module of Interhand3DHead, and outputs 3D
+ heatmaps. Heatmap3DHead is composed of (>=0) number of deconv layers and a
+ simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ depth_size (int): Number of depth discretization size
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ extra (dict): Configs for extra conv layers. Default: None
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ depth_size=64,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None):
+
+ super().__init__()
+
+ assert out_channels % depth_size == 0
+ self.depth_size = depth_size
+ self.in_channels = in_channels
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def forward(self, x):
+ """Forward function."""
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ N, C, H, W = x.shape
+ # reshape the 2D heatmap to 3D heatmap
+ x = x.reshape(N, C // self.depth_size, self.depth_size, H, W)
+ return x
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+
+
+class Heatmap1DHead(nn.Module):
+ """Heatmap1DHead is a sub-module of Interhand3DHead, and outputs 1D
+ heatmaps.
+
+ Args:
+ in_channels (int): Number of input channels
+ heatmap_size (int): Heatmap size
+ hidden_dims (list|tuple): Number of feature dimension of FC layers.
+ """
+
+ def __init__(self, in_channels=2048, heatmap_size=64, hidden_dims=(512, )):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.heatmap_size = heatmap_size
+
+ feature_dims = [in_channels, *hidden_dims, heatmap_size]
+ self.fc = self._make_linear_layers(feature_dims, relu_final=False)
+
+ def soft_argmax_1d(self, heatmap1d):
+ heatmap1d = F.softmax(heatmap1d, 1)
+ accu = heatmap1d * torch.arange(
+ self.heatmap_size, dtype=heatmap1d.dtype,
+ device=heatmap1d.device)[None, :]
+ coord = accu.sum(dim=1)
+ return coord
+
+ def _make_linear_layers(self, feat_dims, relu_final=False):
+ """Make linear layers."""
+ layers = []
+ for i in range(len(feat_dims) - 1):
+ layers.append(nn.Linear(feat_dims[i], feat_dims[i + 1]))
+ if i < len(feat_dims) - 2 or \
+ (i == len(feat_dims) - 2 and relu_final):
+ layers.append(nn.ReLU(inplace=True))
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Forward function."""
+ heatmap1d = self.fc(x)
+ value = self.soft_argmax_1d(heatmap1d).view(-1, 1)
+ return value
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.fc.modules():
+ if isinstance(m, nn.Linear):
+ normal_init(m, mean=0, std=0.01, bias=0)
+
+
+class MultilabelClassificationHead(nn.Module):
+ """MultilabelClassificationHead is a sub-module of Interhand3DHead, and
+ outputs hand type classification.
+
+ Args:
+ in_channels (int): Number of input channels
+ num_labels (int): Number of labels
+ hidden_dims (list|tuple): Number of hidden dimension of FC layers.
+ """
+
+ def __init__(self, in_channels=2048, num_labels=2, hidden_dims=(512, )):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_labesl = num_labels
+
+ feature_dims = [in_channels, *hidden_dims, num_labels]
+ self.fc = self._make_linear_layers(feature_dims, relu_final=False)
+
+ def _make_linear_layers(self, feat_dims, relu_final=False):
+ """Make linear layers."""
+ layers = []
+ for i in range(len(feat_dims) - 1):
+ layers.append(nn.Linear(feat_dims[i], feat_dims[i + 1]))
+ if i < len(feat_dims) - 2 or \
+ (i == len(feat_dims) - 2 and relu_final):
+ layers.append(nn.ReLU(inplace=True))
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ """Forward function."""
+ labels = torch.sigmoid(self.fc(x))
+ return labels
+
+ def init_weights(self):
+ for m in self.fc.modules():
+ if isinstance(m, nn.Linear):
+ normal_init(m, mean=0, std=0.01, bias=0)
+
+
+@HEADS.register_module()
+class Interhand3DHead(nn.Module):
+ """Interhand 3D head of paper ref: Gyeongsik Moon. "InterHand2.6M: A
+ Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single
+ RGB Image".
+
+ Args:
+ keypoint_head_cfg (dict): Configs of Heatmap3DHead for hand
+ keypoint estimation.
+ root_head_cfg (dict): Configs of Heatmap1DHead for relative
+ hand root depth estimation.
+ hand_type_head_cfg (dict): Configs of MultilabelClassificationHead
+ for hand type classification.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ loss_root_depth (dict): Config for relative root depth loss.
+ Default: None.
+ loss_hand_type (dict): Config for hand type classification
+ loss. Default: None.
+ """
+
+ def __init__(self,
+ keypoint_head_cfg,
+ root_head_cfg,
+ hand_type_head_cfg,
+ loss_keypoint=None,
+ loss_root_depth=None,
+ loss_hand_type=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ # build sub-module heads
+ self.right_hand_head = Heatmap3DHead(**keypoint_head_cfg)
+ self.left_hand_head = Heatmap3DHead(**keypoint_head_cfg)
+ self.root_head = Heatmap1DHead(**root_head_cfg)
+ self.hand_type_head = MultilabelClassificationHead(
+ **hand_type_head_cfg)
+ self.neck = GlobalAveragePooling()
+
+ # build losses
+ self.keypoint_loss = build_loss(loss_keypoint)
+ self.root_depth_loss = build_loss(loss_root_depth)
+ self.hand_type_loss = build_loss(loss_hand_type)
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ def init_weights(self):
+ self.left_hand_head.init_weights()
+ self.right_hand_head.init_weights()
+ self.root_head.init_weights()
+ self.hand_type_head.init_weights()
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate loss for hand keypoint heatmaps, relative root depth and
+ hand type.
+
+ Args:
+ output (list[Tensor]): a list of outputs from multiple heads.
+ target (list[Tensor]): a list of targets for multiple heads.
+ target_weight (list[Tensor]): a list of targets weight for
+ multiple heads.
+ """
+ losses = dict()
+
+ # hand keypoint loss
+ assert not isinstance(self.keypoint_loss, nn.Sequential)
+ out, tar, tar_weight = output[0], target[0], target_weight[0]
+ assert tar.dim() == 5 and tar_weight.dim() == 3
+ losses['hand_loss'] = self.keypoint_loss(out, tar, tar_weight)
+
+ # relative root depth loss
+ assert not isinstance(self.root_depth_loss, nn.Sequential)
+ out, tar, tar_weight = output[1], target[1], target_weight[1]
+ assert tar.dim() == 2 and tar_weight.dim() == 2
+ losses['rel_root_loss'] = self.root_depth_loss(out, tar, tar_weight)
+
+ # hand type loss
+ assert not isinstance(self.hand_type_loss, nn.Sequential)
+ out, tar, tar_weight = output[2], target[2], target_weight[2]
+ assert tar.dim() == 2 and tar_weight.dim() in [1, 2]
+ losses['hand_type_loss'] = self.hand_type_loss(out, tar, tar_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for hand type.
+
+ Args:
+ output (list[Tensor]): a list of outputs from multiple heads.
+ target (list[Tensor]): a list of targets for multiple heads.
+ target_weight (list[Tensor]): a list of targets weight for
+ multiple heads.
+ """
+ accuracy = dict()
+ avg_acc = multilabel_classification_accuracy(
+ output[2].detach().cpu().numpy(),
+ target[2].detach().cpu().numpy(),
+ target_weight[2].detach().cpu().numpy(),
+ )
+ accuracy['acc_classification'] = float(avg_acc)
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ outputs = []
+ outputs.append(
+ torch.cat([self.right_hand_head(x),
+ self.left_hand_head(x)], dim=1))
+ x = self.neck(x)
+ outputs.append(self.root_head(x))
+ outputs.append(self.hand_type_head(x))
+ return outputs
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output (list[np.ndarray]): list of output hand keypoint
+ heatmaps, relative root depth and hand type.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ # flip 3D heatmap
+ heatmap_3d = output[0]
+ N, K, D, H, W = heatmap_3d.shape
+ # reshape 3D heatmap to 2D heatmap
+ heatmap_3d = heatmap_3d.reshape(N, K * D, H, W)
+ # 2D heatmap flip
+ heatmap_3d_flipped_back = flip_back(
+ heatmap_3d.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # reshape back to 3D heatmap
+ heatmap_3d_flipped_back = heatmap_3d_flipped_back.reshape(
+ N, K, D, H, W)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ heatmap_3d_flipped_back[...,
+ 1:] = heatmap_3d_flipped_back[..., :-1]
+ output[0] = heatmap_3d_flipped_back
+
+ # flip relative hand root depth
+ output[1] = -output[1].detach().cpu().numpy()
+
+ # flip hand type
+ hand_type = output[2].detach().cpu().numpy()
+ hand_type_flipped_back = hand_type.copy()
+ hand_type_flipped_back[:, 0] = hand_type[:, 1]
+ hand_type_flipped_back[:, 1] = hand_type[:, 0]
+ output[2] = hand_type_flipped_back
+ else:
+ output = [out.detach().cpu().numpy() for out in output]
+
+ return output
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode hand keypoint, relative root depth and hand type.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ - "heatmap3d_depth_bound": depth bound of hand keypoint
+ 3D heatmap
+ - "root_depth_bound": depth bound of relative root depth
+ 1D heatmap
+ output (list[np.ndarray]): model predicted 3D heatmaps, relative
+ root depth and hand type.
+ """
+
+ batch_size = len(img_metas)
+ result = {}
+
+ heatmap3d_depth_bound = np.ones(batch_size, dtype=np.float32)
+ root_depth_bound = np.ones(batch_size, dtype=np.float32)
+ center = np.zeros((batch_size, 2), dtype=np.float32)
+ scale = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size, dtype=np.float32)
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ for i in range(batch_size):
+ heatmap3d_depth_bound[i] = img_metas[i]['heatmap3d_depth_bound']
+ root_depth_bound[i] = img_metas[i]['root_depth_bound']
+ center[i, :] = img_metas[i]['center']
+ scale[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_boxes[:, 0:2] = center[:, 0:2]
+ all_boxes[:, 2:4] = scale[:, 0:2]
+ # scale is defined as: bbox_size / 200.0, so we
+ # need multiply 200.0 to get bbox size
+ all_boxes[:, 4] = np.prod(scale * 200.0, axis=1)
+ all_boxes[:, 5] = score
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ # decode 3D heatmaps of hand keypoints
+ heatmap3d = output[0]
+ preds, maxvals = keypoints_from_heatmaps3d(heatmap3d, center, scale)
+ keypoints_3d = np.zeros((batch_size, preds.shape[1], 4),
+ dtype=np.float32)
+ keypoints_3d[:, :, 0:3] = preds[:, :, 0:3]
+ keypoints_3d[:, :, 3:4] = maxvals
+ # transform keypoint depth to camera space
+ keypoints_3d[:, :, 2] = \
+ (keypoints_3d[:, :, 2] / self.right_hand_head.depth_size - 0.5) \
+ * heatmap3d_depth_bound[:, np.newaxis]
+
+ result['preds'] = keypoints_3d
+
+ # decode relative hand root depth
+ # transform relative root depth to camera space
+ result['rel_root_depth'] = (output[1] / self.root_head.heatmap_size -
+ 0.5) * root_depth_bound
+
+ # decode hand type
+ result['hand_type'] = output[2] > 0.5
+ return result
diff --git a/vendor/ViTPose/mmpose/models/heads/temporal_regression_head.py b/vendor/ViTPose/mmpose/models/heads/temporal_regression_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..97a07f9cf2c9ef0497380ca5c602142b206f3b52
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/temporal_regression_head.py
@@ -0,0 +1,319 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import build_conv_layer, constant_init, kaiming_init
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.core import (WeightNormClipHook, compute_similarity_transform,
+ fliplr_regression)
+from mmpose.models.builder import HEADS, build_loss
+
+
+@HEADS.register_module()
+class TemporalRegressionHead(nn.Module):
+ """Regression head of VideoPose3D.
+
+ "3D human pose estimation in video with temporal convolutions and
+ semi-supervised training", CVPR'2019.
+
+ Args:
+ in_channels (int): Number of input channels
+ num_joints (int): Number of joints
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ max_norm (float|None): if not None, the weight of convolution layers
+ will be clipped to have a maximum norm of max_norm.
+ is_trajectory (bool): If the model only predicts root joint
+ position, then this arg should be set to True. In this case,
+ traj_loss will be calculated. Otherwise, it should be set to
+ False. Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ num_joints,
+ max_norm=None,
+ loss_keypoint=None,
+ is_trajectory=False,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_joints = num_joints
+ self.max_norm = max_norm
+ self.loss = build_loss(loss_keypoint)
+ self.is_trajectory = is_trajectory
+ if self.is_trajectory:
+ assert self.num_joints == 1
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+
+ self.conv = build_conv_layer(
+ dict(type='Conv1d'), in_channels, num_joints * 3, 1)
+
+ if self.max_norm is not None:
+ # Apply weight norm clip to conv layers
+ weight_clip = WeightNormClipHook(self.max_norm)
+ for module in self.modules():
+ if isinstance(module, nn.modules.conv._ConvNd):
+ weight_clip.register(module)
+
+ @staticmethod
+ def _transform_inputs(x):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (tuple or list of Tensor | Tensor): multi-level features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(x, (list, tuple)):
+ return x
+
+ assert len(x) > 0
+
+ # return the top-level feature of the 1D feature pyramid
+ return x[-1]
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+
+ assert x.ndim == 3 and x.shape[2] == 1, f'Invalid shape {x.shape}'
+ output = self.conv(x)
+ N = output.shape[0]
+ return output.reshape(N, self.num_joints, 3)
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 3]): Output keypoints.
+ target (torch.Tensor[N, K, 3]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 3]):
+ Weights across different joint types.
+ If self.is_trajectory is True and target_weight is None,
+ target_weight will be set inversely proportional to joint
+ depth.
+ """
+ losses = dict()
+ assert not isinstance(self.loss, nn.Sequential)
+
+ # trajectory model
+ if self.is_trajectory:
+ if target.dim() == 2:
+ target.unsqueeze_(1)
+
+ if target_weight is None:
+ target_weight = (1 / target[:, :, 2:]).expand(target.shape)
+ assert target.dim() == 3 and target_weight.dim() == 3
+
+ losses['traj_loss'] = self.loss(output, target, target_weight)
+
+ # pose model
+ else:
+ if target_weight is None:
+ target_weight = target.new_ones(target.shape)
+ assert target.dim() == 3 and target_weight.dim() == 3
+ losses['reg_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight, metas):
+ """Calculate accuracy for keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 3]): Output keypoints.
+ target (torch.Tensor[N, K, 3]): Target keypoints.
+ target_weight (torch.Tensor[N, K, 3]):
+ Weights across different joint types.
+ metas (list(dict)): Information about data augmentation including:
+
+ - target_image_path (str): Optional, path to the image file
+ - target_mean (float): Optional, normalization parameter of
+ the target pose.
+ - target_std (float): Optional, normalization parameter of the
+ target pose.
+ - root_position (np.ndarray[3,1]): Optional, global
+ position of the root joint.
+ - root_index (torch.ndarray[1,]): Optional, original index of
+ the root joint before root-centering.
+ """
+
+ accuracy = dict()
+
+ N = output.shape[0]
+ output_ = output.detach().cpu().numpy()
+ target_ = target.detach().cpu().numpy()
+ # Denormalize the predicted pose
+ if 'target_mean' in metas[0] and 'target_std' in metas[0]:
+ target_mean = np.stack([m['target_mean'] for m in metas])
+ target_std = np.stack([m['target_std'] for m in metas])
+ output_ = self._denormalize_joints(output_, target_mean,
+ target_std)
+ target_ = self._denormalize_joints(target_, target_mean,
+ target_std)
+
+ # Restore global position
+ if self.test_cfg.get('restore_global_position', False):
+ root_pos = np.stack([m['root_position'] for m in metas])
+ root_idx = metas[0].get('root_position_index', None)
+ output_ = self._restore_global_position(output_, root_pos,
+ root_idx)
+ target_ = self._restore_global_position(target_, root_pos,
+ root_idx)
+ # Get target weight
+ if target_weight is None:
+ target_weight_ = np.ones_like(target_)
+ else:
+ target_weight_ = target_weight.detach().cpu().numpy()
+ if self.test_cfg.get('restore_global_position', False):
+ root_idx = metas[0].get('root_position_index', None)
+ root_weight = metas[0].get('root_joint_weight', 1.0)
+ target_weight_ = self._restore_root_target_weight(
+ target_weight_, root_weight, root_idx)
+
+ mpjpe = np.mean(
+ np.linalg.norm((output_ - target_) * target_weight_, axis=-1))
+
+ transformed_output = np.zeros_like(output_)
+ for i in range(N):
+ transformed_output[i, :, :] = compute_similarity_transform(
+ output_[i, :, :], target_[i, :, :])
+ p_mpjpe = np.mean(
+ np.linalg.norm(
+ (transformed_output - target_) * target_weight_, axis=-1))
+
+ accuracy['mpjpe'] = output.new_tensor(mpjpe)
+ accuracy['p_mpjpe'] = output.new_tensor(p_mpjpe)
+
+ return accuracy
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_regression (np.ndarray): Output regression.
+
+ Args:
+ x (torch.Tensor[N, K, 2]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_regression = fliplr_regression(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ center_mode='static',
+ center_x=0)
+ else:
+ output_regression = output.detach().cpu().numpy()
+ return output_regression
+
+ def decode(self, metas, output):
+ """Decode the keypoints from output regression.
+
+ Args:
+ metas (list(dict)): Information about data augmentation.
+ By default this includes:
+
+ - "target_image_path": path to the image file
+ output (np.ndarray[N, K, 3]): predicted regression vector.
+ metas (list(dict)): Information about data augmentation including:
+
+ - target_image_path (str): Optional, path to the image file
+ - target_mean (float): Optional, normalization parameter of
+ the target pose.
+ - target_std (float): Optional, normalization parameter of the
+ target pose.
+ - root_position (np.ndarray[3,1]): Optional, global
+ position of the root joint.
+ - root_index (torch.ndarray[1,]): Optional, original index of
+ the root joint before root-centering.
+ """
+
+ # Denormalize the predicted pose
+ if 'target_mean' in metas[0] and 'target_std' in metas[0]:
+ target_mean = np.stack([m['target_mean'] for m in metas])
+ target_std = np.stack([m['target_std'] for m in metas])
+ output = self._denormalize_joints(output, target_mean, target_std)
+
+ # Restore global position
+ if self.test_cfg.get('restore_global_position', False):
+ root_pos = np.stack([m['root_position'] for m in metas])
+ root_idx = metas[0].get('root_position_index', None)
+ output = self._restore_global_position(output, root_pos, root_idx)
+
+ target_image_paths = [m.get('target_image_path', None) for m in metas]
+ result = {'preds': output, 'target_image_paths': target_image_paths}
+
+ return result
+
+ @staticmethod
+ def _denormalize_joints(x, mean, std):
+ """Denormalize joint coordinates with given statistics mean and std.
+
+ Args:
+ x (np.ndarray[N, K, 3]): Normalized joint coordinates.
+ mean (np.ndarray[K, 3]): Mean value.
+ std (np.ndarray[K, 3]): Std value.
+ """
+ assert x.ndim == 3
+ assert x.shape == mean.shape == std.shape
+
+ return x * std + mean
+
+ @staticmethod
+ def _restore_global_position(x, root_pos, root_idx=None):
+ """Restore global position of the root-centered joints.
+
+ Args:
+ x (np.ndarray[N, K, 3]): root-centered joint coordinates
+ root_pos (np.ndarray[N,1,3]): The global position of the
+ root joint.
+ root_idx (int|None): If not none, the root joint will be inserted
+ back to the pose at the given index.
+ """
+ x = x + root_pos
+ if root_idx is not None:
+ x = np.insert(x, root_idx, root_pos.squeeze(1), axis=1)
+ return x
+
+ @staticmethod
+ def _restore_root_target_weight(target_weight, root_weight, root_idx=None):
+ """Restore the target weight of the root joint after the restoration of
+ the global position.
+
+ Args:
+ target_weight (np.ndarray[N, K, 1]): Target weight of relativized
+ joints.
+ root_weight (float): The target weight value of the root joint.
+ root_idx (int|None): If not none, the root joint weight will be
+ inserted back to the target weight at the given index.
+ """
+ if root_idx is not None:
+ root_weight = np.full(
+ target_weight.shape[0], root_weight, dtype=target_weight.dtype)
+ target_weight = np.insert(
+ target_weight, root_idx, root_weight[:, None], axis=1)
+ return target_weight
+
+ def init_weights(self):
+ """Initialize the weights."""
+ for m in self.modules():
+ if isinstance(m, nn.modules.conv._ConvNd):
+ kaiming_init(m, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, _BatchNorm):
+ constant_init(m, 1)
diff --git a/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_base_head.py b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_base_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..09646ead353fb054f066b9fc6816748a43287e2c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_base_head.py
@@ -0,0 +1,120 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch.nn as nn
+
+from mmpose.core.evaluation.top_down_eval import keypoints_from_heatmaps
+
+
+class TopdownHeatmapBaseHead(nn.Module):
+ """Base class for top-down heatmap heads.
+
+ All top-down heatmap heads should subclass it.
+ All subclass should overwrite:
+
+ Methods:`get_loss`, supporting to calculate loss.
+ Methods:`get_accuracy`, supporting to calculate accuracy.
+ Methods:`forward`, supporting to forward model.
+ Methods:`inference_model`, supporting to inference model.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def get_loss(self, **kwargs):
+ """Gets the loss."""
+
+ @abstractmethod
+ def get_accuracy(self, **kwargs):
+ """Gets the accuracy."""
+
+ @abstractmethod
+ def forward(self, **kwargs):
+ """Forward function."""
+
+ @abstractmethod
+ def inference_model(self, **kwargs):
+ """Inference function."""
+
+ def decode(self, img_metas, output, **kwargs):
+ """Decode keypoints from heatmaps.
+
+ Args:
+ img_metas (list(dict)): Information about data augmentation
+ By default this includes:
+
+ - "image_file: path to the image file
+ - "center": center of the bbox
+ - "scale": scale of the bbox
+ - "rotation": rotation of the bbox
+ - "bbox_score": score of bbox
+ output (np.ndarray[N, K, H, W]): model predicted heatmaps.
+ """
+ batch_size = len(img_metas)
+
+ if 'bbox_id' in img_metas[0]:
+ bbox_ids = []
+ else:
+ bbox_ids = None
+
+ c = np.zeros((batch_size, 2), dtype=np.float32)
+ s = np.zeros((batch_size, 2), dtype=np.float32)
+ image_paths = []
+ score = np.ones(batch_size)
+ for i in range(batch_size):
+ c[i, :] = img_metas[i]['center']
+ s[i, :] = img_metas[i]['scale']
+ image_paths.append(img_metas[i]['image_file'])
+
+ if 'bbox_score' in img_metas[i]:
+ score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
+ if bbox_ids is not None:
+ bbox_ids.append(img_metas[i]['bbox_id'])
+
+ preds, maxvals = keypoints_from_heatmaps(
+ output,
+ c,
+ s,
+ unbiased=self.test_cfg.get('unbiased_decoding', False),
+ post_process=self.test_cfg.get('post_process', 'default'),
+ kernel=self.test_cfg.get('modulate_kernel', 11),
+ valid_radius_factor=self.test_cfg.get('valid_radius_factor',
+ 0.0546875),
+ use_udp=self.test_cfg.get('use_udp', False),
+ target_type=self.test_cfg.get('target_type', 'GaussianHeatmap'))
+
+ all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
+ all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
+ all_preds[:, :, 0:2] = preds[:, :, 0:2]
+ all_preds[:, :, 2:3] = maxvals
+ all_boxes[:, 0:2] = c[:, 0:2]
+ all_boxes[:, 2:4] = s[:, 0:2]
+ all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
+ all_boxes[:, 5] = score
+
+ result = {}
+
+ result['preds'] = all_preds
+ result['boxes'] = all_boxes
+ result['image_paths'] = image_paths
+ result['bbox_ids'] = bbox_ids
+
+ return result
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
diff --git a/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_multi_stage_head.py b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_multi_stage_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c439f5b6332d72a66db75bf599035411c4e1e0d1
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_multi_stage_head.py
@@ -0,0 +1,572 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy as cp
+
+import torch.nn as nn
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, Linear,
+ build_activation_layer, build_conv_layer,
+ build_norm_layer, build_upsample_layer, constant_init,
+ kaiming_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from ..builder import HEADS
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class TopdownHeatmapMultiStageHead(TopdownHeatmapBaseHead):
+ """Top-down heatmap multi-stage head.
+
+ TopdownHeatmapMultiStageHead is consisted of multiple branches,
+ each of which has num_deconv_layers(>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ num_stages (int): Number of stages.
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels=512,
+ out_channels=17,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.num_stages = num_stages
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ # build multi-stage deconv layers
+ self.multi_deconv_layers = nn.ModuleList([])
+ for _ in range(self.num_stages):
+ if num_deconv_layers > 0:
+ deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+ self.multi_deconv_layers.append(deconv_layers)
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ # build multi-stage final layers
+ self.multi_final_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ if identity_final_layer:
+ final_layer = nn.Identity()
+ else:
+ final_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=num_deconv_filters[-1]
+ if num_deconv_layers > 0 else in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+ self.multi_final_layers.append(final_layer)
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]):
+ Output heatmaps.
+ target (torch.Tensor[N,K,H,W]):
+ Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert isinstance(output, list)
+ assert target.dim() == 4 and target_weight.dim() == 3
+
+ if isinstance(self.loss, nn.Sequential):
+ assert len(self.loss) == len(output)
+ for i in range(len(output)):
+ target_i = target
+ target_weight_i = target_weight
+ if isinstance(self.loss, nn.Sequential):
+ loss_func = self.loss[i]
+ else:
+ loss_func = self.loss
+ loss_i = loss_func(output[i], target_i, target_weight_i)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = loss_i
+ else:
+ losses['heatmap_loss'] += loss_i
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ _, avg_acc, _ = pose_pck_accuracy(
+ output[-1].detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages.
+ """
+ out = []
+ assert isinstance(x, list)
+ for i in range(self.num_stages):
+ y = self.multi_deconv_layers[i](x[i])
+ y = self.multi_final_layers[i](y)
+ out.append(y)
+ return out
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (List[torch.Tensor[NxKxHxW]]): Input features.
+ flip_pairs (None | list[tuple()):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+ assert isinstance(output, list)
+ output = output[-1]
+
+ if flip_pairs is not None:
+ # perform flip
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+
+ return output_heatmap
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.multi_deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.multi_final_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+
+
+class PredictHeatmap(nn.Module):
+ """Predict the heat map for an input feature.
+
+ Args:
+ unit_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ out_shape (tuple): Shape of the output heatmap.
+ use_prm (bool): Whether to use pose refine machine. Default: False.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self,
+ unit_channels,
+ out_channels,
+ out_shape,
+ use_prm=False,
+ norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.unit_channels = unit_channels
+ self.out_channels = out_channels
+ self.out_shape = out_shape
+ self.use_prm = use_prm
+ if use_prm:
+ self.prm = PRM(out_channels, norm_cfg=norm_cfg)
+ self.conv_layers = nn.Sequential(
+ ConvModule(
+ unit_channels,
+ unit_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=norm_cfg,
+ inplace=False),
+ ConvModule(
+ unit_channels,
+ out_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ act_cfg=None,
+ inplace=False))
+
+ def forward(self, feature):
+ feature = self.conv_layers(feature)
+ output = nn.functional.interpolate(
+ feature, size=self.out_shape, mode='bilinear', align_corners=True)
+ if self.use_prm:
+ output = self.prm(output)
+ return output
+
+
+class PRM(nn.Module):
+ """Pose Refine Machine.
+
+ Please refer to "Learning Delicate Local Representations
+ for Multi-Person Pose Estimation" (ECCV 2020).
+
+ Args:
+ out_channels (int): Channel number of the output. Equals to
+ the number of key points.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ """
+
+ def __init__(self, out_channels, norm_cfg=dict(type='BN')):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+ self.out_channels = out_channels
+ self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
+ self.middle_path = nn.Sequential(
+ Linear(self.out_channels, self.out_channels),
+ build_norm_layer(dict(type='BN1d'), out_channels)[1],
+ build_activation_layer(dict(type='ReLU')),
+ Linear(self.out_channels, self.out_channels),
+ build_norm_layer(dict(type='BN1d'), out_channels)[1],
+ build_activation_layer(dict(type='ReLU')),
+ build_activation_layer(dict(type='Sigmoid')))
+
+ self.bottom_path = nn.Sequential(
+ ConvModule(
+ self.out_channels,
+ self.out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ norm_cfg=norm_cfg,
+ inplace=False),
+ DepthwiseSeparableConvModule(
+ self.out_channels,
+ 1,
+ kernel_size=9,
+ stride=1,
+ padding=4,
+ norm_cfg=norm_cfg,
+ inplace=False), build_activation_layer(dict(type='Sigmoid')))
+ self.conv_bn_relu_prm_1 = ConvModule(
+ self.out_channels,
+ self.out_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ inplace=False)
+
+ def forward(self, x):
+ out = self.conv_bn_relu_prm_1(x)
+ out_1 = out
+
+ out_2 = self.global_pooling(out_1)
+ out_2 = out_2.view(out_2.size(0), -1)
+ out_2 = self.middle_path(out_2)
+ out_2 = out_2.unsqueeze(2)
+ out_2 = out_2.unsqueeze(3)
+
+ out_3 = self.bottom_path(out_1)
+ out = out_1 * (1 + out_2 * out_3)
+
+ return out
+
+
+@HEADS.register_module()
+class TopdownHeatmapMSMUHead(TopdownHeatmapBaseHead):
+ """Heads for multi-stage multi-unit heads used in Multi-Stage Pose
+ estimation Network (MSPN), and Residual Steps Networks (RSN).
+
+ Args:
+ unit_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ out_shape (tuple): Shape of the output heatmap.
+ num_stages (int): Number of stages.
+ num_units (int): Number of units in each stage.
+ use_prm (bool): Whether to use pose refine machine (PRM).
+ Default: False.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ out_shape,
+ unit_channels=256,
+ out_channels=17,
+ num_stages=4,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ # Protect mutable default arguments
+ norm_cfg = cp.deepcopy(norm_cfg)
+ super().__init__()
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self.out_shape = out_shape
+ self.unit_channels = unit_channels
+ self.out_channels = out_channels
+ self.num_stages = num_stages
+ self.num_units = num_units
+
+ self.loss = build_loss(loss_keypoint)
+
+ self.predict_layers = nn.ModuleList([])
+ for i in range(self.num_stages):
+ for j in range(self.num_units):
+ self.predict_layers.append(
+ PredictHeatmap(
+ unit_channels,
+ out_channels,
+ out_shape,
+ use_prm,
+ norm_cfg=norm_cfg))
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - num_outputs: O
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,O,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,O,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,O,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert isinstance(output, list)
+ assert target.dim() == 5 and target_weight.dim() == 4
+ assert target.size(1) == len(output)
+
+ if isinstance(self.loss, nn.Sequential):
+ assert len(self.loss) == len(output)
+ for i in range(len(output)):
+ target_i = target[:, i, :, :, :]
+ target_weight_i = target_weight[:, i, :, :]
+
+ if isinstance(self.loss, nn.Sequential):
+ loss_func = self.loss[i]
+ else:
+ loss_func = self.loss
+
+ loss_i = loss_func(output[i], target_i, target_weight_i)
+ if 'heatmap_loss' not in losses:
+ losses['heatmap_loss'] = loss_i
+ else:
+ losses['heatmap_loss'] += loss_i
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ assert isinstance(output, list)
+ assert target.dim() == 5 and target_weight.dim() == 4
+ _, avg_acc, _ = pose_pck_accuracy(
+ output[-1].detach().cpu().numpy(),
+ target[:, -1, ...].detach().cpu().numpy(),
+ target_weight[:, -1,
+ ...].detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function.
+
+ Returns:
+ out (list[Tensor]): a list of heatmaps from multiple stages
+ and units.
+ """
+ out = []
+ assert isinstance(x, list)
+ assert len(x) == self.num_stages
+ assert isinstance(x[0], list)
+ assert len(x[0]) == self.num_units
+ assert x[0][0].shape[1] == self.unit_channels
+ for i in range(self.num_stages):
+ for j in range(self.num_units):
+ y = self.predict_layers[i * self.num_units + j](x[i][j])
+ out.append(y)
+
+ return out
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (list[torch.Tensor[N,K,H,W]]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+ assert isinstance(output, list)
+ output = output[-1]
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.predict_layers.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
diff --git a/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_simple_head.py b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f3348b2ba06d43e6489e0235c4a883d567e5cd
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/topdown_heatmap_simple_head.py
@@ -0,0 +1,350 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.utils.ops import resize
+from ..builder import HEADS
+import torch.nn.functional as F
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class TopdownHeatmapSimpleHead(TopdownHeatmapBaseHead):
+ """Top-down heatmap simple head. paper ref: Bin Xiao et al. ``Simple
+ Baselines for Human Pose Estimation and Tracking``.
+
+ TopdownHeatmapSimpleHead is consisted of (>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resized to the
+ same size as the first one and then concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None,
+ upsample=0,):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+ self.upsample = upsample
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels,
+ )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 4 and target_weight.dim() == 3
+ losses['heatmap_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type == 'GaussianHeatmap':
+ _, avg_acc, _ = pose_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ return x
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ if not isinstance(inputs, list):
+ if self.upsample > 0:
+ inputs = resize(
+ input=F.relu(inputs),
+ scale_factor=self.upsample,
+ mode='bilinear',
+ align_corners=self.align_corners
+ )
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/vendor/ViTPose/mmpose/models/heads/vipnas_heatmap_simple_head.py b/vendor/ViTPose/mmpose/models/heads/vipnas_heatmap_simple_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..41703128c45909733159a0869e091f61e9805756
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/vipnas_heatmap_simple_head.py
@@ -0,0 +1,349 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+ constant_init, normal_init)
+
+from mmpose.core.evaluation import pose_pck_accuracy
+from mmpose.core.post_processing import flip_back
+from mmpose.models.builder import build_loss
+from mmpose.models.utils.ops import resize
+from ..builder import HEADS
+from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
+
+
+@HEADS.register_module()
+class ViPNASHeatmapSimpleHead(TopdownHeatmapBaseHead):
+ """ViPNAS heatmap simple head.
+
+ ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search.
+ More details can be found in the `paper
+ `__ .
+
+ TopdownHeatmapSimpleHead is consisted of (>=0) number of deconv layers
+ and a simple conv2d layer.
+
+ Args:
+ in_channels (int): Number of input channels
+ out_channels (int): Number of output channels
+ num_deconv_layers (int): Number of deconv layers.
+ num_deconv_layers should >= 0. Note that 0 means
+ no deconv layers.
+ num_deconv_filters (list|tuple): Number of filters.
+ If num_deconv_layers > 0, the length of
+ num_deconv_kernels (list|tuple): Kernel sizes.
+ num_deconv_groups (list|tuple): Group number.
+ in_index (int|Sequence[int]): Input feature index. Default: -1
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ loss_keypoint (dict): Config for keypoint loss. Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_deconv_layers=3,
+ num_deconv_filters=(144, 144, 144),
+ num_deconv_kernels=(4, 4, 4),
+ num_deconv_groups=(16, 16, 16),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ loss_keypoint=None,
+ train_cfg=None,
+ test_cfg=None):
+ super().__init__()
+
+ self.in_channels = in_channels
+ self.loss = build_loss(loss_keypoint)
+
+ self.train_cfg = {} if train_cfg is None else train_cfg
+ self.test_cfg = {} if test_cfg is None else test_cfg
+ self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers, num_deconv_filters, num_deconv_kernels,
+ num_deconv_groups)
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ build_conv_layer(
+ dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(
+ build_norm_layer(dict(type='BN'), conv_channels)[1])
+ layers.append(nn.ReLU(inplace=True))
+
+ layers.append(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ def get_loss(self, output, target, target_weight):
+ """Calculate top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ losses = dict()
+
+ assert not isinstance(self.loss, nn.Sequential)
+ assert target.dim() == 4 and target_weight.dim() == 3
+ losses['heatmap_loss'] = self.loss(output, target, target_weight)
+
+ return losses
+
+ def get_accuracy(self, output, target, target_weight):
+ """Calculate accuracy for top-down keypoint loss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmaps height: H
+ - heatmaps weight: W
+
+ Args:
+ output (torch.Tensor[N,K,H,W]): Output heatmaps.
+ target (torch.Tensor[N,K,H,W]): Target heatmaps.
+ target_weight (torch.Tensor[N,K,1]):
+ Weights across different joint types.
+ """
+
+ accuracy = dict()
+
+ if self.target_type.lower() == 'GaussianHeatmap'.lower():
+ _, avg_acc, _ = pose_pck_accuracy(
+ output.detach().cpu().numpy(),
+ target.detach().cpu().numpy(),
+ target_weight.detach().cpu().numpy().squeeze(-1) > 0)
+ accuracy['acc_pose'] = float(avg_acc)
+
+ return accuracy
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+ return x
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output.detach().cpu().numpy(),
+ flip_pairs,
+ target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.test_cfg.get('shift_heatmap', False):
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output.detach().cpu().numpy()
+ return output_heatmap
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform is not None, in_channels and in_index must be
+ list or tuple, with the same length.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+
+ - 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels,
+ num_groups):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_groups):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_groups({len(num_groups)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ groups = num_groups[i]
+ layers.append(
+ build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ groups=groups,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for _, m in self.deconv_layers.named_modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ for m in self.final_layer.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
diff --git a/vendor/ViTPose/mmpose/models/heads/voxelpose_head.py b/vendor/ViTPose/mmpose/models/heads/voxelpose_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..8799bdc2c0a888973f6cf98f3da00c60a891e699
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/heads/voxelpose_head.py
@@ -0,0 +1,167 @@
+# ------------------------------------------------------------------------------
+# Copyright and License Information
+# https://github.com/microsoft/voxelpose-pytorch/blob/main/lib/models
+# Original Licence: MIT License
+# ------------------------------------------------------------------------------
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import HEADS
+
+
+@HEADS.register_module()
+class CuboidCenterHead(nn.Module):
+ """Get results from the 3D human center heatmap. In this module, human 3D
+ centers are local maximums obtained from the 3D heatmap via NMS (max-
+ pooling).
+
+ Args:
+ space_size (list[3]): The size of the 3D space.
+ cube_size (list[3]): The size of the heatmap volume.
+ space_center (list[3]): The coordinate of space center.
+ max_num (int): Maximum of human center detections.
+ max_pool_kernel (int): Kernel size of the max-pool kernel in nms.
+ """
+
+ def __init__(self,
+ space_size,
+ space_center,
+ cube_size,
+ max_num=10,
+ max_pool_kernel=3):
+ super(CuboidCenterHead, self).__init__()
+ # use register_buffer
+ self.register_buffer('grid_size', torch.tensor(space_size))
+ self.register_buffer('cube_size', torch.tensor(cube_size))
+ self.register_buffer('grid_center', torch.tensor(space_center))
+
+ self.num_candidates = max_num
+ self.max_pool_kernel = max_pool_kernel
+ self.loss = nn.MSELoss()
+
+ def _get_real_locations(self, indices):
+ """
+ Args:
+ indices (torch.Tensor(NXP)): Indices of points in the 3D tensor
+
+ Returns:
+ real_locations (torch.Tensor(NXPx3)): Locations of points
+ in the world coordinate system
+ """
+ real_locations = indices.float() / (
+ self.cube_size - 1) * self.grid_size + \
+ self.grid_center - self.grid_size / 2.0
+ return real_locations
+
+ def _nms_by_max_pool(self, heatmap_volumes):
+ max_num = self.num_candidates
+ batch_size = heatmap_volumes.shape[0]
+ root_cubes_nms = self._max_pool(heatmap_volumes)
+ root_cubes_nms_reshape = root_cubes_nms.reshape(batch_size, -1)
+ topk_values, topk_index = root_cubes_nms_reshape.topk(max_num)
+ topk_unravel_index = self._get_3d_indices(topk_index,
+ heatmap_volumes[0].shape)
+
+ return topk_values, topk_unravel_index
+
+ def _max_pool(self, inputs):
+ kernel = self.max_pool_kernel
+ padding = (kernel - 1) // 2
+ max = F.max_pool3d(
+ inputs, kernel_size=kernel, stride=1, padding=padding)
+ keep = (inputs == max).float()
+ return keep * inputs
+
+ @staticmethod
+ def _get_3d_indices(indices, shape):
+ """Get indices in the 3-D tensor.
+
+ Args:
+ indices (torch.Tensor(NXp)): Indices of points in the 1D tensor
+ shape (torch.Size(3)): The shape of the original 3D tensor
+
+ Returns:
+ indices: Indices of points in the original 3D tensor
+ """
+ batch_size = indices.shape[0]
+ num_people = indices.shape[1]
+ indices_x = (indices //
+ (shape[1] * shape[2])).reshape(batch_size, num_people, -1)
+ indices_y = ((indices % (shape[1] * shape[2])) //
+ shape[2]).reshape(batch_size, num_people, -1)
+ indices_z = (indices % shape[2]).reshape(batch_size, num_people, -1)
+ indices = torch.cat([indices_x, indices_y, indices_z], dim=2)
+ return indices
+
+ def forward(self, heatmap_volumes):
+ """
+
+ Args:
+ heatmap_volumes (torch.Tensor(NXLXWXH)):
+ 3D human center heatmaps predicted by the network.
+ Returns:
+ human_centers (torch.Tensor(NXPX5)):
+ Coordinates of human centers.
+ """
+ batch_size = heatmap_volumes.shape[0]
+
+ topk_values, topk_unravel_index = self._nms_by_max_pool(
+ heatmap_volumes.detach())
+
+ topk_unravel_index = self._get_real_locations(topk_unravel_index)
+
+ human_centers = torch.zeros(
+ batch_size, self.num_candidates, 5, device=heatmap_volumes.device)
+ human_centers[:, :, 0:3] = topk_unravel_index
+ human_centers[:, :, 4] = topk_values
+
+ return human_centers
+
+ def get_loss(self, pred_cubes, gt):
+
+ return dict(loss_center=self.loss(pred_cubes, gt))
+
+
+@HEADS.register_module()
+class CuboidPoseHead(nn.Module):
+
+ def __init__(self, beta):
+ """Get results from the 3D human pose heatmap. Instead of obtaining
+ maximums on the heatmap, this module regresses the coordinates of
+ keypoints via integral pose regression. Refer to `paper.
+
+ ` for more details.
+
+ Args:
+ beta: Constant to adjust the magnification of soft-maxed heatmap.
+ """
+ super(CuboidPoseHead, self).__init__()
+ self.beta = beta
+ self.loss = nn.L1Loss()
+
+ def forward(self, heatmap_volumes, grid_coordinates):
+ """
+
+ Args:
+ heatmap_volumes (torch.Tensor(NxKxLxWxH)):
+ 3D human pose heatmaps predicted by the network.
+ grid_coordinates (torch.Tensor(Nx(LxWxH)x3)):
+ Coordinates of the grids in the heatmap volumes.
+ Returns:
+ human_poses (torch.Tensor(NxKx3)): Coordinates of human poses.
+ """
+ batch_size = heatmap_volumes.size(0)
+ channel = heatmap_volumes.size(1)
+ x = heatmap_volumes.reshape(batch_size, channel, -1, 1)
+ x = F.softmax(self.beta * x, dim=2)
+ grid_coordinates = grid_coordinates.unsqueeze(1)
+ x = torch.mul(x, grid_coordinates)
+ human_poses = torch.sum(x, dim=2)
+
+ return human_poses
+
+ def get_loss(self, preds, targets, weights):
+
+ return dict(loss_pose=self.loss(preds * weights, targets * weights))
diff --git a/vendor/ViTPose/mmpose/models/losses/__init__.py b/vendor/ViTPose/mmpose/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d67973fc5cb53e85faa918719944d8c02f2190cd
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .classfication_loss import BCELoss
+from .heatmap_loss import AdaptiveWingLoss
+from .mesh_loss import GANLoss, MeshLoss
+from .mse_loss import JointsMSELoss, JointsOHKMMSELoss
+from .multi_loss_factory import AELoss, HeatmapLoss, MultiLossFactory
+from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, MSELoss,
+ SemiSupervisionLoss, SmoothL1Loss, SoftWingLoss,
+ WingLoss)
+
+__all__ = [
+ 'JointsMSELoss', 'JointsOHKMMSELoss', 'HeatmapLoss', 'AELoss',
+ 'MultiLossFactory', 'MeshLoss', 'GANLoss', 'SmoothL1Loss', 'WingLoss',
+ 'MPJPELoss', 'MSELoss', 'L1Loss', 'BCELoss', 'BoneLoss',
+ 'SemiSupervisionLoss', 'SoftWingLoss', 'AdaptiveWingLoss'
+]
diff --git a/vendor/ViTPose/mmpose/models/losses/classfication_loss.py b/vendor/ViTPose/mmpose/models/losses/classfication_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..b79b69d035611f75f10e8722aaea4362659509e2
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/classfication_loss.py
@@ -0,0 +1,41 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class BCELoss(nn.Module):
+ """Binary Cross Entropy loss."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.binary_cross_entropy
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_labels: K
+
+ Args:
+ output (torch.Tensor[N, K]): Output classification.
+ target (torch.Tensor[N, K]): Target classification.
+ target_weight (torch.Tensor[N, K] or torch.Tensor[N]):
+ Weights across different labels.
+ """
+
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output, target, reduction='none')
+ if target_weight.dim() == 1:
+ target_weight = target_weight[:, None]
+ loss = (loss * target_weight).mean()
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
diff --git a/vendor/ViTPose/mmpose/models/losses/heatmap_loss.py b/vendor/ViTPose/mmpose/models/losses/heatmap_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..9471457ca0da2d43441da1d394bc45b3e8ca3ee7
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/heatmap_loss.py
@@ -0,0 +1,86 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class AdaptiveWingLoss(nn.Module):
+ """Adaptive wing loss. paper ref: 'Adaptive Wing Loss for Robust Face
+ Alignment via Heatmap Regression' Wang et al. ICCV'2019.
+
+ Args:
+ alpha (float), omega (float), epsilon (float), theta (float)
+ are hyper-parameters.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ alpha=2.1,
+ omega=14,
+ epsilon=1,
+ theta=0.5,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.alpha = float(alpha)
+ self.omega = float(omega)
+ self.epsilon = float(epsilon)
+ self.theta = float(theta)
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+
+ Args:
+ pred (torch.Tensor[NxKxHxW]): Predicted heatmaps.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ """
+ H, W = pred.shape[2:4]
+ delta = (target - pred).abs()
+
+ A = self.omega * (
+ 1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
+ ) * (self.alpha - target) * (torch.pow(
+ self.theta / self.epsilon,
+ self.alpha - target - 1)) * (1 / self.epsilon)
+ C = self.theta * A - self.omega * torch.log(
+ 1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
+
+ losses = torch.where(
+ delta < self.theta,
+ self.omega *
+ torch.log(1 +
+ torch.pow(delta / self.epsilon, self.alpha - target)),
+ A * delta - C)
+
+ return torch.mean(losses)
+
+ def forward(self, output, target, target_weight):
+ """Forward function.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+
+ Args:
+ output (torch.Tensor[NxKxHxW]): Output heatmaps.
+ target (torch.Tensor[NxKxHxW]): Target heatmaps.
+ target_weight (torch.Tensor[NxKx1]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ loss = self.criterion(output * target_weight.unsqueeze(-1),
+ target * target_weight.unsqueeze(-1))
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
diff --git a/vendor/ViTPose/mmpose/models/losses/mesh_loss.py b/vendor/ViTPose/mmpose/models/losses/mesh_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9d18bd7296a189ec2f24c422cc05a19035d3224
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/mesh_loss.py
@@ -0,0 +1,340 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from ..utils.geometry import batch_rodrigues
+
+
+def perspective_projection(points, rotation, translation, focal_length,
+ camera_center):
+ """This function computes the perspective projection of a set of 3D points.
+
+ Note:
+ - batch size: B
+ - point number: N
+
+ Args:
+ points (Tensor([B, N, 3])): A set of 3D points
+ rotation (Tensor([B, 3, 3])): Camera rotation matrix
+ translation (Tensor([B, 3])): Camera translation
+ focal_length (Tensor([B,])): Focal length
+ camera_center (Tensor([B, 2])): Camera center
+
+ Returns:
+ projected_points (Tensor([B, N, 2])): Projected 2D
+ points in image space.
+ """
+
+ batch_size = points.shape[0]
+ K = torch.zeros([batch_size, 3, 3], device=points.device)
+ K[:, 0, 0] = focal_length
+ K[:, 1, 1] = focal_length
+ K[:, 2, 2] = 1.
+ K[:, :-1, -1] = camera_center
+
+ # Transform points
+ points = torch.einsum('bij,bkj->bki', rotation, points)
+ points = points + translation.unsqueeze(1)
+
+ # Apply perspective distortion
+ projected_points = points / points[:, :, -1].unsqueeze(-1)
+
+ # Apply camera intrinsics
+ projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+ projected_points = projected_points[:, :, :-1]
+ return projected_points
+
+
+@LOSSES.register_module()
+class MeshLoss(nn.Module):
+ """Mix loss for 3D human mesh. It is composed of loss on 2D joints, 3D
+ joints, mesh vertices and smpl parameters (if any).
+
+ Args:
+ joints_2d_loss_weight (float): Weight for loss on 2D joints.
+ joints_3d_loss_weight (float): Weight for loss on 3D joints.
+ vertex_loss_weight (float): Weight for loss on 3D verteices.
+ smpl_pose_loss_weight (float): Weight for loss on SMPL
+ pose parameters.
+ smpl_beta_loss_weight (float): Weight for loss on SMPL
+ shape parameters.
+ img_res (int): Input image resolution.
+ focal_length (float): Focal length of camera model. Default=5000.
+ """
+
+ def __init__(self,
+ joints_2d_loss_weight,
+ joints_3d_loss_weight,
+ vertex_loss_weight,
+ smpl_pose_loss_weight,
+ smpl_beta_loss_weight,
+ img_res,
+ focal_length=5000):
+
+ super().__init__()
+ # Per-vertex loss on the mesh
+ self.criterion_vertex = nn.L1Loss(reduction='none')
+
+ # Joints (2D and 3D) loss
+ self.criterion_joints_2d = nn.SmoothL1Loss(reduction='none')
+ self.criterion_joints_3d = nn.SmoothL1Loss(reduction='none')
+
+ # Loss for SMPL parameter regression
+ self.criterion_regr = nn.MSELoss(reduction='none')
+
+ self.joints_2d_loss_weight = joints_2d_loss_weight
+ self.joints_3d_loss_weight = joints_3d_loss_weight
+ self.vertex_loss_weight = vertex_loss_weight
+ self.smpl_pose_loss_weight = smpl_pose_loss_weight
+ self.smpl_beta_loss_weight = smpl_beta_loss_weight
+ self.focal_length = focal_length
+ self.img_res = img_res
+
+ def joints_2d_loss(self, pred_joints_2d, gt_joints_2d, joints_2d_visible):
+ """Compute 2D reprojection loss on the joints.
+
+ The loss is weighted by joints_2d_visible.
+ """
+ conf = joints_2d_visible.float()
+ loss = (conf *
+ self.criterion_joints_2d(pred_joints_2d, gt_joints_2d)).mean()
+ return loss
+
+ def joints_3d_loss(self, pred_joints_3d, gt_joints_3d, joints_3d_visible):
+ """Compute 3D joints loss for the examples that 3D joint annotations
+ are available.
+
+ The loss is weighted by joints_3d_visible.
+ """
+ conf = joints_3d_visible.float()
+ if len(gt_joints_3d) > 0:
+ gt_pelvis = (gt_joints_3d[:, 2, :] + gt_joints_3d[:, 3, :]) / 2
+ gt_joints_3d = gt_joints_3d - gt_pelvis[:, None, :]
+ pred_pelvis = (pred_joints_3d[:, 2, :] +
+ pred_joints_3d[:, 3, :]) / 2
+ pred_joints_3d = pred_joints_3d - pred_pelvis[:, None, :]
+ return (
+ conf *
+ self.criterion_joints_3d(pred_joints_3d, gt_joints_3d)).mean()
+ return pred_joints_3d.sum() * 0
+
+ def vertex_loss(self, pred_vertices, gt_vertices, has_smpl):
+ """Compute 3D vertex loss for the examples that 3D human mesh
+ annotations are available.
+
+ The loss is weighted by the has_smpl.
+ """
+ conf = has_smpl.float()
+ loss_vertex = self.criterion_vertex(pred_vertices, gt_vertices)
+ loss_vertex = (conf[:, None, None] * loss_vertex).mean()
+ return loss_vertex
+
+ def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas,
+ has_smpl):
+ """Compute SMPL parameters loss for the examples that SMPL parameter
+ annotations are available.
+
+ The loss is weighted by has_smpl.
+ """
+ conf = has_smpl.float()
+ gt_rotmat = batch_rodrigues(gt_pose.view(-1, 3)).view(-1, 24, 3, 3)
+ loss_regr_pose = self.criterion_regr(pred_rotmat, gt_rotmat)
+ loss_regr_betas = self.criterion_regr(pred_betas, gt_betas)
+ loss_regr_pose = (conf[:, None, None, None] * loss_regr_pose).mean()
+ loss_regr_betas = (conf[:, None] * loss_regr_betas).mean()
+ return loss_regr_pose, loss_regr_betas
+
+ def project_points(self, points_3d, camera):
+ """Perform orthographic projection of 3D points using the camera
+ parameters, return projected 2D points in image plane.
+
+ Note:
+ - batch size: B
+ - point number: N
+
+ Args:
+ points_3d (Tensor([B, N, 3])): 3D points.
+ camera (Tensor([B, 3])): camera parameters with the
+ 3 channel as (scale, translation_x, translation_y)
+
+ Returns:
+ Tensor([B, N, 2]): projected 2D points \
+ in image space.
+ """
+ batch_size = points_3d.shape[0]
+ device = points_3d.device
+ cam_t = torch.stack([
+ camera[:, 1], camera[:, 2], 2 * self.focal_length /
+ (self.img_res * camera[:, 0] + 1e-9)
+ ],
+ dim=-1)
+ camera_center = camera.new_zeros([batch_size, 2])
+ rot_t = torch.eye(
+ 3, device=device,
+ dtype=points_3d.dtype).unsqueeze(0).expand(batch_size, -1, -1)
+ joints_2d = perspective_projection(
+ points_3d,
+ rotation=rot_t,
+ translation=cam_t,
+ focal_length=self.focal_length,
+ camera_center=camera_center)
+ return joints_2d
+
+ def forward(self, output, target):
+ """Forward function.
+
+ Args:
+ output (dict): dict of network predicted results.
+ Keys: 'vertices', 'joints_3d', 'camera',
+ 'pose'(optional), 'beta'(optional)
+ target (dict): dict of ground-truth labels.
+ Keys: 'vertices', 'joints_3d', 'joints_3d_visible',
+ 'joints_2d', 'joints_2d_visible', 'pose', 'beta',
+ 'has_smpl'
+
+ Returns:
+ dict: dict of losses.
+ """
+ losses = {}
+
+ # Per-vertex loss for the shape
+ pred_vertices = output['vertices']
+
+ gt_vertices = target['vertices']
+ has_smpl = target['has_smpl']
+ loss_vertex = self.vertex_loss(pred_vertices, gt_vertices, has_smpl)
+ losses['vertex_loss'] = loss_vertex * self.vertex_loss_weight
+
+ # Compute loss on SMPL parameters, if available
+ if 'pose' in output.keys() and 'beta' in output.keys():
+ pred_rotmat = output['pose']
+ pred_betas = output['beta']
+ gt_pose = target['pose']
+ gt_betas = target['beta']
+ loss_regr_pose, loss_regr_betas = self.smpl_losses(
+ pred_rotmat, pred_betas, gt_pose, gt_betas, has_smpl)
+ losses['smpl_pose_loss'] = \
+ loss_regr_pose * self.smpl_pose_loss_weight
+ losses['smpl_beta_loss'] = \
+ loss_regr_betas * self.smpl_beta_loss_weight
+
+ # Compute 3D joints loss
+ pred_joints_3d = output['joints_3d']
+ gt_joints_3d = target['joints_3d']
+ joints_3d_visible = target['joints_3d_visible']
+ loss_joints_3d = self.joints_3d_loss(pred_joints_3d, gt_joints_3d,
+ joints_3d_visible)
+ losses['joints_3d_loss'] = loss_joints_3d * self.joints_3d_loss_weight
+
+ # Compute 2D reprojection loss for the 2D joints
+ pred_camera = output['camera']
+ gt_joints_2d = target['joints_2d']
+ joints_2d_visible = target['joints_2d_visible']
+ pred_joints_2d = self.project_points(pred_joints_3d, pred_camera)
+
+ # Normalize keypoints to [-1,1]
+ # The coordinate origin of pred_joints_2d is
+ # the center of the input image.
+ pred_joints_2d = 2 * pred_joints_2d / (self.img_res - 1)
+ # The coordinate origin of gt_joints_2d is
+ # the top left corner of the input image.
+ gt_joints_2d = 2 * gt_joints_2d / (self.img_res - 1) - 1
+ loss_joints_2d = self.joints_2d_loss(pred_joints_2d, gt_joints_2d,
+ joints_2d_visible)
+ losses['joints_2d_loss'] = loss_joints_2d * self.joints_2d_loss_weight
+
+ return losses
+
+
+@LOSSES.register_module()
+class GANLoss(nn.Module):
+ """Define GAN loss.
+
+ Args:
+ gan_type (str): Support 'vanilla', 'lsgan', 'wgan', 'hinge'.
+ real_label_val (float): The value for real label. Default: 1.0.
+ fake_label_val (float): The value for fake label. Default: 0.0.
+ loss_weight (float): Loss weight. Default: 1.0.
+ Note that loss_weight is only for generators; and it is always 1.0
+ for discriminators.
+ """
+
+ def __init__(self,
+ gan_type,
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=1.0):
+ super().__init__()
+ self.gan_type = gan_type
+ self.loss_weight = loss_weight
+ self.real_label_val = real_label_val
+ self.fake_label_val = fake_label_val
+
+ if self.gan_type == 'vanilla':
+ self.loss = nn.BCEWithLogitsLoss()
+ elif self.gan_type == 'lsgan':
+ self.loss = nn.MSELoss()
+ elif self.gan_type == 'wgan':
+ self.loss = self._wgan_loss
+ elif self.gan_type == 'hinge':
+ self.loss = nn.ReLU()
+ else:
+ raise NotImplementedError(
+ f'GAN type {self.gan_type} is not implemented.')
+
+ @staticmethod
+ def _wgan_loss(input, target):
+ """wgan loss.
+
+ Args:
+ input (Tensor): Input tensor.
+ target (bool): Target label.
+
+ Returns:
+ Tensor: wgan loss.
+ """
+ return -input.mean() if target else input.mean()
+
+ def get_target_label(self, input, target_is_real):
+ """Get target label.
+
+ Args:
+ input (Tensor): Input tensor.
+ target_is_real (bool): Whether the target is real or fake.
+
+ Returns:
+ (bool | Tensor): Target tensor. Return bool for wgan, \
+ otherwise, return Tensor.
+ """
+
+ if self.gan_type == 'wgan':
+ return target_is_real
+ target_val = (
+ self.real_label_val if target_is_real else self.fake_label_val)
+ return input.new_ones(input.size()) * target_val
+
+ def forward(self, input, target_is_real, is_disc=False):
+ """
+ Args:
+ input (Tensor): The input for the loss module, i.e., the network
+ prediction.
+ target_is_real (bool): Whether the targe is real or fake.
+ is_disc (bool): Whether the loss for discriminators or not.
+ Default: False.
+
+ Returns:
+ Tensor: GAN loss value.
+ """
+ target_label = self.get_target_label(input, target_is_real)
+ if self.gan_type == 'hinge':
+ if is_disc: # for discriminators in hinge-gan
+ input = -input if target_is_real else input
+ loss = self.loss(1 + input).mean()
+ else: # for generators in hinge-gan
+ loss = -input.mean()
+ else: # other gan types
+ loss = self.loss(input, target_label)
+
+ # loss_weight is always 1.0 for discriminators
+ return loss if is_disc else loss * self.loss_weight
diff --git a/vendor/ViTPose/mmpose/models/losses/mse_loss.py b/vendor/ViTPose/mmpose/models/losses/mse_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f972efadfdfe0093c9ae1b308c6f82a9ccd72f73
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/mse_loss.py
@@ -0,0 +1,153 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class JointsMSELoss(nn.Module):
+ """MSE loss for heatmaps.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = nn.MSELoss()
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight):
+ """Forward function."""
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ loss = 0.
+
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ loss += self.criterion(heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx])
+ else:
+ loss += self.criterion(heatmap_pred, heatmap_gt)
+
+ return loss / num_joints * self.loss_weight
+
+
+@LOSSES.register_module()
+class CombinedTargetMSELoss(nn.Module):
+ """MSE loss for combined target.
+ CombinedTarget: The combination of classification target
+ (response map) and regression target (offset map).
+ Paper ref: Huang et al. The Devil is in the Details: Delving into
+ Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight, loss_weight=1.):
+ super().__init__()
+ self.criterion = nn.MSELoss(reduction='mean')
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight):
+ batch_size = output.size(0)
+ num_channels = output.size(1)
+ heatmaps_pred = output.reshape(
+ (batch_size, num_channels, -1)).split(1, 1)
+ heatmaps_gt = target.reshape(
+ (batch_size, num_channels, -1)).split(1, 1)
+ loss = 0.
+ num_joints = num_channels // 3
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx * 3].squeeze()
+ heatmap_gt = heatmaps_gt[idx * 3].squeeze()
+ offset_x_pred = heatmaps_pred[idx * 3 + 1].squeeze()
+ offset_x_gt = heatmaps_gt[idx * 3 + 1].squeeze()
+ offset_y_pred = heatmaps_pred[idx * 3 + 2].squeeze()
+ offset_y_gt = heatmaps_gt[idx * 3 + 2].squeeze()
+ if self.use_target_weight:
+ heatmap_pred = heatmap_pred * target_weight[:, idx]
+ heatmap_gt = heatmap_gt * target_weight[:, idx]
+ # classification loss
+ loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
+ # regression loss
+ loss += 0.5 * self.criterion(heatmap_gt * offset_x_pred,
+ heatmap_gt * offset_x_gt)
+ loss += 0.5 * self.criterion(heatmap_gt * offset_y_pred,
+ heatmap_gt * offset_y_gt)
+ return loss / num_joints * self.loss_weight
+
+
+@LOSSES.register_module()
+class JointsOHKMMSELoss(nn.Module):
+ """MSE loss with online hard keypoint mining.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ topk (int): Only top k joint losses are kept.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, topk=8, loss_weight=1.):
+ super().__init__()
+ assert topk > 0
+ self.criterion = nn.MSELoss(reduction='none')
+ self.use_target_weight = use_target_weight
+ self.topk = topk
+ self.loss_weight = loss_weight
+
+ def _ohkm(self, loss):
+ """Online hard keypoint mining."""
+ ohkm_loss = 0.
+ N = len(loss)
+ for i in range(N):
+ sub_loss = loss[i]
+ _, topk_idx = torch.topk(
+ sub_loss, k=self.topk, dim=0, sorted=False)
+ tmp_loss = torch.gather(sub_loss, 0, topk_idx)
+ ohkm_loss += torch.sum(tmp_loss) / self.topk
+ ohkm_loss /= N
+ return ohkm_loss
+
+ def forward(self, output, target, target_weight):
+ """Forward function."""
+ batch_size = output.size(0)
+ num_joints = output.size(1)
+ if num_joints < self.topk:
+ raise ValueError(f'topk ({self.topk}) should not '
+ f'larger than num_joints ({num_joints}).')
+ heatmaps_pred = output.reshape(
+ (batch_size, num_joints, -1)).split(1, 1)
+ heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+ losses = []
+ for idx in range(num_joints):
+ heatmap_pred = heatmaps_pred[idx].squeeze(1)
+ heatmap_gt = heatmaps_gt[idx].squeeze(1)
+ if self.use_target_weight:
+ losses.append(
+ self.criterion(heatmap_pred * target_weight[:, idx],
+ heatmap_gt * target_weight[:, idx]))
+ else:
+ losses.append(self.criterion(heatmap_pred, heatmap_gt))
+
+ losses = [loss.mean(dim=1).unsqueeze(dim=1) for loss in losses]
+ losses = torch.cat(losses, dim=1)
+
+ return self._ohkm(losses) * self.loss_weight
diff --git a/vendor/ViTPose/mmpose/models/losses/multi_loss_factory.py b/vendor/ViTPose/mmpose/models/losses/multi_loss_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..65f90a761d0e5f94309023288f0d3ec848ec82dd
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/multi_loss_factory.py
@@ -0,0 +1,281 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+
+
+def _make_input(t, requires_grad=False, device=torch.device('cpu')):
+ """Make zero inputs for AE loss.
+
+ Args:
+ t (torch.Tensor): input
+ requires_grad (bool): Option to use requires_grad.
+ device: torch device
+
+ Returns:
+ torch.Tensor: zero input.
+ """
+ inp = torch.autograd.Variable(t, requires_grad=requires_grad)
+ inp = inp.sum()
+ inp = inp.to(device)
+ return inp
+
+
+@LOSSES.register_module()
+class HeatmapLoss(nn.Module):
+ """Accumulate the heatmap loss for each image in the batch.
+
+ Args:
+ supervise_empty (bool): Whether to supervise empty channels.
+ """
+
+ def __init__(self, supervise_empty=True):
+ super().__init__()
+ self.supervise_empty = supervise_empty
+
+ def forward(self, pred, gt, mask):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ pred (torch.Tensor[N,K,H,W]):heatmap of output.
+ gt (torch.Tensor[N,K,H,W]): target heatmap.
+ mask (torch.Tensor[N,H,W]): mask of target.
+ """
+ assert pred.size() == gt.size(
+ ), f'pred.size() is {pred.size()}, gt.size() is {gt.size()}'
+
+ if not self.supervise_empty:
+ empty_mask = (gt.sum(dim=[2, 3], keepdim=True) > 0).float()
+ loss = ((pred - gt)**2) * empty_mask.expand_as(
+ pred) * mask[:, None, :, :].expand_as(pred)
+ else:
+ loss = ((pred - gt)**2) * mask[:, None, :, :].expand_as(pred)
+ loss = loss.mean(dim=3).mean(dim=2).mean(dim=1)
+ return loss
+
+
+@LOSSES.register_module()
+class AELoss(nn.Module):
+ """Associative Embedding loss.
+
+ `Associative Embedding: End-to-End Learning for Joint Detection and
+ Grouping `_.
+ """
+
+ def __init__(self, loss_type):
+ super().__init__()
+ self.loss_type = loss_type
+
+ def singleTagLoss(self, pred_tag, joints):
+ """Associative embedding loss for one image.
+
+ Note:
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ pred_tag (torch.Tensor[KxHxW,1]): tag of output for one image.
+ joints (torch.Tensor[M,K,2]): joints information for one image.
+ """
+ tags = []
+ pull = 0
+ for joints_per_person in joints:
+ tmp = []
+ for joint in joints_per_person:
+ if joint[1] > 0:
+ tmp.append(pred_tag[joint[0]])
+ if len(tmp) == 0:
+ continue
+ tmp = torch.stack(tmp)
+ tags.append(torch.mean(tmp, dim=0))
+ pull = pull + torch.mean((tmp - tags[-1].expand_as(tmp))**2)
+
+ num_tags = len(tags)
+ if num_tags == 0:
+ return (
+ _make_input(torch.zeros(1).float(), device=pred_tag.device),
+ _make_input(torch.zeros(1).float(), device=pred_tag.device))
+ elif num_tags == 1:
+ return (_make_input(
+ torch.zeros(1).float(), device=pred_tag.device), pull)
+
+ tags = torch.stack(tags)
+
+ size = (num_tags, num_tags)
+ A = tags.expand(*size)
+ B = A.permute(1, 0)
+
+ diff = A - B
+
+ if self.loss_type == 'exp':
+ diff = torch.pow(diff, 2)
+ push = torch.exp(-diff)
+ push = torch.sum(push) - num_tags
+ elif self.loss_type == 'max':
+ diff = 1 - torch.abs(diff)
+ push = torch.clamp(diff, min=0).sum() - num_tags
+ else:
+ raise ValueError('Unknown ae loss type')
+
+ push_loss = push / ((num_tags - 1) * num_tags) * 0.5
+ pull_loss = pull / (num_tags)
+
+ return push_loss, pull_loss
+
+ def forward(self, tags, joints):
+ """Accumulate the tag loss for each image in the batch.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+
+ Args:
+ tags (torch.Tensor[N,KxHxW,1]): tag channels of output.
+ joints (torch.Tensor[N,M,K,2]): joints information.
+ """
+ pushes, pulls = [], []
+ joints = joints.cpu().data.numpy()
+ batch_size = tags.size(0)
+ for i in range(batch_size):
+ push, pull = self.singleTagLoss(tags[i], joints[i])
+ pushes.append(push)
+ pulls.append(pull)
+ return torch.stack(pushes), torch.stack(pulls)
+
+
+@LOSSES.register_module()
+class MultiLossFactory(nn.Module):
+ """Loss for bottom-up models.
+
+ Args:
+ num_joints (int): Number of keypoints.
+ num_stages (int): Number of stages.
+ ae_loss_type (str): Type of ae loss.
+ with_ae_loss (list[bool]): Use ae loss or not in multi-heatmap.
+ push_loss_factor (list[float]):
+ Parameter of push loss in multi-heatmap.
+ pull_loss_factor (list[float]):
+ Parameter of pull loss in multi-heatmap.
+ with_heatmap_loss (list[bool]):
+ Use heatmap loss or not in multi-heatmap.
+ heatmaps_loss_factor (list[float]):
+ Parameter of heatmap loss in multi-heatmap.
+ supervise_empty (bool): Whether to supervise empty channels.
+ """
+
+ def __init__(self,
+ num_joints,
+ num_stages,
+ ae_loss_type,
+ with_ae_loss,
+ push_loss_factor,
+ pull_loss_factor,
+ with_heatmaps_loss,
+ heatmaps_loss_factor,
+ supervise_empty=True):
+ super().__init__()
+
+ assert isinstance(with_heatmaps_loss, (list, tuple)), \
+ 'with_heatmaps_loss should be a list or tuple'
+ assert isinstance(heatmaps_loss_factor, (list, tuple)), \
+ 'heatmaps_loss_factor should be a list or tuple'
+ assert isinstance(with_ae_loss, (list, tuple)), \
+ 'with_ae_loss should be a list or tuple'
+ assert isinstance(push_loss_factor, (list, tuple)), \
+ 'push_loss_factor should be a list or tuple'
+ assert isinstance(pull_loss_factor, (list, tuple)), \
+ 'pull_loss_factor should be a list or tuple'
+
+ self.num_joints = num_joints
+ self.num_stages = num_stages
+ self.ae_loss_type = ae_loss_type
+ self.with_ae_loss = with_ae_loss
+ self.push_loss_factor = push_loss_factor
+ self.pull_loss_factor = pull_loss_factor
+ self.with_heatmaps_loss = with_heatmaps_loss
+ self.heatmaps_loss_factor = heatmaps_loss_factor
+
+ self.heatmaps_loss = \
+ nn.ModuleList(
+ [
+ HeatmapLoss(supervise_empty)
+ if with_heatmaps_loss else None
+ for with_heatmaps_loss in self.with_heatmaps_loss
+ ]
+ )
+
+ self.ae_loss = \
+ nn.ModuleList(
+ [
+ AELoss(self.ae_loss_type) if with_ae_loss else None
+ for with_ae_loss in self.with_ae_loss
+ ]
+ )
+
+ def forward(self, outputs, heatmaps, masks, joints):
+ """Forward function to calculate losses.
+
+ Note:
+ - batch_size: N
+ - heatmaps weight: W
+ - heatmaps height: H
+ - max_num_people: M
+ - num_keypoints: K
+ - output_channel: C C=2K if use ae loss else K
+
+ Args:
+ outputs (list(torch.Tensor[N,C,H,W])): outputs of stages.
+ heatmaps (list(torch.Tensor[N,K,H,W])): target of heatmaps.
+ masks (list(torch.Tensor[N,H,W])): masks of heatmaps.
+ joints (list(torch.Tensor[N,M,K,2])): joints of ae loss.
+ """
+ heatmaps_losses = []
+ push_losses = []
+ pull_losses = []
+ for idx in range(len(outputs)):
+ offset_feat = 0
+ if self.heatmaps_loss[idx]:
+ heatmaps_pred = outputs[idx][:, :self.num_joints]
+ offset_feat = self.num_joints
+ heatmaps_loss = self.heatmaps_loss[idx](heatmaps_pred,
+ heatmaps[idx],
+ masks[idx])
+ heatmaps_loss = heatmaps_loss * self.heatmaps_loss_factor[idx]
+ heatmaps_losses.append(heatmaps_loss)
+ else:
+ heatmaps_losses.append(None)
+
+ if self.ae_loss[idx]:
+ tags_pred = outputs[idx][:, offset_feat:]
+ batch_size = tags_pred.size()[0]
+ tags_pred = tags_pred.contiguous().view(batch_size, -1, 1)
+
+ push_loss, pull_loss = self.ae_loss[idx](tags_pred,
+ joints[idx])
+ push_loss = push_loss * self.push_loss_factor[idx]
+ pull_loss = pull_loss * self.pull_loss_factor[idx]
+
+ push_losses.append(push_loss)
+ pull_losses.append(pull_loss)
+ else:
+ push_losses.append(None)
+ pull_losses.append(None)
+
+ return heatmaps_losses, push_losses, pull_losses
diff --git a/vendor/ViTPose/mmpose/models/losses/regression_loss.py b/vendor/ViTPose/mmpose/models/losses/regression_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..db4178355ed4d16978d487ed92120a4cf427bf83
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/losses/regression_loss.py
@@ -0,0 +1,448 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+
+
+@LOSSES.register_module()
+class SmoothL1Loss(nn.Module):
+ """SmoothL1Loss loss.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.smooth_l1_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K, D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class WingLoss(nn.Module):
+ """Wing Loss. paper ref: 'Wing Loss for Robust Facial Landmark Localisation
+ with Convolutional Neural Networks' Feng et al. CVPR'2018.
+
+ Args:
+ omega (float): Also referred to as width.
+ epsilon (float): Also referred to as curvature.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ omega=10.0,
+ epsilon=2.0,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.omega = omega
+ self.epsilon = epsilon
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ # constant that smoothly links the piecewise-defined linear
+ # and nonlinear parts
+ self.C = self.omega * (1.0 - math.log(1.0 + self.omega / self.epsilon))
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ pred (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ """
+ delta = (target - pred).abs()
+ losses = torch.where(
+ delta < self.omega,
+ self.omega * torch.log(1.0 + delta / self.epsilon), delta - self.C)
+ return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N,K,D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class SoftWingLoss(nn.Module):
+ """Soft Wing Loss 'Structure-Coherent Deep Feature Learning for Robust Face
+ Alignment' Lin et al. TIP'2021.
+
+ loss =
+ 1. |x| , if |x| < omega1
+ 2. omega2*ln(1+|x|/epsilon) + B, if |x| >= omega1
+
+ Args:
+ omega1 (float): The first threshold.
+ omega2 (float): The second threshold.
+ epsilon (float): Also referred to as curvature.
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self,
+ omega1=2.0,
+ omega2=20.0,
+ epsilon=0.5,
+ use_target_weight=False,
+ loss_weight=1.):
+ super().__init__()
+ self.omega1 = omega1
+ self.omega2 = omega2
+ self.epsilon = epsilon
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ # constant that smoothly links the piecewise-defined linear
+ # and nonlinear parts
+ self.B = self.omega1 - self.omega2 * math.log(1.0 + self.omega1 /
+ self.epsilon)
+
+ def criterion(self, pred, target):
+ """Criterion of wingloss.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ pred (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ """
+ delta = (target - pred).abs()
+ losses = torch.where(
+ delta < self.omega1, delta,
+ self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B)
+ return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K, D]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class MPJPELoss(nn.Module):
+ """MPJPE (Mean Per Joint Position Error) loss.
+
+ Args:
+ use_target_weight (bool): Option to use weighted MSE loss.
+ Different joint types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N,K,D]):
+ Weights across different joint types.
+ """
+
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = torch.mean(
+ torch.norm((output - target) * target_weight, dim=-1))
+ else:
+ loss = torch.mean(torch.norm(output - target, dim=-1))
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class L1Loss(nn.Module):
+ """L1Loss loss ."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.l1_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output regression.
+ target (torch.Tensor[N, K, 2]): Target regression.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class MSELoss(nn.Module):
+ """MSE loss for coordinate regression."""
+
+ def __init__(self, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.criterion = F.mse_loss
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ output (torch.Tensor[N, K, 2]): Output regression.
+ target (torch.Tensor[N, K, 2]): Target regression.
+ target_weight (torch.Tensor[N, K, 2]):
+ Weights across different joint types.
+ """
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = self.criterion(output * target_weight,
+ target * target_weight)
+ else:
+ loss = self.criterion(output, target)
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class BoneLoss(nn.Module):
+ """Bone length loss.
+
+ Args:
+ joint_parents (list): Indices of each joint's parent joint.
+ use_target_weight (bool): Option to use weighted bone loss.
+ Different bone types may have different target weights.
+ loss_weight (float): Weight of the loss. Default: 1.0.
+ """
+
+ def __init__(self, joint_parents, use_target_weight=False, loss_weight=1.):
+ super().__init__()
+ self.joint_parents = joint_parents
+ self.use_target_weight = use_target_weight
+ self.loss_weight = loss_weight
+
+ self.non_root_indices = []
+ for i in range(len(self.joint_parents)):
+ if i != self.joint_parents[i]:
+ self.non_root_indices.append(i)
+
+ def forward(self, output, target, target_weight=None):
+ """Forward function.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - dimension of keypoints: D (D=2 or D=3)
+
+ Args:
+ output (torch.Tensor[N, K, D]): Output regression.
+ target (torch.Tensor[N, K, D]): Target regression.
+ target_weight (torch.Tensor[N, K-1]):
+ Weights across different bone types.
+ """
+ output_bone = torch.norm(
+ output - output[:, self.joint_parents, :],
+ dim=-1)[:, self.non_root_indices]
+ target_bone = torch.norm(
+ target - target[:, self.joint_parents, :],
+ dim=-1)[:, self.non_root_indices]
+ if self.use_target_weight:
+ assert target_weight is not None
+ loss = torch.mean(
+ torch.abs((output_bone * target_weight).mean(dim=0) -
+ (target_bone * target_weight).mean(dim=0)))
+ else:
+ loss = torch.mean(
+ torch.abs(output_bone.mean(dim=0) - target_bone.mean(dim=0)))
+
+ return loss * self.loss_weight
+
+
+@LOSSES.register_module()
+class SemiSupervisionLoss(nn.Module):
+ """Semi-supervision loss for unlabeled data. It is composed of projection
+ loss and bone loss.
+
+ Paper ref: `3D human pose estimation in video with temporal convolutions
+ and semi-supervised training` Dario Pavllo et al. CVPR'2019.
+
+ Args:
+ joint_parents (list): Indices of each joint's parent joint.
+ projection_loss_weight (float): Weight for projection loss.
+ bone_loss_weight (float): Weight for bone loss.
+ warmup_iterations (int): Number of warmup iterations. In the first
+ `warmup_iterations` iterations, the model is trained only on
+ labeled data, and semi-supervision loss will be 0.
+ This is a workaround since currently we cannot access
+ epoch number in loss functions. Note that the iteration number in
+ an epoch can be changed due to different GPU numbers in multi-GPU
+ settings. So please set this parameter carefully.
+ warmup_iterations = dataset_size // samples_per_gpu // gpu_num
+ * warmup_epochs
+ """
+
+ def __init__(self,
+ joint_parents,
+ projection_loss_weight=1.,
+ bone_loss_weight=1.,
+ warmup_iterations=0):
+ super().__init__()
+ self.criterion_projection = MPJPELoss(
+ loss_weight=projection_loss_weight)
+ self.criterion_bone = BoneLoss(
+ joint_parents, loss_weight=bone_loss_weight)
+ self.warmup_iterations = warmup_iterations
+ self.num_iterations = 0
+
+ @staticmethod
+ def project_joints(x, intrinsics):
+ """Project 3D joint coordinates to 2D image plane using camera
+ intrinsic parameters.
+
+ Args:
+ x (torch.Tensor[N, K, 3]): 3D joint coordinates.
+ intrinsics (torch.Tensor[N, 4] | torch.Tensor[N, 9]): Camera
+ intrinsics: f (2), c (2), k (3), p (2).
+ """
+ while intrinsics.dim() < x.dim():
+ intrinsics.unsqueeze_(1)
+ f = intrinsics[..., :2]
+ c = intrinsics[..., 2:4]
+ _x = torch.clamp(x[:, :, :2] / x[:, :, 2:], -1, 1)
+ if intrinsics.shape[-1] == 9:
+ k = intrinsics[..., 4:7]
+ p = intrinsics[..., 7:9]
+
+ r2 = torch.sum(_x[:, :, :2]**2, dim=-1, keepdim=True)
+ radial = 1 + torch.sum(
+ k * torch.cat((r2, r2**2, r2**3), dim=-1),
+ dim=-1,
+ keepdim=True)
+ tan = torch.sum(p * _x, dim=-1, keepdim=True)
+ _x = _x * (radial + tan) + p * r2
+ _x = f * _x + c
+ return _x
+
+ def forward(self, output, target):
+ losses = dict()
+
+ self.num_iterations += 1
+ if self.num_iterations <= self.warmup_iterations:
+ return losses
+
+ labeled_pose = output['labeled_pose']
+ unlabeled_pose = output['unlabeled_pose']
+ unlabeled_traj = output['unlabeled_traj']
+ unlabeled_target_2d = target['unlabeled_target_2d']
+ intrinsics = target['intrinsics']
+
+ # projection loss
+ unlabeled_output = unlabeled_pose + unlabeled_traj
+ unlabeled_output_2d = self.project_joints(unlabeled_output, intrinsics)
+ loss_proj = self.criterion_projection(unlabeled_output_2d,
+ unlabeled_target_2d, None)
+ losses['proj_loss'] = loss_proj
+
+ # bone loss
+ loss_bone = self.criterion_bone(unlabeled_pose, labeled_pose, None)
+ losses['bone_loss'] = loss_bone
+
+ return losses
diff --git a/vendor/ViTPose/mmpose/models/misc/__init__.py b/vendor/ViTPose/mmpose/models/misc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef101fec61e72abc0eb90266d453b5b22331378d
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/misc/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/vendor/ViTPose/mmpose/models/misc/discriminator.py b/vendor/ViTPose/mmpose/models/misc/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..712f0a8b566e3dcbc0cd13206610d3c750b942ab
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/misc/discriminator.py
@@ -0,0 +1,307 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/akanazawa/hmr
+# Original licence: Copyright (c) 2018 akanazawa, under the MIT License.
+# ------------------------------------------------------------------------------
+
+from abc import abstractmethod
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init, xavier_init
+
+from mmpose.models.utils.geometry import batch_rodrigues
+
+
+class BaseDiscriminator(nn.Module):
+ """Base linear module for SMPL parameter discriminator.
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count,
+ such as (9, 32, 32, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or not
+ for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active function
+ or not, such as (True, True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ super().__init__()
+ self.fc_layers = fc_layers
+ self.use_dropout = use_dropout
+ self.drop_prob = drop_prob
+ self.use_activation = use_activation
+ self._check()
+ self.create_layers()
+
+ def _check(self):
+ """Check input to avoid ValueError."""
+ if not isinstance(self.fc_layers, tuple):
+ raise TypeError(f'fc_layers require tuple, '
+ f'get {type(self.fc_layers)}')
+
+ if not isinstance(self.use_dropout, tuple):
+ raise TypeError(f'use_dropout require tuple, '
+ f'get {type(self.use_dropout)}')
+
+ if not isinstance(self.drop_prob, tuple):
+ raise TypeError(f'drop_prob require tuple, '
+ f'get {type(self.drop_prob)}')
+
+ if not isinstance(self.use_activation, tuple):
+ raise TypeError(f'use_activation require tuple, '
+ f'get {type(self.use_activation)}')
+
+ l_fc_layer = len(self.fc_layers)
+ l_use_drop = len(self.use_dropout)
+ l_drop_prob = len(self.drop_prob)
+ l_use_activation = len(self.use_activation)
+
+ pass_check = (
+ l_fc_layer >= 2 and l_use_drop < l_fc_layer
+ and l_drop_prob < l_fc_layer and l_use_activation < l_fc_layer
+ and l_drop_prob == l_use_drop)
+
+ if not pass_check:
+ msg = 'Wrong BaseDiscriminator parameters!'
+ raise ValueError(msg)
+
+ def create_layers(self):
+ """Create layers."""
+ l_fc_layer = len(self.fc_layers)
+ l_use_drop = len(self.use_dropout)
+ l_use_activation = len(self.use_activation)
+
+ self.fc_blocks = nn.Sequential()
+
+ for i in range(l_fc_layer - 1):
+ self.fc_blocks.add_module(
+ name=f'regressor_fc_{i}',
+ module=nn.Linear(
+ in_features=self.fc_layers[i],
+ out_features=self.fc_layers[i + 1]))
+
+ if i < l_use_activation and self.use_activation[i]:
+ self.fc_blocks.add_module(
+ name=f'regressor_af_{i}', module=nn.ReLU())
+
+ if i < l_use_drop and self.use_dropout[i]:
+ self.fc_blocks.add_module(
+ name=f'regressor_fc_dropout_{i}',
+ module=nn.Dropout(p=self.drop_prob[i]))
+
+ @abstractmethod
+ def forward(self, inputs):
+ """Forward function."""
+ msg = 'the base class [BaseDiscriminator] is not callable!'
+ raise NotImplementedError(msg)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.fc_blocks.named_modules():
+ if isinstance(m, nn.Linear):
+ xavier_init(m, gain=0.01)
+
+
+class ShapeDiscriminator(BaseDiscriminator):
+ """Discriminator for SMPL shape parameters, the inputs is (batch_size x 10)
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count, such as (10, 5, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or
+ not for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active
+ function or not, such as (True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ if fc_layers[-1] != 1:
+ msg = f'the neuron count of the last layer ' \
+ f'must be 1, but got {fc_layers[-1]}'
+ raise ValueError(msg)
+
+ super().__init__(fc_layers, use_dropout, drop_prob, use_activation)
+
+ def forward(self, inputs):
+ """Forward function."""
+ return self.fc_blocks(inputs)
+
+
+class PoseDiscriminator(nn.Module):
+ """Discriminator for SMPL pose parameters of each joint. It is composed of
+ discriminators for each joints. The inputs is (batch_size x joint_count x
+ 9)
+
+ Args:
+ channels (Tuple): Tuple of channel number,
+ such as (9, 32, 32, 1)
+ joint_count (int): Joint number, such as 23
+ """
+
+ def __init__(self, channels, joint_count):
+ super().__init__()
+ if channels[-1] != 1:
+ msg = f'the neuron count of the last layer ' \
+ f'must be 1, but got {channels[-1]}'
+ raise ValueError(msg)
+ self.joint_count = joint_count
+
+ self.conv_blocks = nn.Sequential()
+ len_channels = len(channels)
+ for idx in range(len_channels - 2):
+ self.conv_blocks.add_module(
+ name=f'conv_{idx}',
+ module=nn.Conv2d(
+ in_channels=channels[idx],
+ out_channels=channels[idx + 1],
+ kernel_size=1,
+ stride=1))
+
+ self.fc_layer = nn.ModuleList()
+ for idx in range(joint_count):
+ self.fc_layer.append(
+ nn.Linear(
+ in_features=channels[len_channels - 2], out_features=1))
+
+ def forward(self, inputs):
+ """Forward function.
+
+ The input is (batch_size x joint_count x 9).
+ """
+ # shape: batch_size x 9 x 1 x joint_count
+ inputs = inputs.transpose(1, 2).unsqueeze(2).contiguous()
+ # shape: batch_size x c x 1 x joint_count
+ internal_outputs = self.conv_blocks(inputs)
+ outputs = []
+ for idx in range(self.joint_count):
+ outputs.append(self.fc_layer[idx](internal_outputs[:, :, 0, idx]))
+
+ return torch.cat(outputs, 1), internal_outputs
+
+ def init_weights(self):
+ """Initialize model weights."""
+ for m in self.conv_blocks:
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001, bias=0)
+ for m in self.fc_layer.named_modules():
+ if isinstance(m, nn.Linear):
+ xavier_init(m, gain=0.01)
+
+
+class FullPoseDiscriminator(BaseDiscriminator):
+ """Discriminator for SMPL pose parameters of all joints.
+
+ Args:
+ fc_layers (Tuple): Tuple of neuron count,
+ such as (736, 1024, 1024, 1)
+ use_dropout (Tuple): Tuple of bool define use dropout or not
+ for each layer, such as (True, True, False)
+ drop_prob (Tuple): Tuple of float defined the drop prob,
+ such as (0.5, 0.5, 0)
+ use_activation(Tuple): Tuple of bool define use active
+ function or not, such as (True, True, False)
+ """
+
+ def __init__(self, fc_layers, use_dropout, drop_prob, use_activation):
+ if fc_layers[-1] != 1:
+ msg = f'the neuron count of the last layer must be 1,' \
+ f' but got {fc_layers[-1]}'
+ raise ValueError(msg)
+
+ super().__init__(fc_layers, use_dropout, drop_prob, use_activation)
+
+ def forward(self, inputs):
+ """Forward function."""
+ return self.fc_blocks(inputs)
+
+
+class SMPLDiscriminator(nn.Module):
+ """Discriminator for SMPL pose and shape parameters. It is composed of a
+ discriminator for SMPL shape parameters, a discriminator for SMPL pose
+ parameters of all joints and a discriminator for SMPL pose parameters of
+ each joint.
+
+ Args:
+ beta_channel (tuple of int): Tuple of neuron count of the
+ discriminator of shape parameters. Defaults to (10, 5, 1)
+ per_joint_channel (tuple of int): Tuple of neuron count of the
+ discriminator of each joint. Defaults to (9, 32, 32, 1)
+ full_pose_channel (tuple of int): Tuple of neuron count of the
+ discriminator of full pose. Defaults to (23*32, 1024, 1024, 1)
+ """
+
+ def __init__(self,
+ beta_channel=(10, 5, 1),
+ per_joint_channel=(9, 32, 32, 1),
+ full_pose_channel=(23 * 32, 1024, 1024, 1)):
+ super().__init__()
+ self.joint_count = 23
+ # The count of SMPL shape parameter is 10.
+ assert beta_channel[0] == 10
+ # Use 3 x 3 rotation matrix as the pose parameters
+ # of each joint, so the input channel is 9.
+ assert per_joint_channel[0] == 9
+ assert self.joint_count * per_joint_channel[-2] \
+ == full_pose_channel[0]
+
+ self.beta_channel = beta_channel
+ self.per_joint_channel = per_joint_channel
+ self.full_pose_channel = full_pose_channel
+ self._create_sub_modules()
+
+ def _create_sub_modules(self):
+ """Create sub discriminators."""
+
+ # create theta discriminator for each joint
+ self.pose_discriminator = PoseDiscriminator(self.per_joint_channel,
+ self.joint_count)
+
+ # create full pose discriminator for total joints
+ fc_layers = self.full_pose_channel
+ use_dropout = tuple([False] * (len(fc_layers) - 1))
+ drop_prob = tuple([0.5] * (len(fc_layers) - 1))
+ use_activation = tuple([True] * (len(fc_layers) - 2) + [False])
+
+ self.full_pose_discriminator = FullPoseDiscriminator(
+ fc_layers, use_dropout, drop_prob, use_activation)
+
+ # create shape discriminator for betas
+ fc_layers = self.beta_channel
+ use_dropout = tuple([False] * (len(fc_layers) - 1))
+ drop_prob = tuple([0.5] * (len(fc_layers) - 1))
+ use_activation = tuple([True] * (len(fc_layers) - 2) + [False])
+ self.shape_discriminator = ShapeDiscriminator(fc_layers, use_dropout,
+ drop_prob,
+ use_activation)
+
+ def forward(self, thetas):
+ """Forward function."""
+ _, poses, shapes = thetas
+
+ batch_size = poses.shape[0]
+ shape_disc_value = self.shape_discriminator(shapes)
+
+ # The first rotation matrix is global rotation
+ # and is NOT used in discriminator.
+ if poses.dim() == 2:
+ rotate_matrixs = \
+ batch_rodrigues(poses.contiguous().view(-1, 3)
+ ).view(batch_size, 24, 9)[:, 1:, :]
+ else:
+ rotate_matrixs = poses.contiguous().view(batch_size, 24,
+ 9)[:, 1:, :].contiguous()
+ pose_disc_value, pose_inter_disc_value \
+ = self.pose_discriminator(rotate_matrixs)
+ full_pose_disc_value = self.full_pose_discriminator(
+ pose_inter_disc_value.contiguous().view(batch_size, -1))
+ return torch.cat(
+ (pose_disc_value, full_pose_disc_value, shape_disc_value), 1)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ self.full_pose_discriminator.init_weights()
+ self.pose_discriminator.init_weights()
+ self.shape_discriminator.init_weights()
diff --git a/vendor/ViTPose/mmpose/models/necks/__init__.py b/vendor/ViTPose/mmpose/models/necks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d3a5cc01a93604f3d9da9242ea2eac0fe60638c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/necks/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .gap_neck import GlobalAveragePooling
+from .posewarper_neck import PoseWarperNeck
+
+__all__ = ['GlobalAveragePooling', 'PoseWarperNeck']
diff --git a/vendor/ViTPose/mmpose/models/necks/gap_neck.py b/vendor/ViTPose/mmpose/models/necks/gap_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e6ad68ec11110daaad3a66e09d67efb355c4b93
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/necks/gap_neck.py
@@ -0,0 +1,37 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import NECKS
+
+
+@NECKS.register_module()
+class GlobalAveragePooling(nn.Module):
+ """Global Average Pooling neck.
+
+ Note that we use `view` to remove extra channel after pooling. We do not
+ use `squeeze` as it will also remove the batch dimension when the tensor
+ has a batch dimension of size 1, which can lead to unexpected errors.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.gap = nn.AdaptiveAvgPool2d((1, 1))
+
+ def init_weights(self):
+ pass
+
+ def forward(self, inputs):
+ if isinstance(inputs, tuple):
+ outs = tuple([self.gap(x) for x in inputs])
+ outs = tuple(
+ [out.view(x.size(0), -1) for out, x in zip(outs, inputs)])
+ elif isinstance(inputs, list):
+ outs = [self.gap(x) for x in inputs]
+ outs = [out.view(x.size(0), -1) for out, x in zip(outs, inputs)]
+ elif isinstance(inputs, torch.Tensor):
+ outs = self.gap(inputs)
+ outs = outs.view(inputs.size(0), -1)
+ else:
+ raise TypeError('neck inputs should be tuple or torch.tensor')
+ return outs
diff --git a/vendor/ViTPose/mmpose/models/necks/posewarper_neck.py b/vendor/ViTPose/mmpose/models/necks/posewarper_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd4ddfbf8984857a6110f19b0a7d703b53f1c433
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/necks/posewarper_neck.py
@@ -0,0 +1,329 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+ normal_init)
+from mmcv.utils import digit_version
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.utils.ops import resize
+from ..backbones.resnet import BasicBlock, Bottleneck
+from ..builder import NECKS
+
+try:
+ from mmcv.ops import DeformConv2d
+ has_mmcv_full = True
+except (ImportError, ModuleNotFoundError):
+ has_mmcv_full = False
+
+
+@NECKS.register_module()
+class PoseWarperNeck(nn.Module):
+ """PoseWarper neck.
+
+ `"Learning temporal pose estimation from sparsely-labeled videos"
+ `_.
+
+ Args:
+ in_channels (int): Number of input channels from backbone
+ out_channels (int): Number of output channels
+ inner_channels (int): Number of intermediate channels of the res block
+ deform_groups (int): Number of groups in the deformable conv
+ dilations (list|tuple): different dilations of the offset conv layers
+ trans_conv_kernel (int): the kernel of the trans conv layer, which is
+ used to get heatmap from the output of backbone. Default: 1
+ res_blocks_cfg (dict|None): config of residual blocks. If None,
+ use the default values. If not None, it should contain the
+ following keys:
+
+ - block (str): the type of residual block, Default: 'BASIC'.
+ - num_blocks (int): the number of blocks, Default: 20.
+
+ offsets_kernel (int): the kernel of offset conv layer.
+ deform_conv_kernel (int): the kernel of defomrable conv layer.
+ in_index (int|Sequence[int]): Input feature index. Default: 0
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ Default: None.
+
+ - 'resize_concat': Multiple feature maps will be resize to \
+ the same size as first one and than concat together. \
+ Usually used in FCN head of HRNet.
+ - 'multiple_select': Multiple feature maps will be bundle into \
+ a list and passed into decode head.
+ - None: Only one select feature map is allowed.
+
+ freeze_trans_layer (bool): Whether to freeze the transition layer
+ (stop grad and set eval mode). Default: True.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only. Default: False.
+ im2col_step (int): the argument `im2col_step` in deformable conv,
+ Default: 80.
+ """
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+ minimum_mmcv_version = '1.3.17'
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ inner_channels,
+ deform_groups=17,
+ dilations=(3, 6, 12, 18, 24),
+ trans_conv_kernel=1,
+ res_blocks_cfg=None,
+ offsets_kernel=3,
+ deform_conv_kernel=3,
+ in_index=0,
+ input_transform=None,
+ freeze_trans_layer=True,
+ norm_eval=False,
+ im2col_step=80):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.inner_channels = inner_channels
+ self.deform_groups = deform_groups
+ self.dilations = dilations
+ self.trans_conv_kernel = trans_conv_kernel
+ self.res_blocks_cfg = res_blocks_cfg
+ self.offsets_kernel = offsets_kernel
+ self.deform_conv_kernel = deform_conv_kernel
+ self.in_index = in_index
+ self.input_transform = input_transform
+ self.freeze_trans_layer = freeze_trans_layer
+ self.norm_eval = norm_eval
+ self.im2col_step = im2col_step
+
+ identity_trans_layer = False
+
+ assert trans_conv_kernel in [0, 1, 3]
+ kernel_size = trans_conv_kernel
+ if kernel_size == 3:
+ padding = 1
+ elif kernel_size == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_trans_layer = True
+
+ if identity_trans_layer:
+ self.trans_layer = nn.Identity()
+ else:
+ self.trans_layer = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=padding)
+
+ # build chain of residual blocks
+ if res_blocks_cfg is not None and not isinstance(res_blocks_cfg, dict):
+ raise TypeError('res_blocks_cfg should be dict or None.')
+
+ if res_blocks_cfg is None:
+ block_type = 'BASIC'
+ num_blocks = 20
+ else:
+ block_type = res_blocks_cfg.get('block', 'BASIC')
+ num_blocks = res_blocks_cfg.get('num_blocks', 20)
+
+ block = self.blocks_dict[block_type]
+
+ res_layers = []
+ downsample = nn.Sequential(
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=out_channels,
+ out_channels=inner_channels,
+ kernel_size=1,
+ stride=1,
+ bias=False),
+ build_norm_layer(dict(type='BN'), inner_channels)[1])
+ res_layers.append(
+ block(
+ in_channels=out_channels,
+ out_channels=inner_channels,
+ downsample=downsample))
+
+ for _ in range(1, num_blocks):
+ res_layers.append(block(inner_channels, inner_channels))
+ self.offset_feats = nn.Sequential(*res_layers)
+
+ # build offset layers
+ self.num_offset_layers = len(dilations)
+ assert self.num_offset_layers > 0, 'Number of offset layers ' \
+ 'should be larger than 0.'
+
+ target_offset_channels = 2 * offsets_kernel**2 * deform_groups
+
+ offset_layers = [
+ build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=inner_channels,
+ out_channels=target_offset_channels,
+ kernel_size=offsets_kernel,
+ stride=1,
+ dilation=dilations[i],
+ padding=dilations[i],
+ bias=False,
+ ) for i in range(self.num_offset_layers)
+ ]
+ self.offset_layers = nn.ModuleList(offset_layers)
+
+ # build deformable conv layers
+ assert digit_version(mmcv.__version__) >= \
+ digit_version(self.minimum_mmcv_version), \
+ f'Current MMCV version: {mmcv.__version__}, ' \
+ f'but MMCV >= {self.minimum_mmcv_version} is required, see ' \
+ f'https://github.com/open-mmlab/mmcv/issues/1440, ' \
+ f'Please install the latest MMCV.'
+
+ if has_mmcv_full:
+ deform_conv_layers = [
+ DeformConv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=deform_conv_kernel,
+ stride=1,
+ padding=int(deform_conv_kernel / 2) * dilations[i],
+ dilation=dilations[i],
+ deform_groups=deform_groups,
+ im2col_step=self.im2col_step,
+ ) for i in range(self.num_offset_layers)
+ ]
+ else:
+ raise ImportError('Please install the full version of mmcv '
+ 'to use `DeformConv2d`.')
+
+ self.deform_conv_layers = nn.ModuleList(deform_conv_layers)
+
+ self.freeze_layers()
+
+ def freeze_layers(self):
+ if self.freeze_trans_layer:
+ self.trans_layer.eval()
+
+ for param in self.trans_layer.parameters():
+ param.requires_grad = False
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.001)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+ elif isinstance(m, DeformConv2d):
+ filler = torch.zeros([
+ m.weight.size(0),
+ m.weight.size(1),
+ m.weight.size(2),
+ m.weight.size(3)
+ ],
+ dtype=torch.float32,
+ device=m.weight.device)
+ for k in range(m.weight.size(0)):
+ filler[k, k,
+ int(m.weight.size(2) / 2),
+ int(m.weight.size(3) / 2)] = 1.0
+ m.weight = torch.nn.Parameter(filler)
+ m.weight.requires_grad = True
+
+ # posewarper offset layer weight initialization
+ for m in self.offset_layers.modules():
+ constant_init(m, 0)
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor] | Tensor): multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+ if not isinstance(inputs, list):
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def forward(self, inputs, frame_weight):
+ assert isinstance(inputs, (list, tuple)), 'PoseWarperNeck inputs ' \
+ 'should be list or tuple, even though the length is 1, ' \
+ 'for unified processing.'
+
+ output_heatmap = 0
+ if len(inputs) > 1:
+ inputs = [self._transform_inputs(input) for input in inputs]
+ inputs = [self.trans_layer(input) for input in inputs]
+
+ # calculate difference features
+ diff_features = [
+ self.offset_feats(inputs[0] - input) for input in inputs
+ ]
+
+ for i in range(len(inputs)):
+ if frame_weight[i] == 0:
+ continue
+ warped_heatmap = 0
+ for j in range(self.num_offset_layers):
+ offset = (self.offset_layers[j](diff_features[i]))
+ warped_heatmap_tmp = self.deform_conv_layers[j](inputs[i],
+ offset)
+ warped_heatmap += warped_heatmap_tmp / \
+ self.num_offset_layers
+
+ output_heatmap += warped_heatmap * frame_weight[i]
+
+ else:
+ inputs = inputs[0]
+ inputs = self._transform_inputs(inputs)
+ inputs = self.trans_layer(inputs)
+
+ num_frames = len(frame_weight)
+ batch_size = inputs.size(0) // num_frames
+ ref_x = inputs[:batch_size]
+ ref_x_tiled = ref_x.repeat(num_frames, 1, 1, 1)
+
+ offset_features = self.offset_feats(ref_x_tiled - inputs)
+
+ warped_heatmap = 0
+ for j in range(self.num_offset_layers):
+ offset = self.offset_layers[j](offset_features)
+
+ warped_heatmap_tmp = self.deform_conv_layers[j](inputs, offset)
+ warped_heatmap += warped_heatmap_tmp / self.num_offset_layers
+
+ for i in range(num_frames):
+ if frame_weight[i] == 0:
+ continue
+ output_heatmap += warped_heatmap[i * batch_size:(i + 1) *
+ batch_size] * frame_weight[i]
+
+ return output_heatmap
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self.freeze_layers()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/vendor/ViTPose/mmpose/models/registry.py b/vendor/ViTPose/mmpose/models/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..f354ae9e137262e2f375a64aef74c3af20baae63
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/registry.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+from .builder import BACKBONES, HEADS, LOSSES, NECKS, POSENETS
+
+__all__ = ['BACKBONES', 'HEADS', 'LOSSES', 'NECKS', 'POSENETS']
+
+warnings.simplefilter('once', DeprecationWarning)
+warnings.warn(
+ 'Registries (BACKBONES, NECKS, HEADS, LOSSES, POSENETS) have '
+ 'been moved to mmpose.models.builder. Importing from '
+ 'mmpose.models.registry will be deprecated in the future.',
+ DeprecationWarning)
diff --git a/vendor/ViTPose/mmpose/models/utils/__init__.py b/vendor/ViTPose/mmpose/models/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6871c66e50708f928ead8714aa83cb4ef6447e09
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/utils/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .smpl import SMPL
+
+__all__ = ['SMPL']
diff --git a/vendor/ViTPose/mmpose/models/utils/geometry.py b/vendor/ViTPose/mmpose/models/utils/geometry.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ceadaec30cd2c9bb3fbada132e1ea674f2e8754
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/utils/geometry.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.nn import functional as F
+
+
+def rot6d_to_rotmat(x):
+ """Convert 6D rotation representation to 3x3 rotation matrix.
+
+ Based on Zhou et al., "On the Continuity of Rotation
+ Representations in Neural Networks", CVPR 2019
+ Input:
+ (B,6) Batch of 6-D rotation representations
+ Output:
+ (B,3,3) Batch of corresponding rotation matrices
+ """
+ x = x.view(-1, 3, 2)
+ a1 = x[:, :, 0]
+ a2 = x[:, :, 1]
+ b1 = F.normalize(a1)
+ b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
+ b3 = torch.cross(b1, b2)
+ return torch.stack((b1, b2, b3), dim=-1)
+
+
+def batch_rodrigues(theta):
+ """Convert axis-angle representation to rotation matrix.
+ Args:
+ theta: size = [B, 3]
+ Returns:
+ Rotation matrix corresponding to the quaternion
+ -- size = [B, 3, 3]
+ """
+ l2norm = torch.norm(theta + 1e-8, p=2, dim=1)
+ angle = torch.unsqueeze(l2norm, -1)
+ normalized = torch.div(theta, angle)
+ angle = angle * 0.5
+ v_cos = torch.cos(angle)
+ v_sin = torch.sin(angle)
+ quat = torch.cat([v_cos, v_sin * normalized], dim=1)
+ return quat_to_rotmat(quat)
+
+
+def quat_to_rotmat(quat):
+ """Convert quaternion coefficients to rotation matrix.
+ Args:
+ quat: size = [B, 4] 4 <===>(w, x, y, z)
+ Returns:
+ Rotation matrix corresponding to the quaternion
+ -- size = [B, 3, 3]
+ """
+ norm_quat = quat
+ norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
+ w, x, y, z = norm_quat[:, 0], norm_quat[:, 1],\
+ norm_quat[:, 2], norm_quat[:, 3]
+
+ B = quat.size(0)
+
+ w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+ wx, wy, wz = w * x, w * y, w * z
+ xy, xz, yz = x * y, x * z, y * z
+
+ rotMat = torch.stack([
+ w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy,
+ w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz,
+ w2 - x2 - y2 + z2
+ ],
+ dim=1).view(B, 3, 3)
+ return rotMat
diff --git a/vendor/ViTPose/mmpose/models/utils/ops.py b/vendor/ViTPose/mmpose/models/utils/ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..858d0a92148a591d235e58bfce8990207632fb39
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/utils/ops.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+import torch.nn.functional as F
+
+
+def resize(input,
+ size=None,
+ scale_factor=None,
+ mode='nearest',
+ align_corners=None,
+ warning=True):
+ if warning:
+ if size is not None and align_corners:
+ input_h, input_w = tuple(int(x) for x in input.shape[2:])
+ output_h, output_w = tuple(int(x) for x in size)
+ if output_h > input_h or output_w > output_h:
+ if ((output_h > 1 and output_w > 1 and input_h > 1
+ and input_w > 1) and (output_h - 1) % (input_h - 1)
+ and (output_w - 1) % (input_w - 1)):
+ warnings.warn(
+ f'When align_corners={align_corners}, '
+ 'the output would more aligned if '
+ f'input size {(input_h, input_w)} is `x+1` and '
+ f'out size {(output_h, output_w)} is `nx+1`')
+ if isinstance(size, torch.Size):
+ size = tuple(int(x) for x in size)
+ return F.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/vendor/ViTPose/mmpose/models/utils/smpl.py b/vendor/ViTPose/mmpose/models/utils/smpl.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe723d483aadb7ce7e0e9f50ef8da7b10e7529e5
--- /dev/null
+++ b/vendor/ViTPose/mmpose/models/utils/smpl.py
@@ -0,0 +1,184 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+
+from ..builder import MESH_MODELS
+
+try:
+ from smplx import SMPL as SMPL_
+ has_smpl = True
+except (ImportError, ModuleNotFoundError):
+ has_smpl = False
+
+
+@MESH_MODELS.register_module()
+class SMPL(nn.Module):
+ """SMPL 3d human mesh model of paper ref: Matthew Loper. ``SMPL: A skinned
+ multi-person linear model''. This module is based on the smplx project
+ (https://github.com/vchoutas/smplx).
+
+ Args:
+ smpl_path (str): The path to the folder where the model weights are
+ stored.
+ joints_regressor (str): The path to the file where the joints
+ regressor weight are stored.
+ """
+
+ def __init__(self, smpl_path, joints_regressor):
+ super().__init__()
+
+ assert has_smpl, 'Please install smplx to use SMPL.'
+
+ self.smpl_neutral = SMPL_(
+ model_path=smpl_path,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='neutral')
+
+ self.smpl_male = SMPL_(
+ model_path=smpl_path,
+ create_betas=False,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='male')
+
+ self.smpl_female = SMPL_(
+ model_path=smpl_path,
+ create_betas=False,
+ create_global_orient=False,
+ create_body_pose=False,
+ create_transl=False,
+ gender='female')
+
+ joints_regressor = torch.tensor(
+ np.load(joints_regressor), dtype=torch.float)[None, ...]
+ self.register_buffer('joints_regressor', joints_regressor)
+
+ self.num_verts = self.smpl_neutral.get_num_verts()
+ self.num_joints = self.joints_regressor.shape[1]
+
+ def smpl_forward(self, model, **kwargs):
+ """Apply a specific SMPL model with given model parameters.
+
+ Note:
+ B: batch size
+ V: number of vertices
+ K: number of joints
+
+ Returns:
+ outputs (dict): Dict with mesh vertices and joints.
+ - vertices: Tensor([B, V, 3]), mesh vertices
+ - joints: Tensor([B, K, 3]), 3d joints regressed
+ from mesh vertices.
+ """
+
+ betas = kwargs['betas']
+ batch_size = betas.shape[0]
+ device = betas.device
+ output = {}
+ if batch_size == 0:
+ output['vertices'] = betas.new_zeros([0, self.num_verts, 3])
+ output['joints'] = betas.new_zeros([0, self.num_joints, 3])
+ else:
+ smpl_out = model(**kwargs)
+ output['vertices'] = smpl_out.vertices
+ output['joints'] = torch.matmul(
+ self.joints_regressor.to(device), output['vertices'])
+ return output
+
+ def get_faces(self):
+ """Return mesh faces.
+
+ Note:
+ F: number of faces
+
+ Returns:
+ faces: np.ndarray([F, 3]), mesh faces
+ """
+ return self.smpl_neutral.faces
+
+ def forward(self,
+ betas,
+ body_pose,
+ global_orient,
+ transl=None,
+ gender=None):
+ """Forward function.
+
+ Note:
+ B: batch size
+ J: number of controllable joints of model, for smpl model J=23
+ K: number of joints
+
+ Args:
+ betas: Tensor([B, 10]), human body shape parameters of SMPL model.
+ body_pose: Tensor([B, J*3] or [B, J, 3, 3]), human body pose
+ parameters of SMPL model. It should be axis-angle vector
+ ([B, J*3]) or rotation matrix ([B, J, 3, 3)].
+ global_orient: Tensor([B, 3] or [B, 1, 3, 3]), global orientation
+ of human body. It should be axis-angle vector ([B, 3]) or
+ rotation matrix ([B, 1, 3, 3)].
+ transl: Tensor([B, 3]), global translation of human body.
+ gender: Tensor([B]), gender parameters of human body. -1 for
+ neutral, 0 for male , 1 for female.
+
+ Returns:
+ outputs (dict): Dict with mesh vertices and joints.
+ - vertices: Tensor([B, V, 3]), mesh vertices
+ - joints: Tensor([B, K, 3]), 3d joints regressed from
+ mesh vertices.
+ """
+
+ batch_size = betas.shape[0]
+ pose2rot = True if body_pose.dim() == 2 else False
+ if batch_size > 0 and gender is not None:
+ output = {
+ 'vertices': betas.new_zeros([batch_size, self.num_verts, 3]),
+ 'joints': betas.new_zeros([batch_size, self.num_joints, 3])
+ }
+
+ mask = gender < 0
+ _out = self.smpl_forward(
+ self.smpl_neutral,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+
+ mask = gender == 0
+ _out = self.smpl_forward(
+ self.smpl_male,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+
+ mask = gender == 1
+ _out = self.smpl_forward(
+ self.smpl_male,
+ betas=betas[mask],
+ body_pose=body_pose[mask],
+ global_orient=global_orient[mask],
+ transl=transl[mask] if transl is not None else None,
+ pose2rot=pose2rot)
+ output['vertices'][mask] = _out['vertices']
+ output['joints'][mask] = _out['joints']
+ else:
+ return self.smpl_forward(
+ self.smpl_neutral,
+ betas=betas,
+ body_pose=body_pose,
+ global_orient=global_orient,
+ transl=transl,
+ pose2rot=pose2rot)
+
+ return output
diff --git a/vendor/ViTPose/mmpose/utils/__init__.py b/vendor/ViTPose/mmpose/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1293ca05aab2632e0d6df29734438bc38ed79c6c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .collect_env import collect_env
+from .logger import get_root_logger
+from .setup_env import setup_multi_processes
+from .timer import StopWatch
+
+__all__ = [
+ 'get_root_logger', 'collect_env', 'StopWatch', 'setup_multi_processes'
+]
diff --git a/vendor/ViTPose/mmpose/utils/collect_env.py b/vendor/ViTPose/mmpose/utils/collect_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75c5ea73383ccef367632cf497227498ac50078
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/collect_env.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.utils import collect_env as collect_basic_env
+from mmcv.utils import get_git_hash
+
+import mmpose
+
+
+def collect_env():
+ env_info = collect_basic_env()
+ env_info['MMPose'] = (mmpose.__version__ + '+' + get_git_hash(digits=7))
+ return env_info
+
+
+if __name__ == '__main__':
+ for name, val in collect_env().items():
+ print(f'{name}: {val}')
diff --git a/vendor/ViTPose/mmpose/utils/hooks.py b/vendor/ViTPose/mmpose/utils/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..b68940f2b7a8a618916ea5aab331e3ce45ba98e7
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/hooks.py
@@ -0,0 +1,60 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools
+
+
+class OutputHook:
+
+ def __init__(self, module, outputs=None, as_tensor=False):
+ self.outputs = outputs
+ self.as_tensor = as_tensor
+ self.layer_outputs = {}
+ self.register(module)
+
+ def register(self, module):
+
+ def hook_wrapper(name):
+
+ def hook(model, input, output):
+ if self.as_tensor:
+ self.layer_outputs[name] = output
+ else:
+ if isinstance(output, list):
+ self.layer_outputs[name] = [
+ out.detach().cpu().numpy() for out in output
+ ]
+ else:
+ self.layer_outputs[name] = output.detach().cpu().numpy(
+ )
+
+ return hook
+
+ self.handles = []
+ if isinstance(self.outputs, (list, tuple)):
+ for name in self.outputs:
+ try:
+ layer = rgetattr(module, name)
+ h = layer.register_forward_hook(hook_wrapper(name))
+ except ModuleNotFoundError as module_not_found:
+ raise ModuleNotFoundError(
+ f'Module {name} not found') from module_not_found
+ self.handles.append(h)
+
+ def remove(self):
+ for h in self.handles:
+ h.remove()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.remove()
+
+
+# using wonder's beautiful simplification:
+# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects
+def rgetattr(obj, attr, *args):
+
+ def _getattr(obj, attr):
+ return getattr(obj, attr, *args)
+
+ return functools.reduce(_getattr, [obj] + attr.split('.'))
diff --git a/vendor/ViTPose/mmpose/utils/logger.py b/vendor/ViTPose/mmpose/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..294837fa6aec1e1896de8c8accf470f366f81296
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/logger.py
@@ -0,0 +1,25 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+
+from mmcv.utils import get_logger
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO):
+ """Use `get_logger` method in mmcv to get the root logger.
+
+ The logger will be initialized if it has not been initialized. By default a
+ StreamHandler will be added. If `log_file` is specified, a FileHandler will
+ also be added. The name of the root logger is the top-level package name,
+ e.g., "mmpose".
+
+ Args:
+ log_file (str | None): The log filename. If specified, a FileHandler
+ will be added to the root logger.
+ log_level (int): The root logger level. Note that only the process of
+ rank 0 is affected, while other processes will set the level to
+ "Error" and be silent most of the time.
+
+ Returns:
+ logging.Logger: The root logger.
+ """
+ return get_logger(__name__.split('.')[0], log_file, log_level)
diff --git a/vendor/ViTPose/mmpose/utils/setup_env.py b/vendor/ViTPose/mmpose/utils/setup_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..21def2f0809153a5f755af2431f7e702db625e5c
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/setup_env.py
@@ -0,0 +1,47 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import platform
+import warnings
+
+import cv2
+import torch.multiprocessing as mp
+
+
+def setup_multi_processes(cfg):
+ """Setup multi-processing environment variables."""
+ # set multi-process start method as `fork` to speed up the training
+ if platform.system() != 'Windows':
+ mp_start_method = cfg.get('mp_start_method', 'fork')
+ current_method = mp.get_start_method(allow_none=True)
+ if current_method is not None and current_method != mp_start_method:
+ warnings.warn(
+ f'Multi-processing start method `{mp_start_method}` is '
+ f'different from the previous setting `{current_method}`.'
+ f'It will be force set to `{mp_start_method}`. You can change '
+ f'this behavior by changing `mp_start_method` in your config.')
+ mp.set_start_method(mp_start_method, force=True)
+
+ # disable opencv multithreading to avoid system being overloaded
+ opencv_num_threads = cfg.get('opencv_num_threads', 0)
+ cv2.setNumThreads(opencv_num_threads)
+
+ # setup OMP threads
+ # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa
+ if 'OMP_NUM_THREADS' not in os.environ and cfg.data.workers_per_gpu > 1:
+ omp_num_threads = 1
+ warnings.warn(
+ f'Setting OMP_NUM_THREADS environment variable for each process '
+ f'to be {omp_num_threads} in default, to avoid your system being '
+ f'overloaded, please further tune the variable for optimal '
+ f'performance in your application as needed.')
+ os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
+
+ # setup MKL threads
+ if 'MKL_NUM_THREADS' not in os.environ and cfg.data.workers_per_gpu > 1:
+ mkl_num_threads = 1
+ warnings.warn(
+ f'Setting MKL_NUM_THREADS environment variable for each process '
+ f'to be {mkl_num_threads} in default, to avoid your system being '
+ f'overloaded, please further tune the variable for optimal '
+ f'performance in your application as needed.')
+ os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
diff --git a/vendor/ViTPose/mmpose/utils/timer.py b/vendor/ViTPose/mmpose/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a3185c5e89ce73bd33591c22ce74fc73ef8e770
--- /dev/null
+++ b/vendor/ViTPose/mmpose/utils/timer.py
@@ -0,0 +1,117 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import defaultdict
+from contextlib import contextmanager
+from functools import partial
+
+import numpy as np
+from mmcv import Timer
+
+
+class RunningAverage():
+ r"""A helper class to calculate running average in a sliding window.
+
+ Args:
+ window (int): The size of the sliding window.
+ """
+
+ def __init__(self, window: int = 1):
+ self.window = window
+ self._data = []
+
+ def update(self, value):
+ """Update a new data sample."""
+ self._data.append(value)
+ self._data = self._data[-self.window:]
+
+ def average(self):
+ """Get the average value of current window."""
+ return np.mean(self._data)
+
+
+class StopWatch:
+ r"""A helper class to measure FPS and detailed time consuming of each phase
+ in a video processing loop or similar scenarios.
+
+ Args:
+ window (int): The sliding window size to calculate the running average
+ of the time consuming.
+
+ Example:
+ >>> from mmpose.utils import StopWatch
+ >>> import time
+ >>> stop_watch = StopWatch(window=10)
+ >>> with stop_watch.timeit('total'):
+ >>> time.sleep(0.1)
+ >>> # 'timeit' support nested use
+ >>> with stop_watch.timeit('phase1'):
+ >>> time.sleep(0.1)
+ >>> with stop_watch.timeit('phase2'):
+ >>> time.sleep(0.2)
+ >>> time.sleep(0.2)
+ >>> report = stop_watch.report()
+ """
+
+ def __init__(self, window=1):
+ self.window = window
+ self._record = defaultdict(partial(RunningAverage, window=self.window))
+ self._timer_stack = []
+
+ @contextmanager
+ def timeit(self, timer_name='_FPS_'):
+ """Timing a code snippet with an assigned name.
+
+ Args:
+ timer_name (str): The unique name of the interested code snippet to
+ handle multiple timers and generate reports. Note that '_FPS_'
+ is a special key that the measurement will be in `fps` instead
+ of `millisecond`. Also see `report` and `report_strings`.
+ Default: '_FPS_'.
+ Note:
+ This function should always be used in a `with` statement, as shown
+ in the example.
+ """
+ self._timer_stack.append((timer_name, Timer()))
+ try:
+ yield
+ finally:
+ timer_name, timer = self._timer_stack.pop()
+ self._record[timer_name].update(timer.since_start())
+
+ def report(self, key=None):
+ """Report timing information.
+
+ Returns:
+ dict: The key is the timer name and the value is the \
+ corresponding average time consuming.
+ """
+ result = {
+ name: r.average() * 1000.
+ for name, r in self._record.items()
+ }
+
+ if '_FPS_' in result:
+ result['_FPS_'] = 1000. / result.pop('_FPS_')
+
+ if key is None:
+ return result
+ return result[key]
+
+ def report_strings(self):
+ """Report timing information in texture strings.
+
+ Returns:
+ list(str): Each element is the information string of a timed \
+ event, in format of '{timer_name}: {time_in_ms}'. \
+ Specially, if timer_name is '_FPS_', the result will \
+ be converted to fps.
+ """
+ result = self.report()
+ strings = []
+ if '_FPS_' in result:
+ strings.append(f'FPS: {result["_FPS_"]:>5.1f}')
+ strings += [f'{name}: {val:>3.0f}' for name, val in result.items()]
+ return strings
+
+ def reset(self):
+ self._record = defaultdict(list)
+ self._active_timer_stack = []
diff --git a/vendor/ViTPose/mmpose/version.py b/vendor/ViTPose/mmpose/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a10826ab75786cbc8aaaf2a6a87e0465be35801
--- /dev/null
+++ b/vendor/ViTPose/mmpose/version.py
@@ -0,0 +1,19 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+
+__version__ = '0.24.0'
+short_version = __version__
+
+
+def parse_version_info(version_str):
+ version_info = []
+ for x in version_str.split('.'):
+ if x.isdigit():
+ version_info.append(int(x))
+ elif x.find('rc') != -1:
+ patch_version = x.split('rc')
+ version_info.append(int(patch_version[0]))
+ version_info.append(f'rc{patch_version[1]}')
+ return tuple(version_info)
+
+
+version_info = parse_version_info(__version__)
diff --git a/vendor/ViTPose/model-index.yml b/vendor/ViTPose/model-index.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5522f6fc18c959f604864464998a1b9ed53f9ef
--- /dev/null
+++ b/vendor/ViTPose/model-index.yml
@@ -0,0 +1,139 @@
+Import:
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_macaque.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/resnet_macaque.yml
+- configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/zebra/resnet_zebra.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hourglass_ae_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_mhp.yml
+- configs/body/2d_kpt_sview_rgb_img/deeppose/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/deeppose/mpii/resnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/hrnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/resnet_aic.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/alexnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hourglass_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrformer_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_augmentation_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_dark_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_fp16_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_udp_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/litehrnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mobilenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/mspn_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnest_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_dark_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnet_fp16_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnetv1d_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/resnext_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/rsn_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/scnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/seresnet_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv1_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/shufflenetv2_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vgg_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/resnet_crowdpose.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/h36m/hrnet_h36m.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/resnet_jhmdb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mhp/resnet_mhp.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/cpm_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hourglass_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_dark_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/hrnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/litehrnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/mobilenetv2_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnetv1d_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/resnext_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/scnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/seresnet_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv1_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/shufflenetv2_mpii.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii_trb/resnet_mpii_trb.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/hrnet_ochuman.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
+- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml
+- configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
+- configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml
+- configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml
+- configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml
+- configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml
+- configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/mpi_inf_3dhp/videopose3d_mpi-inf-3dhp.yml
+- configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_softwingloss_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/deeppose/wflw/resnet_wingloss_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/300w/hrnetv2_300w.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_aflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/cofw/hrnetv2_cofw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
+- configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml
+- configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml
+- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/freihand2d/resnet_freihand2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/interhand2d/resnet_interhand2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/onehand10k/resnet_onehand10k.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_dark_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/hrnetv2_udp_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/mobilenetv2_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/panoptic2d/resnet_panoptic2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
+- configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.yml
+- configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
+- configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/halpe/hrnet_dark_halpe.yml
diff --git a/vendor/ViTPose/pytest.ini b/vendor/ViTPose/pytest.ini
new file mode 100644
index 0000000000000000000000000000000000000000..9796e871e70c7c67345b1d6bcf708c0c82377a98
--- /dev/null
+++ b/vendor/ViTPose/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+addopts = --xdoctest --xdoctest-style=auto
+norecursedirs = .git ignore build __pycache__ data docker docs .eggs
+
+filterwarnings= default
+ ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
+ ignore:.*Define the __nice__ method for.*:Warning
diff --git a/vendor/ViTPose/requirements.txt b/vendor/ViTPose/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5b5d97a6ea7837890ff0247bac8c5f24f6eabab
--- /dev/null
+++ b/vendor/ViTPose/requirements.txt
@@ -0,0 +1,4 @@
+-r requirements/build.txt
+-r requirements/runtime.txt
+-r requirements/tests.txt
+-r requirements/optional.txt
diff --git a/vendor/ViTPose/requirements/build.txt b/vendor/ViTPose/requirements/build.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9566943cef029e5c8dab0b52ba564a7f9c7ad30
--- /dev/null
+++ b/vendor/ViTPose/requirements/build.txt
@@ -0,0 +1,3 @@
+# These must be installed before building mmpose
+numpy
+torch>=1.3
diff --git a/vendor/ViTPose/requirements/docs.txt b/vendor/ViTPose/requirements/docs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20170845c44eefcb139ee2baa1a3d375b71c34ec
--- /dev/null
+++ b/vendor/ViTPose/requirements/docs.txt
@@ -0,0 +1,6 @@
+docutils==0.16.0
+myst-parser
+-e git+https://github.com/gaotongxiao/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+sphinx==4.0.2
+sphinx_copybutton
+sphinx_markdown_tables
diff --git a/vendor/ViTPose/requirements/mminstall.txt b/vendor/ViTPose/requirements/mminstall.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89199e36061dcd5361d029606fa25cb791af110a
--- /dev/null
+++ b/vendor/ViTPose/requirements/mminstall.txt
@@ -0,0 +1,3 @@
+mmcv-full>=1.3.8
+mmdet>=2.14.0
+mmtrack>=0.6.0
diff --git a/vendor/ViTPose/requirements/optional.txt b/vendor/ViTPose/requirements/optional.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfb1e75f86aba2fd074b0b1723e9b07a2037e9c3
--- /dev/null
+++ b/vendor/ViTPose/requirements/optional.txt
@@ -0,0 +1,8 @@
+albumentations>=0.3.2 --no-binary qudida,albumentations
+onnx
+onnxruntime
+poseval@git+https://github.com/svenkreiss/poseval.git
+pyrender
+requests
+smplx>=0.1.28
+trimesh
diff --git a/vendor/ViTPose/requirements/readthedocs.txt b/vendor/ViTPose/requirements/readthedocs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8b69d3ca2f051dcb6d6a96a25e7cb9054483c76
--- /dev/null
+++ b/vendor/ViTPose/requirements/readthedocs.txt
@@ -0,0 +1,9 @@
+mmcv-full
+munkres
+poseval@git+https://github.com/svenkreiss/poseval.git
+regex
+scipy
+titlecase
+torch
+torchvision
+xtcocotools>=1.8
diff --git a/vendor/ViTPose/requirements/runtime.txt b/vendor/ViTPose/requirements/runtime.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e83d9d232061098a768184076b451fa6b402230c
--- /dev/null
+++ b/vendor/ViTPose/requirements/runtime.txt
@@ -0,0 +1,11 @@
+chumpy
+dataclasses; python_version == '3.6'
+json_tricks
+matplotlib
+munkres
+numpy
+opencv-python
+pillow
+scipy
+torchvision
+xtcocotools>=1.8
diff --git a/vendor/ViTPose/requirements/tests.txt b/vendor/ViTPose/requirements/tests.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa23e69da611f7dec62cf84541b7b508f4437a26
--- /dev/null
+++ b/vendor/ViTPose/requirements/tests.txt
@@ -0,0 +1,9 @@
+coverage
+flake8
+interrogate
+isort==4.3.21
+pytest
+pytest-runner
+smplx>=0.1.28
+xdoctest>=0.10.0
+yapf
diff --git a/vendor/ViTPose/resources/mmpose-logo.png b/vendor/ViTPose/resources/mmpose-logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..128e1714f0933d0dfe0ab82d6f8780c48e0edc21
Binary files /dev/null and b/vendor/ViTPose/resources/mmpose-logo.png differ
diff --git a/vendor/ViTPose/setup.cfg b/vendor/ViTPose/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..c4d8643bc91a06cc48f0d88b23288e892121249c
--- /dev/null
+++ b/vendor/ViTPose/setup.cfg
@@ -0,0 +1,24 @@
+[bdist_wheel]
+universal=1
+
+[aliases]
+test=pytest
+
+[tool:pytest]
+addopts=tests/
+
+[yapf]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = true
+split_before_expression_after_opening_paren = true
+split_penalty_import_names=0
+SPLIT_PENALTY_AFTER_OPENING_BRACKET=800
+
+[isort]
+line_length = 79
+multi_line_output = 0
+extra_standard_library = pkg_resources,setuptools
+known_first_party = mmpose
+known_third_party = PIL,cv2,h5py,json_tricks,matplotlib,mmcv,munkres,numpy,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,spacepy,titlecase,torch,torchvision,webcam_apis,xmltodict,xtcocotools
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
diff --git a/vendor/ViTPose/setup.py b/vendor/ViTPose/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..c72e8cee00eb360310ab9676ea3465a49993fd33
--- /dev/null
+++ b/vendor/ViTPose/setup.py
@@ -0,0 +1,193 @@
+import os
+import os.path as osp
+import platform
+import shutil
+import sys
+import warnings
+from setuptools import find_packages, setup
+
+
+def readme():
+ with open('README.md', encoding='utf-8') as f:
+ content = f.read()
+ return content
+
+
+version_file = 'mmpose/version.py'
+
+
+def get_version():
+ with open(version_file, 'r') as f:
+ exec(compile(f.read(), version_file, 'exec'))
+ import sys
+
+ # return short version for sdist
+ if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
+ return locals()['short_version']
+ else:
+ return locals()['__version__']
+
+
+def parse_requirements(fname='requirements.txt', with_version=True):
+ """Parse the package dependencies listed in a requirements file but strips
+ specific versioning information.
+
+ Args:
+ fname (str): path to requirements file
+ with_version (bool, default=False): if True include version specs
+
+ Returns:
+ List[str]: list of requirements items
+
+ CommandLine:
+ python -c "import setup; print(setup.parse_requirements())"
+ """
+ import re
+ import sys
+ from os.path import exists
+ require_fpath = fname
+
+ def parse_line(line):
+ """Parse information from a line in a requirements text file."""
+ if line.startswith('-r '):
+ # Allow specifying requirements in other files
+ target = line.split(' ')[1]
+ for info in parse_require_file(target):
+ yield info
+ else:
+ info = {'line': line}
+ if line.startswith('-e '):
+ info['package'] = line.split('#egg=')[1]
+ elif '@git+' in line:
+ info['package'] = line
+ else:
+ # Remove versioning from the package
+ pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+ parts = re.split(pat, line, maxsplit=1)
+ parts = [p.strip() for p in parts]
+
+ info['package'] = parts[0]
+ if len(parts) > 1:
+ op, rest = parts[1:]
+ if ';' in rest:
+ # Handle platform specific dependencies
+ # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+ version, platform_deps = map(str.strip,
+ rest.split(';'))
+ info['platform_deps'] = platform_deps
+ else:
+ version = rest # NOQA
+ info['version'] = (op, version)
+ yield info
+
+ def parse_require_file(fpath):
+ with open(fpath, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ if line and not line.startswith('#'):
+ for info in parse_line(line):
+ yield info
+
+ def gen_packages_items():
+ if exists(require_fpath):
+ for info in parse_require_file(require_fpath):
+ parts = [info['package']]
+ if with_version and 'version' in info:
+ parts.extend(info['version'])
+ if not sys.version.startswith('3.4'):
+ # apparently package_deps are broken in 3.4
+ platform_deps = info.get('platform_deps')
+ if platform_deps is not None:
+ parts.append(';' + platform_deps)
+ item = ''.join(parts)
+ yield item
+
+ packages = list(gen_packages_items())
+ return packages
+
+
+def add_mim_extension():
+ """Add extra files that are required to support MIM into the package.
+
+ These files will be added by creating a symlink to the originals if the
+ package is installed in `editable` mode (e.g. pip install -e .), or by
+ copying from the originals otherwise.
+ """
+
+ # parse installment mode
+ if 'develop' in sys.argv:
+ # installed by `pip install -e .`
+ if platform.system() == 'Windows':
+ mode = 'copy'
+ else:
+ mode = 'symlink'
+ elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
+ # installed by `pip install .`
+ # or create source distribution by `python setup.py sdist`
+ mode = 'copy'
+ else:
+ return
+
+ filenames = ['tools', 'configs', 'demo', 'model-index.yml']
+ repo_path = osp.dirname(__file__)
+ mim_path = osp.join(repo_path, 'mmpose', '.mim')
+ os.makedirs(mim_path, exist_ok=True)
+
+ for filename in filenames:
+ if osp.exists(filename):
+ src_path = osp.join(repo_path, filename)
+ tar_path = osp.join(mim_path, filename)
+
+ if osp.isfile(tar_path) or osp.islink(tar_path):
+ os.remove(tar_path)
+ elif osp.isdir(tar_path):
+ shutil.rmtree(tar_path)
+
+ if mode == 'symlink':
+ src_relpath = osp.relpath(src_path, osp.dirname(tar_path))
+ os.symlink(src_relpath, tar_path)
+ elif mode == 'copy':
+ if osp.isfile(src_path):
+ shutil.copyfile(src_path, tar_path)
+ elif osp.isdir(src_path):
+ shutil.copytree(src_path, tar_path)
+ else:
+ warnings.warn(f'Cannot copy file {src_path}.')
+ else:
+ raise ValueError(f'Invalid mode {mode}')
+
+
+if __name__ == '__main__':
+ add_mim_extension()
+ setup(
+ name='mmpose',
+ version=get_version(),
+ description='OpenMMLab Pose Estimation Toolbox and Benchmark.',
+ author='MMPose Contributors',
+ author_email='openmmlab@gmail.com',
+ keywords='computer vision, pose estimation',
+ long_description=readme(),
+ long_description_content_type='text/markdown',
+ packages=find_packages(exclude=('configs', 'tools', 'demo')),
+ include_package_data=True,
+ package_data={'mmpose.ops': ['*/*.so']},
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
+ ],
+ url='https://github.com/open-mmlab/mmpose',
+ license='Apache License 2.0',
+ install_requires=parse_requirements('requirements/runtime.txt'),
+ extras_require={
+ 'tests': parse_requirements('requirements/tests.txt'),
+ 'build': parse_requirements('requirements/build.txt'),
+ 'runtime': parse_requirements('requirements/runtime.txt')
+ },
+ zip_safe=False)
diff --git a/vendor/ViTPose/tests/__init__.py b/vendor/ViTPose/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef101fec61e72abc0eb90266d453b5b22331378d
--- /dev/null
+++ b/vendor/ViTPose/tests/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/vendor/ViTPose/tests/data/300w/indoor_020.png b/vendor/ViTPose/tests/data/300w/indoor_020.png
new file mode 100644
index 0000000000000000000000000000000000000000..0512b8c35361769299204680ab2dc5ea6cc2001c
Binary files /dev/null and b/vendor/ViTPose/tests/data/300w/indoor_020.png differ
diff --git a/vendor/ViTPose/tests/data/300w/indoor_029.png b/vendor/ViTPose/tests/data/300w/indoor_029.png
new file mode 100644
index 0000000000000000000000000000000000000000..2d6e7b6835d4d64bb35e584a5a7d8cffcb3c30a5
Binary files /dev/null and b/vendor/ViTPose/tests/data/300w/indoor_029.png differ
diff --git a/vendor/ViTPose/tests/data/300w/test_300w.json b/vendor/ViTPose/tests/data/300w/test_300w.json
new file mode 100644
index 0000000000000000000000000000000000000000..e825300a57af8bceaa1f6d79416b547d410de4ab
--- /dev/null
+++ b/vendor/ViTPose/tests/data/300w/test_300w.json
@@ -0,0 +1,477 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "face",
+ "keypoints": [],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 197,
+ "file_name": "indoor_029.png",
+ "height": 845,
+ "width": 960
+ },
+ {
+ "id": 565,
+ "file_name": "indoor_020.png",
+ "height": 768,
+ "width": 726
+ }
+ ],
+ "annotations": [
+ {
+ "image_id": 197,
+ "id": 197,
+ "keypoints": [
+ 268.0,
+ 398.882,
+ 1,
+ 285.21,
+ 470.547,
+ 1,
+ 303.994,
+ 540.61,
+ 1,
+ 332.8,
+ 611.274,
+ 1,
+ 376.829,
+ 659.993,
+ 1,
+ 428.904,
+ 701.529,
+ 1,
+ 493.765,
+ 726.48,
+ 1,
+ 566.941,
+ 741.209,
+ 1,
+ 615.5,
+ 733.248,
+ 1,
+ 660.628,
+ 711.888,
+ 1,
+ 693.575,
+ 666.8610000000001,
+ 1,
+ 707.9989999999998,
+ 602.151,
+ 1,
+ 710.0010000000002,
+ 540.7090000000002,
+ 1,
+ 710.702,
+ 482.586,
+ 1,
+ 705.705,
+ 430.128,
+ 1,
+ 698.574,
+ 376.051,
+ 1,
+ 687.17,
+ 325.797,
+ 1,
+ 335.426,
+ 370.217,
+ 1,
+ 352.01,
+ 339.706,
+ 1,
+ 400.98800000000006,
+ 317.285,
+ 1,
+ 449.164,
+ 310.243,
+ 1,
+ 493.34,
+ 314.9120000000001,
+ 1,
+ 548.874,
+ 304.259,
+ 1,
+ 572.625,
+ 284.111,
+ 1,
+ 609.946,
+ 265.0,
+ 1,
+ 650.465,
+ 269.886,
+ 1,
+ 672.5269999999998,
+ 287.694,
+ 1,
+ 531.823,
+ 349.5019999999999,
+ 1,
+ 543.992,
+ 387.47,
+ 1,
+ 557.0459999999998,
+ 425.639,
+ 1,
+ 570.283,
+ 465.089,
+ 1,
+ 521.077,
+ 509.142,
+ 1,
+ 543.5830000000002,
+ 511.647,
+ 1,
+ 569.154,
+ 510.935,
+ 1,
+ 589.758,
+ 504.75,
+ 1,
+ 607.544,
+ 494.626,
+ 1,
+ 372.146,
+ 389.57,
+ 1,
+ 399.878,
+ 370.642,
+ 1,
+ 431.883,
+ 359.838,
+ 1,
+ 465.725,
+ 371.503,
+ 1,
+ 437.99,
+ 384.279,
+ 1,
+ 406.296,
+ 393.511,
+ 1,
+ 571.331,
+ 349.968,
+ 1,
+ 599.158,
+ 324.208,
+ 1,
+ 630.259,
+ 318.067,
+ 1,
+ 656.076,
+ 327.782,
+ 1,
+ 635.32,
+ 340.57199999999995,
+ 1,
+ 607.295,
+ 346.391,
+ 1,
+ 479.066,
+ 604.947,
+ 1,
+ 519.818,
+ 577.8,
+ 1,
+ 547.948,
+ 566.137,
+ 1,
+ 572.52,
+ 568.232,
+ 1,
+ 594.948,
+ 556.586,
+ 1,
+ 621.335,
+ 562.737,
+ 1,
+ 653.6,
+ 571.3580000000002,
+ 1,
+ 623.72,
+ 596.32,
+ 1,
+ 606.549,
+ 604.577,
+ 1,
+ 578.673,
+ 606.798,
+ 1,
+ 554.4830000000002,
+ 609.318,
+ 1,
+ 525.276,
+ 609.497,
+ 1,
+ 494.741,
+ 601.097,
+ 1,
+ 549.953,
+ 585.0319999999998,
+ 1,
+ 573.969,
+ 584.442,
+ 1,
+ 599.372,
+ 575.65,
+ 1,
+ 640.35,
+ 573.788,
+ 1,
+ 599.372,
+ 575.65,
+ 1,
+ 573.969,
+ 584.442,
+ 1,
+ 549.953,
+ 585.0319999999998,
+ 1
+ ],
+ "num_keypoints": 68,
+ "bbox": [
+ 223.7298,
+ 217.3791,
+ 531.2424000000001,
+ 571.4508
+ ],
+ "iscrowd": 0,
+ "area": 303578.89447392005,
+ "category_id": 1,
+ "center": [
+ 489.5,
+ 503.5
+ ],
+ "scale": 2.385
+ },
+ {
+ "image_id": 565,
+ "id": 565,
+ "keypoints": [
+ 70.0,
+ 292.332,
+ 1,
+ 85.978,
+ 359.108,
+ 1,
+ 106.67,
+ 442.2480000000001,
+ 1,
+ 132.174,
+ 524.227,
+ 1,
+ 170.87900000000005,
+ 587.591,
+ 1,
+ 220.419,
+ 640.665,
+ 1,
+ 275.329,
+ 686.7510000000002,
+ 1,
+ 345.149,
+ 712.11,
+ 1,
+ 415.072,
+ 700.013,
+ 1,
+ 455.739,
+ 681.039,
+ 1,
+ 491.441,
+ 646.908,
+ 1,
+ 522.22,
+ 601.67,
+ 1,
+ 545.278,
+ 556.815,
+ 1,
+ 570.101,
+ 495.899,
+ 1,
+ 588.304,
+ 413.976,
+ 1,
+ 595.136,
+ 343.6280000000001,
+ 1,
+ 590.716,
+ 280.211,
+ 1,
+ 118.878,
+ 305.308,
+ 1,
+ 158.248,
+ 281.872,
+ 1,
+ 202.699,
+ 284.469,
+ 1,
+ 246.669,
+ 294.941,
+ 1,
+ 294.485,
+ 316.657,
+ 1,
+ 387.621,
+ 306.5490000000001,
+ 1,
+ 437.315,
+ 274.369,
+ 1,
+ 483.305,
+ 246.679,
+ 1,
+ 531.807,
+ 219.0,
+ 1,
+ 574.753,
+ 226.314,
+ 1,
+ 350.492,
+ 372.72,
+ 1,
+ 354.8180000000001,
+ 422.627,
+ 1,
+ 358.916,
+ 467.076,
+ 1,
+ 364.204,
+ 508.283,
+ 1,
+ 303.536,
+ 510.181,
+ 1,
+ 332.565,
+ 524.2280000000002,
+ 1,
+ 361.282,
+ 537.337,
+ 1,
+ 385.853,
+ 530.722,
+ 1,
+ 410.586,
+ 512.7090000000002,
+ 1,
+ 171.577,
+ 361.551,
+ 1,
+ 203.614,
+ 344.588,
+ 1,
+ 246.448,
+ 345.9380000000001,
+ 1,
+ 288.441,
+ 368.74300000000005,
+ 1,
+ 246.677,
+ 376.513,
+ 1,
+ 202.377,
+ 382.091,
+ 1,
+ 411.996,
+ 361.712,
+ 1,
+ 445.408,
+ 332.093,
+ 1,
+ 485.232,
+ 319.01,
+ 1,
+ 518.47,
+ 328.7990000000001,
+ 1,
+ 492.908,
+ 360.212,
+ 1,
+ 447.886,
+ 364.719,
+ 1,
+ 256.704,
+ 564.955,
+ 1,
+ 306.255,
+ 569.807,
+ 1,
+ 333.68,
+ 566.9019999999998,
+ 1,
+ 360.689,
+ 571.737,
+ 1,
+ 391.088,
+ 565.381,
+ 1,
+ 426.92,
+ 559.18,
+ 1,
+ 476.772,
+ 545.14,
+ 1,
+ 429.904,
+ 606.391,
+ 1,
+ 398.257,
+ 628.13,
+ 1,
+ 361.86,
+ 635.736,
+ 1,
+ 332.214,
+ 631.4259999999998,
+ 1,
+ 300.871,
+ 615.508,
+ 1,
+ 271.028,
+ 570.685,
+ 1,
+ 333.764,
+ 594.513,
+ 1,
+ 360.897,
+ 597.525,
+ 1,
+ 391.282,
+ 594.612,
+ 1,
+ 467.095,
+ 554.192,
+ 1,
+ 391.282,
+ 594.612,
+ 1,
+ 360.897,
+ 597.525,
+ 1,
+ 333.764,
+ 594.513,
+ 1
+ ],
+ "num_keypoints": 68,
+ "bbox": [
+ 17.486400000000003,
+ 169.689,
+ 630.1632,
+ 591.7320000000001
+ ],
+ "iscrowd": 0,
+ "area": 372887.7306624,
+ "category_id": 1,
+ "center": [
+ 333.0,
+ 466.0
+ ],
+ "scale": 2.63
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/aflw/image04476.jpg b/vendor/ViTPose/tests/data/aflw/image04476.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a6cf8ee3ec55c98cb63941eed7e833b08d96b732
Binary files /dev/null and b/vendor/ViTPose/tests/data/aflw/image04476.jpg differ
diff --git a/vendor/ViTPose/tests/data/aflw/image22568.jpg b/vendor/ViTPose/tests/data/aflw/image22568.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c1b3765dcf9f441eff296f2b552aa416599acf37
Binary files /dev/null and b/vendor/ViTPose/tests/data/aflw/image22568.jpg differ
diff --git a/vendor/ViTPose/tests/data/aflw/test_aflw.json b/vendor/ViTPose/tests/data/aflw/test_aflw.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc2e903e265a8c11cac52349f766e93f51fbdf5a
--- /dev/null
+++ b/vendor/ViTPose/tests/data/aflw/test_aflw.json
@@ -0,0 +1,185 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "face",
+ "keypoints": [],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 3,
+ "file_name": "image22568.jpg",
+ "height": 1280,
+ "width": 853
+ },
+ {
+ "id": 68,
+ "file_name": "image04476.jpg",
+ "height": 500,
+ "width": 439
+ }
+ ],
+ "annotations": [
+ {
+ "image_id": 3,
+ "id": 3,
+ "keypoints": [
+ 337.28341384863,
+ 205.78904991948002,
+ 1,
+ 370.46215780998,
+ 203.18679549113997,
+ 1,
+ 400.38808373591,
+ 210.99355877617,
+ 1,
+ 481.70853462158,
+ 217.49919484702,
+ 1,
+ 518.7906602254401,
+ 210.99355877617,
+ 1,
+ 549.3671497584501,
+ 223.35426731079,
+ 1,
+ 359.40257648952996,
+ 253.28019323671,
+ 1,
+ 391.28019323671,
+ 254.58132045089002,
+ 1,
+ 419.25442834138,
+ 261.08695652174,
+ 1,
+ 479.10628019324,
+ 259.78582930757005,
+ 1,
+ 507.08051529790976,
+ 255.88244766506,
+ 1,
+ 533.10305958132,
+ 258.4847020934,
+ 1,
+ 404.29146537842,
+ 323.54106280192997,
+ 1,
+ 443.32528180354007,
+ 338.5040257649,
+ 1,
+ 483.66022544283,
+ 327.44444444444,
+ 1,
+ 381.52173913043,
+ 374.28502415459,
+ 1,
+ 443.32528180354007,
+ 376.23671497584996,
+ 1,
+ 507.73107890499,
+ 372.98389694042,
+ 1,
+ 451.0,
+ 478.0,
+ 1
+ ],
+ "num_keypoints": 19,
+ "bbox": [
+ 316.07504025764797,
+ 175.70547504025396,
+ 254.50048309178408,
+ 329.7758454106321
+ ],
+ "iscrowd": 0,
+ "area": 83928.11196900737,
+ "category_id": 1,
+ "center": [
+ 449.0,
+ 321.0
+ ],
+ "scale": 1.81,
+ "box_size": 362.0
+ },
+ {
+ "image_id": 68,
+ "id": 68,
+ "keypoints": [
+ 126.64745330811,
+ 157.27305603027,
+ 1,
+ 134.30273752013,
+ 153.39452495973998,
+ 1,
+ 145.34465026855,
+ 153.01428222656,
+ 1,
+ 165.48123168945,
+ 146.28958129883,
+ 1,
+ 181.7833404541,
+ 140.24139404297,
+ 1,
+ 198.6918182373,
+ 143.05288696289,
+ 1,
+ 133.90043640137,
+ 167.45462036133,
+ 1,
+ 141.77455716586002,
+ 165.24637681158995,
+ 1,
+ 148.98872785829,
+ 163.70048309178998,
+ 1,
+ 174.96592712402,
+ 157.80386352539,
+ 1,
+ 185.42395019531003,
+ 155.1201171875,
+ 1,
+ 194.88919067383,
+ 154.83345031738,
+ 1,
+ 145.87278582931,
+ 188.89049919485,
+ 1,
+ 152.59581320451,
+ 177.61352657005,
+ 1,
+ 174.75362318841,
+ 185.34299516908,
+ 1,
+ 145.63929146538,
+ 213.68438003221002,
+ 1,
+ 161.87117552335,
+ 211.3655394525,
+ 1,
+ 187.12077294686,
+ 207.24315619968002,
+ 1,
+ 166.0,
+ 244.0,
+ 1
+ ],
+ "num_keypoints": 19,
+ "bbox": [
+ 119.443016815191,
+ 129.865533447267,
+ 86.453237915028,
+ 124.51032714843598
+ ],
+ "iscrowd": 0,
+ "area": 10764.320935841704,
+ "category_id": 1,
+ "center": [
+ 166.0,
+ 185.0
+ ],
+ "scale": 0.64,
+ "box_size": 128.0
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg b/vendor/ViTPose/tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f988b6ee6ee303351043c0cee3e5d068dd200346
Binary files /dev/null and b/vendor/ViTPose/tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg differ
diff --git a/vendor/ViTPose/tests/data/aic/fa436c914fe4a8ec1ec5474af4d3820b84d17561.jpg b/vendor/ViTPose/tests/data/aic/fa436c914fe4a8ec1ec5474af4d3820b84d17561.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..31dba7242e5517ff4fe478818744ea3bcfe1511c
Binary files /dev/null and b/vendor/ViTPose/tests/data/aic/fa436c914fe4a8ec1ec5474af4d3820b84d17561.jpg differ
diff --git a/vendor/ViTPose/tests/data/aic/ff945ae2e729f24eea992814639d59b3bdec8bd8.jpg b/vendor/ViTPose/tests/data/aic/ff945ae2e729f24eea992814639d59b3bdec8bd8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..00c05504db3383e8ee905cfe6d88af258d79c1db
Binary files /dev/null and b/vendor/ViTPose/tests/data/aic/ff945ae2e729f24eea992814639d59b3bdec8bd8.jpg differ
diff --git a/vendor/ViTPose/tests/data/aic/test_aic.json b/vendor/ViTPose/tests/data/aic/test_aic.json
new file mode 100644
index 0000000000000000000000000000000000000000..28b006a5ff8070129e8d800bd2e49b7e552e0f24
--- /dev/null
+++ b/vendor/ViTPose/tests/data/aic/test_aic.json
@@ -0,0 +1,625 @@
+{
+ "info": {
+ "description": "MMPose example aic dataset",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/08/25"
+ },
+ "licenses": [
+ {
+ "url": "",
+ "id": 1,
+ "name": ""
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "Right Shoulder",
+ "Right Elbow",
+ "Right Wrist",
+ "Left Shoulder",
+ "Left Elbow",
+ "Left Wrist",
+ "Right Hip",
+ "Right Knee",
+ "Right Ankle",
+ "Left Hip",
+ "Left Knee",
+ "Left Ankle",
+ "Head top",
+ "Neck"
+ ],
+ "skeleton": [
+ [
+ 3,
+ 2
+ ],
+ [
+ 2,
+ 1
+ ],
+ [
+ 1,
+ 14
+ ],
+ [
+ 14,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 5,
+ 6
+ ],
+ [
+ 9,
+ 8
+ ],
+ [
+ 8,
+ 7
+ ],
+ [
+ 7,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 1,
+ 7
+ ],
+ [
+ 4,
+ 10
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "url": "http://www.sinaimg.cn/dy/slidenews/4_img/2013_47/704_1154733_789201.jpg",
+ "file_name": "054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg",
+ "height": 600,
+ "width": 900,
+ "id": 1
+ },
+ {
+ "url": "http://www.sinaimg.cn/dy/slidenews/2_img/2015_26/820_1533617_599302.jpg",
+ "file_name": "fa436c914fe4a8ec1ec5474af4d3820b84d17561.jpg",
+ "height": 596,
+ "width": 900,
+ "id": 2
+ },
+ {
+ "url": "http://www.sinaimg.cn/dy/slidenews/2_img/2016_39/730_1947359_260964.jpg",
+ "file_name": "ff945ae2e729f24eea992814639d59b3bdec8bd8.jpg",
+ "height": 641,
+ "width": 950,
+ "id": 3
+ }
+ ],
+ "annotations": [
+ {
+ "bbox": [
+ 279,
+ 55,
+ 213,
+ 544
+ ],
+ "keypoints": [
+ 313,
+ 201,
+ 2,
+ 312,
+ 313,
+ 1,
+ 320,
+ 424,
+ 2,
+ 406,
+ 197,
+ 1,
+ 431,
+ 286,
+ 1,
+ 459,
+ 269,
+ 2,
+ 375,
+ 447,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 416,
+ 441,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 395,
+ 74,
+ 2,
+ 372,
+ 170,
+ 2
+ ],
+ "num_keypoints": 10,
+ "image_id": 1,
+ "category_id": 1,
+ "id": 4
+ },
+ {
+ "bbox": [
+ 541,
+ 131,
+ 329,
+ 468
+ ],
+ "keypoints": [
+ 637,
+ 374,
+ 1,
+ 626,
+ 509,
+ 2,
+ 0,
+ 0,
+ 0,
+ 755,
+ 347,
+ 2,
+ 728,
+ 538,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 604,
+ 169,
+ 2,
+ 674,
+ 290,
+ 2
+ ],
+ "num_keypoints": 6,
+ "image_id": 1,
+ "category_id": 1,
+ "id": 5
+ },
+ {
+ "bbox": [
+ 88,
+ 7,
+ 252,
+ 592
+ ],
+ "keypoints": [
+ 144,
+ 180,
+ 2,
+ 171,
+ 325,
+ 1,
+ 256,
+ 428,
+ 1,
+ 265,
+ 196,
+ 2,
+ 297,
+ 311,
+ 2,
+ 300,
+ 412,
+ 2,
+ 178,
+ 476,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 253,
+ 474,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 220,
+ 23,
+ 2,
+ 205,
+ 133,
+ 2
+ ],
+ "num_keypoints": 10,
+ "image_id": 1,
+ "category_id": 1,
+ "id": 6
+ },
+ {
+ "bbox": [
+ 497,
+ 179,
+ 401,
+ 416
+ ],
+ "keypoints": [
+ 692,
+ 332,
+ 1,
+ 587,
+ 430,
+ 2,
+ 612,
+ 552,
+ 1,
+ 657,
+ 422,
+ 2,
+ 533,
+ 571,
+ 2,
+ 621,
+ 450,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 564,
+ 212,
+ 2,
+ 656,
+ 362,
+ 2
+ ],
+ "num_keypoints": 8,
+ "image_id": 2,
+ "category_id": 1,
+ "id": 7
+ },
+ {
+ "bbox": [
+ 336,
+ 26,
+ 177,
+ 254
+ ],
+ "keypoints": [
+ 368,
+ 142,
+ 2,
+ 365,
+ 237,
+ 1,
+ 415,
+ 271,
+ 1,
+ 487,
+ 147,
+ 2,
+ 493,
+ 240,
+ 2,
+ 431,
+ 265,
+ 2,
+ 393,
+ 296,
+ 1,
+ 326,
+ 306,
+ 1,
+ 339,
+ 390,
+ 1,
+ 449,
+ 297,
+ 1,
+ 373,
+ 315,
+ 1,
+ 376,
+ 389,
+ 1,
+ 435,
+ 43,
+ 2,
+ 430,
+ 131,
+ 2
+ ],
+ "num_keypoints": 14,
+ "image_id": 2,
+ "category_id": 1,
+ "id": 8
+ },
+ {
+ "bbox": [
+ 0,
+ 109,
+ 473,
+ 486
+ ],
+ "keypoints": [
+ 68,
+ 333,
+ 2,
+ 215,
+ 408,
+ 2,
+ 376,
+ 427,
+ 2,
+ 169,
+ 280,
+ 1,
+ 166,
+ 386,
+ 1,
+ 146,
+ 462,
+ 2,
+ 39,
+ 545,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 136,
+ 515,
+ 1,
+ 292,
+ 531,
+ 1,
+ 0,
+ 0,
+ 0,
+ 181,
+ 147,
+ 2,
+ 127,
+ 269,
+ 2
+ ],
+ "num_keypoints": 11,
+ "image_id": 2,
+ "category_id": 1,
+ "id": 9
+ },
+ {
+ "bbox": [
+ 681,
+ 3,
+ 267,
+ 607
+ ],
+ "keypoints": [
+ 846,
+ 98,
+ 1,
+ 862,
+ 223,
+ 1,
+ 794,
+ 282,
+ 2,
+ 824,
+ 134,
+ 2,
+ 875,
+ 241,
+ 2,
+ 842,
+ 329,
+ 2,
+ 903,
+ 296,
+ 1,
+ 766,
+ 397,
+ 1,
+ 777,
+ 562,
+ 2,
+ 886,
+ 299,
+ 2,
+ 757,
+ 399,
+ 2,
+ 871,
+ 514,
+ 2,
+ 761,
+ 29,
+ 2,
+ 813,
+ 87,
+ 2
+ ],
+ "num_keypoints": 14,
+ "image_id": 3,
+ "category_id": 1,
+ "id": 10
+ },
+ {
+ "bbox": [
+ 484,
+ 7,
+ 162,
+ 481
+ ],
+ "keypoints": [
+ 544,
+ 96,
+ 2,
+ 506,
+ 161,
+ 2,
+ 542,
+ 208,
+ 2,
+ 606,
+ 93,
+ 2,
+ 615,
+ 151,
+ 1,
+ 622,
+ 187,
+ 2,
+ 571,
+ 251,
+ 2,
+ 553,
+ 361,
+ 2,
+ 556,
+ 458,
+ 2,
+ 591,
+ 251,
+ 1,
+ 581,
+ 363,
+ 2,
+ 587,
+ 456,
+ 2,
+ 587,
+ 21,
+ 2,
+ 578,
+ 80,
+ 2
+ ],
+ "num_keypoints": 14,
+ "image_id": 3,
+ "category_id": 1,
+ "id": 11
+ },
+ {
+ "bbox": [
+ 33,
+ 73,
+ 493,
+ 566
+ ],
+ "keypoints": [
+ 254,
+ 203,
+ 2,
+ 169,
+ 203,
+ 2,
+ 111,
+ 187,
+ 2,
+ 391,
+ 204,
+ 2,
+ 425,
+ 276,
+ 2,
+ 475,
+ 346,
+ 2,
+ 272,
+ 376,
+ 2,
+ 185,
+ 485,
+ 2,
+ 126,
+ 607,
+ 1,
+ 357,
+ 383,
+ 2,
+ 359,
+ 459,
+ 2,
+ 350,
+ 561,
+ 2,
+ 338,
+ 111,
+ 2,
+ 325,
+ 180,
+ 1
+ ],
+ "num_keypoints": 14,
+ "image_id": 3,
+ "category_id": 1,
+ "id": 12
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/animalpose/ca110.jpeg b/vendor/ViTPose/tests/data/animalpose/ca110.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..70289a95da8bdf32570dc8b264b08337af0c6fd7
Binary files /dev/null and b/vendor/ViTPose/tests/data/animalpose/ca110.jpeg differ
diff --git a/vendor/ViTPose/tests/data/animalpose/ho105.jpeg b/vendor/ViTPose/tests/data/animalpose/ho105.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..38f41de368282b57891de5cc7ae64bebf79eaa70
Binary files /dev/null and b/vendor/ViTPose/tests/data/animalpose/ho105.jpeg differ
diff --git a/vendor/ViTPose/tests/data/animalpose/test_animalpose.json b/vendor/ViTPose/tests/data/animalpose/test_animalpose.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b11465327ae74d30e14a134cfd88e0e78aaf754
--- /dev/null
+++ b/vendor/ViTPose/tests/data/animalpose/test_animalpose.json
@@ -0,0 +1,281 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "cat",
+ "keypoints": [
+ "L_Eye",
+ "R_Eye",
+ "L_EarBase",
+ "R_EarBase",
+ "Nose",
+ "Throat",
+ "TailBase",
+ "Withers",
+ "L_F_Elbow",
+ "R_F_Elbow",
+ "L_B_Elbow",
+ "R_B_Elbow",
+ "L_F_Knee",
+ "R_F_Knee",
+ "L_B_Knee",
+ "R_B_Knee",
+ "L_F_Paw",
+ "R_F_Paw",
+ "L_B_Paw",
+ "R_B_Paw"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 1,
+ 5
+ ],
+ [
+ 2,
+ 5
+ ],
+ [
+ 5,
+ 6
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 6,
+ 9
+ ],
+ [
+ 9,
+ 13
+ ],
+ [
+ 13,
+ 17
+ ],
+ [
+ 6,
+ 10
+ ],
+ [
+ 10,
+ 14
+ ],
+ [
+ 14,
+ 18
+ ],
+ [
+ 7,
+ 11
+ ],
+ [
+ 11,
+ 15
+ ],
+ [
+ 15,
+ 19
+ ],
+ [
+ 7,
+ 12
+ ],
+ [
+ 12,
+ 16
+ ],
+ [
+ 16,
+ 20
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "id": 110,
+ "file_name": "ca110.jpeg",
+ "height": 240,
+ "width": 300
+ },
+ {
+ "id": 3105,
+ "file_name": "ho105.jpeg",
+ "height": 255,
+ "width": 300
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 117.0,
+ 95.0,
+ 2.0,
+ 85.0,
+ 102.0,
+ 2.0,
+ 115.0,
+ 56.0,
+ 2.0,
+ 62.0,
+ 78.0,
+ 2.0,
+ 102.0,
+ 109.0,
+ 2.0,
+ 104.0,
+ 130.0,
+ 2.0,
+ 235.0,
+ 163.0,
+ 2.0,
+ 144.0,
+ 53.0,
+ 2.0,
+ 123.0,
+ 142.0,
+ 2.0,
+ 40.0,
+ 161.0,
+ 2.0,
+ 182.0,
+ 160.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 115.0,
+ 186.0,
+ 2.0,
+ 64.0,
+ 192.0,
+ 2.0,
+ 189.0,
+ 195.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 84.0,
+ 214.0,
+ 2.0,
+ 53.0,
+ 209.0,
+ 2.0,
+ 146.0,
+ 206.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "image_id": 110,
+ "id": 129,
+ "num_keypoints": 17,
+ "bbox": [
+ 13.0,
+ 36.0,
+ 284.0,
+ 192.0
+ ],
+ "iscrowd": 0,
+ "area": 54528.0,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 54.0,
+ 36.0,
+ 2.0,
+ 42.0,
+ 33.0,
+ 2.0,
+ 65.0,
+ 21.0,
+ 2.0,
+ 51.0,
+ 18.0,
+ 2.0,
+ 30.0,
+ 59.0,
+ 2.0,
+ 57.0,
+ 62.0,
+ 2.0,
+ 203.0,
+ 109.0,
+ 2.0,
+ 104.0,
+ 82.0,
+ 2.0,
+ 73.0,
+ 141.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 195.0,
+ 157.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 73.0,
+ 185.0,
+ 2.0,
+ 81.0,
+ 183.0,
+ 2.0,
+ 225.0,
+ 204.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 62.0,
+ 221.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 249.0,
+ 242.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "image_id": 3105,
+ "id": 583,
+ "num_keypoints": 15,
+ "bbox": [
+ 23.0,
+ 9.0,
+ 256.0,
+ 240.0
+ ],
+ "iscrowd": 0,
+ "area": 61440.0,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/ap10k/000000000004.jpg b/vendor/ViTPose/tests/data/ap10k/000000000004.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..100734dfcb56d4d8baa44c89edd4e167026309df
Binary files /dev/null and b/vendor/ViTPose/tests/data/ap10k/000000000004.jpg differ
diff --git a/vendor/ViTPose/tests/data/ap10k/000000037516.jpg b/vendor/ViTPose/tests/data/ap10k/000000037516.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5d6e1b5199d546e54ce8ac6c58969a969bf524c3
Binary files /dev/null and b/vendor/ViTPose/tests/data/ap10k/000000037516.jpg differ
diff --git a/vendor/ViTPose/tests/data/ap10k/test_ap10k.json b/vendor/ViTPose/tests/data/ap10k/test_ap10k.json
new file mode 100644
index 0000000000000000000000000000000000000000..851dc1ad7569d1e14ef76926388b4a61b082b936
--- /dev/null
+++ b/vendor/ViTPose/tests/data/ap10k/test_ap10k.json
@@ -0,0 +1,5249 @@
+{
+ "info":{
+ "description":"AP-10k",
+ "url":"https://github.com/AlexTheBad/AP-10K",
+ "version":"1.0",
+ "year":2021,
+ "contributor":"AP-10k Team",
+ "date_created":"2021/07/01"
+ },
+ "licenses":[
+ {
+ "id":1,
+ "name":"The MIT License",
+ "url":"https://www.mit.edu/~amini/LICENSE.md"
+ }
+ ],
+ "images":[
+ {
+ "license":1,
+ "id":37516,
+ "file_name":"000000037516.jpg",
+ "width":1200,
+ "height":867,
+ "background":5
+ },
+ {
+ "license":1,
+ "id":4,
+ "file_name":"000000000004.jpg",
+ "width":1024,
+ "height":683,
+ "background":1
+ }
+ ],
+ "annotations":[
+ {
+ "id":9284,
+ "image_id":37516,
+ "category_id":26,
+ "bbox":[
+ 66,
+ 192,
+ 1092,
+ 512
+ ],
+ "area":559104,
+ "iscrowd":0,
+ "num_keypoints":16,
+ "keypoints":[
+ 134,
+ 415,
+ 2,
+ 0,
+ 0,
+ 0,
+ 94,
+ 475,
+ 2,
+ 302,
+ 330,
+ 2,
+ 890,
+ 287,
+ 2,
+ 414,
+ 470,
+ 2,
+ 414,
+ 554,
+ 2,
+ 396,
+ 624,
+ 2,
+ 302,
+ 466,
+ 2,
+ 230,
+ 515,
+ 2,
+ 214,
+ 623,
+ 2,
+ 838,
+ 422,
+ 2,
+ 946,
+ 511,
+ 2,
+ 936,
+ 628,
+ 2,
+ 708,
+ 442,
+ 2,
+ 698,
+ 555,
+ 2,
+ 636,
+ 602,
+ 2
+ ]
+ },
+ {
+ "id":6,
+ "image_id":4,
+ "category_id":1,
+ "bbox":[
+ 408,
+ 197,
+ 429,
+ 341
+ ],
+ "area":146289,
+ "iscrowd":0,
+ "num_keypoints":16,
+ "keypoints":[
+ 488,
+ 443,
+ 2,
+ 0,
+ 0,
+ 0,
+ 466,
+ 499,
+ 2,
+ 600,
+ 307,
+ 2,
+ 787,
+ 255,
+ 2,
+ 643,
+ 369,
+ 2,
+ 660,
+ 438,
+ 2,
+ 684,
+ 514,
+ 2,
+ 592,
+ 380,
+ 2,
+ 594,
+ 443,
+ 2,
+ 591,
+ 520,
+ 2,
+ 757,
+ 350,
+ 2,
+ 778,
+ 408,
+ 2,
+ 772,
+ 513,
+ 2,
+ 729,
+ 352,
+ 2,
+ 778,
+ 400,
+ 2,
+ 765,
+ 497,
+ 2
+ ]
+ }
+ ],
+ "categories":[
+ {
+ "id":1,
+ "name":"antelope",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":2,
+ "name":"argali sheep",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":3,
+ "name":"bison",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":4,
+ "name":"buffalo",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":5,
+ "name":"cow",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":6,
+ "name":"sheep",
+ "supercategory":"Bovidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":7,
+ "name":"arctic fox",
+ "supercategory":"Canidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":8,
+ "name":"dog",
+ "supercategory":"Canidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":9,
+ "name":"fox",
+ "supercategory":"Canidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":10,
+ "name":"wolf",
+ "supercategory":"Canidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":11,
+ "name":"beaver",
+ "supercategory":"Castoridae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":12,
+ "name":"alouatta",
+ "supercategory":"Cercopithecidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":13,
+ "name":"monkey",
+ "supercategory":"Cercopithecidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":14,
+ "name":"noisy night monkey",
+ "supercategory":"Cercopithecidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":15,
+ "name":"spider monkey",
+ "supercategory":"Cercopithecidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":16,
+ "name":"uakari",
+ "supercategory":"Cercopithecidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":17,
+ "name":"deer",
+ "supercategory":"Cervidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":18,
+ "name":"moose",
+ "supercategory":"Cervidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":19,
+ "name":"hamster",
+ "supercategory":"Cricetidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":20,
+ "name":"elephant",
+ "supercategory":"Elephantidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":21,
+ "name":"horse",
+ "supercategory":"Equidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":22,
+ "name":"zebra",
+ "supercategory":"Equidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":23,
+ "name":"bobcat",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":24,
+ "name":"cat",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":25,
+ "name":"cheetah",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":26,
+ "name":"jaguar",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":27,
+ "name":"king cheetah",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":28,
+ "name":"leopard",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":29,
+ "name":"lion",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":30,
+ "name":"panther",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":31,
+ "name":"snow leopard",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":32,
+ "name":"tiger",
+ "supercategory":"Felidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":33,
+ "name":"giraffe",
+ "supercategory":"Giraffidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":34,
+ "name":"hippo",
+ "supercategory":"Hippopotamidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":35,
+ "name":"chimpanzee",
+ "supercategory":"Hominidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":36,
+ "name":"gorilla",
+ "supercategory":"Hominidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":37,
+ "name":"orangutan",
+ "supercategory":"Hominidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":38,
+ "name":"rabbit",
+ "supercategory":"Leporidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":39,
+ "name":"skunk",
+ "supercategory":"Mephitidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":40,
+ "name":"mouse",
+ "supercategory":"Muridae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":41,
+ "name":"rat",
+ "supercategory":"Muridae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":42,
+ "name":"otter",
+ "supercategory":"Mustelidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":43,
+ "name":"weasel",
+ "supercategory":"Mustelidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":44,
+ "name":"raccoon",
+ "supercategory":"Procyonidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":45,
+ "name":"rhino",
+ "supercategory":"Rhinocerotidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":46,
+ "name":"marmot",
+ "supercategory":"Sciuridae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":47,
+ "name":"squirrel",
+ "supercategory":"Sciuridae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":48,
+ "name":"pig",
+ "supercategory":"Suidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":49,
+ "name":"mole",
+ "supercategory":"Talpidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":50,
+ "name":"black bear",
+ "supercategory":"Ursidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":51,
+ "name":"brown bear",
+ "supercategory":"Ursidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":52,
+ "name":"panda",
+ "supercategory":"Ursidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":53,
+ "name":"polar bear",
+ "supercategory":"Ursidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ },
+ {
+ "id":54,
+ "name":"bat",
+ "supercategory":"Vespertilionidae",
+ "keypoints":[
+ "left_eye",
+ "right_eye",
+ "nose",
+ "neck",
+ "root_of_tail",
+ "left_shoulder",
+ "left_elbow",
+ "left_front_paw",
+ "right_shoulder",
+ "right_elbow",
+ "right_front_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw"
+ ],
+ "skeleton":[
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 4,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 5,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 5,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/atrw/000061.jpg b/vendor/ViTPose/tests/data/atrw/000061.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fb9bfdf781f10720d024b9d7806a0ea0fca97e93
Binary files /dev/null and b/vendor/ViTPose/tests/data/atrw/000061.jpg differ
diff --git a/vendor/ViTPose/tests/data/atrw/003464.jpg b/vendor/ViTPose/tests/data/atrw/003464.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..53ee3ae16938748fc2d1fa0a53fce51a4198d49d
Binary files /dev/null and b/vendor/ViTPose/tests/data/atrw/003464.jpg differ
diff --git a/vendor/ViTPose/tests/data/atrw/test_atrw.json b/vendor/ViTPose/tests/data/atrw/test_atrw.json
new file mode 100644
index 0000000000000000000000000000000000000000..513faab788f046486a88bcacfc01c6ff1420353a
--- /dev/null
+++ b/vendor/ViTPose/tests/data/atrw/test_atrw.json
@@ -0,0 +1,221 @@
+{
+ "categories": [
+ {
+ "keypoints": [
+ "left_ear",
+ "right_ear",
+ "nose",
+ "right_shoulder",
+ "right_front_paw",
+ "left_shoulder",
+ "left_front_paw",
+ "right_hip",
+ "right_knee",
+ "right_back_paw",
+ "left_hip",
+ "left_knee",
+ "left_back_paw",
+ "tail",
+ "center"
+ ],
+ "name": "tiger",
+ "skeleton": [
+ [
+ 0,
+ 2
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 14
+ ],
+ [
+ 5,
+ 6
+ ],
+ [
+ 5,
+ 14
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 3,
+ 14
+ ],
+ [
+ 13,
+ 14
+ ],
+ [
+ 9,
+ 8
+ ],
+ [
+ 8,
+ 7
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 12,
+ 11
+ ],
+ [
+ 11,
+ 10
+ ],
+ [
+ 10,
+ 13
+ ]
+ ],
+ "id": 1
+ }
+ ],
+ "images": [
+ {
+ "width": 1239,
+ "height": 731,
+ "file_name": "003464.jpg",
+ "id": 3464
+ },
+ {
+ "width": 925,
+ "height": 1080,
+ "file_name": "000061.jpg",
+ "id": 61
+ }
+ ],
+ "annotations": [
+ {
+ "bbox": [
+ 0,
+ 0,
+ 1239,
+ 731
+ ],
+ "category_id": 1,
+ "keypoints": [
+ 225,
+ 215,
+ 2,
+ 285,
+ 194,
+ 2,
+ 191,
+ 368,
+ 2,
+ 417,
+ 428,
+ 2,
+ 308,
+ 594,
+ 2,
+ 536,
+ 401,
+ 2,
+ 642,
+ 638,
+ 2,
+ 893,
+ 419,
+ 2,
+ 974,
+ 494,
+ 2,
+ 885,
+ 584,
+ 2,
+ 925,
+ 328,
+ 2,
+ 1065,
+ 419,
+ 2,
+ 1050,
+ 583,
+ 2,
+ 994,
+ 186,
+ 2,
+ 592,
+ 277,
+ 2
+ ],
+ "num_keypoints": 15,
+ "image_id": 3464,
+ "id": 3464,
+ "area": 905709,
+ "iscrowd": 0
+ },
+ {
+ "bbox": [
+ 0,
+ 0,
+ 925,
+ 1080
+ ],
+ "category_id": 1,
+ "keypoints": [
+ 324,
+ 571,
+ 2,
+ 158,
+ 568,
+ 2,
+ 246,
+ 806,
+ 2,
+ 217,
+ 806,
+ 2,
+ 359,
+ 805,
+ 2,
+ 447,
+ 657,
+ 2,
+ 362,
+ 911,
+ 2,
+ 539,
+ 546,
+ 2,
+ 369,
+ 552,
+ 2,
+ 546,
+ 612,
+ 2,
+ 638,
+ 358,
+ 2,
+ 722,
+ 332,
+ 2,
+ 712,
+ 507,
+ 2,
+ 593,
+ 121,
+ 2,
+ 419,
+ 463,
+ 2
+ ],
+ "num_keypoints": 15,
+ "image_id": 61,
+ "id": 61,
+ "area": 999000,
+ "iscrowd": 0
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/coco/000000000785.jpg b/vendor/ViTPose/tests/data/coco/000000000785.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..78718f5f5c031d1fed853b55878083058f289755
Binary files /dev/null and b/vendor/ViTPose/tests/data/coco/000000000785.jpg differ
diff --git a/vendor/ViTPose/tests/data/coco/000000040083.jpg b/vendor/ViTPose/tests/data/coco/000000040083.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9da5da73d141b4da39cc9c8f556cb723a4eaad35
Binary files /dev/null and b/vendor/ViTPose/tests/data/coco/000000040083.jpg differ
diff --git a/vendor/ViTPose/tests/data/coco/000000196141.jpg b/vendor/ViTPose/tests/data/coco/000000196141.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c23a98bb6ddde294db1486a8ab055d850a8fdb97
Binary files /dev/null and b/vendor/ViTPose/tests/data/coco/000000196141.jpg differ
diff --git a/vendor/ViTPose/tests/data/coco/000000197388.jpg b/vendor/ViTPose/tests/data/coco/000000197388.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d19ecd0cac9b14e7d68c1c851c60f207e2d5840
Binary files /dev/null and b/vendor/ViTPose/tests/data/coco/000000197388.jpg differ
diff --git a/vendor/ViTPose/tests/data/coco/test_coco.json b/vendor/ViTPose/tests/data/coco/test_coco.json
new file mode 100644
index 0000000000000000000000000000000000000000..75448df5cdf7f75935677bc7626b699bcd0505d0
--- /dev/null
+++ b/vendor/ViTPose/tests/data/coco/test_coco.json
@@ -0,0 +1,2465 @@
+{
+ "info": {
+ "description": "For testing COCO dataset only.",
+ "year": 2020,
+ "date_created": "2020/06/20"
+ },
+ "licenses": [
+ {
+ "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
+ "id": 1,
+ "name": "Attribution-NonCommercial-ShareAlike License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nc/2.0/",
+ "id": 2,
+ "name": "Attribution-NonCommercial License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nc-nd/2.0/",
+ "id": 3,
+ "name": "Attribution-NonCommercial-NoDerivs License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by/2.0/",
+ "id": 4,
+ "name": "Attribution License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-sa/2.0/",
+ "id": 5,
+ "name": "Attribution-ShareAlike License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nd/2.0/",
+ "id": 6,
+ "name": "Attribution-NoDerivs License"
+ },
+ {
+ "url": "http://flickr.com/commons/usage/",
+ "id": 7,
+ "name": "No known copyright restrictions"
+ },
+ {
+ "url": "http://www.usa.gov/copyright.shtml",
+ "id": 8,
+ "name": "United States Government Work"
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "left_eye",
+ "right_eye",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "license": 4,
+ "file_name": "000000000785.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000000785.jpg",
+ "height": 425,
+ "width": 640,
+ "date_captured": "2013-11-19 21:22:42",
+ "flickr_url": "http://farm8.staticflickr.com/7015/6795644157_f019453ae7_z.jpg",
+ "id": 785
+ },
+ {
+ "license": 3,
+ "file_name": "000000040083.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000040083.jpg",
+ "height": 333,
+ "width": 500,
+ "date_captured": "2013-11-18 03:30:24",
+ "flickr_url": "http://farm1.staticflickr.com/116/254881838_e21c6d17b8_z.jpg",
+ "id": 40083
+ },
+ {
+ "license": 1,
+ "file_name": "000000196141.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000196141.jpg",
+ "height": 429,
+ "width": 640,
+ "date_captured": "2013-11-22 22:37:15",
+ "flickr_url": "http://farm4.staticflickr.com/3310/3611902235_57d4ae496d_z.jpg",
+ "id": 196141
+ },
+ {
+ "license": 3,
+ "file_name": "000000197388.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000197388.jpg",
+ "height": 392,
+ "width": 640,
+ "date_captured": "2013-11-19 20:10:37",
+ "flickr_url": "http://farm9.staticflickr.com/8375/8507321836_5b8b13188f_z.jpg",
+ "id": 197388
+ }
+ ],
+ "annotations": [
+ {
+ "segmentation": [
+ [
+ 353.37,
+ 67.65,
+ 358.15,
+ 52.37,
+ 362.92,
+ 47.59,
+ 374.38,
+ 44.73,
+ 389.66,
+ 52.37,
+ 389.66,
+ 67.65,
+ 389.66,
+ 76.25,
+ 393.48,
+ 83.89,
+ 396.35,
+ 88.66,
+ 397.3,
+ 91.53,
+ 406.85,
+ 99.17,
+ 413.54,
+ 104.9,
+ 451.74,
+ 148.83,
+ 458.43,
+ 153.6,
+ 462.25,
+ 166.02,
+ 467.02,
+ 173.66,
+ 463.2,
+ 181.3,
+ 449.83,
+ 183.21,
+ 448.88,
+ 191.81,
+ 455.56,
+ 226.19,
+ 448.88,
+ 254.84,
+ 453.65,
+ 286.36,
+ 475.62,
+ 323.6,
+ 491.85,
+ 361.81,
+ 494.72,
+ 382.82,
+ 494.72,
+ 382.82,
+ 499.49,
+ 391.41,
+ 416.4,
+ 391.41,
+ 424.04,
+ 383.77,
+ 439.33,
+ 374.22,
+ 445.06,
+ 360.85,
+ 436.46,
+ 334.11,
+ 421.18,
+ 303.55,
+ 416.4,
+ 289.22,
+ 409.72,
+ 268.21,
+ 396.35,
+ 280.63,
+ 405.9,
+ 298.77,
+ 417.36,
+ 324.56,
+ 425,
+ 349.39,
+ 425,
+ 357.99,
+ 419.27,
+ 360.85,
+ 394.44,
+ 367.54,
+ 362.92,
+ 370.4,
+ 346.69,
+ 367.54,
+ 360.06,
+ 362.76,
+ 369.61,
+ 360.85,
+ 382.98,
+ 340.8,
+ 355.28,
+ 271.08,
+ 360.06,
+ 266.3,
+ 386.8,
+ 219.5,
+ 368.65,
+ 162.2,
+ 348.6,
+ 175.57,
+ 309.44,
+ 187.03,
+ 301.8,
+ 192.76,
+ 288.43,
+ 193.72,
+ 282.7,
+ 193.72,
+ 280.79,
+ 187.03,
+ 280.79,
+ 174.62,
+ 287.47,
+ 171.75,
+ 291.29,
+ 171.75,
+ 295.11,
+ 171.75,
+ 306.57,
+ 166.98,
+ 312.3,
+ 165.07,
+ 345.73,
+ 142.14,
+ 350.51,
+ 117.31,
+ 350.51,
+ 102.03,
+ 350.51,
+ 90.57,
+ 353.37,
+ 65.74
+ ]
+ ],
+ "num_keypoints": 17,
+ "area": 27789.11055,
+ "iscrowd": 0,
+ "keypoints": [
+ 367,
+ 81,
+ 2,
+ 374,
+ 73,
+ 2,
+ 360,
+ 75,
+ 2,
+ 386,
+ 78,
+ 2,
+ 356,
+ 81,
+ 2,
+ 399,
+ 108,
+ 2,
+ 358,
+ 129,
+ 2,
+ 433,
+ 142,
+ 2,
+ 341,
+ 159,
+ 2,
+ 449,
+ 165,
+ 2,
+ 309,
+ 178,
+ 2,
+ 424,
+ 203,
+ 2,
+ 393,
+ 214,
+ 2,
+ 429,
+ 294,
+ 2,
+ 367,
+ 273,
+ 2,
+ 466,
+ 362,
+ 2,
+ 396,
+ 341,
+ 2
+ ],
+ "image_id": 785,
+ "bbox": [
+ 280.79,
+ 44.73,
+ 218.7,
+ 346.68
+ ],
+ "category_id": 1,
+ "id": 442619
+ },
+ {
+ "segmentation": [
+ [
+ 98.56,
+ 273.72,
+ 132.9,
+ 267,
+ 140.37,
+ 281.93,
+ 165.75,
+ 285.66,
+ 156.79,
+ 264.01,
+ 170.23,
+ 261.02,
+ 177.7,
+ 272.97,
+ 182.18,
+ 279.69,
+ 200.85,
+ 268.49,
+ 212.79,
+ 255.05,
+ 188.9,
+ 256.54,
+ 164.26,
+ 240.12,
+ 139.62,
+ 212.49,
+ 109.01,
+ 221.45,
+ 103.04,
+ 220.71,
+ 122.45,
+ 202.04,
+ 113.49,
+ 196.07,
+ 96.32,
+ 168.44,
+ 97.06,
+ 162.47,
+ 110.5,
+ 136.34,
+ 112,
+ 124.39,
+ 91.09,
+ 110.95,
+ 80.64,
+ 114.68,
+ 71.68,
+ 131.86,
+ 62.72,
+ 147.54,
+ 57.49,
+ 156.5,
+ 48.53,
+ 168.44,
+ 41.07,
+ 180.39,
+ 38.08,
+ 193.08,
+ 40.32,
+ 205.03,
+ 47.04,
+ 213.24,
+ 54.5,
+ 216.23,
+ 82.13,
+ 252.06,
+ 91.09,
+ 271.48
+ ]
+ ],
+ "num_keypoints": 14,
+ "area": 11025.219,
+ "iscrowd": 0,
+ "keypoints": [
+ 99,
+ 144,
+ 2,
+ 104,
+ 141,
+ 2,
+ 96,
+ 137,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 133,
+ 2,
+ 56,
+ 161,
+ 2,
+ 81,
+ 162,
+ 2,
+ 0,
+ 0,
+ 0,
+ 103,
+ 208,
+ 2,
+ 116,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 57,
+ 246,
+ 1,
+ 82,
+ 259,
+ 1,
+ 137,
+ 219,
+ 2,
+ 138,
+ 247,
+ 2,
+ 177,
+ 256,
+ 2,
+ 158,
+ 296,
+ 1
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 38.08,
+ 110.95,
+ 174.71,
+ 174.71
+ ],
+ "category_id": 1,
+ "id": 198196
+ },
+ {
+ "segmentation": [
+ [
+ 257.76,
+ 288.05,
+ 273.4,
+ 258.26,
+ 325.55,
+ 253.79,
+ 335.23,
+ 232.93,
+ 326.3,
+ 186.74,
+ 333.74,
+ 177.05,
+ 327.79,
+ 153.21,
+ 333.74,
+ 142.04,
+ 344.17,
+ 139.06,
+ 353.11,
+ 139.06,
+ 359.07,
+ 145.02,
+ 360.56,
+ 148.74,
+ 362.05,
+ 168.86,
+ 388.87,
+ 197.17,
+ 397.81,
+ 276.88,
+ 372.48,
+ 293.27
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 10171.9544,
+ "iscrowd": 0,
+ "keypoints": [
+ 343,
+ 164,
+ 2,
+ 348,
+ 160,
+ 2,
+ 340,
+ 160,
+ 2,
+ 359,
+ 163,
+ 2,
+ 332,
+ 164,
+ 2,
+ 370,
+ 189,
+ 2,
+ 334,
+ 190,
+ 2,
+ 358,
+ 236,
+ 2,
+ 348,
+ 234,
+ 2,
+ 339,
+ 270,
+ 2,
+ 330,
+ 262,
+ 2,
+ 378,
+ 262,
+ 2,
+ 343,
+ 254,
+ 2,
+ 338,
+ 280,
+ 2,
+ 283,
+ 272,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 257.76,
+ 139.06,
+ 140.05,
+ 154.21
+ ],
+ "category_id": 1,
+ "id": 230195
+ },
+ {
+ "segmentation": [
+ [
+ 285.37,
+ 126.5,
+ 281.97,
+ 127.72,
+ 280.76,
+ 132.33,
+ 280.76,
+ 136.46,
+ 275.17,
+ 143.26,
+ 275.9,
+ 158.08,
+ 277.6,
+ 164.4,
+ 278.33,
+ 173.87,
+ 278.33,
+ 183.83,
+ 279.79,
+ 191.11,
+ 281.97,
+ 194.76,
+ 284.89,
+ 192.09,
+ 284.89,
+ 186.99,
+ 284.89,
+ 181.16,
+ 284.64,
+ 177.51,
+ 285.86,
+ 173.87
+ ]
+ ],
+ "num_keypoints": 0,
+ "area": 491.2669,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 275.17,
+ 126.5,
+ 10.69,
+ 68.26
+ ],
+ "category_id": 1,
+ "id": 1202706
+ },
+ {
+ "segmentation": [
+ [
+ 339.34,
+ 107.97,
+ 338.38,
+ 102.19,
+ 339.34,
+ 91.58,
+ 335.49,
+ 84.84,
+ 326.81,
+ 74.23,
+ 312.35,
+ 74.23,
+ 301.75,
+ 74.23,
+ 295,
+ 86.76,
+ 295,
+ 93.51,
+ 292.11,
+ 99.3,
+ 287.29,
+ 102.19,
+ 291.14,
+ 107.01,
+ 295,
+ 107.01,
+ 295.96,
+ 112.79,
+ 301.75,
+ 115.69,
+ 305.6,
+ 119.54,
+ 307.53,
+ 123.4,
+ 317.17,
+ 123.4,
+ 311.39,
+ 129.18,
+ 286.32,
+ 139.79,
+ 274.75,
+ 139.79,
+ 264.15,
+ 138.82,
+ 262.22,
+ 144.61,
+ 261.26,
+ 147.5,
+ 253.54,
+ 147.5,
+ 247.76,
+ 150.39,
+ 249.69,
+ 159.07,
+ 256.44,
+ 161,
+ 262.22,
+ 161,
+ 268,
+ 161,
+ 276.68,
+ 161.96,
+ 284.39,
+ 168.71,
+ 293.07,
+ 174.49,
+ 301.75,
+ 174.49,
+ 308.49,
+ 169.67,
+ 308.49,
+ 188.95,
+ 311.39,
+ 194.74,
+ 312.35,
+ 208.23,
+ 307.53,
+ 221.73,
+ 297.89,
+ 229.44,
+ 281.5,
+ 250.65,
+ 269.93,
+ 262.22,
+ 278.61,
+ 320.06,
+ 281.5,
+ 331.63,
+ 276.68,
+ 338.38,
+ 270.9,
+ 349.95,
+ 262.22,
+ 356.7,
+ 253.54,
+ 359.59,
+ 253.54,
+ 365.37,
+ 274.75,
+ 365.37,
+ 291.14,
+ 365.37,
+ 306.57,
+ 359.59,
+ 303.67,
+ 352.84,
+ 297.89,
+ 340.31,
+ 293.07,
+ 318.13,
+ 295,
+ 294.03,
+ 293.07,
+ 278.61,
+ 294.03,
+ 270.9,
+ 305.6,
+ 259.33,
+ 313.31,
+ 299.82,
+ 319.1,
+ 309.46,
+ 341.27,
+ 317.17,
+ 384.65,
+ 330.67,
+ 387.55,
+ 335.49,
+ 383.69,
+ 341.27,
+ 397.19,
+ 350.91,
+ 398.15,
+ 363.44,
+ 398.15,
+ 375.01,
+ 405.86,
+ 374.05,
+ 409.72,
+ 357.66,
+ 411.65,
+ 342.24,
+ 416.47,
+ 328.74,
+ 417.43,
+ 321.03,
+ 410.68,
+ 319.1,
+ 401.04,
+ 318.13,
+ 392.37,
+ 318.13,
+ 382.73,
+ 314.28,
+ 348.98,
+ 300.78,
+ 339.34,
+ 293.07,
+ 334.52,
+ 285.36,
+ 340.31,
+ 259.33,
+ 340.31,
+ 246.8,
+ 340.31,
+ 242.94,
+ 350.91,
+ 228.48,
+ 358.62,
+ 214.98,
+ 355.22,
+ 204.32,
+ 357.05,
+ 196.11,
+ 361.61,
+ 188.82,
+ 361.61,
+ 181.97,
+ 365.26,
+ 165.63,
+ 367.54,
+ 139.18,
+ 366.17,
+ 123.68,
+ 361.15,
+ 112.73,
+ 353.86,
+ 107.72,
+ 351.58,
+ 105.89,
+ 344.74,
+ 105.89,
+ 340.18,
+ 109.08
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 17123.92955,
+ "iscrowd": 0,
+ "keypoints": [
+ 297,
+ 111,
+ 2,
+ 299,
+ 106,
+ 2,
+ 0,
+ 0,
+ 0,
+ 314,
+ 108,
+ 2,
+ 0,
+ 0,
+ 0,
+ 329,
+ 141,
+ 2,
+ 346,
+ 125,
+ 2,
+ 295,
+ 164,
+ 2,
+ 323,
+ 130,
+ 2,
+ 266,
+ 155,
+ 2,
+ 279,
+ 143,
+ 2,
+ 329,
+ 225,
+ 2,
+ 331,
+ 221,
+ 2,
+ 327,
+ 298,
+ 2,
+ 283,
+ 269,
+ 2,
+ 398,
+ 327,
+ 2,
+ 288,
+ 349,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 247.76,
+ 74.23,
+ 169.67,
+ 300.78
+ ],
+ "category_id": 1,
+ "id": 460541
+ },
+ {
+ "segmentation": [
+ [
+ 578.76,
+ 112.4,
+ 589.39,
+ 100.81,
+ 589.39,
+ 99.84,
+ 596.16,
+ 116.27,
+ 603.89,
+ 122.07,
+ 603.89,
+ 138.49,
+ 598.09,
+ 159.75,
+ 597.12,
+ 181,
+ 594.22,
+ 191.63,
+ 589.39,
+ 212.89,
+ 583.59,
+ 208.06,
+ 583.59,
+ 206.13,
+ 582.63,
+ 200.33,
+ 582.63,
+ 193.57,
+ 582.63,
+ 182.94,
+ 575.86,
+ 181,
+ 567.17,
+ 197.43,
+ 571.03,
+ 203.23,
+ 567.17,
+ 207.09,
+ 555.57,
+ 208.06,
+ 562.34,
+ 200.33,
+ 565.24,
+ 190.67,
+ 565.24,
+ 173.27,
+ 566.2,
+ 163.61,
+ 568.14,
+ 156.85,
+ 570.07,
+ 148.15,
+ 566.2,
+ 143.32,
+ 565.24,
+ 133.66,
+ 575.86,
+ 118.2
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 2789.0208,
+ "iscrowd": 0,
+ "keypoints": [
+ 589,
+ 113,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 595,
+ 112,
+ 1,
+ 584,
+ 110,
+ 2,
+ 598,
+ 123,
+ 2,
+ 579,
+ 119,
+ 2,
+ 594,
+ 141,
+ 2,
+ 570,
+ 137,
+ 2,
+ 576,
+ 135,
+ 2,
+ 585,
+ 139,
+ 2,
+ 590,
+ 157,
+ 2,
+ 574,
+ 156,
+ 2,
+ 589,
+ 192,
+ 2,
+ 565,
+ 189,
+ 1,
+ 587,
+ 222,
+ 1,
+ 557,
+ 219,
+ 1
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 555.57,
+ 99.84,
+ 48.32,
+ 113.05
+ ],
+ "category_id": 1,
+ "id": 488308
+ },
+ {
+ "segmentation": [
+ [
+ 446.96,
+ 73.13,
+ 445.81,
+ 77.71,
+ 443.33,
+ 78.29,
+ 441.61,
+ 81.72,
+ 441.23,
+ 84.58,
+ 440.85,
+ 90.5,
+ 442.19,
+ 94.32,
+ 443.52,
+ 97.18,
+ 443.52,
+ 102.33,
+ 442.57,
+ 105.58,
+ 446.58,
+ 105.19,
+ 447.15,
+ 99.85,
+ 447.53,
+ 94.89,
+ 446,
+ 93.55,
+ 446.38,
+ 92.03,
+ 453.64,
+ 92.41,
+ 454.02,
+ 94.51,
+ 457.64,
+ 94.51,
+ 455.74,
+ 88.4,
+ 455.35,
+ 82.29,
+ 453.64,
+ 78.48,
+ 451.92,
+ 77.71,
+ 452.87,
+ 74.47,
+ 450.58,
+ 73.13
+ ]
+ ],
+ "num_keypoints": 0,
+ "area": 285.7906,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 440.85,
+ 73.13,
+ 16.79,
+ 32.45
+ ],
+ "category_id": 1,
+ "id": 508900
+ },
+ {
+ "segmentation": [
+ [
+ 497.15,
+ 413.95,
+ 531.55,
+ 417.68,
+ 548.74,
+ 411.7,
+ 551.74,
+ 403.48,
+ 546.5,
+ 394.5,
+ 543.51,
+ 386.28,
+ 571.93,
+ 390.76,
+ 574.92,
+ 391.51,
+ 579.4,
+ 409.46,
+ 605.58,
+ 409.46,
+ 615.3,
+ 408.71,
+ 607.07,
+ 389.27,
+ 598.1,
+ 381.79,
+ 607.82,
+ 366.83,
+ 607.82,
+ 352.63,
+ 610.06,
+ 338.42,
+ 619.04,
+ 345.15,
+ 631,
+ 344.4,
+ 630.25,
+ 336.92,
+ 626.51,
+ 318.98,
+ 616.05,
+ 286.07,
+ 598.85,
+ 263.64,
+ 585.39,
+ 257.66,
+ 593.61,
+ 244.2,
+ 601.09,
+ 235.97,
+ 596.6,
+ 219.52,
+ 587.63,
+ 211.29,
+ 577.91,
+ 208.3,
+ 563.7,
+ 206.81,
+ 556.22,
+ 214.29,
+ 548,
+ 217.28,
+ 539.77,
+ 229.99,
+ 539.77,
+ 241.95,
+ 539.02,
+ 247.19,
+ 523.32,
+ 247.19,
+ 503.88,
+ 254.67,
+ 485.93,
+ 254.67,
+ 479.95,
+ 248.68,
+ 473.22,
+ 241.21,
+ 485.93,
+ 227,
+ 477.7,
+ 215.78,
+ 457.51,
+ 215.78,
+ 453.77,
+ 235.22,
+ 463.5,
+ 246.44,
+ 465.74,
+ 261.4,
+ 490.42,
+ 274.11,
+ 501.63,
+ 275.6,
+ 504.62,
+ 286.07,
+ 519.58,
+ 286.07,
+ 522.57,
+ 292.06,
+ 512.85,
+ 310,
+ 515.09,
+ 330.94,
+ 530.05,
+ 343.65,
+ 505.37,
+ 341.41,
+ 479.95,
+ 339.91,
+ 465.74,
+ 346.64,
+ 463.5,
+ 358.61,
+ 473.97,
+ 381.04,
+ 485.18,
+ 390.02,
+ 501.63,
+ 398.99,
+ 504.62,
+ 404.22,
+ 491.16,
+ 412.45,
+ 495.65,
+ 417.68
+ ]
+ ],
+ "num_keypoints": 12,
+ "area": 21608.94075,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 552,
+ 234,
+ 2,
+ 0,
+ 0,
+ 0,
+ 531,
+ 262,
+ 2,
+ 600,
+ 283,
+ 2,
+ 480,
+ 260,
+ 2,
+ 622,
+ 336,
+ 2,
+ 466,
+ 242,
+ 2,
+ 0,
+ 0,
+ 0,
+ 546,
+ 365,
+ 2,
+ 592,
+ 371,
+ 2,
+ 470,
+ 351,
+ 2,
+ 551,
+ 330,
+ 2,
+ 519,
+ 394,
+ 2,
+ 589,
+ 391,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 453.77,
+ 206.81,
+ 177.23,
+ 210.87
+ ],
+ "category_id": 1,
+ "id": 1717641
+ },
+ {
+ "segmentation": [
+ [
+ 58.93,
+ 163.67,
+ 47.18,
+ 161.59,
+ 36.12,
+ 93.86,
+ 41.65,
+ 82.8,
+ 40.27,
+ 69.66,
+ 50.64,
+ 67.59,
+ 55.48,
+ 73.81,
+ 63.08,
+ 92.47,
+ 66.53,
+ 99.38,
+ 65.15,
+ 109.06,
+ 61,
+ 127.03,
+ 59.62,
+ 162.97
+ ]
+ ],
+ "num_keypoints": 17,
+ "area": 1870.14015,
+ "iscrowd": 0,
+ "keypoints": [
+ 48,
+ 79,
+ 2,
+ 50,
+ 77,
+ 2,
+ 46,
+ 77,
+ 2,
+ 54,
+ 78,
+ 2,
+ 45,
+ 78,
+ 2,
+ 57,
+ 90,
+ 2,
+ 42,
+ 90,
+ 2,
+ 63,
+ 103,
+ 2,
+ 42,
+ 105,
+ 2,
+ 56,
+ 113,
+ 2,
+ 49,
+ 112,
+ 2,
+ 55,
+ 117,
+ 2,
+ 44,
+ 117,
+ 2,
+ 55,
+ 140,
+ 2,
+ 47,
+ 140,
+ 2,
+ 56,
+ 160,
+ 2,
+ 49,
+ 159,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 36.12,
+ 67.59,
+ 30.41,
+ 96.08
+ ],
+ "category_id": 1,
+ "id": 1724673
+ },
+ {
+ "segmentation": [
+ [
+ 139.41,
+ 321.58,
+ 144.78,
+ 326.56,
+ 196.92,
+ 314.68,
+ 196.16,
+ 309.31,
+ 207.28,
+ 292.05,
+ 213.03,
+ 284,
+ 228.75,
+ 270.2,
+ 233.35,
+ 261.38,
+ 244.47,
+ 252.56,
+ 254.44,
+ 237.61,
+ 267.86,
+ 215.37,
+ 272.08,
+ 212.68,
+ 285.5,
+ 232.62,
+ 294.7,
+ 250.64,
+ 295.08,
+ 264.06,
+ 290.87,
+ 277.87,
+ 290.87,
+ 286.3,
+ 289.71,
+ 298.19,
+ 281.66,
+ 318.89,
+ 282.05,
+ 334.23,
+ 295.08,
+ 340.37,
+ 315.02,
+ 343.82,
+ 314.25,
+ 336.53,
+ 310.42,
+ 330.4,
+ 301.98,
+ 322.34,
+ 304.29,
+ 310.84,
+ 304.67,
+ 302.79,
+ 306.2,
+ 292.05,
+ 311.19,
+ 275.56,
+ 313.87,
+ 251.79,
+ 311.19,
+ 234.54,
+ 312.72,
+ 224.57,
+ 310.42,
+ 212.3,
+ 307.74,
+ 201.56,
+ 306.2,
+ 193.51,
+ 306.59,
+ 183.16,
+ 310.04,
+ 177.41,
+ 314.64,
+ 173.19,
+ 316.94,
+ 171.65,
+ 328.06,
+ 163.99,
+ 337.64,
+ 157.85,
+ 343.4,
+ 159.77,
+ 346.46,
+ 166.67,
+ 346.85,
+ 170.5,
+ 346.46,
+ 179.71,
+ 346.85,
+ 188.53,
+ 346.85,
+ 191.98,
+ 344.55,
+ 198.11,
+ 342.25,
+ 203.48,
+ 338.41,
+ 208.46,
+ 335.34,
+ 212.68,
+ 335.34,
+ 217.67,
+ 343.01,
+ 222.65,
+ 354.9,
+ 210.76,
+ 359.12,
+ 196.19,
+ 361.8,
+ 173.19,
+ 361.42,
+ 161.69,
+ 356.43,
+ 150.18,
+ 344.93,
+ 135.61,
+ 343.01,
+ 132.93,
+ 345.31,
+ 126.41,
+ 345.7,
+ 124.88,
+ 343.4,
+ 115.29,
+ 340.33,
+ 104.17,
+ 337.26,
+ 102.25,
+ 330.36,
+ 103.4,
+ 326.14,
+ 106.09,
+ 320.01,
+ 111.07,
+ 314.64,
+ 119.89,
+ 310.42,
+ 121.04,
+ 292.02,
+ 121.81,
+ 279.75,
+ 127.94,
+ 244.09,
+ 138.68,
+ 240.25,
+ 142.51,
+ 238.72,
+ 154.4,
+ 239.1,
+ 163.6,
+ 239.87,
+ 173.96,
+ 241.79,
+ 181.24,
+ 248.3,
+ 192.36,
+ 240.25,
+ 206.55,
+ 236.42,
+ 219.2,
+ 229.9,
+ 236.45,
+ 225.3,
+ 247.57,
+ 218.4,
+ 254.48,
+ 208.81,
+ 265.6,
+ 202.29,
+ 278.25,
+ 195.39,
+ 285.92,
+ 188.49,
+ 292.05,
+ 183.5,
+ 295.89,
+ 176.6,
+ 302.41,
+ 172,
+ 308.54,
+ 167.78,
+ 313.14,
+ 146.31,
+ 318.89
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 14250.29385,
+ "iscrowd": 0,
+ "keypoints": [
+ 334,
+ 135,
+ 2,
+ 340,
+ 129,
+ 2,
+ 331,
+ 129,
+ 2,
+ 0,
+ 0,
+ 0,
+ 319,
+ 123,
+ 2,
+ 340,
+ 146,
+ 2,
+ 292,
+ 133,
+ 2,
+ 353,
+ 164,
+ 2,
+ 246,
+ 144,
+ 2,
+ 354,
+ 197,
+ 2,
+ 250,
+ 185,
+ 2,
+ 293,
+ 197,
+ 2,
+ 265,
+ 187,
+ 2,
+ 305,
+ 252,
+ 2,
+ 231,
+ 254,
+ 2,
+ 293,
+ 321,
+ 2,
+ 193,
+ 297,
+ 2
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 139.41,
+ 102.25,
+ 222.39,
+ 241.57
+ ],
+ "category_id": 1,
+ "id": 437295
+ },
+ {
+ "segmentation": [
+ [
+ 287.17,
+ 121.42,
+ 294.22,
+ 106.44,
+ 302.15,
+ 116.13,
+ 303.03,
+ 121.42
+ ],
+ [
+ 297.74,
+ 99.39,
+ 310.08,
+ 76.49,
+ 326.81,
+ 76.49,
+ 329.46,
+ 67.68,
+ 337.38,
+ 61.52,
+ 346.19,
+ 62.4,
+ 353.24,
+ 65.92,
+ 353.24,
+ 76.49,
+ 355.88,
+ 84.42,
+ 359.41,
+ 87.94,
+ 362.05,
+ 96.75,
+ 354.12,
+ 139.04,
+ 349.72,
+ 142.56,
+ 345.31,
+ 139.92,
+ 349.72,
+ 117.89,
+ 348.84,
+ 108.2,
+ 345.31,
+ 113.49,
+ 336.5,
+ 101.16,
+ 325.93,
+ 110.85,
+ 311.84,
+ 123.18
+ ],
+ [
+ 324.17,
+ 176.91,
+ 332.1,
+ 191.89,
+ 328.58,
+ 198.94,
+ 327.69,
+ 205.98,
+ 333.86,
+ 213.03,
+ 337.38,
+ 227.13,
+ 332.98,
+ 227.13,
+ 319.77,
+ 219.2,
+ 313.6,
+ 211.27
+ ],
+ [
+ 332.98,
+ 165.46,
+ 341.79,
+ 161.06,
+ 336.5,
+ 174.27,
+ 333.86,
+ 186.6,
+ 326.81,
+ 176.03
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 3404.869,
+ "iscrowd": 0,
+ "keypoints": [
+ 345,
+ 92,
+ 2,
+ 350,
+ 87,
+ 2,
+ 341,
+ 87,
+ 2,
+ 0,
+ 0,
+ 0,
+ 330,
+ 83,
+ 2,
+ 357,
+ 94,
+ 2,
+ 316,
+ 92,
+ 2,
+ 357,
+ 104,
+ 2,
+ 291,
+ 123,
+ 1,
+ 351,
+ 133,
+ 2,
+ 281,
+ 136,
+ 1,
+ 326,
+ 131,
+ 1,
+ 305,
+ 128,
+ 1,
+ 336,
+ 152,
+ 1,
+ 303,
+ 171,
+ 1,
+ 318,
+ 206,
+ 2,
+ 294,
+ 211,
+ 1
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 287.17,
+ 61.52,
+ 74.88,
+ 165.61
+ ],
+ "category_id": 1,
+ "id": 467657
+ },
+ {
+ "segmentation": [
+ [
+ 547.95,
+ 201.57,
+ 546.73,
+ 190.62,
+ 547.95,
+ 181.49,
+ 547.95,
+ 169.31,
+ 547.95,
+ 156.53,
+ 546.73,
+ 144.36,
+ 544.3,
+ 139.49,
+ 540.04,
+ 132.19,
+ 540.04,
+ 121.84,
+ 542.47,
+ 107.24,
+ 544.3,
+ 99.33,
+ 548.56,
+ 88.98,
+ 561.95,
+ 78.03,
+ 572.29,
+ 71.33,
+ 572.29,
+ 71.33,
+ 572.29,
+ 65.25,
+ 574.12,
+ 51.86,
+ 583.86,
+ 48.81,
+ 592.99,
+ 48.81,
+ 597.86,
+ 57.33,
+ 599.07,
+ 64.64,
+ 608.2,
+ 76.81,
+ 614.9,
+ 82.89,
+ 620.98,
+ 89.59,
+ 628.89,
+ 93.24,
+ 636.81,
+ 101.76,
+ 640,
+ 109.67,
+ 640,
+ 115.76,
+ 640,
+ 127.93,
+ 620.37,
+ 111.5,
+ 619.16,
+ 111.5,
+ 618.55,
+ 112.11,
+ 608.2,
+ 105.41,
+ 600.9,
+ 119.41,
+ 592.99,
+ 131.58,
+ 596.03,
+ 148.01,
+ 605.16,
+ 162.01,
+ 612.46,
+ 190.01,
+ 614.9,
+ 204.61,
+ 606.98,
+ 216.78,
+ 603.94,
+ 226.52,
+ 606.38,
+ 239.91,
+ 605.16,
+ 256.95,
+ 604.55,
+ 264.26,
+ 602.12,
+ 271.56,
+ 586.29,
+ 272.17,
+ 584.47,
+ 255.13,
+ 588.73,
+ 237.48,
+ 592.99,
+ 221.65,
+ 596.64,
+ 207.05,
+ 596.64,
+ 197.31,
+ 594.2,
+ 186.96,
+ 584.47,
+ 172.36,
+ 577.77,
+ 166.27,
+ 570.47,
+ 170.53,
+ 558.91,
+ 179.66,
+ 555.86,
+ 192.44,
+ 548.56,
+ 198.53,
+ 547.95,
+ 198.53
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 8913.98475,
+ "iscrowd": 0,
+ "keypoints": [
+ 591,
+ 78,
+ 2,
+ 594,
+ 74,
+ 2,
+ 586,
+ 74,
+ 2,
+ 0,
+ 0,
+ 0,
+ 573,
+ 70,
+ 2,
+ 598,
+ 86,
+ 2,
+ 566,
+ 93,
+ 2,
+ 626,
+ 105,
+ 2,
+ 546,
+ 126,
+ 2,
+ 0,
+ 0,
+ 0,
+ 561,
+ 150,
+ 2,
+ 582,
+ 150,
+ 2,
+ 557,
+ 154,
+ 2,
+ 606,
+ 194,
+ 2,
+ 558,
+ 209,
+ 1,
+ 591,
+ 252,
+ 2,
+ 539,
+ 262,
+ 1
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 540.04,
+ 48.81,
+ 99.96,
+ 223.36
+ ],
+ "category_id": 1,
+ "id": 531914
+ },
+ {
+ "segmentation": [
+ [
+ 561.51,
+ 385.38,
+ 572.11,
+ 352.71,
+ 570.34,
+ 317.4,
+ 559.75,
+ 282.08,
+ 552.68,
+ 267.07,
+ 565.93,
+ 236.17,
+ 583.59,
+ 236.17,
+ 602.13,
+ 260.01,
+ 614.49,
+ 286.5,
+ 628.61,
+ 302.39,
+ 639.21,
+ 281.2,
+ 614.49,
+ 251.18,
+ 588,
+ 218.51,
+ 595.95,
+ 202.62,
+ 594.18,
+ 185.85,
+ 580.05,
+ 170.84,
+ 562.4,
+ 179.67,
+ 557.98,
+ 198.21,
+ 554.45,
+ 202.62,
+ 532.38,
+ 199.97,
+ 525.32,
+ 202.62,
+ 511.19,
+ 229.11,
+ 493.53,
+ 256.48,
+ 484.7,
+ 276.78,
+ 451.15,
+ 323.58,
+ 423.78,
+ 338.59,
+ 388.47,
+ 373.9,
+ 372.58,
+ 387.14,
+ 396.41,
+ 388.03,
+ 418.49,
+ 367.72,
+ 450.27,
+ 345.65,
+ 501.48,
+ 306.8,
+ 520.02,
+ 301.5,
+ 552.68,
+ 340.35,
+ 543.86,
+ 369.49
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 14267.20475,
+ "iscrowd": 0,
+ "keypoints": [
+ 580,
+ 211,
+ 2,
+ 586,
+ 206,
+ 2,
+ 574,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 562,
+ 198,
+ 2,
+ 584,
+ 220,
+ 2,
+ 529,
+ 215,
+ 2,
+ 599,
+ 242,
+ 2,
+ 512,
+ 260,
+ 2,
+ 619,
+ 274,
+ 2,
+ 538,
+ 285,
+ 2,
+ 537,
+ 288,
+ 2,
+ 506,
+ 277,
+ 2,
+ 562,
+ 332,
+ 2,
+ 452,
+ 332,
+ 2,
+ 550,
+ 387,
+ 1,
+ 402,
+ 371,
+ 2
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 372.58,
+ 170.84,
+ 266.63,
+ 217.19
+ ],
+ "category_id": 1,
+ "id": 533949
+ },
+ {
+ "segmentation": [
+ [
+ 2.03,
+ 75.18,
+ 10.85,
+ 70.58,
+ 16.99,
+ 65.59,
+ 17.75,
+ 55.24,
+ 20.05,
+ 50.25,
+ 29.64,
+ 43.74,
+ 37.31,
+ 47.57,
+ 41.52,
+ 53.7,
+ 43.83,
+ 64.82,
+ 53.03,
+ 70.19,
+ 61.85,
+ 77.09,
+ 72.58,
+ 87.06,
+ 74.88,
+ 79.01,
+ 78.72,
+ 73.64,
+ 86.39,
+ 77.86,
+ 90.6,
+ 90.13,
+ 86,
+ 93.2,
+ 82.17,
+ 102.4,
+ 75.27,
+ 106.24,
+ 68.75,
+ 104.7,
+ 50.34,
+ 90.9,
+ 43.06,
+ 112.37,
+ 40.76,
+ 123.11,
+ 42.29,
+ 130.78,
+ 48.04,
+ 161.83,
+ 52.26,
+ 190.59,
+ 50.73,
+ 210.15,
+ 44.21,
+ 245.04,
+ 50.34,
+ 256.16,
+ 53.03,
+ 261.53,
+ 47.28,
+ 263.83,
+ 40.37,
+ 263.83,
+ 31.56,
+ 260.76,
+ 28.1,
+ 256.16,
+ 26.95,
+ 244.65,
+ 29.25,
+ 233.54,
+ 32.71,
+ 223.95,
+ 33.09,
+ 213.98,
+ 32.32,
+ 206.31,
+ 32.71,
+ 194.81,
+ 33.09,
+ 185.61,
+ 24.65,
+ 177.17,
+ 16.99,
+ 161.45,
+ 13.53,
+ 176.02,
+ 10.85,
+ 206.31,
+ 1.65,
+ 231.62,
+ 1.65,
+ 235.84,
+ 0.5,
+ 146.88,
+ 0.88,
+ 122.34,
+ 1.65,
+ 75.56
+ ]
+ ],
+ "num_keypoints": 13,
+ "area": 8260.75085,
+ "iscrowd": 0,
+ "keypoints": [
+ 36,
+ 79,
+ 2,
+ 40,
+ 74,
+ 2,
+ 31,
+ 75,
+ 2,
+ 0,
+ 0,
+ 0,
+ 19,
+ 69,
+ 2,
+ 45,
+ 77,
+ 2,
+ 2,
+ 89,
+ 2,
+ 74,
+ 99,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 92,
+ 2,
+ 0,
+ 0,
+ 0,
+ 33,
+ 149,
+ 2,
+ 7,
+ 153,
+ 2,
+ 44,
+ 196,
+ 2,
+ 2,
+ 205,
+ 2,
+ 35,
+ 245,
+ 2,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 0.5,
+ 43.74,
+ 90.1,
+ 220.09
+ ],
+ "category_id": 1,
+ "id": 543117
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/coco/test_coco_det_AP_H_56.json b/vendor/ViTPose/tests/data/coco/test_coco_det_AP_H_56.json
new file mode 100644
index 0000000000000000000000000000000000000000..e166de0c6426a185b27ceb49355c563fa852e6c3
--- /dev/null
+++ b/vendor/ViTPose/tests/data/coco/test_coco_det_AP_H_56.json
@@ -0,0 +1,1300 @@
+[
+ {
+ "bbox": [
+ 277.1183158543966,
+ 45.699667786163765,
+ 225.09126579259754,
+ 333.5602652943344
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.9999731779098511
+ },
+ {
+ "bbox": [
+ 281.950178384611,
+ 44.56940615106412,
+ 212.94084624881856,
+ 344.98328732330305
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.30122078732076535
+ },
+ {
+ "bbox": [
+ 268.01163251716935,
+ 43.98534000198524,
+ 238.46561540311666,
+ 341.79494090239166
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.09537058952055945
+ },
+ {
+ "bbox": [
+ 286.24685022227766,
+ 41.757854101745124,
+ 223.83092714841916,
+ 338.2323329803221
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.02974060317622316
+ },
+ {
+ "bbox": [
+ 262.7942371596824,
+ 63.5024099030928,
+ 3.164080328447767,
+ 4.2931809049024
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.01697496324777603
+ },
+ {
+ "bbox": [
+ 460.79934160584526,
+ 54.24632570186816,
+ 3.1264258976875112,
+ 5.30507188737684
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.011266417550507977
+ },
+ {
+ "bbox": [
+ 457.74867915702885,
+ 54.642754761043186,
+ 3.1463156275978577,
+ 5.30487109975607
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.009877337450527405
+ },
+ {
+ "bbox": [
+ 283.6326909128262,
+ 48.41948428440242,
+ 208.11973684568892,
+ 329.94523003138954
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.009197559746208601
+ },
+ {
+ "bbox": [
+ 207.3711401479468,
+ 63.36160650309581,
+ 2.93447433643874,
+ 3.468569626452343
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.008295997977256775
+ },
+ {
+ "bbox": [
+ 458.51562228937183,
+ 59.46703918462182,
+ 3.272054625157523,
+ 4.619048555254508
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.008173274752520696
+ },
+ {
+ "bbox": [
+ 461.08150984219986,
+ 58.545150021384245,
+ 3.249185872840485,
+ 5.844152786677249
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.007174033771332924
+ },
+ {
+ "bbox": [
+ 259.83498140597413,
+ 62.3517572900752,
+ 2.9195241669668235,
+ 4.559862560086913
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.006377489306032658
+ },
+ {
+ "bbox": [
+ 206.80460173580252,
+ 62.5220090883142,
+ 3.1584765729102457,
+ 3.520867237953432
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.005891890564944476
+ },
+ {
+ "bbox": [
+ 459.5511247244534,
+ 54.89593493663015,
+ 3.230180209185619,
+ 5.595806307593442
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.005863019167811413
+ },
+ {
+ "bbox": [
+ 457.2902794671802,
+ 58.740074277713674,
+ 3.316325358758718,
+ 5.415639229745793
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.005827399869551478
+ },
+ {
+ "bbox": [
+ 262.6182415084011,
+ 62.83701378140133,
+ 3.0697625867510396,
+ 4.148177980683975
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.005008531179775657
+ },
+ {
+ "bbox": [
+ 209.95621769919438,
+ 63.58898404912936,
+ 3.097942773760309,
+ 3.9870186328652224
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.004536413883644729
+ },
+ {
+ "bbox": [
+ 459.25342388420654,
+ 59.04022778823142,
+ 3.6918324658356596,
+ 6.2054702421954175
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.00384555541357817
+ },
+ {
+ "bbox": [
+ 208.42983867925258,
+ 62.66620641784881,
+ 2.939843970544956,
+ 3.5128275773914908
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.003631657359987463
+ },
+ {
+ "bbox": [
+ 213.41976294267863,
+ 62.71431286477077,
+ 2.528260915549936,
+ 3.4008991982205927
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.0033746918197721243
+ },
+ {
+ "bbox": [
+ 161.97753405615518,
+ 61.730313756833205,
+ 2.8917805026908923,
+ 4.075206275914702
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.003240120830014348
+ },
+ {
+ "bbox": [
+ 457.5244691894709,
+ 54.70691525725411,
+ 6.2095088496953394,
+ 8.39989354390223
+ ],
+ "category_id": 1,
+ "image_id": 785,
+ "score": 0.0028898494491729535
+ },
+ {
+ "bbox": [
+ 376.9178826443722,
+ 172.73052709081233,
+ 261.25961331942824,
+ 215.58502374291808
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.9999579191207886
+ },
+ {
+ "bbox": [
+ 163.9687616410633,
+ 80.41943032016765,
+ 200.19976794356094,
+ 259.2492676442412
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.9999035596847534
+ },
+ {
+ "bbox": [
+ 1.218278714743892,
+ 47.45300387559155,
+ 90.54113395922819,
+ 220.98988830655202
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.9998950958251953
+ },
+ {
+ "bbox": [
+ 542.055600304138,
+ 50.78951110214531,
+ 97.65374183236963,
+ 187.04227881069528
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.9867184565824798
+ },
+ {
+ "bbox": [
+ 281.8670596900398,
+ 58.53450299402189,
+ 82.11294655596839,
+ 86.20744367046282
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.9736837699533164
+ },
+ {
+ "bbox": [
+ 279.94252362290945,
+ 59.89339467038772,
+ 81.61478084086349,
+ 147.45283612214442
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.5819535544584765
+ },
+ {
+ "bbox": [
+ 535.4019505240893,
+ 48.1844256878009,
+ 105.27804947591062,
+ 239.31002317693435
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.4461107432274131
+ },
+ {
+ "bbox": [
+ 168.57347257788564,
+ 103.56636286623898,
+ 188.67170536354314,
+ 230.37891238088162
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.3492993107937081
+ },
+ {
+ "bbox": [
+ 372.0082417618134,
+ 163.99891619439003,
+ 236.90653900133447,
+ 224.81380141719242
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.32743142104478484
+ },
+ {
+ "bbox": [
+ 1.388905257619702,
+ 35.86500152126901,
+ 87.67960208998994,
+ 220.4727970838673
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.31936580857404523
+ },
+ {
+ "bbox": [
+ 283.65021434011885,
+ 57.518455359834334,
+ 81.08575097216988,
+ 85.11418577738398
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.1897958763078807
+ },
+ {
+ "bbox": [
+ 543.1779979060689,
+ 37.87532382036906,
+ 94.66280745251572,
+ 191.29243939893223
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.17261266781373394
+ },
+ {
+ "bbox": [
+ 258.5633408567725,
+ 60.27068241963883,
+ 102.3686462123,
+ 151.42071713691902
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.13677866226510016
+ },
+ {
+ "bbox": [
+ 380.00719017305823,
+ 181.1782438214781,
+ 257.505490623621,
+ 199.13011090655024
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.12246560363252844
+ },
+ {
+ "bbox": [
+ 177.40899563109633,
+ 78.35446740631232,
+ 189.53651142957023,
+ 263.45315194093274
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.1013108540546625
+ },
+ {
+ "bbox": [
+ 0.7289829477709847,
+ 43.73276160140667,
+ 85.41587076323728,
+ 221.3344387113314
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.09960434746646744
+ },
+ {
+ "bbox": [
+ 461.84120081448543,
+ 144.75681027711394,
+ 7.162490813687327,
+ 8.531497919325176
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.08173750340938568
+ },
+ {
+ "bbox": [
+ 296.17189402683806,
+ 85.73360082440907,
+ 62.47594584815931,
+ 130.1418854933646
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.0717465542448663
+ },
+ {
+ "bbox": [
+ 539.1454728501081,
+ 43.14242476252679,
+ 100.3810332864756,
+ 247.18086755992118
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.06011599272181979
+ },
+ {
+ "bbox": [
+ 277.97115514687323,
+ 62.833796387748365,
+ 85.73469418408934,
+ 109.64015622069529
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.0423359872651069
+ },
+ {
+ "bbox": [
+ 462.1613388043361,
+ 146.12331612284657,
+ 4.619414527763752,
+ 5.653142729845399
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.03960325857728385
+ },
+ {
+ "bbox": [
+ 365.7412020686737,
+ 174.63881714430087,
+ 251.65152786857914,
+ 216.71453560361638
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.03937998874316995
+ },
+ {
+ "bbox": [
+ 3.4297732174796693,
+ 45.43705430480154,
+ 92.63472057783511,
+ 222.82923167372067
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.033127322744961746
+ },
+ {
+ "bbox": [
+ 169.87771310995316,
+ 89.66612191248007,
+ 182.26201179942262,
+ 244.24356591209786
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.03232751908601077
+ },
+ {
+ "bbox": [
+ 236.36941077406334,
+ 63.89780825602214,
+ 126.04036089393139,
+ 167.83640884370914
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.026460597694444848
+ },
+ {
+ "bbox": [
+ 306.015998970117,
+ 102.95796459236254,
+ 50.95681252313989,
+ 115.84925059311661
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.02226386399182351
+ },
+ {
+ "bbox": [
+ 537.318841521999,
+ 51.127194758764055,
+ 100.70779100270272,
+ 184.38821643554354
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.021828400794543387
+ },
+ {
+ "bbox": [
+ 462.4003780259345,
+ 145.2270003005055,
+ 5.570865375100425,
+ 6.968161205149954
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.017564592704083917
+ },
+ {
+ "bbox": [
+ 284.4247396061427,
+ 58.40109305610073,
+ 77.51981649355616,
+ 85.87582588813615
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.015670991050973693
+ },
+ {
+ "bbox": [
+ 381.11136505330313,
+ 182.22526492755827,
+ 252.6961926281694,
+ 195.18863447956443
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.012290037721773745
+ },
+ {
+ "bbox": [
+ 159.00697010469204,
+ 66.94814529991709,
+ 208.17784842532066,
+ 275.3418926190766
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.010543055168754003
+ },
+ {
+ "bbox": [
+ 0.0,
+ 41.78049849392192,
+ 88.22526407776418,
+ 228.8951048951705
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.009550385293192926
+ },
+ {
+ "bbox": [
+ 577.9447869595953,
+ 225.0889245399691,
+ 34.613561069282355,
+ 45.224848999211105
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.009009368302155088
+ },
+ {
+ "bbox": [
+ 461.84120081448543,
+ 144.75681027711394,
+ 7.162490813687327,
+ 8.531497919325176
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.008478489359995936
+ },
+ {
+ "bbox": [
+ 536.7620147243282,
+ 50.12388034294447,
+ 103.91798527567175,
+ 227.99503472686746
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.0070238283037164315
+ },
+ {
+ "bbox": [
+ 324.4889601722706,
+ 132.0053388533619,
+ 33.860410488241655,
+ 86.62326758044719
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.006766568381450841
+ },
+ {
+ "bbox": [
+ 246.15395215941302,
+ 55.57516986353281,
+ 114.57893265029415,
+ 151.51097731653135
+ ],
+ "category_id": 1,
+ "image_id": 197388,
+ "score": 0.00619416668365814
+ },
+ {
+ "bbox": [
+ 38.32789823729127,
+ 112.41407584232527,
+ 174.68030024685248,
+ 169.5690071995081
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.9999903440475464
+ },
+ {
+ "bbox": [
+ 273.75504650493133,
+ 127.03007800217645,
+ 13.119059034012025,
+ 66.89919582171933
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.9987139701843262
+ },
+ {
+ "bbox": [
+ 281.037309318129,
+ 138.89800552022552,
+ 115.77299430404673,
+ 161.8925392525125
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.9967334429627354
+ },
+ {
+ "bbox": [
+ 122.98736914581909,
+ 149.19548926043387,
+ 13.238023418245518,
+ 13.251921410601938
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.7115740632536128
+ },
+ {
+ "bbox": [
+ 134.73643174966296,
+ 136.1444006258907,
+ 11.484101688887165,
+ 24.515063595289917
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.6175192526221182
+ },
+ {
+ "bbox": [
+ 244.00963353440733,
+ 141.97232651644495,
+ 149.05240181123492,
+ 151.9715830001215
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.4946145965118973
+ },
+ {
+ "bbox": [
+ 275.164993708296,
+ 126.95531864312014,
+ 13.321305363409294,
+ 66.11390534184258
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.4050845742423741
+ },
+ {
+ "bbox": [
+ 42.96185669219733,
+ 122.34524983009223,
+ 160.1285645732864,
+ 161.9463250366397
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.353162111215626
+ },
+ {
+ "bbox": [
+ 119.6385577246031,
+ 155.7402521228216,
+ 13.35265116435049,
+ 26.52128467487711
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.28122130800324224
+ },
+ {
+ "bbox": [
+ 134.01278713702155,
+ 135.5395238881317,
+ 11.64567949798922,
+ 24.682523935864452
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.19370334661431887
+ },
+ {
+ "bbox": [
+ 124.09760300731958,
+ 148.1338264630807,
+ 11.235262772767982,
+ 13.52837293393398
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.176868630098971
+ },
+ {
+ "bbox": [
+ 218.7332213212989,
+ 140.0443329358783,
+ 180.4683469351732,
+ 156.8554518569021
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.16822000522327524
+ },
+ {
+ "bbox": [
+ 270.92053528959764,
+ 133.3265646431611,
+ 13.58464710826729,
+ 56.339971422777694
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.1562438273124175
+ },
+ {
+ "bbox": [
+ 37.809250550065954,
+ 105.79757078726388,
+ 182.54979468741817,
+ 184.99414098124603
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.14206553007930756
+ },
+ {
+ "bbox": [
+ 131.5670033941938,
+ 158.319905396887,
+ 9.554075877756475,
+ 21.518604078379468
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.1142622835492838
+ },
+ {
+ "bbox": [
+ 127.07848171294685,
+ 138.86839277431187,
+ 17.235128293754656,
+ 44.84156945207431
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.09938282001938761
+ },
+ {
+ "bbox": [
+ 275.15638186104223,
+ 133.5832174441871,
+ 10.20764095132887,
+ 60.2529082432996
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.08779323860838567
+ },
+ {
+ "bbox": [
+ 118.09746041875155,
+ 153.9768088492941,
+ 17.64612772931838,
+ 33.0168198306535
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.08400380428176607
+ },
+ {
+ "bbox": [
+ 129.65247011589898,
+ 146.21014275291188,
+ 9.816644995735373,
+ 16.98788352109895
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.07980794934855787
+ },
+ {
+ "bbox": [
+ 271.7621155363754,
+ 144.86674821981342,
+ 124.64715453387907,
+ 156.9482558015152
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.07801336023989208
+ },
+ {
+ "bbox": [
+ 122.31437055574987,
+ 149.80085696138593,
+ 14.266245774025762,
+ 12.463835012516398
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.06346535355569785
+ },
+ {
+ "bbox": [
+ 34.56564215631444,
+ 135.92815585957712,
+ 177.51220438385354,
+ 164.41951766953704
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.0485074333765967
+ },
+ {
+ "bbox": [
+ 136.7368415229119,
+ 137.89135149894196,
+ 9.122227037700043,
+ 22.213023488378155
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.04772781404400169
+ },
+ {
+ "bbox": [
+ 123.3235499944418,
+ 150.25321417348,
+ 15.765761854272228,
+ 36.16957895970921
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.04220727754152085
+ },
+ {
+ "bbox": [
+ 271.90779626938615,
+ 128.14539407135078,
+ 15.405080085072711,
+ 64.71005682344074
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.04092462762153748
+ },
+ {
+ "bbox": [
+ 114.0193235709124,
+ 155.5618252886575,
+ 9.112663847332854,
+ 14.913955482463706
+ ],
+ "category_id": 1,
+ "image_id": 40083,
+ "score": 0.040561411233867466
+ },
+ {
+ "bbox": [
+ 246.79480278830977,
+ 74.45452361185933,
+ 168.83467296399175,
+ 294.5553838783887
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.9998471736907959
+ },
+ {
+ "bbox": [
+ 449.91721482790945,
+ 204.96684769367067,
+ 185.0938399278399,
+ 209.68341364145596
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.9993680119514465
+ },
+ {
+ "bbox": [
+ 551.8933527530817,
+ 98.62668626165973,
+ 53.015730818431166,
+ 114.70768739332982
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.9989681245939074
+ },
+ {
+ "bbox": [
+ 36.629787184254866,
+ 68.37446568096026,
+ 33.14949933628988,
+ 95.8618173172063
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.9987284541130066
+ },
+ {
+ "bbox": [
+ 440.89995321368673,
+ 70.30641025016695,
+ 19.43814726089363,
+ 37.077964642141026
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.9947758913040161
+ },
+ {
+ "bbox": [
+ 601.8062068801571,
+ 88.95295148681318,
+ 16.128385553229577,
+ 24.398472250098138
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.7787292817106939
+ },
+ {
+ "bbox": [
+ 443.0809847626748,
+ 71.63759967713678,
+ 13.50749833723944,
+ 32.66811758890536
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.4904795373325092
+ },
+ {
+ "bbox": [
+ 396.569778686132,
+ 70.2787260371438,
+ 13.479104730026052,
+ 31.759617864735645
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.4112498931182214
+ },
+ {
+ "bbox": [
+ 38.70719296509935,
+ 70.61443452888409,
+ 28.17963315510066,
+ 92.31016180688292
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.3796398182128506
+ },
+ {
+ "bbox": [
+ 609.3142175988798,
+ 93.72376246104807,
+ 19.058191027280486,
+ 20.77005778794522
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.370328633830097
+ },
+ {
+ "bbox": [
+ 548.7095132625554,
+ 98.39472701114634,
+ 53.25156101474022,
+ 116.43788199987897
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.33102923130101364
+ },
+ {
+ "bbox": [
+ 455.5297663676009,
+ 206.88078209027378,
+ 175.70291860814734,
+ 199.34403654904446
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.3069290034626759
+ },
+ {
+ "bbox": [
+ 250.74661573104714,
+ 87.13280710904513,
+ 167.45142937734437,
+ 278.3106151544837
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.30579873324356427
+ },
+ {
+ "bbox": [
+ 440.7002672189753,
+ 69.17369758813695,
+ 14.444703091985616,
+ 37.00946842030504
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.25331338842056605
+ },
+ {
+ "bbox": [
+ 614.9353977385917,
+ 95.74403799582933,
+ 11.596245346674664,
+ 17.631981747095708
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.22204102380904406
+ },
+ {
+ "bbox": [
+ 400.60963922399134,
+ 70.43862641691737,
+ 8.331775245023891,
+ 35.000620170929324
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.20590268390631786
+ },
+ {
+ "bbox": [
+ 602.6848618804396,
+ 88.3983294514046,
+ 15.524266109773862,
+ 24.329680417924536
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.1935096033322262
+ },
+ {
+ "bbox": [
+ 453.62495235047044,
+ 80.93588476309868,
+ 8.634490931609093,
+ 24.416622635007826
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.13682630359796108
+ },
+ {
+ "bbox": [
+ 438.1383792082668,
+ 71.62832244418284,
+ 13.671594135308055,
+ 34.59094773941301
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.12521365808926627
+ },
+ {
+ "bbox": [
+ 37.07150693742372,
+ 71.09337416480857,
+ 29.051661261168164,
+ 90.74910484197981
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.11572668958758377
+ },
+ {
+ "bbox": [
+ 612.4694532238449,
+ 94.33977605307147,
+ 11.44235234183725,
+ 18.834863504196264
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.1118136151149066
+ },
+ {
+ "bbox": [
+ 601.3005939432458,
+ 93.44761682206529,
+ 12.158258551431686,
+ 21.16533746684057
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.10474070969851616
+ },
+ {
+ "bbox": [
+ 552.5681619230662,
+ 93.99774029686462,
+ 52.01820025716597,
+ 118.51885706193504
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.10326196808658804
+ },
+ {
+ "bbox": [
+ 398.5848517781443,
+ 73.06106969434823,
+ 9.784228227546066,
+ 31.1350301063286
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.09513584625155845
+ },
+ {
+ "bbox": [
+ 447.4145013754455,
+ 199.11669450357687,
+ 182.9378852593169,
+ 211.20266858232594
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.09457972184460144
+ },
+ {
+ "bbox": [
+ 242.46158239970538,
+ 71.50036639162563,
+ 171.43617162489392,
+ 297.42260463621386
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.09176039055855717
+ },
+ {
+ "bbox": [
+ 597.2197814264931,
+ 82.37761224901661,
+ 11.327105500584025,
+ 31.481263735129318
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.08028100931968704
+ },
+ {
+ "bbox": [
+ 599.0760153957814,
+ 81.53235136929479,
+ 7.865899180085421,
+ 9.27911853791521
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.06306317158251058
+ },
+ {
+ "bbox": [
+ 458.0528386594554,
+ 76.79036559159022,
+ 7.6005536116708186,
+ 25.915126727881812
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.06281862376239655
+ },
+ {
+ "bbox": [
+ 446.7096696323964,
+ 70.72615937722122,
+ 12.841618701895356,
+ 34.64495922754935
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.061957712774678333
+ },
+ {
+ "bbox": [
+ 435.5707540307205,
+ 72.6766990179972,
+ 9.948115403515544,
+ 29.835360002866068
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.05090554307604889
+ },
+ {
+ "bbox": [
+ 395.9134672120448,
+ 68.37234648135498,
+ 13.313090353344592,
+ 35.21000811416911
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.048676813090792935
+ },
+ {
+ "bbox": [
+ 441.55283109201787,
+ 70.93636919677598,
+ 12.61247065074889,
+ 34.04032271350583
+ ],
+ "category_id": 1,
+ "image_id": 196141,
+ "score": 0.041175731433019114
+ }
+]
diff --git a/vendor/ViTPose/tests/data/coco/test_coco_wholebody.json b/vendor/ViTPose/tests/data/coco/test_coco_wholebody.json
new file mode 100644
index 0000000000000000000000000000000000000000..94d49664d4c7d8e0649bf906323fc6641cfbedc5
--- /dev/null
+++ b/vendor/ViTPose/tests/data/coco/test_coco_wholebody.json
@@ -0,0 +1,7759 @@
+{
+ "info": {
+ "description": "COCO-WholeBody sample",
+ "url": "https://github.com/jin-s13/COCO-WholeBody",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/09/18"
+ },
+ "licenses": [
+ {
+ "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
+ "id": 1,
+ "name": "Attribution-NonCommercial-ShareAlike License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nc/2.0/",
+ "id": 2,
+ "name": "Attribution-NonCommercial License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nc-nd/2.0/",
+ "id": 3,
+ "name": "Attribution-NonCommercial-NoDerivs License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by/2.0/",
+ "id": 4,
+ "name": "Attribution License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-sa/2.0/",
+ "id": 5,
+ "name": "Attribution-ShareAlike License"
+ },
+ {
+ "url": "http://creativecommons.org/licenses/by-nd/2.0/",
+ "id": 6,
+ "name": "Attribution-NoDerivs License"
+ },
+ {
+ "url": "http://flickr.com/commons/usage/",
+ "id": 7,
+ "name": "No known copyright restrictions"
+ },
+ {
+ "url": "http://www.usa.gov/copyright.shtml",
+ "id": 8,
+ "name": "United States Government Work"
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "left_eye",
+ "right_eye",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "license": 4,
+ "file_name": "000000000785.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000000785.jpg",
+ "height": 425,
+ "width": 640,
+ "date_captured": "2013-11-19 21:22:42",
+ "flickr_url": "http://farm8.staticflickr.com/7015/6795644157_f019453ae7_z.jpg",
+ "id": 785
+ },
+ {
+ "license": 3,
+ "file_name": "000000040083.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000040083.jpg",
+ "height": 333,
+ "width": 500,
+ "date_captured": "2013-11-18 03:30:24",
+ "flickr_url": "http://farm1.staticflickr.com/116/254881838_e21c6d17b8_z.jpg",
+ "id": 40083
+ },
+ {
+ "license": 1,
+ "file_name": "000000196141.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000196141.jpg",
+ "height": 429,
+ "width": 640,
+ "date_captured": "2013-11-22 22:37:15",
+ "flickr_url": "http://farm4.staticflickr.com/3310/3611902235_57d4ae496d_z.jpg",
+ "id": 196141
+ },
+ {
+ "license": 3,
+ "file_name": "000000197388.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000197388.jpg",
+ "height": 392,
+ "width": 640,
+ "date_captured": "2013-11-19 20:10:37",
+ "flickr_url": "http://farm9.staticflickr.com/8375/8507321836_5b8b13188f_z.jpg",
+ "id": 197388
+ }
+ ],
+ "annotations": [
+ {
+ "segmentation": [
+ [
+ 353.37,
+ 67.65,
+ 358.15,
+ 52.37,
+ 362.92,
+ 47.59,
+ 374.38,
+ 44.73,
+ 389.66,
+ 52.37,
+ 389.66,
+ 67.65,
+ 389.66,
+ 76.25,
+ 393.48,
+ 83.89,
+ 396.35,
+ 88.66,
+ 397.3,
+ 91.53,
+ 406.85,
+ 99.17,
+ 413.54,
+ 104.9,
+ 451.74,
+ 148.83,
+ 458.43,
+ 153.6,
+ 462.25,
+ 166.02,
+ 467.02,
+ 173.66,
+ 463.2,
+ 181.3,
+ 449.83,
+ 183.21,
+ 448.88,
+ 191.81,
+ 455.56,
+ 226.19,
+ 448.88,
+ 254.84,
+ 453.65,
+ 286.36,
+ 475.62,
+ 323.6,
+ 491.85,
+ 361.81,
+ 494.72,
+ 382.82,
+ 494.72,
+ 382.82,
+ 499.49,
+ 391.41,
+ 416.4,
+ 391.41,
+ 424.04,
+ 383.77,
+ 439.33,
+ 374.22,
+ 445.06,
+ 360.85,
+ 436.46,
+ 334.11,
+ 421.18,
+ 303.55,
+ 416.4,
+ 289.22,
+ 409.72,
+ 268.21,
+ 396.35,
+ 280.63,
+ 405.9,
+ 298.77,
+ 417.36,
+ 324.56,
+ 425,
+ 349.39,
+ 425,
+ 357.99,
+ 419.27,
+ 360.85,
+ 394.44,
+ 367.54,
+ 362.92,
+ 370.4,
+ 346.69,
+ 367.54,
+ 360.06,
+ 362.76,
+ 369.61,
+ 360.85,
+ 382.98,
+ 340.8,
+ 355.28,
+ 271.08,
+ 360.06,
+ 266.3,
+ 386.8,
+ 219.5,
+ 368.65,
+ 162.2,
+ 348.6,
+ 175.57,
+ 309.44,
+ 187.03,
+ 301.8,
+ 192.76,
+ 288.43,
+ 193.72,
+ 282.7,
+ 193.72,
+ 280.79,
+ 187.03,
+ 280.79,
+ 174.62,
+ 287.47,
+ 171.75,
+ 291.29,
+ 171.75,
+ 295.11,
+ 171.75,
+ 306.57,
+ 166.98,
+ 312.3,
+ 165.07,
+ 345.73,
+ 142.14,
+ 350.51,
+ 117.31,
+ 350.51,
+ 102.03,
+ 350.51,
+ 90.57,
+ 353.37,
+ 65.74
+ ]
+ ],
+ "num_keypoints": 17,
+ "area": 27789.11055,
+ "iscrowd": 0,
+ "keypoints": [
+ 367,
+ 81,
+ 2,
+ 374,
+ 73,
+ 2,
+ 360,
+ 75,
+ 2,
+ 386,
+ 78,
+ 2,
+ 356,
+ 81,
+ 2,
+ 399,
+ 108,
+ 2,
+ 358,
+ 129,
+ 2,
+ 433,
+ 142,
+ 2,
+ 341,
+ 159,
+ 2,
+ 449,
+ 165,
+ 2,
+ 309,
+ 178,
+ 2,
+ 424,
+ 203,
+ 2,
+ 393,
+ 214,
+ 2,
+ 429,
+ 294,
+ 2,
+ 367,
+ 273,
+ 2,
+ 466,
+ 362,
+ 2,
+ 396,
+ 341,
+ 2
+ ],
+ "image_id": 785,
+ "bbox": [
+ 280.79,
+ 44.73,
+ 218.7,
+ 346.68
+ ],
+ "category_id": 1,
+ "id": 442619,
+ "face_box": [
+ 358.2,
+ 69.86,
+ 26.360000000000014,
+ 25.849999999999994
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 280.43,
+ 173.12,
+ 27.860000000000014,
+ 24.849999999999994
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 304.10366,
+ 181.75134,
+ 1,
+ 300.70183,
+ 182.77567,
+ 1,
+ 297.3,
+ 183.8,
+ 1,
+ 294.7,
+ 186.5,
+ 1,
+ 290.1,
+ 187.8,
+ 1,
+ 290.9,
+ 176.6,
+ 1,
+ 287.5,
+ 176.0,
+ 1,
+ 285.5,
+ 178.4,
+ 1,
+ 286.4,
+ 182.4,
+ 1,
+ 288.8,
+ 179.4,
+ 1,
+ 285.0,
+ 181.0,
+ 1,
+ 287.3,
+ 186.1,
+ 1,
+ 291.8,
+ 189.5,
+ 1,
+ 287.7,
+ 182.7,
+ 1,
+ 283.8,
+ 184.1,
+ 1,
+ 286.5,
+ 189.1,
+ 1,
+ 290.0,
+ 192.0,
+ 1,
+ 286.7,
+ 185.3,
+ 1,
+ 282.8,
+ 187.4,
+ 1,
+ 284.8,
+ 191.6,
+ 1,
+ 288.4,
+ 194.5,
+ 1
+ ],
+ "face_kpts": [
+ 355.823,
+ 75.36,
+ 1.0,
+ 356.354,
+ 79.0837,
+ 1.0,
+ 357.244,
+ 82.7374,
+ 1.0,
+ 358.518,
+ 86.2722,
+ 1.0,
+ 360.146,
+ 89.6578,
+ 1.0,
+ 362.266,
+ 92.7538,
+ 1.0,
+ 365.004,
+ 95.3223,
+ 1.0,
+ 368.487,
+ 96.6454,
+ 1.0,
+ 372.191,
+ 96.1419,
+ 1.0,
+ 375.644,
+ 94.6832,
+ 1.0,
+ 378.601,
+ 92.3665,
+ 1.0,
+ 381.101,
+ 89.5662,
+ 1.0,
+ 382.903,
+ 86.2741,
+ 1.0,
+ 383.896,
+ 82.6509,
+ 1.0,
+ 384.075,
+ 78.9011,
+ 1.0,
+ 384.1,
+ 75.1408,
+ 1.0,
+ 383.903,
+ 71.3861,
+ 1.0,
+ 357.084,
+ 72.9743,
+ 1.0,
+ 358.602,
+ 71.7848,
+ 1.0,
+ 360.42,
+ 71.3443,
+ 1.0,
+ 362.377,
+ 71.1566,
+ 1.0,
+ 364.36,
+ 71.1889,
+ 1.0,
+ 368.971,
+ 70.4992,
+ 1.0,
+ 370.945,
+ 69.8179,
+ 1.0,
+ 373.001,
+ 69.3543,
+ 1.0,
+ 375.14,
+ 69.2666,
+ 1.0,
+ 377.358,
+ 69.8865,
+ 1.0,
+ 366.57,
+ 73.9588,
+ 1.0,
+ 366.734,
+ 76.1499,
+ 1.0,
+ 366.88,
+ 78.3018,
+ 1.0,
+ 366.99,
+ 80.4957,
+ 1.0,
+ 365.104,
+ 82.5589,
+ 1.0,
+ 366.308,
+ 82.8331,
+ 1.0,
+ 367.645,
+ 82.8037,
+ 1.0,
+ 369.172,
+ 82.2061,
+ 1.0,
+ 370.693,
+ 81.6521,
+ 1.0,
+ 358.705,
+ 75.4542,
+ 1.0,
+ 360.294,
+ 74.0903,
+ 1.0,
+ 362.376,
+ 73.8423,
+ 1.0,
+ 364.302,
+ 74.6834,
+ 1.0,
+ 362.543,
+ 75.568,
+ 1.0,
+ 360.612,
+ 75.8883,
+ 1.0,
+ 369.771,
+ 73.7734,
+ 1.0,
+ 371.409,
+ 72.2638,
+ 1.0,
+ 373.615,
+ 71.9502,
+ 1.0,
+ 375.722,
+ 72.7144,
+ 1.0,
+ 373.888,
+ 73.699,
+ 1.0,
+ 371.835,
+ 74.0238,
+ 1.0,
+ 363.184,
+ 86.9317,
+ 1.0,
+ 364.788,
+ 85.4484,
+ 1.0,
+ 367.021,
+ 84.7474,
+ 1.0,
+ 368.048,
+ 84.5364,
+ 1.0,
+ 369.083,
+ 84.3709,
+ 1.0,
+ 372.183,
+ 84.0529,
+ 1.0,
+ 375.083,
+ 84.8901,
+ 1.0,
+ 373.687,
+ 87.0735,
+ 1.0,
+ 371.644,
+ 88.8121,
+ 1.0,
+ 369.024,
+ 89.6982,
+ 1.0,
+ 366.67,
+ 89.6039,
+ 1.0,
+ 364.721,
+ 88.606,
+ 1.0,
+ 363.588,
+ 86.903,
+ 1.0,
+ 365.723,
+ 85.8496,
+ 1.0,
+ 368.184,
+ 85.2863,
+ 1.0,
+ 371.444,
+ 84.8294,
+ 1.0,
+ 374.647,
+ 85.0454,
+ 1.0,
+ 372.166,
+ 87.2914,
+ 1.0,
+ 368.81,
+ 88.3791,
+ 1.0,
+ 365.965,
+ 88.3238,
+ 1.0
+ ],
+ "face_valid": true,
+ "lefthand_valid": false,
+ "righthand_valid": true,
+ "foot_valid": true,
+ "foot_kpts": [
+ 439,
+ 378,
+ 2,
+ 446,
+ 380,
+ 2,
+ 479,
+ 370,
+ 2,
+ 377,
+ 359,
+ 2,
+ 376,
+ 358,
+ 2,
+ 413,
+ 353,
+ 2
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 98.56,
+ 273.72,
+ 132.9,
+ 267,
+ 140.37,
+ 281.93,
+ 165.75,
+ 285.66,
+ 156.79,
+ 264.01,
+ 170.23,
+ 261.02,
+ 177.7,
+ 272.97,
+ 182.18,
+ 279.69,
+ 200.85,
+ 268.49,
+ 212.79,
+ 255.05,
+ 188.9,
+ 256.54,
+ 164.26,
+ 240.12,
+ 139.62,
+ 212.49,
+ 109.01,
+ 221.45,
+ 103.04,
+ 220.71,
+ 122.45,
+ 202.04,
+ 113.49,
+ 196.07,
+ 96.32,
+ 168.44,
+ 97.06,
+ 162.47,
+ 110.5,
+ 136.34,
+ 112,
+ 124.39,
+ 91.09,
+ 110.95,
+ 80.64,
+ 114.68,
+ 71.68,
+ 131.86,
+ 62.72,
+ 147.54,
+ 57.49,
+ 156.5,
+ 48.53,
+ 168.44,
+ 41.07,
+ 180.39,
+ 38.08,
+ 193.08,
+ 40.32,
+ 205.03,
+ 47.04,
+ 213.24,
+ 54.5,
+ 216.23,
+ 82.13,
+ 252.06,
+ 91.09,
+ 271.48
+ ]
+ ],
+ "num_keypoints": 14,
+ "area": 11025.219,
+ "iscrowd": 0,
+ "keypoints": [
+ 99,
+ 144,
+ 2,
+ 104,
+ 141,
+ 2,
+ 96,
+ 137,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 133,
+ 2,
+ 56,
+ 161,
+ 2,
+ 81,
+ 162,
+ 2,
+ 0,
+ 0,
+ 0,
+ 103,
+ 208,
+ 2,
+ 116,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 57,
+ 246,
+ 1,
+ 82,
+ 259,
+ 1,
+ 137,
+ 219,
+ 2,
+ 138,
+ 247,
+ 2,
+ 177,
+ 256,
+ 2,
+ 158,
+ 296,
+ 1
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 38.08,
+ 110.95,
+ 174.71,
+ 174.71
+ ],
+ "category_id": 1,
+ "id": 198196,
+ "face_box": [
+ 79.19,
+ 131.64,
+ 29.290000000000006,
+ 28.480000000000018
+ ],
+ "lefthand_box": [
+ 104.83,
+ 196.48,
+ 16.400000000000006,
+ 15.810000000000002
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 109.88978,
+ 204.46047,
+ 1,
+ 113.101195,
+ 201.939065,
+ 1,
+ 116.31261,
+ 199.41766,
+ 1,
+ 113.19977,
+ 199.3139,
+ 1,
+ 109.8794,
+ 200.24775,
+ 1,
+ 117.86903,
+ 199.10638,
+ 2,
+ 113.9261,
+ 199.00262,
+ 2,
+ 109.56812,
+ 198.48381,
+ 2,
+ 106.6628,
+ 198.38004999999998,
+ 1,
+ 117.1427,
+ 202.32298,
+ 2,
+ 111.2283,
+ 201.80417,
+ 2,
+ 107.07784000000001,
+ 201.38913,
+ 2,
+ 103.65371999999999,
+ 201.18161,
+ 1,
+ 116.52013,
+ 205.95463,
+ 2,
+ 112.5772,
+ 205.53958,
+ 2,
+ 107.59665,
+ 204.39821,
+ 2,
+ 104.27629,
+ 203.77564,
+ 2,
+ 116.41637,
+ 209.69004,
+ 2,
+ 112.16215,
+ 209.48252,
+ 2,
+ 108.73803000000001,
+ 208.34114,
+ 2,
+ 105.72895,
+ 206.68096,
+ 2
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 82.9654,
+ 131.144,
+ 1.0,
+ 81.8046,
+ 134.328,
+ 1.0,
+ 80.7007,
+ 137.531,
+ 1.0,
+ 79.8836,
+ 140.818,
+ 1.0,
+ 79.734,
+ 144.196,
+ 1.0,
+ 80.4763,
+ 147.486,
+ 1.0,
+ 82.0188,
+ 150.498,
+ 1.0,
+ 84.2352,
+ 153.057,
+ 1.0,
+ 86.8081,
+ 155.258,
+ 1.0,
+ 89.652,
+ 157.095,
+ 1.0,
+ 92.9128,
+ 157.812,
+ 1.0,
+ 95.962,
+ 156.474,
+ 1.0,
+ 98.5377,
+ 154.281,
+ 1.0,
+ 100.557,
+ 151.568,
+ 1.0,
+ 102.508,
+ 148.799,
+ 1.0,
+ 103.987,
+ 145.756,
+ 1.0,
+ 105.345,
+ 142.655,
+ 1.0,
+ 93.6074,
+ 132.13,
+ 1.0,
+ 95.8108,
+ 132.112,
+ 1.0,
+ 97.7956,
+ 132.618,
+ 1.0,
+ 99.6897,
+ 133.398,
+ 1.0,
+ 101.364,
+ 134.432,
+ 1.0,
+ 105.0,
+ 136.896,
+ 1.0,
+ 105.708,
+ 137.334,
+ 1.0,
+ 106.267,
+ 137.852,
+ 1.0,
+ 106.759,
+ 138.404,
+ 1.0,
+ 107.013,
+ 139.401,
+ 1.0,
+ 100.904,
+ 139.994,
+ 1.0,
+ 100.551,
+ 142.0,
+ 1.0,
+ 100.202,
+ 143.956,
+ 1.0,
+ 99.8116,
+ 145.919,
+ 1.0,
+ 94.7941,
+ 146.187,
+ 1.0,
+ 95.9823,
+ 147.027,
+ 1.0,
+ 97.3054,
+ 147.849,
+ 1.0,
+ 98.2362,
+ 148.403,
+ 1.0,
+ 99.2812,
+ 148.491,
+ 1.0,
+ 93.151,
+ 135.98,
+ 1.0,
+ 94.9184,
+ 136.187,
+ 1.0,
+ 96.5441,
+ 136.903,
+ 1.0,
+ 97.6034,
+ 138.308,
+ 1.0,
+ 95.8998,
+ 138.017,
+ 1.0,
+ 94.3941,
+ 137.178,
+ 1.0,
+ 102.085,
+ 141.003,
+ 1.0,
+ 103.379,
+ 141.05,
+ 1.0,
+ 104.485,
+ 141.71,
+ 1.0,
+ 104.899,
+ 142.915,
+ 1.0,
+ 103.704,
+ 142.739,
+ 1.0,
+ 102.729,
+ 142.026,
+ 1.0,
+ 89.8433,
+ 148.685,
+ 1.0,
+ 92.6494,
+ 149.006,
+ 1.0,
+ 95.2801,
+ 149.78,
+ 1.0,
+ 96.1096,
+ 150.259,
+ 1.0,
+ 96.7411,
+ 150.719,
+ 1.0,
+ 97.3853,
+ 151.82,
+ 1.0,
+ 97.337,
+ 153.217,
+ 1.0,
+ 96.5124,
+ 153.108,
+ 1.0,
+ 95.6091,
+ 152.796,
+ 1.0,
+ 94.7518,
+ 152.399,
+ 1.0,
+ 93.0313,
+ 151.317,
+ 1.0,
+ 91.3461,
+ 150.149,
+ 1.0,
+ 90.24,
+ 148.802,
+ 1.0,
+ 92.9121,
+ 149.883,
+ 1.0,
+ 95.4213,
+ 151.204,
+ 1.0,
+ 96.3082,
+ 152.03,
+ 1.0,
+ 97.1377,
+ 152.997,
+ 1.0,
+ 96.3098,
+ 152.035,
+ 1.0,
+ 95.406,
+ 151.234,
+ 1.0,
+ 92.8725,
+ 149.984,
+ 1.0
+ ],
+ "face_valid": true,
+ "lefthand_valid": true,
+ "righthand_valid": false,
+ "foot_valid": true,
+ "foot_kpts": [
+ 208.16049,
+ 257.42419,
+ 2.0,
+ 205.8824,
+ 259.13276,
+ 2.0,
+ 183.38626,
+ 275.93367,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 257.76,
+ 288.05,
+ 273.4,
+ 258.26,
+ 325.55,
+ 253.79,
+ 335.23,
+ 232.93,
+ 326.3,
+ 186.74,
+ 333.74,
+ 177.05,
+ 327.79,
+ 153.21,
+ 333.74,
+ 142.04,
+ 344.17,
+ 139.06,
+ 353.11,
+ 139.06,
+ 359.07,
+ 145.02,
+ 360.56,
+ 148.74,
+ 362.05,
+ 168.86,
+ 388.87,
+ 197.17,
+ 397.81,
+ 276.88,
+ 372.48,
+ 293.27
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 10171.9544,
+ "iscrowd": 0,
+ "keypoints": [
+ 343,
+ 164,
+ 2,
+ 348,
+ 160,
+ 2,
+ 340,
+ 160,
+ 2,
+ 359,
+ 163,
+ 2,
+ 332,
+ 164,
+ 2,
+ 370,
+ 189,
+ 2,
+ 334,
+ 190,
+ 2,
+ 358,
+ 236,
+ 2,
+ 348,
+ 234,
+ 2,
+ 339,
+ 270,
+ 2,
+ 330,
+ 262,
+ 2,
+ 378,
+ 262,
+ 2,
+ 343,
+ 254,
+ 2,
+ 338,
+ 280,
+ 2,
+ 283,
+ 272,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 257.76,
+ 139.06,
+ 140.05,
+ 154.21
+ ],
+ "category_id": 1,
+ "id": 230195,
+ "face_box": [
+ 333.96,
+ 154.32,
+ 23.28000000000003,
+ 26.79000000000002
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 333.383,
+ 160.62,
+ 1.0,
+ 333.607,
+ 163.811,
+ 1.0,
+ 334.137,
+ 166.965,
+ 1.0,
+ 334.934,
+ 170.062,
+ 1.0,
+ 336.036,
+ 173.062,
+ 1.0,
+ 337.69,
+ 175.794,
+ 1.0,
+ 340.01,
+ 177.986,
+ 1.0,
+ 342.889,
+ 179.347,
+ 1.0,
+ 346.063,
+ 179.445,
+ 1.0,
+ 349.16,
+ 178.674,
+ 1.0,
+ 351.892,
+ 177.033,
+ 1.0,
+ 354.132,
+ 174.761,
+ 1.0,
+ 355.652,
+ 171.957,
+ 1.0,
+ 356.482,
+ 168.871,
+ 1.0,
+ 356.751,
+ 165.691,
+ 1.0,
+ 356.914,
+ 162.496,
+ 1.0,
+ 356.913,
+ 159.299,
+ 1.0,
+ 335.435,
+ 157.491,
+ 1.0,
+ 336.759,
+ 156.383,
+ 1.0,
+ 338.264,
+ 155.821,
+ 1.0,
+ 339.903,
+ 155.445,
+ 1.0,
+ 341.565,
+ 155.312,
+ 1.0,
+ 345.805,
+ 155.039,
+ 1.0,
+ 347.424,
+ 154.896,
+ 1.0,
+ 349.044,
+ 154.957,
+ 1.0,
+ 350.677,
+ 155.266,
+ 1.0,
+ 352.333,
+ 156.08,
+ 1.0,
+ 343.65,
+ 159.186,
+ 1.0,
+ 343.687,
+ 161.041,
+ 1.0,
+ 343.68,
+ 162.886,
+ 1.0,
+ 343.657,
+ 164.752,
+ 1.0,
+ 341.61,
+ 167.049,
+ 1.0,
+ 342.69,
+ 167.145,
+ 1.0,
+ 343.906,
+ 167.123,
+ 1.0,
+ 345.179,
+ 166.907,
+ 1.0,
+ 346.456,
+ 166.707,
+ 1.0,
+ 336.707,
+ 159.932,
+ 1.0,
+ 338.078,
+ 158.999,
+ 1.0,
+ 339.726,
+ 158.864,
+ 1.0,
+ 341.204,
+ 159.605,
+ 1.0,
+ 339.755,
+ 160.185,
+ 1.0,
+ 338.21,
+ 160.321,
+ 1.0,
+ 346.612,
+ 159.27,
+ 1.0,
+ 348.028,
+ 158.307,
+ 1.0,
+ 349.739,
+ 158.245,
+ 1.0,
+ 351.302,
+ 158.965,
+ 1.0,
+ 349.802,
+ 159.575,
+ 1.0,
+ 348.188,
+ 159.642,
+ 1.0,
+ 340.049,
+ 171.873,
+ 1.0,
+ 341.307,
+ 170.304,
+ 1.0,
+ 343.097,
+ 169.499,
+ 1.0,
+ 343.987,
+ 169.41,
+ 1.0,
+ 344.876,
+ 169.314,
+ 1.0,
+ 346.909,
+ 169.61,
+ 1.0,
+ 348.603,
+ 170.874,
+ 1.0,
+ 347.548,
+ 172.219,
+ 1.0,
+ 346.133,
+ 173.242,
+ 1.0,
+ 344.378,
+ 173.742,
+ 1.0,
+ 342.683,
+ 173.666,
+ 1.0,
+ 341.218,
+ 173.038,
+ 1.0,
+ 340.398,
+ 171.815,
+ 1.0,
+ 342.1,
+ 170.752,
+ 1.0,
+ 344.043,
+ 170.287,
+ 1.0,
+ 346.21,
+ 170.271,
+ 1.0,
+ 348.214,
+ 170.913,
+ 1.0,
+ 346.462,
+ 171.947,
+ 1.0,
+ 344.283,
+ 172.468,
+ 1.0,
+ 342.246,
+ 172.507,
+ 1.0
+ ],
+ "face_valid": true,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": false,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 285.37,
+ 126.5,
+ 281.97,
+ 127.72,
+ 280.76,
+ 132.33,
+ 280.76,
+ 136.46,
+ 275.17,
+ 143.26,
+ 275.9,
+ 158.08,
+ 277.6,
+ 164.4,
+ 278.33,
+ 173.87,
+ 278.33,
+ 183.83,
+ 279.79,
+ 191.11,
+ 281.97,
+ 194.76,
+ 284.89,
+ 192.09,
+ 284.89,
+ 186.99,
+ 284.89,
+ 181.16,
+ 284.64,
+ 177.51,
+ 285.86,
+ 173.87
+ ]
+ ],
+ "num_keypoints": 0,
+ "area": 491.2669,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 275.17,
+ 126.5,
+ 10.69,
+ 68.26
+ ],
+ "category_id": 1,
+ "id": 1202706,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": false,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 339.34,
+ 107.97,
+ 338.38,
+ 102.19,
+ 339.34,
+ 91.58,
+ 335.49,
+ 84.84,
+ 326.81,
+ 74.23,
+ 312.35,
+ 74.23,
+ 301.75,
+ 74.23,
+ 295,
+ 86.76,
+ 295,
+ 93.51,
+ 292.11,
+ 99.3,
+ 287.29,
+ 102.19,
+ 291.14,
+ 107.01,
+ 295,
+ 107.01,
+ 295.96,
+ 112.79,
+ 301.75,
+ 115.69,
+ 305.6,
+ 119.54,
+ 307.53,
+ 123.4,
+ 317.17,
+ 123.4,
+ 311.39,
+ 129.18,
+ 286.32,
+ 139.79,
+ 274.75,
+ 139.79,
+ 264.15,
+ 138.82,
+ 262.22,
+ 144.61,
+ 261.26,
+ 147.5,
+ 253.54,
+ 147.5,
+ 247.76,
+ 150.39,
+ 249.69,
+ 159.07,
+ 256.44,
+ 161,
+ 262.22,
+ 161,
+ 268,
+ 161,
+ 276.68,
+ 161.96,
+ 284.39,
+ 168.71,
+ 293.07,
+ 174.49,
+ 301.75,
+ 174.49,
+ 308.49,
+ 169.67,
+ 308.49,
+ 188.95,
+ 311.39,
+ 194.74,
+ 312.35,
+ 208.23,
+ 307.53,
+ 221.73,
+ 297.89,
+ 229.44,
+ 281.5,
+ 250.65,
+ 269.93,
+ 262.22,
+ 278.61,
+ 320.06,
+ 281.5,
+ 331.63,
+ 276.68,
+ 338.38,
+ 270.9,
+ 349.95,
+ 262.22,
+ 356.7,
+ 253.54,
+ 359.59,
+ 253.54,
+ 365.37,
+ 274.75,
+ 365.37,
+ 291.14,
+ 365.37,
+ 306.57,
+ 359.59,
+ 303.67,
+ 352.84,
+ 297.89,
+ 340.31,
+ 293.07,
+ 318.13,
+ 295,
+ 294.03,
+ 293.07,
+ 278.61,
+ 294.03,
+ 270.9,
+ 305.6,
+ 259.33,
+ 313.31,
+ 299.82,
+ 319.1,
+ 309.46,
+ 341.27,
+ 317.17,
+ 384.65,
+ 330.67,
+ 387.55,
+ 335.49,
+ 383.69,
+ 341.27,
+ 397.19,
+ 350.91,
+ 398.15,
+ 363.44,
+ 398.15,
+ 375.01,
+ 405.86,
+ 374.05,
+ 409.72,
+ 357.66,
+ 411.65,
+ 342.24,
+ 416.47,
+ 328.74,
+ 417.43,
+ 321.03,
+ 410.68,
+ 319.1,
+ 401.04,
+ 318.13,
+ 392.37,
+ 318.13,
+ 382.73,
+ 314.28,
+ 348.98,
+ 300.78,
+ 339.34,
+ 293.07,
+ 334.52,
+ 285.36,
+ 340.31,
+ 259.33,
+ 340.31,
+ 246.8,
+ 340.31,
+ 242.94,
+ 350.91,
+ 228.48,
+ 358.62,
+ 214.98,
+ 355.22,
+ 204.32,
+ 357.05,
+ 196.11,
+ 361.61,
+ 188.82,
+ 361.61,
+ 181.97,
+ 365.26,
+ 165.63,
+ 367.54,
+ 139.18,
+ 366.17,
+ 123.68,
+ 361.15,
+ 112.73,
+ 353.86,
+ 107.72,
+ 351.58,
+ 105.89,
+ 344.74,
+ 105.89,
+ 340.18,
+ 109.08
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 17123.92955,
+ "iscrowd": 0,
+ "keypoints": [
+ 297,
+ 111,
+ 2,
+ 299,
+ 106,
+ 2,
+ 0,
+ 0,
+ 0,
+ 314,
+ 108,
+ 2,
+ 0,
+ 0,
+ 0,
+ 329,
+ 141,
+ 2,
+ 346,
+ 125,
+ 2,
+ 295,
+ 164,
+ 2,
+ 323,
+ 130,
+ 2,
+ 266,
+ 155,
+ 2,
+ 279,
+ 143,
+ 2,
+ 329,
+ 225,
+ 2,
+ 331,
+ 221,
+ 2,
+ 327,
+ 298,
+ 2,
+ 283,
+ 269,
+ 2,
+ 398,
+ 327,
+ 2,
+ 288,
+ 349,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 247.76,
+ 74.23,
+ 169.67,
+ 300.78
+ ],
+ "category_id": 1,
+ "id": 460541,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 249.12,
+ 146.31,
+ 19.920000000000016,
+ 15.819999999999993
+ ],
+ "righthand_box": [
+ 262.82,
+ 139.96,
+ 18.930000000000007,
+ 14.679999999999978
+ ],
+ "lefthand_kpts": [
+ 265.1,
+ 155.9,
+ 1,
+ 260.05,
+ 152.25,
+ 1,
+ 255.0,
+ 148.6,
+ 1,
+ 250.6,
+ 148.6,
+ 1,
+ 249.1,
+ 151.0,
+ 1,
+ 253.4,
+ 158.9,
+ 1,
+ 251.9,
+ 155.1,
+ 1,
+ 252.0,
+ 151.9,
+ 1,
+ 252.9,
+ 150.0,
+ 1,
+ 257.4,
+ 157.9,
+ 1,
+ 256.7,
+ 154.2,
+ 1,
+ 256.3,
+ 151.6,
+ 1,
+ 256.9,
+ 149.3,
+ 1,
+ 260.2,
+ 156.5,
+ 1,
+ 260.1,
+ 153.0,
+ 1,
+ 259.9,
+ 150.7,
+ 1,
+ 260.2,
+ 148.7,
+ 1,
+ 262.8,
+ 154.8,
+ 1,
+ 262.7,
+ 152.5,
+ 1,
+ 262.7,
+ 150.9,
+ 1,
+ 262.6,
+ 148.8,
+ 1
+ ],
+ "righthand_kpts": [
+ 280.8,
+ 146.5,
+ 1,
+ 275.4,
+ 149.15,
+ 1,
+ 270.0,
+ 151.8,
+ 1,
+ 266.2,
+ 152.2,
+ 1,
+ 263.5,
+ 151.9,
+ 1,
+ 266.6,
+ 142.5,
+ 1,
+ 263.6,
+ 147.0,
+ 1,
+ 264.9,
+ 151.0,
+ 1,
+ 268.5,
+ 152.9,
+ 1,
+ 270.6,
+ 142.0,
+ 1,
+ 267.9,
+ 146.0,
+ 1,
+ 269.4,
+ 149.6,
+ 1,
+ 272.5,
+ 151.5,
+ 1,
+ 273.8,
+ 142.1,
+ 1,
+ 272.2,
+ 146.0,
+ 1,
+ 274.2,
+ 149.1,
+ 1,
+ 276.5,
+ 149.6,
+ 1,
+ 277.4,
+ 142.3,
+ 1,
+ 276.6,
+ 145.2,
+ 1,
+ 277.6,
+ 148.3,
+ 1,
+ 279.4,
+ 148.6,
+ 1
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": true,
+ "righthand_valid": true,
+ "foot_valid": true,
+ "foot_kpts": [
+ 401.79499,
+ 364.28207,
+ 2.0,
+ 407.21854,
+ 361.57029,
+ 2.0,
+ 407.21854,
+ 325.86523,
+ 2.0,
+ 257.16687,
+ 361.57029,
+ 2.0,
+ 258.52276,
+ 361.11833,
+ 2.0,
+ 297.84353,
+ 355.69477,
+ 2.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 578.76,
+ 112.4,
+ 589.39,
+ 100.81,
+ 589.39,
+ 99.84,
+ 596.16,
+ 116.27,
+ 603.89,
+ 122.07,
+ 603.89,
+ 138.49,
+ 598.09,
+ 159.75,
+ 597.12,
+ 181,
+ 594.22,
+ 191.63,
+ 589.39,
+ 212.89,
+ 583.59,
+ 208.06,
+ 583.59,
+ 206.13,
+ 582.63,
+ 200.33,
+ 582.63,
+ 193.57,
+ 582.63,
+ 182.94,
+ 575.86,
+ 181,
+ 567.17,
+ 197.43,
+ 571.03,
+ 203.23,
+ 567.17,
+ 207.09,
+ 555.57,
+ 208.06,
+ 562.34,
+ 200.33,
+ 565.24,
+ 190.67,
+ 565.24,
+ 173.27,
+ 566.2,
+ 163.61,
+ 568.14,
+ 156.85,
+ 570.07,
+ 148.15,
+ 566.2,
+ 143.32,
+ 565.24,
+ 133.66,
+ 575.86,
+ 118.2
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 2789.0208,
+ "iscrowd": 0,
+ "keypoints": [
+ 589,
+ 113,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 595,
+ 112,
+ 1,
+ 584,
+ 110,
+ 2,
+ 598,
+ 123,
+ 2,
+ 579,
+ 119,
+ 2,
+ 594,
+ 141,
+ 2,
+ 570,
+ 137,
+ 2,
+ 576,
+ 135,
+ 2,
+ 585,
+ 139,
+ 2,
+ 590,
+ 157,
+ 2,
+ 574,
+ 156,
+ 2,
+ 589,
+ 192,
+ 2,
+ 565,
+ 189,
+ 1,
+ 587,
+ 222,
+ 1,
+ 557,
+ 219,
+ 1
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 555.57,
+ 99.84,
+ 48.32,
+ 113.05
+ ],
+ "category_id": 1,
+ "id": 488308,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 568.2,
+ 130.89,
+ 10.75,
+ 11.130000000000024
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 578.8,
+ 135.7,
+ 2,
+ 577.55,
+ 134.35,
+ 2,
+ 576.3,
+ 133.0,
+ 1,
+ 574.6,
+ 134.1,
+ 1,
+ 574.0,
+ 135.5,
+ 1,
+ 574.3,
+ 132.9,
+ 2,
+ 572.0,
+ 132.4,
+ 2,
+ 570.3,
+ 131.8,
+ 2,
+ 568.9,
+ 130.7,
+ 2,
+ 573.3,
+ 134.4,
+ 2,
+ 570.9,
+ 134.0,
+ 2,
+ 569.5,
+ 133.9,
+ 2,
+ 568.2,
+ 133.8,
+ 2,
+ 572.8,
+ 135.7,
+ 2,
+ 572.6,
+ 138.3,
+ 2,
+ 574.1,
+ 139.4,
+ 2,
+ 576.2,
+ 139.4,
+ 1,
+ 574.4,
+ 138.0,
+ 2,
+ 575.4,
+ 139.5,
+ 2,
+ 576.3,
+ 140.2,
+ 2,
+ 577.6,
+ 140.8,
+ 2
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": true,
+ "righthand_valid": false,
+ "foot_valid": false,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 446.96,
+ 73.13,
+ 445.81,
+ 77.71,
+ 443.33,
+ 78.29,
+ 441.61,
+ 81.72,
+ 441.23,
+ 84.58,
+ 440.85,
+ 90.5,
+ 442.19,
+ 94.32,
+ 443.52,
+ 97.18,
+ 443.52,
+ 102.33,
+ 442.57,
+ 105.58,
+ 446.58,
+ 105.19,
+ 447.15,
+ 99.85,
+ 447.53,
+ 94.89,
+ 446,
+ 93.55,
+ 446.38,
+ 92.03,
+ 453.64,
+ 92.41,
+ 454.02,
+ 94.51,
+ 457.64,
+ 94.51,
+ 455.74,
+ 88.4,
+ 455.35,
+ 82.29,
+ 453.64,
+ 78.48,
+ 451.92,
+ 77.71,
+ 452.87,
+ 74.47,
+ 450.58,
+ 73.13
+ ]
+ ],
+ "num_keypoints": 0,
+ "area": 285.7906,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 440.85,
+ 73.13,
+ 16.79,
+ 32.45
+ ],
+ "category_id": 1,
+ "id": 508900,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": false,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 497.15,
+ 413.95,
+ 531.55,
+ 417.68,
+ 548.74,
+ 411.7,
+ 551.74,
+ 403.48,
+ 546.5,
+ 394.5,
+ 543.51,
+ 386.28,
+ 571.93,
+ 390.76,
+ 574.92,
+ 391.51,
+ 579.4,
+ 409.46,
+ 605.58,
+ 409.46,
+ 615.3,
+ 408.71,
+ 607.07,
+ 389.27,
+ 598.1,
+ 381.79,
+ 607.82,
+ 366.83,
+ 607.82,
+ 352.63,
+ 610.06,
+ 338.42,
+ 619.04,
+ 345.15,
+ 631,
+ 344.4,
+ 630.25,
+ 336.92,
+ 626.51,
+ 318.98,
+ 616.05,
+ 286.07,
+ 598.85,
+ 263.64,
+ 585.39,
+ 257.66,
+ 593.61,
+ 244.2,
+ 601.09,
+ 235.97,
+ 596.6,
+ 219.52,
+ 587.63,
+ 211.29,
+ 577.91,
+ 208.3,
+ 563.7,
+ 206.81,
+ 556.22,
+ 214.29,
+ 548,
+ 217.28,
+ 539.77,
+ 229.99,
+ 539.77,
+ 241.95,
+ 539.02,
+ 247.19,
+ 523.32,
+ 247.19,
+ 503.88,
+ 254.67,
+ 485.93,
+ 254.67,
+ 479.95,
+ 248.68,
+ 473.22,
+ 241.21,
+ 485.93,
+ 227,
+ 477.7,
+ 215.78,
+ 457.51,
+ 215.78,
+ 453.77,
+ 235.22,
+ 463.5,
+ 246.44,
+ 465.74,
+ 261.4,
+ 490.42,
+ 274.11,
+ 501.63,
+ 275.6,
+ 504.62,
+ 286.07,
+ 519.58,
+ 286.07,
+ 522.57,
+ 292.06,
+ 512.85,
+ 310,
+ 515.09,
+ 330.94,
+ 530.05,
+ 343.65,
+ 505.37,
+ 341.41,
+ 479.95,
+ 339.91,
+ 465.74,
+ 346.64,
+ 463.5,
+ 358.61,
+ 473.97,
+ 381.04,
+ 485.18,
+ 390.02,
+ 501.63,
+ 398.99,
+ 504.62,
+ 404.22,
+ 491.16,
+ 412.45,
+ 495.65,
+ 417.68
+ ]
+ ],
+ "num_keypoints": 12,
+ "area": 21608.94075,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 552,
+ 234,
+ 2,
+ 0,
+ 0,
+ 0,
+ 531,
+ 262,
+ 2,
+ 600,
+ 283,
+ 2,
+ 480,
+ 260,
+ 2,
+ 622,
+ 336,
+ 2,
+ 466,
+ 242,
+ 2,
+ 0,
+ 0,
+ 0,
+ 546,
+ 365,
+ 2,
+ 592,
+ 371,
+ 2,
+ 470,
+ 351,
+ 2,
+ 551,
+ 330,
+ 2,
+ 519,
+ 394,
+ 2,
+ 589,
+ 391,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 453.77,
+ 206.81,
+ 177.23,
+ 210.87
+ ],
+ "category_id": 1,
+ "id": 1717641,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": true,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 498.08009,
+ 412.23863,
+ 2.0,
+ 541.66626,
+ 400.39384,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 602.22109,
+ 403.58794,
+ 2.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 58.93,
+ 163.67,
+ 47.18,
+ 161.59,
+ 36.12,
+ 93.86,
+ 41.65,
+ 82.8,
+ 40.27,
+ 69.66,
+ 50.64,
+ 67.59,
+ 55.48,
+ 73.81,
+ 63.08,
+ 92.47,
+ 66.53,
+ 99.38,
+ 65.15,
+ 109.06,
+ 61,
+ 127.03,
+ 59.62,
+ 162.97
+ ]
+ ],
+ "num_keypoints": 17,
+ "area": 1870.14015,
+ "iscrowd": 0,
+ "keypoints": [
+ 48,
+ 79,
+ 2,
+ 50,
+ 77,
+ 2,
+ 46,
+ 77,
+ 2,
+ 54,
+ 78,
+ 2,
+ 45,
+ 78,
+ 2,
+ 57,
+ 90,
+ 2,
+ 42,
+ 90,
+ 2,
+ 63,
+ 103,
+ 2,
+ 42,
+ 105,
+ 2,
+ 56,
+ 113,
+ 2,
+ 49,
+ 112,
+ 2,
+ 55,
+ 117,
+ 2,
+ 44,
+ 117,
+ 2,
+ 55,
+ 140,
+ 2,
+ 47,
+ 140,
+ 2,
+ 56,
+ 160,
+ 2,
+ 49,
+ 159,
+ 2
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 36.12,
+ 67.59,
+ 30.41,
+ 96.08
+ ],
+ "category_id": 1,
+ "id": 1724673,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": true,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 44.4,
+ 162.6,
+ 2.0,
+ 43.4,
+ 161.5,
+ 2.0,
+ 51.7,
+ 160.7,
+ 2.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 139.41,
+ 321.58,
+ 144.78,
+ 326.56,
+ 196.92,
+ 314.68,
+ 196.16,
+ 309.31,
+ 207.28,
+ 292.05,
+ 213.03,
+ 284,
+ 228.75,
+ 270.2,
+ 233.35,
+ 261.38,
+ 244.47,
+ 252.56,
+ 254.44,
+ 237.61,
+ 267.86,
+ 215.37,
+ 272.08,
+ 212.68,
+ 285.5,
+ 232.62,
+ 294.7,
+ 250.64,
+ 295.08,
+ 264.06,
+ 290.87,
+ 277.87,
+ 290.87,
+ 286.3,
+ 289.71,
+ 298.19,
+ 281.66,
+ 318.89,
+ 282.05,
+ 334.23,
+ 295.08,
+ 340.37,
+ 315.02,
+ 343.82,
+ 314.25,
+ 336.53,
+ 310.42,
+ 330.4,
+ 301.98,
+ 322.34,
+ 304.29,
+ 310.84,
+ 304.67,
+ 302.79,
+ 306.2,
+ 292.05,
+ 311.19,
+ 275.56,
+ 313.87,
+ 251.79,
+ 311.19,
+ 234.54,
+ 312.72,
+ 224.57,
+ 310.42,
+ 212.3,
+ 307.74,
+ 201.56,
+ 306.2,
+ 193.51,
+ 306.59,
+ 183.16,
+ 310.04,
+ 177.41,
+ 314.64,
+ 173.19,
+ 316.94,
+ 171.65,
+ 328.06,
+ 163.99,
+ 337.64,
+ 157.85,
+ 343.4,
+ 159.77,
+ 346.46,
+ 166.67,
+ 346.85,
+ 170.5,
+ 346.46,
+ 179.71,
+ 346.85,
+ 188.53,
+ 346.85,
+ 191.98,
+ 344.55,
+ 198.11,
+ 342.25,
+ 203.48,
+ 338.41,
+ 208.46,
+ 335.34,
+ 212.68,
+ 335.34,
+ 217.67,
+ 343.01,
+ 222.65,
+ 354.9,
+ 210.76,
+ 359.12,
+ 196.19,
+ 361.8,
+ 173.19,
+ 361.42,
+ 161.69,
+ 356.43,
+ 150.18,
+ 344.93,
+ 135.61,
+ 343.01,
+ 132.93,
+ 345.31,
+ 126.41,
+ 345.7,
+ 124.88,
+ 343.4,
+ 115.29,
+ 340.33,
+ 104.17,
+ 337.26,
+ 102.25,
+ 330.36,
+ 103.4,
+ 326.14,
+ 106.09,
+ 320.01,
+ 111.07,
+ 314.64,
+ 119.89,
+ 310.42,
+ 121.04,
+ 292.02,
+ 121.81,
+ 279.75,
+ 127.94,
+ 244.09,
+ 138.68,
+ 240.25,
+ 142.51,
+ 238.72,
+ 154.4,
+ 239.1,
+ 163.6,
+ 239.87,
+ 173.96,
+ 241.79,
+ 181.24,
+ 248.3,
+ 192.36,
+ 240.25,
+ 206.55,
+ 236.42,
+ 219.2,
+ 229.9,
+ 236.45,
+ 225.3,
+ 247.57,
+ 218.4,
+ 254.48,
+ 208.81,
+ 265.6,
+ 202.29,
+ 278.25,
+ 195.39,
+ 285.92,
+ 188.49,
+ 292.05,
+ 183.5,
+ 295.89,
+ 176.6,
+ 302.41,
+ 172,
+ 308.54,
+ 167.78,
+ 313.14,
+ 146.31,
+ 318.89
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 14250.29385,
+ "iscrowd": 0,
+ "keypoints": [
+ 334,
+ 135,
+ 2,
+ 340,
+ 129,
+ 2,
+ 331,
+ 129,
+ 2,
+ 0,
+ 0,
+ 0,
+ 319,
+ 123,
+ 2,
+ 340,
+ 146,
+ 2,
+ 292,
+ 133,
+ 2,
+ 353,
+ 164,
+ 2,
+ 246,
+ 144,
+ 2,
+ 354,
+ 197,
+ 2,
+ 250,
+ 185,
+ 2,
+ 293,
+ 197,
+ 2,
+ 265,
+ 187,
+ 2,
+ 305,
+ 252,
+ 2,
+ 231,
+ 254,
+ 2,
+ 293,
+ 321,
+ 2,
+ 193,
+ 297,
+ 2
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 139.41,
+ 102.25,
+ 222.39,
+ 241.57
+ ],
+ "category_id": 1,
+ "id": 437295,
+ "face_box": [
+ 320.23,
+ 123.84,
+ 21.049999999999955,
+ 23.5
+ ],
+ "lefthand_box": [
+ 333.65,
+ 198.45,
+ 23.150000000000034,
+ 23.57000000000002
+ ],
+ "righthand_box": [
+ 247.5,
+ 184.92,
+ 23.30000000000001,
+ 22.360000000000014
+ ],
+ "lefthand_kpts": [
+ 353.87482,
+ 196.49984999999998,
+ 1,
+ 349.01957500000003,
+ 201.76511,
+ 1,
+ 344.16433,
+ 207.03037,
+ 1,
+ 340.81534,
+ 210.64729,
+ 1,
+ 337.46165,
+ 216.59183000000002,
+ 1,
+ 346.65868,
+ 216.02586,
+ 1,
+ 342.27241,
+ 219.28019999999998,
+ 1,
+ 337.88613,
+ 219.70467,
+ 1,
+ 334.4903,
+ 218.57273,
+ 1,
+ 345.5,
+ 215.0,
+ 1,
+ 342.27241,
+ 217.72377,
+ 1,
+ 338.73509,
+ 218.00675999999999,
+ 1,
+ 334.77329,
+ 216.30885,
+ 1,
+ 343.7,
+ 213.8,
+ 1,
+ 341.42345,
+ 215.74288,
+ 1,
+ 338.73509,
+ 215.60138,
+ 1,
+ 335.62225,
+ 213.76198,
+ 1,
+ 342.4139,
+ 212.63003,
+ 1,
+ 340.85748,
+ 213.76198,
+ 1,
+ 338.87658,
+ 214.04496,
+ 1,
+ 337.17867,
+ 213.76198,
+ 1
+ ],
+ "righthand_kpts": [
+ 249.4,
+ 180.4,
+ 1,
+ 254.3,
+ 184.9,
+ 1,
+ 259.2,
+ 189.4,
+ 1,
+ 259.3,
+ 192.1,
+ 1,
+ 258.2,
+ 194.9,
+ 1,
+ 254.9,
+ 193.2,
+ 1,
+ 255.9,
+ 192.3,
+ 1,
+ 255.9,
+ 190.5,
+ 1,
+ 255.4,
+ 188.5,
+ 1,
+ 252.2,
+ 194.0,
+ 1,
+ 253.2,
+ 193.6,
+ 1,
+ 253.2,
+ 191.1,
+ 1,
+ 252.9,
+ 188.8,
+ 1,
+ 249.4,
+ 193.6,
+ 1,
+ 250.4,
+ 193.6,
+ 1,
+ 250.4,
+ 191.3,
+ 1,
+ 249.9,
+ 188.7,
+ 1,
+ 247.1,
+ 192.2,
+ 1,
+ 248.0,
+ 192.2,
+ 1,
+ 247.9,
+ 190.3,
+ 1,
+ 247.5,
+ 188.3,
+ 1
+ ],
+ "face_kpts": [
+ 319.681,
+ 126.613,
+ 1.0,
+ 319.155,
+ 129.261,
+ 1.0,
+ 318.92,
+ 131.954,
+ 1.0,
+ 319.187,
+ 134.631,
+ 1.0,
+ 319.707,
+ 137.271,
+ 1.0,
+ 320.991,
+ 139.649,
+ 1.0,
+ 322.846,
+ 141.606,
+ 1.0,
+ 325.009,
+ 143.216,
+ 1.0,
+ 327.359,
+ 144.544,
+ 1.0,
+ 329.907,
+ 145.384,
+ 1.0,
+ 332.347,
+ 144.347,
+ 1.0,
+ 334.268,
+ 142.449,
+ 1.0,
+ 335.767,
+ 140.222,
+ 1.0,
+ 336.675,
+ 137.69,
+ 1.0,
+ 337.019,
+ 135.009,
+ 1.0,
+ 336.982,
+ 132.311,
+ 1.0,
+ 337.13,
+ 129.618,
+ 1.0,
+ 328.503,
+ 125.823,
+ 1.0,
+ 329.531,
+ 125.489,
+ 1.0,
+ 330.619,
+ 125.626,
+ 1.0,
+ 331.573,
+ 125.909,
+ 1.0,
+ 332.529,
+ 126.431,
+ 1.0,
+ 334.479,
+ 127.459,
+ 1.0,
+ 334.815,
+ 127.43,
+ 1.0,
+ 335.157,
+ 127.316,
+ 1.0,
+ 335.52,
+ 127.327,
+ 1.0,
+ 335.949,
+ 127.701,
+ 1.0,
+ 332.762,
+ 129.334,
+ 1.0,
+ 333.168,
+ 130.389,
+ 1.0,
+ 333.603,
+ 131.342,
+ 1.0,
+ 333.928,
+ 132.331,
+ 1.0,
+ 331.671,
+ 134.291,
+ 1.0,
+ 332.232,
+ 134.389,
+ 1.0,
+ 332.931,
+ 134.487,
+ 1.0,
+ 333.332,
+ 134.463,
+ 1.0,
+ 333.645,
+ 134.212,
+ 1.0,
+ 329.271,
+ 128.208,
+ 1.0,
+ 329.963,
+ 128.464,
+ 1.0,
+ 330.676,
+ 128.659,
+ 1.0,
+ 331.392,
+ 128.839,
+ 1.0,
+ 330.672,
+ 128.659,
+ 1.0,
+ 330.003,
+ 128.334,
+ 1.0,
+ 333.792,
+ 129.611,
+ 1.0,
+ 334.158,
+ 129.741,
+ 1.0,
+ 334.546,
+ 129.765,
+ 1.0,
+ 334.878,
+ 129.954,
+ 1.0,
+ 334.523,
+ 129.822,
+ 1.0,
+ 334.161,
+ 129.704,
+ 1.0,
+ 327.38,
+ 138.818,
+ 1.0,
+ 329.757,
+ 138.136,
+ 1.0,
+ 332.086,
+ 137.874,
+ 1.0,
+ 332.75,
+ 138.208,
+ 1.0,
+ 333.221,
+ 138.515,
+ 1.0,
+ 334.495,
+ 139.634,
+ 1.0,
+ 335.213,
+ 141.054,
+ 1.0,
+ 334.12,
+ 140.754,
+ 1.0,
+ 333.208,
+ 140.234,
+ 1.0,
+ 332.2,
+ 139.888,
+ 1.0,
+ 330.765,
+ 139.414,
+ 1.0,
+ 329.069,
+ 139.351,
+ 1.0,
+ 327.561,
+ 138.814,
+ 1.0,
+ 329.88,
+ 138.346,
+ 1.0,
+ 332.517,
+ 138.668,
+ 1.0,
+ 334.031,
+ 139.589,
+ 1.0,
+ 335.123,
+ 140.862,
+ 1.0,
+ 333.726,
+ 140.572,
+ 1.0,
+ 332.203,
+ 140.032,
+ 1.0,
+ 329.731,
+ 139.403,
+ 1.0
+ ],
+ "face_valid": true,
+ "lefthand_valid": true,
+ "righthand_valid": true,
+ "foot_valid": true,
+ "foot_kpts": [
+ 300.24175,
+ 336.83838,
+ 2.0,
+ 306.59015,
+ 335.34464,
+ 2.0,
+ 290.07408,
+ 326.47826,
+ 2.0,
+ 182.60972,
+ 314.05885,
+ 2.0,
+ 175.88789,
+ 305.84328,
+ 2.0,
+ 189.70499,
+ 302.48236,
+ 2.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 287.17,
+ 121.42,
+ 294.22,
+ 106.44,
+ 302.15,
+ 116.13,
+ 303.03,
+ 121.42
+ ],
+ [
+ 297.74,
+ 99.39,
+ 310.08,
+ 76.49,
+ 326.81,
+ 76.49,
+ 329.46,
+ 67.68,
+ 337.38,
+ 61.52,
+ 346.19,
+ 62.4,
+ 353.24,
+ 65.92,
+ 353.24,
+ 76.49,
+ 355.88,
+ 84.42,
+ 359.41,
+ 87.94,
+ 362.05,
+ 96.75,
+ 354.12,
+ 139.04,
+ 349.72,
+ 142.56,
+ 345.31,
+ 139.92,
+ 349.72,
+ 117.89,
+ 348.84,
+ 108.2,
+ 345.31,
+ 113.49,
+ 336.5,
+ 101.16,
+ 325.93,
+ 110.85,
+ 311.84,
+ 123.18
+ ],
+ [
+ 324.17,
+ 176.91,
+ 332.1,
+ 191.89,
+ 328.58,
+ 198.94,
+ 327.69,
+ 205.98,
+ 333.86,
+ 213.03,
+ 337.38,
+ 227.13,
+ 332.98,
+ 227.13,
+ 319.77,
+ 219.2,
+ 313.6,
+ 211.27
+ ],
+ [
+ 332.98,
+ 165.46,
+ 341.79,
+ 161.06,
+ 336.5,
+ 174.27,
+ 333.86,
+ 186.6,
+ 326.81,
+ 176.03
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 3404.869,
+ "iscrowd": 0,
+ "keypoints": [
+ 345,
+ 92,
+ 2,
+ 350,
+ 87,
+ 2,
+ 341,
+ 87,
+ 2,
+ 0,
+ 0,
+ 0,
+ 330,
+ 83,
+ 2,
+ 357,
+ 94,
+ 2,
+ 316,
+ 92,
+ 2,
+ 357,
+ 104,
+ 2,
+ 291,
+ 123,
+ 1,
+ 351,
+ 133,
+ 2,
+ 281,
+ 136,
+ 1,
+ 326,
+ 131,
+ 1,
+ 305,
+ 128,
+ 1,
+ 336,
+ 152,
+ 1,
+ 303,
+ 171,
+ 1,
+ 318,
+ 206,
+ 2,
+ 294,
+ 211,
+ 1
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 287.17,
+ 61.52,
+ 74.88,
+ 165.61
+ ],
+ "category_id": 1,
+ "id": 467657,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": true,
+ "foot_kpts": [
+ 322.595,
+ 216.245,
+ 2.0,
+ 327.23077,
+ 215.42692,
+ 2.0,
+ 316.81553,
+ 207.67155,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 547.95,
+ 201.57,
+ 546.73,
+ 190.62,
+ 547.95,
+ 181.49,
+ 547.95,
+ 169.31,
+ 547.95,
+ 156.53,
+ 546.73,
+ 144.36,
+ 544.3,
+ 139.49,
+ 540.04,
+ 132.19,
+ 540.04,
+ 121.84,
+ 542.47,
+ 107.24,
+ 544.3,
+ 99.33,
+ 548.56,
+ 88.98,
+ 561.95,
+ 78.03,
+ 572.29,
+ 71.33,
+ 572.29,
+ 71.33,
+ 572.29,
+ 65.25,
+ 574.12,
+ 51.86,
+ 583.86,
+ 48.81,
+ 592.99,
+ 48.81,
+ 597.86,
+ 57.33,
+ 599.07,
+ 64.64,
+ 608.2,
+ 76.81,
+ 614.9,
+ 82.89,
+ 620.98,
+ 89.59,
+ 628.89,
+ 93.24,
+ 636.81,
+ 101.76,
+ 640,
+ 109.67,
+ 640,
+ 115.76,
+ 640,
+ 127.93,
+ 620.37,
+ 111.5,
+ 619.16,
+ 111.5,
+ 618.55,
+ 112.11,
+ 608.2,
+ 105.41,
+ 600.9,
+ 119.41,
+ 592.99,
+ 131.58,
+ 596.03,
+ 148.01,
+ 605.16,
+ 162.01,
+ 612.46,
+ 190.01,
+ 614.9,
+ 204.61,
+ 606.98,
+ 216.78,
+ 603.94,
+ 226.52,
+ 606.38,
+ 239.91,
+ 605.16,
+ 256.95,
+ 604.55,
+ 264.26,
+ 602.12,
+ 271.56,
+ 586.29,
+ 272.17,
+ 584.47,
+ 255.13,
+ 588.73,
+ 237.48,
+ 592.99,
+ 221.65,
+ 596.64,
+ 207.05,
+ 596.64,
+ 197.31,
+ 594.2,
+ 186.96,
+ 584.47,
+ 172.36,
+ 577.77,
+ 166.27,
+ 570.47,
+ 170.53,
+ 558.91,
+ 179.66,
+ 555.86,
+ 192.44,
+ 548.56,
+ 198.53,
+ 547.95,
+ 198.53
+ ]
+ ],
+ "num_keypoints": 15,
+ "area": 8913.98475,
+ "iscrowd": 0,
+ "keypoints": [
+ 591,
+ 78,
+ 2,
+ 594,
+ 74,
+ 2,
+ 586,
+ 74,
+ 2,
+ 0,
+ 0,
+ 0,
+ 573,
+ 70,
+ 2,
+ 598,
+ 86,
+ 2,
+ 566,
+ 93,
+ 2,
+ 626,
+ 105,
+ 2,
+ 546,
+ 126,
+ 2,
+ 0,
+ 0,
+ 0,
+ 561,
+ 150,
+ 2,
+ 582,
+ 150,
+ 2,
+ 557,
+ 154,
+ 2,
+ 606,
+ 194,
+ 2,
+ 558,
+ 209,
+ 1,
+ 591,
+ 252,
+ 2,
+ 539,
+ 262,
+ 1
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 540.04,
+ 48.81,
+ 99.96,
+ 223.36
+ ],
+ "category_id": 1,
+ "id": 531914,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 557.05,
+ 149.73,
+ 19.879999999999995,
+ 21.76000000000002
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 565.0,
+ 153.0,
+ 0.08773341029882431,
+ 568.0,
+ 156.0,
+ 0.04602484405040741,
+ 571.0,
+ 159.0,
+ 0.04602484405040741,
+ 573.0,
+ 161.0,
+ 0.06972061097621918,
+ 575.0,
+ 164.0,
+ 0.06297813355922699,
+ 569.0,
+ 158.0,
+ 0.294232040643692,
+ 570.0,
+ 162.0,
+ 0.26472434401512146,
+ 570.0,
+ 166.0,
+ 0.2826344072818756,
+ 571.0,
+ 171.0,
+ 0.374575674533844,
+ 565.0,
+ 159.0,
+ 0.2154899388551712,
+ 566.0,
+ 162.0,
+ 0.21613340079784393,
+ 566.0,
+ 164.0,
+ 0.2544613480567932,
+ 567.0,
+ 168.0,
+ 0.31771761178970337,
+ 562.0,
+ 160.0,
+ 0.23286579549312592,
+ 563.0,
+ 166.0,
+ 0.1579097956418991,
+ 564.0,
+ 166.0,
+ 0.17961391806602478,
+ 564.0,
+ 166.0,
+ 0.17504136264324188,
+ 559.0,
+ 160.0,
+ 0.3428754508495331,
+ 559.0,
+ 162.0,
+ 0.2897874116897583,
+ 561.0,
+ 165.0,
+ 0.24125981330871582,
+ 562.0,
+ 166.0,
+ 0.20118576288223267
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": true,
+ "foot_valid": true,
+ "foot_kpts": [
+ 599.72032,
+ 264.75714,
+ 2.0,
+ 603.91172,
+ 265.80499,
+ 2.0,
+ 585.74897,
+ 265.10642,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 561.51,
+ 385.38,
+ 572.11,
+ 352.71,
+ 570.34,
+ 317.4,
+ 559.75,
+ 282.08,
+ 552.68,
+ 267.07,
+ 565.93,
+ 236.17,
+ 583.59,
+ 236.17,
+ 602.13,
+ 260.01,
+ 614.49,
+ 286.5,
+ 628.61,
+ 302.39,
+ 639.21,
+ 281.2,
+ 614.49,
+ 251.18,
+ 588,
+ 218.51,
+ 595.95,
+ 202.62,
+ 594.18,
+ 185.85,
+ 580.05,
+ 170.84,
+ 562.4,
+ 179.67,
+ 557.98,
+ 198.21,
+ 554.45,
+ 202.62,
+ 532.38,
+ 199.97,
+ 525.32,
+ 202.62,
+ 511.19,
+ 229.11,
+ 493.53,
+ 256.48,
+ 484.7,
+ 276.78,
+ 451.15,
+ 323.58,
+ 423.78,
+ 338.59,
+ 388.47,
+ 373.9,
+ 372.58,
+ 387.14,
+ 396.41,
+ 388.03,
+ 418.49,
+ 367.72,
+ 450.27,
+ 345.65,
+ 501.48,
+ 306.8,
+ 520.02,
+ 301.5,
+ 552.68,
+ 340.35,
+ 543.86,
+ 369.49
+ ]
+ ],
+ "num_keypoints": 16,
+ "area": 14267.20475,
+ "iscrowd": 0,
+ "keypoints": [
+ 580,
+ 211,
+ 2,
+ 586,
+ 206,
+ 2,
+ 574,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 562,
+ 198,
+ 2,
+ 584,
+ 220,
+ 2,
+ 529,
+ 215,
+ 2,
+ 599,
+ 242,
+ 2,
+ 512,
+ 260,
+ 2,
+ 619,
+ 274,
+ 2,
+ 538,
+ 285,
+ 2,
+ 537,
+ 288,
+ 2,
+ 506,
+ 277,
+ 2,
+ 562,
+ 332,
+ 2,
+ 452,
+ 332,
+ 2,
+ 550,
+ 387,
+ 1,
+ 402,
+ 371,
+ 2
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 372.58,
+ 170.84,
+ 266.63,
+ 217.19
+ ],
+ "category_id": 1,
+ "id": 533949,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 615.22,
+ 271.56,
+ 22.139999999999986,
+ 28.839999999999975
+ ],
+ "righthand_box": [
+ 538.83,
+ 283.74,
+ 25.639999999999986,
+ 30.659999999999968
+ ],
+ "lefthand_kpts": [
+ 620.284,
+ 274.54006,
+ 1,
+ 621.65135,
+ 282.30908999999997,
+ 1,
+ 623.0187,
+ 290.07812,
+ 1,
+ 625.38048,
+ 294.55308,
+ 1,
+ 628.86101,
+ 298.90373999999997,
+ 1,
+ 630.22836,
+ 289.20799,
+ 1,
+ 634.57901,
+ 292.43991,
+ 1,
+ 633.08736,
+ 295.54752,
+ 1,
+ 628.6124,
+ 295.42321,
+ 1,
+ 632.46584,
+ 286.5976,
+ 1,
+ 631.3,
+ 291.9,
+ 1,
+ 627.7,
+ 291.6,
+ 1,
+ 625.6,
+ 288.9,
+ 1,
+ 633.7,
+ 284.2,
+ 1,
+ 632.3,
+ 288.0,
+ 1,
+ 629.1,
+ 288.0,
+ 1,
+ 627.0,
+ 285.9,
+ 1,
+ 633.2,
+ 280.4,
+ 1,
+ 632.8,
+ 283.6,
+ 1,
+ 630.8,
+ 284.4,
+ 1,
+ 629.1,
+ 283.2,
+ 1
+ ],
+ "righthand_kpts": [
+ 544.0,
+ 291.0,
+ 0.09089653939008713,
+ 551.0,
+ 291.0,
+ 0.041192591190338135,
+ 558.0,
+ 291.0,
+ 0.041192591190338135,
+ 559.0,
+ 294.0,
+ 0.056781601160764694,
+ 563.0,
+ 298.0,
+ 0.2960541546344757,
+ 559.0,
+ 296.0,
+ 0.18105527758598328,
+ 562.0,
+ 301.0,
+ 0.12244582921266556,
+ 559.0,
+ 308.0,
+ 0.05529222637414932,
+ 564.0,
+ 306.0,
+ 0.05997529253363609,
+ 555.0,
+ 299.0,
+ 0.18805834650993347,
+ 556.0,
+ 302.0,
+ 0.1534559577703476,
+ 555.0,
+ 306.0,
+ 0.20564205944538116,
+ 556.0,
+ 309.0,
+ 0.06228385493159294,
+ 550.0,
+ 300.0,
+ 0.1409723311662674,
+ 550.0,
+ 301.0,
+ 0.2223101258277893,
+ 551.0,
+ 305.0,
+ 0.2001882642507553,
+ 553.0,
+ 308.0,
+ 0.1712668538093567,
+ 545.0,
+ 302.0,
+ 0.1908813714981079,
+ 546.0,
+ 304.0,
+ 0.13619276881217957,
+ 547.0,
+ 306.0,
+ 0.19773860275745392,
+ 549.0,
+ 308.0,
+ 0.1341865360736847
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": true,
+ "righthand_valid": true,
+ "foot_valid": true,
+ "foot_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 375.00826,
+ 386.35839,
+ 2.0,
+ 399.52454,
+ 375.91627,
+ 2.0
+ ]
+ },
+ {
+ "segmentation": [
+ [
+ 2.03,
+ 75.18,
+ 10.85,
+ 70.58,
+ 16.99,
+ 65.59,
+ 17.75,
+ 55.24,
+ 20.05,
+ 50.25,
+ 29.64,
+ 43.74,
+ 37.31,
+ 47.57,
+ 41.52,
+ 53.7,
+ 43.83,
+ 64.82,
+ 53.03,
+ 70.19,
+ 61.85,
+ 77.09,
+ 72.58,
+ 87.06,
+ 74.88,
+ 79.01,
+ 78.72,
+ 73.64,
+ 86.39,
+ 77.86,
+ 90.6,
+ 90.13,
+ 86,
+ 93.2,
+ 82.17,
+ 102.4,
+ 75.27,
+ 106.24,
+ 68.75,
+ 104.7,
+ 50.34,
+ 90.9,
+ 43.06,
+ 112.37,
+ 40.76,
+ 123.11,
+ 42.29,
+ 130.78,
+ 48.04,
+ 161.83,
+ 52.26,
+ 190.59,
+ 50.73,
+ 210.15,
+ 44.21,
+ 245.04,
+ 50.34,
+ 256.16,
+ 53.03,
+ 261.53,
+ 47.28,
+ 263.83,
+ 40.37,
+ 263.83,
+ 31.56,
+ 260.76,
+ 28.1,
+ 256.16,
+ 26.95,
+ 244.65,
+ 29.25,
+ 233.54,
+ 32.71,
+ 223.95,
+ 33.09,
+ 213.98,
+ 32.32,
+ 206.31,
+ 32.71,
+ 194.81,
+ 33.09,
+ 185.61,
+ 24.65,
+ 177.17,
+ 16.99,
+ 161.45,
+ 13.53,
+ 176.02,
+ 10.85,
+ 206.31,
+ 1.65,
+ 231.62,
+ 1.65,
+ 235.84,
+ 0.5,
+ 146.88,
+ 0.88,
+ 122.34,
+ 1.65,
+ 75.56
+ ]
+ ],
+ "num_keypoints": 13,
+ "area": 8260.75085,
+ "iscrowd": 0,
+ "keypoints": [
+ 36,
+ 79,
+ 2,
+ 40,
+ 74,
+ 2,
+ 31,
+ 75,
+ 2,
+ 0,
+ 0,
+ 0,
+ 19,
+ 69,
+ 2,
+ 45,
+ 77,
+ 2,
+ 2,
+ 89,
+ 2,
+ 74,
+ 99,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 92,
+ 2,
+ 0,
+ 0,
+ 0,
+ 33,
+ 149,
+ 2,
+ 7,
+ 153,
+ 2,
+ 44,
+ 196,
+ 2,
+ 2,
+ 205,
+ 2,
+ 35,
+ 245,
+ 2,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 0.5,
+ 43.74,
+ 90.1,
+ 220.09
+ ],
+ "category_id": 1,
+ "id": 543117,
+ "face_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_box": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "lefthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "righthand_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_kpts": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "face_valid": false,
+ "lefthand_valid": false,
+ "righthand_valid": false,
+ "foot_valid": true,
+ "foot_kpts": [
+ 43.80826,
+ 259.40011,
+ 2.0,
+ 48.63752,
+ 257.67537,
+ 2.0,
+ 32.08007,
+ 256.29558,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/cofw/001766.jpg b/vendor/ViTPose/tests/data/cofw/001766.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..233cb61677c395db45b122120086e1569ed4218a
Binary files /dev/null and b/vendor/ViTPose/tests/data/cofw/001766.jpg differ
diff --git a/vendor/ViTPose/tests/data/cofw/001805.jpg b/vendor/ViTPose/tests/data/cofw/001805.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e652565b60d5a6420668f36f1e0713759b9d96c
Binary files /dev/null and b/vendor/ViTPose/tests/data/cofw/001805.jpg differ
diff --git a/vendor/ViTPose/tests/data/cofw/test_cofw.json b/vendor/ViTPose/tests/data/cofw/test_cofw.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8e6ac5d0bf0147c5f56ee3bed5d814aa171e493
--- /dev/null
+++ b/vendor/ViTPose/tests/data/cofw/test_cofw.json
@@ -0,0 +1,239 @@
+{
+ "info": {
+ "description": "MMPose example COFW dataset",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/12/31"
+ },
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "face",
+ "keypoints": [],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 1766,
+ "file_name": "001766.jpg",
+ "height": 322,
+ "width": 235
+ },
+ {
+ "id": 1805,
+ "file_name": "001805.jpg",
+ "height": 253,
+ "width": 352
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 46.562534549474826,
+ 120.07575548185008,
+ 2.0,
+ 131.52522111663905,
+ 115.97127786990977,
+ 2.0,
+ 65.8535793255943,
+ 118.02351667587989,
+ 2.0,
+ 105.66701216141516,
+ 114.73993458632765,
+ 2.0,
+ 56.41328081813161,
+ 110.63545697438737,
+ 2.0,
+ 56.41328081813161,
+ 118.43396443707394,
+ 2.0,
+ 119.62223604201218,
+ 111.45635249677541,
+ 2.0,
+ 119.62223604201218,
+ 117.61306891468585,
+ 2.0,
+ 51.89835544499728,
+ 144.29217339229783,
+ 2.0,
+ 129.06253454947486,
+ 143.88172563110376,
+ 2.0,
+ 72.83119126589278,
+ 145.9339644370739,
+ 2.0,
+ 108.12969872857936,
+ 143.47127786990976,
+ 2.0,
+ 62.980444997236056,
+ 136.0832181684172,
+ 2.0,
+ 60.928206191265936,
+ 148.3966510042381,
+ 2.0,
+ 120.03268380320623,
+ 136.90411369080527,
+ 2.0,
+ 120.44313156440026,
+ 147.16530772065602,
+ 2.0,
+ 65.44313156440026,
+ 143.06083010871572,
+ 2.0,
+ 120.44313156440026,
+ 143.06083010871572,
+ 2.0,
+ 65.44313156440026,
+ 184.10560622811872,
+ 2.0,
+ 103.61477335544504,
+ 183.69515846692468,
+ 2.0,
+ 78.5774599226092,
+ 180.0011286161784,
+ 2.0,
+ 79.80880320619127,
+ 190.26232264602916,
+ 2.0,
+ 60.928206191265936,
+ 212.8369495117008,
+ 1.0,
+ 116.74910171365397,
+ 214.4787405564769,
+ 1.0,
+ 83.50283305693756,
+ 207.5011286161784,
+ 1.0,
+ 83.91328081813161,
+ 211.19515846692468,
+ 1.0,
+ 88.83865395245994,
+ 232.94888981020827,
+ 1.0,
+ 88.83865395245994,
+ 239.92650175050676,
+ 1.0,
+ 94.99537037037038,
+ 271.120531601253,
+ 2.0
+ ],
+ "image_id": 1766,
+ "id": 1766,
+ "num_keypoints": 29,
+ "bbox": [
+ 38.0,
+ 102.0,
+ 105.0,
+ 180.0
+ ],
+ "iscrowd": 0,
+ "area": 18900.0,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 111.02581748226716,
+ 99.60260061678404,
+ 1.0,
+ 203.48326006615514,
+ 99.60260061678402,
+ 2.0,
+ 138.9556699294833,
+ 104.89964159815261,
+ 1.0,
+ 158.69918631458435,
+ 101.04724815715728,
+ 2.0,
+ 124.02764534562637,
+ 98.6395022565352,
+ 1.0,
+ 124.50919452575083,
+ 102.49189569753052,
+ 1.0,
+ 178.44270269968544,
+ 92.37936291491776,
+ 2.0,
+ 178.92425187980987,
+ 97.19485471616193,
+ 2.0,
+ 116.80440764376011,
+ 115.9752727410142,
+ 1.0,
+ 192.40762892329354,
+ 115.9752727410142,
+ 2.0,
+ 137.02947320898565,
+ 120.30921536213394,
+ 2.0,
+ 165.44087483632623,
+ 119.82766618200952,
+ 2.0,
+ 125.47229288599961,
+ 114.04907602051652,
+ 1.0,
+ 126.43539124624846,
+ 120.30921536213394,
+ 1.0,
+ 180.3688994201831,
+ 107.78893667889912,
+ 2.0,
+ 180.3688994201831,
+ 118.8645678217607,
+ 2.0,
+ 125.95384206612407,
+ 118.38301864163628,
+ 1.0,
+ 180.85044860030752,
+ 115.01217438076534,
+ 2.0,
+ 132.69553058786587,
+ 149.68371534972337,
+ 2.0,
+ 162.0700305754553,
+ 152.09146125034545,
+ 2.0,
+ 143.77116173072744,
+ 147.7575186292257,
+ 2.0,
+ 144.2527109108519,
+ 157.388502231714,
+ 2.0,
+ 132.21398140774147,
+ 168.4641333745756,
+ 1.0,
+ 184.22129286117845,
+ 167.9825841944512,
+ 2.0,
+ 149.06820271209602,
+ 165.09328911370469,
+ 2.0,
+ 150.51285025246932,
+ 170.3903300950733,
+ 2.0,
+ 151.95749779284256,
+ 182.42905959818367,
+ 2.0,
+ 153.4021453332158,
+ 188.6891989398011,
+ 2.0,
+ 155.32834205371347,
+ 218.54524810751494,
+ 2.0
+ ],
+ "image_id": 1805,
+ "id": 1805,
+ "num_keypoints": 29,
+ "bbox": [
+ 102.0,
+ 83.0,
+ 112.0,
+ 146.0
+ ],
+ "iscrowd": 0,
+ "area": 16352.0,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/crowdpose/103319.jpg b/vendor/ViTPose/tests/data/crowdpose/103319.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fd435b474a3164339f85fd4d439980d6fe275b10
Binary files /dev/null and b/vendor/ViTPose/tests/data/crowdpose/103319.jpg differ
diff --git a/vendor/ViTPose/tests/data/crowdpose/106848.jpg b/vendor/ViTPose/tests/data/crowdpose/106848.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5add3e255534c27e10a68481b5572f5da1311e61
Binary files /dev/null and b/vendor/ViTPose/tests/data/crowdpose/106848.jpg differ
diff --git a/vendor/ViTPose/tests/data/crowdpose/test_crowdpose.json b/vendor/ViTPose/tests/data/crowdpose/test_crowdpose.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e9d9b7a8a6955a8901ed3580cc40b327abf2613
--- /dev/null
+++ b/vendor/ViTPose/tests/data/crowdpose/test_crowdpose.json
@@ -0,0 +1,378 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle",
+ "head",
+ "neck"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "file_name": "106848.jpg",
+ "id": 106848,
+ "height": 425,
+ "width": 640,
+ "crowdIndex": 0.33
+ },
+ {
+ "file_name": "103319.jpg",
+ "id": 103319,
+ "height": 480,
+ "width": 640,
+ "crowdIndex": 0.39
+ }
+ ],
+ "annotations": [
+ {
+ "num_keypoints": 5,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 208,
+ 108,
+ 2,
+ 0,
+ 0,
+ 0,
+ 278,
+ 158,
+ 2,
+ 262,
+ 206,
+ 2,
+ 348,
+ 98,
+ 2,
+ 0,
+ 0,
+ 0,
+ 173,
+ 299,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 256,
+ 27,
+ 1,
+ 220,
+ 89,
+ 1
+ ],
+ "image_id": 106848,
+ "bbox": [
+ 106.01,
+ 13.43,
+ 273.15,
+ 352.42
+ ],
+ "category_id": 1,
+ "id": 123803
+ },
+ {
+ "num_keypoints": 0,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 106848,
+ "bbox": [
+ 108.5,
+ 96.78,
+ 35.46,
+ 30.23
+ ],
+ "category_id": 1,
+ "id": 131039
+ },
+ {
+ "num_keypoints": 10,
+ "iscrowd": 0,
+ "keypoints": [
+ 482,
+ 129,
+ 2,
+ 364,
+ 126,
+ 2,
+ 513,
+ 213,
+ 2,
+ 339,
+ 163,
+ 2,
+ 431,
+ 210,
+ 2,
+ 276,
+ 163,
+ 1,
+ 440,
+ 308,
+ 2,
+ 371,
+ 304,
+ 1,
+ 432,
+ 419,
+ 1,
+ 366,
+ 419,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 407,
+ 29,
+ 1,
+ 420,
+ 110,
+ 1
+ ],
+ "image_id": 106848,
+ "bbox": [
+ 281.51,
+ 21.92,
+ 244.5,
+ 349.72
+ ],
+ "category_id": 1,
+ "id": 147481
+ },
+ {
+ "num_keypoints": 12,
+ "iscrowd": 0,
+ "keypoints": [
+ 388,
+ 205,
+ 2,
+ 344,
+ 211,
+ 2,
+ 407,
+ 249,
+ 2,
+ 337,
+ 256,
+ 2,
+ 393,
+ 278,
+ 2,
+ 336,
+ 290,
+ 2,
+ 390,
+ 293,
+ 2,
+ 354,
+ 294,
+ 2,
+ 387,
+ 354,
+ 2,
+ 351,
+ 357,
+ 2,
+ 380,
+ 390,
+ 2,
+ 359,
+ 408,
+ 1,
+ 351,
+ 163,
+ 1,
+ 364,
+ 198,
+ 1
+ ],
+ "image_id": 103319,
+ "bbox": [
+ 316.76,
+ 157.3,
+ 100.54,
+ 247.56
+ ],
+ "category_id": 1,
+ "id": 127068
+ },
+ {
+ "num_keypoints": 12,
+ "iscrowd": 0,
+ "keypoints": [
+ 350,
+ 145,
+ 2,
+ 300,
+ 145,
+ 2,
+ 352,
+ 190,
+ 1,
+ 299,
+ 180,
+ 2,
+ 322,
+ 163,
+ 2,
+ 291,
+ 217,
+ 2,
+ 346,
+ 232,
+ 1,
+ 314,
+ 232,
+ 2,
+ 346,
+ 283,
+ 1,
+ 310,
+ 284,
+ 2,
+ 345,
+ 346,
+ 1,
+ 305,
+ 344,
+ 2,
+ 312,
+ 106,
+ 1,
+ 323,
+ 137,
+ 1
+ ],
+ "image_id": 103319,
+ "bbox": [
+ 279.68,
+ 102.17,
+ 81.13,
+ 255.49
+ ],
+ "category_id": 1,
+ "id": 129014
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/crowdpose/test_crowdpose_det_AP_40.json b/vendor/ViTPose/tests/data/crowdpose/test_crowdpose_det_AP_40.json
new file mode 100644
index 0000000000000000000000000000000000000000..36d0572bb4582009f39c0fc1c43ddf70b88ac76b
--- /dev/null
+++ b/vendor/ViTPose/tests/data/crowdpose/test_crowdpose_det_AP_40.json
@@ -0,0 +1,68 @@
+[
+ {
+ "bbox": [
+ 120.36583709716797,
+ 30.521512985229492,
+ 244.14288330078125,
+ 328.944580078125
+ ],
+ "category_id": 1,
+ "image_id": 106848,
+ "score": 0.9999284744262695
+ },
+ {
+ "bbox": [
+ 326.6805725097656,
+ 30.76219940185547,
+ 209.03128051757812,
+ 327.80035400390625
+ ],
+ "category_id": 1,
+ "image_id": 106848,
+ "score": 0.9993789196014404
+ },
+ {
+ "bbox": [
+ 109.94915008544922,
+ 95.794677734375,
+ 32.249656677246094,
+ 26.97345733642578
+ ],
+ "category_id": 1,
+ "image_id": 106848,
+ "score": 0.9997813105583191
+ },
+ {
+ "bbox": [
+ 315.21368408203125,
+ 149.79432678222656,
+ 100.7252197265625,
+ 259.96405029296875
+ ],
+ "category_id": 1,
+ "image_id": 103319,
+ "score": 0.9998345375061035
+ },
+ {
+ "bbox": [
+ 282.7766418457031,
+ 100.75929260253906,
+ 76.44869995117188,
+ 261.6209716796875
+ ],
+ "category_id": 1,
+ "image_id": 103319,
+ "score": 0.9998021721839905
+ },
+ {
+ "bbox": [
+ -0.10778862237930298,
+ 238.81455993652344,
+ 38.23238754272461,
+ 99.21165466308594
+ ],
+ "category_id": 1,
+ "image_id": 103319,
+ "score": 0.9673888683319092
+ }
+]
diff --git a/vendor/ViTPose/tests/data/fld/img_00000128.jpg b/vendor/ViTPose/tests/data/fld/img_00000128.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1f49980bd730de0c48ee78a566154b5047fd5f78
Binary files /dev/null and b/vendor/ViTPose/tests/data/fld/img_00000128.jpg differ
diff --git a/vendor/ViTPose/tests/data/fld/img_00000132.jpg b/vendor/ViTPose/tests/data/fld/img_00000132.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fa224e4e29b3b19276885f4c868c3f0f6055a939
Binary files /dev/null and b/vendor/ViTPose/tests/data/fld/img_00000132.jpg differ
diff --git a/vendor/ViTPose/tests/data/fld/test_fld.json b/vendor/ViTPose/tests/data/fld/test_fld.json
new file mode 100644
index 0000000000000000000000000000000000000000..9996ac3dc2d9542f215b84d4d301bc76cdfe8580
--- /dev/null
+++ b/vendor/ViTPose/tests/data/fld/test_fld.json
@@ -0,0 +1,123 @@
+{
+ "info": {
+ "description": "Fashion Landmark Detection (FLD) test set for full-body clothes generated by MMPose Team.",
+ "url": "http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/LandmarkDetection.html",
+ "version": "1.0",
+ "year": "2021",
+ "date_created": "2021/01/02"
+ },
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "fashion",
+ "keypoints": [
+ "left collar",
+ "right collar",
+ "left sleeve",
+ "right sleeve",
+ "left waistline",
+ "right waistline",
+ "left hem",
+ "right hem"
+ ],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 128,
+ "file_name": "img_00000128.jpg",
+ "height": 250,
+ "width": 200
+ },
+ {
+ "id": 132,
+ "file_name": "img_00000132.jpg",
+ "height": 250,
+ "width": 200
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 108.0,
+ 33.0,
+ 1.0,
+ 121.0,
+ 36.0,
+ 1.0,
+ 95.0,
+ 65.0,
+ 2.0,
+ 128.0,
+ 68.0,
+ 1.0,
+ 118.0,
+ 79.0,
+ 2.0,
+ 114.0,
+ 75.0,
+ 1.0,
+ 115.0,
+ 150.0,
+ 2.0,
+ 112.0,
+ 143.0,
+ 1.0
+ ],
+ "num_keypoints": 8,
+ "bbox": [
+ 88,
+ 21,
+ 48,
+ 142
+ ],
+ "iscrowd": 0,
+ "area": 6816,
+ "category_id": 1,
+ "id": 128,
+ "image_id": 128
+ },
+ {
+ "keypoints": [
+ 71.0,
+ 19.0,
+ 2.0,
+ 127.0,
+ 15.0,
+ 2.0,
+ 37.0,
+ 80.0,
+ 2.0,
+ 162.0,
+ 71.0,
+ 2.0,
+ 60.0,
+ 112.0,
+ 2.0,
+ 145.0,
+ 109.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 172.0,
+ 235.0,
+ 2.0
+ ],
+ "num_keypoints": 7,
+ "bbox": [
+ 1,
+ 1,
+ 199,
+ 249
+ ],
+ "iscrowd": 0,
+ "area": 49551,
+ "category_id": 1,
+ "id": 132,
+ "image_id": 132
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/fly/1400.jpg b/vendor/ViTPose/tests/data/fly/1400.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0da95bea75ed3293277ad576ac272b1e1b8ae220
Binary files /dev/null and b/vendor/ViTPose/tests/data/fly/1400.jpg differ
diff --git a/vendor/ViTPose/tests/data/fly/1450.jpg b/vendor/ViTPose/tests/data/fly/1450.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dd1312c4bb7aef808870545f978709ef960b3a89
Binary files /dev/null and b/vendor/ViTPose/tests/data/fly/1450.jpg differ
diff --git a/vendor/ViTPose/tests/data/fly/test_fly.json b/vendor/ViTPose/tests/data/fly/test_fly.json
new file mode 100644
index 0000000000000000000000000000000000000000..7cb11a0d178233fc751bbcbbaeed81e94df8b42f
--- /dev/null
+++ b/vendor/ViTPose/tests/data/fly/test_fly.json
@@ -0,0 +1,385 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "fly",
+ "keypoints": [
+ "head",
+ "eyeL",
+ "eyeR",
+ "neck",
+ "thorax",
+ "abdomen",
+ "forelegR1",
+ "forelegR2",
+ "forelegR3",
+ "forelegR4",
+ "midlegR1",
+ "midlegR2",
+ "midlegR3",
+ "midlegR4",
+ "hindlegR1",
+ "hindlegR2",
+ "hindlegR3",
+ "hindlegR4",
+ "forelegL1",
+ "forelegL2",
+ "forelegL3",
+ "forelegL4",
+ "midlegL1",
+ "midlegL2",
+ "midlegL3",
+ "midlegL4",
+ "hindlegL1",
+ "hindlegL2",
+ "hindlegL3",
+ "hindlegL4",
+ "wingL",
+ "wingR"
+ ],
+ "skeleton": [
+ [
+ 2,
+ 1
+ ],
+ [
+ 3,
+ 1
+ ],
+ [
+ 4,
+ 1
+ ],
+ [
+ 5,
+ 4
+ ],
+ [
+ 6,
+ 5
+ ],
+ [
+ 8,
+ 7
+ ],
+ [
+ 9,
+ 8
+ ],
+ [
+ 10,
+ 9
+ ],
+ [
+ 12,
+ 11
+ ],
+ [
+ 13,
+ 12
+ ],
+ [
+ 14,
+ 13
+ ],
+ [
+ 16,
+ 15
+ ],
+ [
+ 17,
+ 16
+ ],
+ [
+ 18,
+ 17
+ ],
+ [
+ 20,
+ 19
+ ],
+ [
+ 21,
+ 20
+ ],
+ [
+ 22,
+ 21
+ ],
+ [
+ 24,
+ 23
+ ],
+ [
+ 25,
+ 24
+ ],
+ [
+ 26,
+ 25
+ ],
+ [
+ 28,
+ 27
+ ],
+ [
+ 29,
+ 28
+ ],
+ [
+ 30,
+ 29
+ ],
+ [
+ 31,
+ 4
+ ],
+ [
+ 32,
+ 4
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "id": 1400,
+ "file_name": "1400.jpg",
+ "height": 192,
+ "width": 192
+ },
+ {
+ "id": 1450,
+ "file_name": "1450.jpg",
+ "height": 192,
+ "width": 192
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 146.0,
+ 95.0,
+ 2.0,
+ 134.0,
+ 80.0,
+ 2.0,
+ 136.0,
+ 112.0,
+ 2.0,
+ 129.0,
+ 97.0,
+ 2.0,
+ 99.0,
+ 97.0,
+ 2.0,
+ 52.0,
+ 95.0,
+ 2.0,
+ 123.0,
+ 107.0,
+ 2.0,
+ 140.0,
+ 114.0,
+ 2.0,
+ 158.0,
+ 109.0,
+ 2.0,
+ 173.0,
+ 109.0,
+ 2.0,
+ 110.0,
+ 106.0,
+ 2.0,
+ 115.0,
+ 127.0,
+ 2.0,
+ 133.0,
+ 127.0,
+ 2.0,
+ 146.0,
+ 138.0,
+ 2.0,
+ 96.0,
+ 106.0,
+ 2.0,
+ 64.18991088867188,
+ 120.96142578125,
+ 2.0,
+ 46.0,
+ 126.0,
+ 2.0,
+ 34.0,
+ 137.0,
+ 2.0,
+ 121.0,
+ 86.0,
+ 2.0,
+ 147.0,
+ 78.0,
+ 2.0,
+ 169.0,
+ 79.0,
+ 2.0,
+ 184.0,
+ 75.0,
+ 2.0,
+ 108.0,
+ 86.0,
+ 2.0,
+ 103.0,
+ 70.0,
+ 2.0,
+ 109.0,
+ 40.0,
+ 2.0,
+ 114.0,
+ 18.0,
+ 2.0,
+ 93.0,
+ 87.0,
+ 2.0,
+ 82.0,
+ 64.0,
+ 2.0,
+ 74.0,
+ 46.0,
+ 2.0,
+ 67.0,
+ 22.0,
+ 2.0,
+ 19.0,
+ 86.0,
+ 2.0,
+ 23.0,
+ 137.0,
+ 2.0
+ ],
+ "image_id": 1400,
+ "id": 1400,
+ "num_keypoints": 32,
+ "bbox": [
+ 19.0,
+ 18.0,
+ 166.0,
+ 121.0
+ ],
+ "iscrowd": 0,
+ "area": 20086.0,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 147.43026733398438,
+ 96.94955444335938,
+ 2.0,
+ 137.32937622070312,
+ 79.7210693359375,
+ 2.0,
+ 138.43026733398438,
+ 110.86053466796875,
+ 2.0,
+ 128.0,
+ 96.0,
+ 2.0,
+ 98.0,
+ 96.0,
+ 2.0,
+ 49.329376220703125,
+ 94.13946533203125,
+ 2.0,
+ 122.37982177734375,
+ 108.81008911132812,
+ 2.0,
+ 129.43026733398438,
+ 114.18991088867188,
+ 2.0,
+ 138.65875244140625,
+ 114.62017822265625,
+ 2.0,
+ 144.480712890625,
+ 118.7596435546875,
+ 2.0,
+ 112.18991088867188,
+ 109.18991088867188,
+ 2.0,
+ 105.557861328125,
+ 118.43026733398438,
+ 2.0,
+ 95.67062377929688,
+ 121.91098022460938,
+ 2.0,
+ 91.13946533203125,
+ 136.10089111328125,
+ 2.0,
+ 91.46884155273438,
+ 104.2017822265625,
+ 2.0,
+ 73.2403564453125,
+ 117.43026733398438,
+ 2.0,
+ 57.37833786010742,
+ 107.22997283935547,
+ 2.0,
+ 44.87240219116211,
+ 112.96142578125,
+ 2.0,
+ 119.65875244140625,
+ 84.81008911132812,
+ 2.0,
+ 123.78634643554688,
+ 77.94955444335938,
+ 2.0,
+ 132.36795043945312,
+ 76.58160400390625,
+ 2.0,
+ 140.0,
+ 64.0,
+ 2.0,
+ 113.2789306640625,
+ 83.29080200195312,
+ 2.0,
+ 100.08901977539062,
+ 74.98812866210938,
+ 2.0,
+ 98.519287109375,
+ 67.13946533203125,
+ 2.0,
+ 93.62017822265625,
+ 55.810089111328125,
+ 2.0,
+ 94.22848510742188,
+ 85.08901977539062,
+ 2.0,
+ 78.36795043945312,
+ 69.2403564453125,
+ 2.0,
+ 71.60830688476562,
+ 74.58160400390625,
+ 2.0,
+ 60.848663330078125,
+ 68.67062377929688,
+ 2.0,
+ 16.0,
+ 66.0,
+ 2.0,
+ 16.0,
+ 126.0,
+ 2.0
+ ],
+ "image_id": 1450,
+ "id": 1450,
+ "num_keypoints": 32,
+ "bbox": [
+ 16.0,
+ 55.810089111328125,
+ 132.43026733398438,
+ 81.29080200195312
+ ],
+ "iscrowd": 0,
+ "area": 10765.362640912645,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/freihand/00000355.jpg b/vendor/ViTPose/tests/data/freihand/00000355.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4687ec7f4bc945cdf731d425faad3f76aa848a79
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00000355.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00017620.jpg b/vendor/ViTPose/tests/data/freihand/00017620.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ebb2f0f998a0e6a81566e84e5664797086a5757a
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00017620.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00032915.jpg b/vendor/ViTPose/tests/data/freihand/00032915.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bc7ddb0d4461caf6fab8e084d152a2dd22a6b378
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00032915.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00050180.jpg b/vendor/ViTPose/tests/data/freihand/00050180.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..38e7dd1d0968a1b2e5e27338e2156057b2c11983
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00050180.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00065475.jpg b/vendor/ViTPose/tests/data/freihand/00065475.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3858bf7d7f11da98a83019baf4cb2efe4cedd5ed
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00065475.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00082740.jpg b/vendor/ViTPose/tests/data/freihand/00082740.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b9ff5e5dd064d2b6b320429f93e177112401503c
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00082740.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00098035.jpg b/vendor/ViTPose/tests/data/freihand/00098035.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ec3b82d53d98c837f1633b3d59a191224eda69d8
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00098035.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/00115300.jpg b/vendor/ViTPose/tests/data/freihand/00115300.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cae2c2c391b2056b012d37df3dc72cceaf5f14b8
Binary files /dev/null and b/vendor/ViTPose/tests/data/freihand/00115300.jpg differ
diff --git a/vendor/ViTPose/tests/data/freihand/test_freihand.json b/vendor/ViTPose/tests/data/freihand/test_freihand.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c6ebd0dc576681e54afdfc46f830f27868d2565
--- /dev/null
+++ b/vendor/ViTPose/tests/data/freihand/test_freihand.json
@@ -0,0 +1,957 @@
+{
+ "info": {
+ "description": "FreiHand",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/09/08"
+ },
+ "licenses": "",
+ "images": [
+ {
+ "file_name": "00017620.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 17620
+ },
+ {
+ "file_name": "00050180.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 50180
+ },
+ {
+ "file_name": "00082740.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 82740
+ },
+ {
+ "file_name": "00115300.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 115300
+ },
+ {
+ "file_name": "00000355.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 355
+ },
+ {
+ "file_name": "00032915.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 32915
+ },
+ {
+ "file_name": "00065475.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 65475
+ },
+ {
+ "file_name": "00098035.jpg",
+ "height": 224,
+ "width": 224,
+ "id": 98035
+ }
+ ],
+ "annotations": [
+ {
+ "bbox": [
+ 62,
+ 82,
+ 104,
+ 63
+ ],
+ "keypoints": [
+ 75.09007144965095,
+ 114.79035385093314,
+ 1,
+ 88.01978404720953,
+ 109.72359615889864,
+ 1,
+ 98.79950536639522,
+ 109.05442666062974,
+ 1,
+ 110.16327936938085,
+ 114.72375114390456,
+ 1,
+ 121.75826373686846,
+ 122.01572654269421,
+ 1,
+ 126.92528942089982,
+ 93.65489136216958,
+ 1,
+ 144.49316505581498,
+ 94.71206260545628,
+ 1,
+ 152.3510241000562,
+ 102.03474955900822,
+ 1,
+ 159.94413202793353,
+ 111.6105502403288,
+ 1,
+ 136.5822887073417,
+ 102.58162787991249,
+ 1,
+ 153.71181890922904,
+ 105.7627322321249,
+ 1,
+ 158.23785994857087,
+ 113.05793071695886,
+ 1,
+ 159.1827624858022,
+ 122.12860754004963,
+ 1,
+ 131.78312266215684,
+ 118.12603871987666,
+ 1,
+ 144.37435502719956,
+ 122.97613121869307,
+ 1,
+ 144.12850082414747,
+ 130.24233623490562,
+ 1,
+ 138.0058328373116,
+ 135.03475933083362,
+ 1,
+ 123.6128526185571,
+ 130.55957078894423,
+ 1,
+ 126.52617237783046,
+ 135.2764317635352,
+ 1,
+ 123.26857656908544,
+ 138.5518599403549,
+ 1,
+ 118.92147700299864,
+ 140.34319120176468,
+ 1
+ ],
+ "category_id": 1,
+ "id": 17620,
+ "image_id": 17620,
+ "segmentation": [
+ [
+ 62,
+ 82,
+ 62,
+ 113.0,
+ 62,
+ 144,
+ 113.5,
+ 144,
+ 165,
+ 144,
+ 165,
+ 113.0,
+ 165,
+ 82,
+ 113.5,
+ 82
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 6552
+ },
+ {
+ "bbox": [
+ 62,
+ 82,
+ 104,
+ 63
+ ],
+ "keypoints": [
+ 75.09007144965095,
+ 114.79035385093314,
+ 1,
+ 88.01978404720953,
+ 109.72359615889864,
+ 1,
+ 98.79950536639522,
+ 109.05442666062974,
+ 1,
+ 110.16327936938085,
+ 114.72375114390456,
+ 1,
+ 121.75826373686846,
+ 122.01572654269421,
+ 1,
+ 126.92528942089982,
+ 93.65489136216958,
+ 1,
+ 144.49316505581498,
+ 94.71206260545628,
+ 1,
+ 152.3510241000562,
+ 102.03474955900822,
+ 1,
+ 159.94413202793353,
+ 111.6105502403288,
+ 1,
+ 136.5822887073417,
+ 102.58162787991249,
+ 1,
+ 153.71181890922904,
+ 105.7627322321249,
+ 1,
+ 158.23785994857087,
+ 113.05793071695886,
+ 1,
+ 159.1827624858022,
+ 122.12860754004963,
+ 1,
+ 131.78312266215684,
+ 118.12603871987666,
+ 1,
+ 144.37435502719956,
+ 122.97613121869307,
+ 1,
+ 144.12850082414747,
+ 130.24233623490562,
+ 1,
+ 138.0058328373116,
+ 135.03475933083362,
+ 1,
+ 123.6128526185571,
+ 130.55957078894423,
+ 1,
+ 126.52617237783046,
+ 135.2764317635352,
+ 1,
+ 123.26857656908544,
+ 138.5518599403549,
+ 1,
+ 118.92147700299864,
+ 140.34319120176468,
+ 1
+ ],
+ "category_id": 1,
+ "id": 50180,
+ "image_id": 50180,
+ "segmentation": [
+ [
+ 62,
+ 82,
+ 62,
+ 113.0,
+ 62,
+ 144,
+ 113.5,
+ 144,
+ 165,
+ 144,
+ 165,
+ 113.0,
+ 165,
+ 82,
+ 113.5,
+ 82
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 6552
+ },
+ {
+ "bbox": [
+ 62,
+ 82,
+ 104,
+ 63
+ ],
+ "keypoints": [
+ 75.09007144965095,
+ 114.79035385093314,
+ 1,
+ 88.01978404720953,
+ 109.72359615889864,
+ 1,
+ 98.79950536639522,
+ 109.05442666062974,
+ 1,
+ 110.16327936938085,
+ 114.72375114390456,
+ 1,
+ 121.75826373686846,
+ 122.01572654269421,
+ 1,
+ 126.92528942089982,
+ 93.65489136216958,
+ 1,
+ 144.49316505581498,
+ 94.71206260545628,
+ 1,
+ 152.3510241000562,
+ 102.03474955900822,
+ 1,
+ 159.94413202793353,
+ 111.6105502403288,
+ 1,
+ 136.5822887073417,
+ 102.58162787991249,
+ 1,
+ 153.71181890922904,
+ 105.7627322321249,
+ 1,
+ 158.23785994857087,
+ 113.05793071695886,
+ 1,
+ 159.1827624858022,
+ 122.12860754004963,
+ 1,
+ 131.78312266215684,
+ 118.12603871987666,
+ 1,
+ 144.37435502719956,
+ 122.97613121869307,
+ 1,
+ 144.12850082414747,
+ 130.24233623490562,
+ 1,
+ 138.0058328373116,
+ 135.03475933083362,
+ 1,
+ 123.6128526185571,
+ 130.55957078894423,
+ 1,
+ 126.52617237783046,
+ 135.2764317635352,
+ 1,
+ 123.26857656908544,
+ 138.5518599403549,
+ 1,
+ 118.92147700299864,
+ 140.34319120176468,
+ 1
+ ],
+ "category_id": 1,
+ "id": 82740,
+ "image_id": 82740,
+ "segmentation": [
+ [
+ 62,
+ 82,
+ 62,
+ 113.0,
+ 62,
+ 144,
+ 113.5,
+ 144,
+ 165,
+ 144,
+ 165,
+ 113.0,
+ 165,
+ 82,
+ 113.5,
+ 82
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 6552
+ },
+ {
+ "bbox": [
+ 62,
+ 82,
+ 104,
+ 63
+ ],
+ "keypoints": [
+ 75.09007144965095,
+ 114.79035385093314,
+ 1,
+ 88.01978404720953,
+ 109.72359615889864,
+ 1,
+ 98.79950536639522,
+ 109.05442666062974,
+ 1,
+ 110.16327936938085,
+ 114.72375114390456,
+ 1,
+ 121.75826373686846,
+ 122.01572654269421,
+ 1,
+ 126.92528942089982,
+ 93.65489136216958,
+ 1,
+ 144.49316505581498,
+ 94.71206260545628,
+ 1,
+ 152.3510241000562,
+ 102.03474955900822,
+ 1,
+ 159.94413202793353,
+ 111.6105502403288,
+ 1,
+ 136.5822887073417,
+ 102.58162787991249,
+ 1,
+ 153.71181890922904,
+ 105.7627322321249,
+ 1,
+ 158.23785994857087,
+ 113.05793071695886,
+ 1,
+ 159.1827624858022,
+ 122.12860754004963,
+ 1,
+ 131.78312266215684,
+ 118.12603871987666,
+ 1,
+ 144.37435502719956,
+ 122.97613121869307,
+ 1,
+ 144.12850082414747,
+ 130.24233623490562,
+ 1,
+ 138.0058328373116,
+ 135.03475933083362,
+ 1,
+ 123.6128526185571,
+ 130.55957078894423,
+ 1,
+ 126.52617237783046,
+ 135.2764317635352,
+ 1,
+ 123.26857656908544,
+ 138.5518599403549,
+ 1,
+ 118.92147700299864,
+ 140.34319120176468,
+ 1
+ ],
+ "category_id": 1,
+ "id": 115300,
+ "image_id": 115300,
+ "segmentation": [
+ [
+ 62,
+ 82,
+ 62,
+ 113.0,
+ 62,
+ 144,
+ 113.5,
+ 144,
+ 165,
+ 144,
+ 165,
+ 113.0,
+ 165,
+ 82,
+ 113.5,
+ 82
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 6552
+ },
+ {
+ "bbox": [
+ 48,
+ 81,
+ 111,
+ 73
+ ],
+ "keypoints": [
+ 72.3863777322552,
+ 118.66396006693559,
+ 1,
+ 94.24833834345874,
+ 103.27814170253427,
+ 1,
+ 110.88311700561579,
+ 95.90395591649063,
+ 1,
+ 126.4579609506009,
+ 94.84947407598384,
+ 1,
+ 150.22575721471514,
+ 90.20807463489129,
+ 1,
+ 101.58391664034835,
+ 95.2364549099302,
+ 1,
+ 123.22957111339275,
+ 99.32947575213643,
+ 1,
+ 139.48821317513102,
+ 106.07413659069489,
+ 1,
+ 157.4869130814403,
+ 114.05678966958038,
+ 1,
+ 102.72641676686953,
+ 113.8112215401411,
+ 1,
+ 124.77010074005784,
+ 117.9386487787441,
+ 1,
+ 138.88096072705787,
+ 120.6828207743196,
+ 1,
+ 153.55692830019055,
+ 122.08891417018086,
+ 1,
+ 101.79667808841384,
+ 132.8686913780324,
+ 1,
+ 122.47431735923229,
+ 131.3244981984239,
+ 1,
+ 136.86479076428296,
+ 129.51781183394235,
+ 1,
+ 147.14149503293044,
+ 124.23211514642553,
+ 1,
+ 103.99186381010902,
+ 143.91615273519855,
+ 1,
+ 119.95852588057097,
+ 140.94459694337758,
+ 1,
+ 130.47757563177504,
+ 137.0559475661833,
+ 1,
+ 140.32638831475907,
+ 128.94416862968552,
+ 1
+ ],
+ "category_id": 1,
+ "id": 355,
+ "image_id": 355,
+ "segmentation": [
+ [
+ 48,
+ 81,
+ 48,
+ 117.0,
+ 48,
+ 153,
+ 103.0,
+ 153,
+ 158,
+ 153,
+ 158,
+ 117.0,
+ 158,
+ 81,
+ 103.0,
+ 81
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 8103
+ },
+ {
+ "bbox": [
+ 48,
+ 81,
+ 111,
+ 73
+ ],
+ "keypoints": [
+ 72.3863777322552,
+ 118.66396006693559,
+ 1,
+ 94.24833834345874,
+ 103.27814170253427,
+ 1,
+ 110.88311700561579,
+ 95.90395591649063,
+ 1,
+ 126.4579609506009,
+ 94.84947407598384,
+ 1,
+ 150.22575721471514,
+ 90.20807463489129,
+ 1,
+ 101.58391664034835,
+ 95.2364549099302,
+ 1,
+ 123.22957111339275,
+ 99.32947575213643,
+ 1,
+ 139.48821317513102,
+ 106.07413659069489,
+ 1,
+ 157.4869130814403,
+ 114.05678966958038,
+ 1,
+ 102.72641676686953,
+ 113.8112215401411,
+ 1,
+ 124.77010074005784,
+ 117.9386487787441,
+ 1,
+ 138.88096072705787,
+ 120.6828207743196,
+ 1,
+ 153.55692830019055,
+ 122.08891417018086,
+ 1,
+ 101.79667808841384,
+ 132.8686913780324,
+ 1,
+ 122.47431735923229,
+ 131.3244981984239,
+ 1,
+ 136.86479076428296,
+ 129.51781183394235,
+ 1,
+ 147.14149503293044,
+ 124.23211514642553,
+ 1,
+ 103.99186381010902,
+ 143.91615273519855,
+ 1,
+ 119.95852588057097,
+ 140.94459694337758,
+ 1,
+ 130.47757563177504,
+ 137.0559475661833,
+ 1,
+ 140.32638831475907,
+ 128.94416862968552,
+ 1
+ ],
+ "category_id": 1,
+ "id": 32915,
+ "image_id": 32915,
+ "segmentation": [
+ [
+ 48,
+ 81,
+ 48,
+ 117.0,
+ 48,
+ 153,
+ 103.0,
+ 153,
+ 158,
+ 153,
+ 158,
+ 117.0,
+ 158,
+ 81,
+ 103.0,
+ 81
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 8103
+ },
+ {
+ "bbox": [
+ 48,
+ 81,
+ 111,
+ 73
+ ],
+ "keypoints": [
+ 72.3863777322552,
+ 118.66396006693559,
+ 1,
+ 94.24833834345874,
+ 103.27814170253427,
+ 1,
+ 110.88311700561579,
+ 95.90395591649063,
+ 1,
+ 126.4579609506009,
+ 94.84947407598384,
+ 1,
+ 150.22575721471514,
+ 90.20807463489129,
+ 1,
+ 101.58391664034835,
+ 95.2364549099302,
+ 1,
+ 123.22957111339275,
+ 99.32947575213643,
+ 1,
+ 139.48821317513102,
+ 106.07413659069489,
+ 1,
+ 157.4869130814403,
+ 114.05678966958038,
+ 1,
+ 102.72641676686953,
+ 113.8112215401411,
+ 1,
+ 124.77010074005784,
+ 117.9386487787441,
+ 1,
+ 138.88096072705787,
+ 120.6828207743196,
+ 1,
+ 153.55692830019055,
+ 122.08891417018086,
+ 1,
+ 101.79667808841384,
+ 132.8686913780324,
+ 1,
+ 122.47431735923229,
+ 131.3244981984239,
+ 1,
+ 136.86479076428296,
+ 129.51781183394235,
+ 1,
+ 147.14149503293044,
+ 124.23211514642553,
+ 1,
+ 103.99186381010902,
+ 143.91615273519855,
+ 1,
+ 119.95852588057097,
+ 140.94459694337758,
+ 1,
+ 130.47757563177504,
+ 137.0559475661833,
+ 1,
+ 140.32638831475907,
+ 128.94416862968552,
+ 1
+ ],
+ "category_id": 1,
+ "id": 65475,
+ "image_id": 65475,
+ "segmentation": [
+ [
+ 48,
+ 81,
+ 48,
+ 117.0,
+ 48,
+ 153,
+ 103.0,
+ 153,
+ 158,
+ 153,
+ 158,
+ 117.0,
+ 158,
+ 81,
+ 103.0,
+ 81
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 8103
+ },
+ {
+ "bbox": [
+ 48,
+ 81,
+ 111,
+ 73
+ ],
+ "keypoints": [
+ 72.3863777322552,
+ 118.66396006693559,
+ 1,
+ 94.24833834345874,
+ 103.27814170253427,
+ 1,
+ 110.88311700561579,
+ 95.90395591649063,
+ 1,
+ 126.4579609506009,
+ 94.84947407598384,
+ 1,
+ 150.22575721471514,
+ 90.20807463489129,
+ 1,
+ 101.58391664034835,
+ 95.2364549099302,
+ 1,
+ 123.22957111339275,
+ 99.32947575213643,
+ 1,
+ 139.48821317513102,
+ 106.07413659069489,
+ 1,
+ 157.4869130814403,
+ 114.05678966958038,
+ 1,
+ 102.72641676686953,
+ 113.8112215401411,
+ 1,
+ 124.77010074005784,
+ 117.9386487787441,
+ 1,
+ 138.88096072705787,
+ 120.6828207743196,
+ 1,
+ 153.55692830019055,
+ 122.08891417018086,
+ 1,
+ 101.79667808841384,
+ 132.8686913780324,
+ 1,
+ 122.47431735923229,
+ 131.3244981984239,
+ 1,
+ 136.86479076428296,
+ 129.51781183394235,
+ 1,
+ 147.14149503293044,
+ 124.23211514642553,
+ 1,
+ 103.99186381010902,
+ 143.91615273519855,
+ 1,
+ 119.95852588057097,
+ 140.94459694337758,
+ 1,
+ 130.47757563177504,
+ 137.0559475661833,
+ 1,
+ 140.32638831475907,
+ 128.94416862968552,
+ 1
+ ],
+ "category_id": 1,
+ "id": 98035,
+ "image_id": 98035,
+ "segmentation": [
+ [
+ 48,
+ 81,
+ 48,
+ 117.0,
+ 48,
+ 153,
+ 103.0,
+ 153,
+ 158,
+ 153,
+ 158,
+ 117.0,
+ 158,
+ 81,
+ 103.0,
+ 81
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 8103
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "hand",
+ "id": 1,
+ "name": "hand",
+ "keypoints": [
+ "wrist",
+ "thumb1",
+ "thumb2",
+ "thumb3",
+ "thumb4",
+ "forefinger1",
+ "forefinger2",
+ "forefinger3",
+ "forefinger4",
+ "middle_finger1",
+ "middle_finger2",
+ "middle_finger3",
+ "middle_finger4",
+ "ring_finger1",
+ "ring_finger2",
+ "ring_finger3",
+ "ring_finger4",
+ "pinky_finger1",
+ "pinky_finger2",
+ "pinky_finger3",
+ "pinky_finger4"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 1,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 9
+ ],
+ [
+ 1,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 1,
+ 14
+ ],
+ [
+ 14,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ],
+ [
+ 1,
+ 18
+ ],
+ [
+ 18,
+ 19
+ ],
+ [
+ 19,
+ 20
+ ],
+ [
+ 20,
+ 21
+ ]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S1_Directions_1.54138969_000001_467_466.png b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S1_Directions_1.54138969_000001_467_466.png
new file mode 100644
index 0000000000000000000000000000000000000000..19fde48fc8626a1c28a40923dccc29488f53abff
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S1_Directions_1.54138969_000001_467_466.png differ
diff --git a/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S5_SittingDown.54138969_002061_478_619.png b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S5_SittingDown.54138969_002061_478_619.png
new file mode 100644
index 0000000000000000000000000000000000000000..c4e8498667abf9b606281855019a14f830ed9224
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S5_SittingDown.54138969_002061_478_619.png differ
diff --git a/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S7_Greeting.55011271_000396_365_433.png b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S7_Greeting.55011271_000396_365_433.png
new file mode 100644
index 0000000000000000000000000000000000000000..f4ae2ced654a2b998a4605292036ee0c21c59c42
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S7_Greeting.55011271_000396_365_433.png differ
diff --git a/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S8_WalkDog_1.55011271_000026_592_382.png b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S8_WalkDog_1.55011271_000026_592_382.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d1c80dadc0b9c8f149dd974d5841a8960925f2c
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/BF_IUV_gt/S8_WalkDog_1.55011271_000026_592_382.png differ
diff --git a/vendor/ViTPose/tests/data/h36m/S1_Directions_1.54138969_000001.jpg b/vendor/ViTPose/tests/data/h36m/S1_Directions_1.54138969_000001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cea7d5784872ba32cd821f31c1216c4a999b95c3
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/S1_Directions_1.54138969_000001.jpg differ
diff --git a/vendor/ViTPose/tests/data/h36m/S5_SittingDown.54138969_002061.jpg b/vendor/ViTPose/tests/data/h36m/S5_SittingDown.54138969_002061.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..699c2dbc9433bbea32fdd3ab293a0d2be5dc2653
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/S5_SittingDown.54138969_002061.jpg differ
diff --git a/vendor/ViTPose/tests/data/h36m/S7_Greeting.55011271_000396.jpg b/vendor/ViTPose/tests/data/h36m/S7_Greeting.55011271_000396.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bb5a3f923682fd14a3e02d41b97af1abeea8e004
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/S7_Greeting.55011271_000396.jpg differ
diff --git a/vendor/ViTPose/tests/data/h36m/S8_WalkDog_1.55011271_000026.jpg b/vendor/ViTPose/tests/data/h36m/S8_WalkDog_1.55011271_000026.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..70da26ab3b185f22f36c51a570691ad8b7429d89
Binary files /dev/null and b/vendor/ViTPose/tests/data/h36m/S8_WalkDog_1.55011271_000026.jpg differ
diff --git a/vendor/ViTPose/tests/data/h36m/h36m_coco.json b/vendor/ViTPose/tests/data/h36m/h36m_coco.json
new file mode 100644
index 0000000000000000000000000000000000000000..714d74d01d2f31954424781693c49b8f7830ad55
--- /dev/null
+++ b/vendor/ViTPose/tests/data/h36m/h36m_coco.json
@@ -0,0 +1 @@
+{"categories": [{"supercategory": "person", "id": 1, "name": "person", "keypoints": ["root (pelvis)", "left_hip", "left_knee", "left_foot", "right_hip", "right_knee", "right_foot", "spine", "thorax", "neck_base", "head", "left_shoulder", "left_elbow", "left_wrist", "right_shoulder", "right_elbow", "right_wrist"], "skeleton": [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]]}], "images": [{"file_name": "S1_Directions_1.54138969_000001.jpg", "height": 1002, "width": 1000, "id": 1}, {"file_name": "S5_SittingDown.54138969_002061.jpg", "height": 1002, "width": 1000, "id": 2}, {"file_name": "S7_Greeting.55011271_000396.jpg", "height": 1002, "width": 1000, "id": 3}, {"file_name": "S8_WalkDog_1.55011271_000026.jpg", "height": 1000, "width": 1000, "id": 4}], "annotations": [{"id": 1, "category_id": 1, "image_id": 1, "iscrowd": 0, "bbox": [264.9376922607422, 263.9518524169922, 403.4372680664062, 403.4372680664062], "area": 162761.6292648853, "num_keypoints": 17, "keypoints": [473.6835632324219, 444.9424133300781, 1.0, 500.99609375, 448.0298767089844, 1.0, 479.8392639160156, 530.78564453125, 1.0, 506.2183837890625, 622.56884765625, 1.0, 445.90008544921875, 441.81585693359375, 1.0, 456.1890563964844, 537.1580810546875, 1.0, 467.3092346191406, 633.7693481445312, 1.0, 488.1867370605469, 397.4340515136719, 1.0, 481.0284729003906, 340.3969421386719, 1.0, 478.5175476074219, 318.8080139160156, 1.0, 485.7689514160156, 297.5716247558594, 1.0, 454.0160827636719, 359.7595520019531, 1.0, 430.05877685546875, 415.7348937988281, 1.0, 412.9972229003906, 452.88665771484375, 1.0, 508.1343688964844, 356.49151611328125, 1.0, 520.3154296875, 413.3182678222656, 1.0, 515.4714965820312, 456.4298400878906, 1.0], "keypoints_3d": [-0.09167896467684544, 0.15440384287163678, 0.9072610776685424, 1.0, -0.22323561595581554, 0.16380534684958903, 0.8905342751070698, 1.0, -0.18847025361683212, 0.014076936796798556, 0.47516886851117857, 1.0, -0.2618404912559873, 0.1865526993157367, 0.06143897298248735, 1.0, 0.039877919019916996, 0.1450023222842871, 0.923987909780832, 1.0, -0.011675957824905936, 0.16089903522172655, 0.48439154540870843, 1.0, -0.051550255421339175, 0.2201460834524429, 0.035834453020362034, 1.0, -0.1323477693191084, 0.2157300269714426, 1.128839650641341, 1.0, -0.09716736817976912, 0.20234419005974758, 1.3831467408152163, 1.0, -0.11297070886154925, 0.1279692883955672, 1.4774457457591577, 1.0, -0.12003286832485882, 0.19096459762702, 1.5734000571086066, 1.0, 0.025895481465340486, 0.1923593164851738, 1.2961572344669199, 1.0, 0.10710585363549718, 0.11605013463811531, 1.040506322546096, 1.0, 0.12983817437702339, -0.04802507206683693, 0.8509481470388911, 1.0, -0.23036952033496494, 0.20317906334818225, 1.31196399244637, 1.0, -0.3154053270492714, 0.1645526934522641, 1.0491747593377458, 1.0, -0.35077130389737743, 0.04344200135666121, 0.8313473515463179, 1.0]}, {"id": 2, "category_id": 1, "image_id": 2, "iscrowd": 0, "bbox": [332.0779327392578, 473.9801391601562, 290.84520263671874, 290.84520263671874], "area": 84590.93189679399, "num_keypoints": 17, "keypoints": [504.7028503417969, 669.4107666015625, 1.0, 522.782958984375, 662.7621459960938, 1.0, 480.4589538574219, 573.927490234375, 1.0, 423.3778991699219, 652.5033569335938, 1.0, 485.9207763671875, 676.2982788085938, 1.0, 421.36602783203125, 601.1521606445312, 1.0, 356.3150329589844, 537.103759765625, 1.0, 550.3255004882812, 662.92822265625, 1.0, 589.5880126953125, 619.5794067382812, 1.0, 571.8924560546875, 595.6388549804688, 1.0, 598.68603515625, 587.589599609375, 1.0, 568.692138671875, 634.8923950195312, 1.0, 566.2520751953125, 701.7017211914062, 1.0, 511.3590087890625, 693.0339965820312, 1.0, 590.8366088867188, 616.5315551757812, 1.0, 591.3470458984375, 677.7720947265625, 1.0, 537.99951171875, 670.0408325195312, 1.0], "keypoints_3d": [0.18313216685808942, 0.8231641950309152, 0.07380225812969665, 1.0, 0.07554131210634596, 0.7715923776310918, 0.07325729992245722, 1.0, 0.16299457213112878, 0.5166802880563885, 0.4061248320474351, 1.0, 0.37620554042837284, 0.3923511353809239, 0.03891887911670544, 1.0, 0.2907222583652278, 0.8747356465826233, 0.07434721247102738, 1.0, 0.4538780403958691, 0.569630972616368, 0.3267755094977387, 1.0, 0.6251452690485007, 0.22271823762053078, 0.5414286139004776, 1.0, 0.06532934302457871, 1.001159255205049, 0.14274932444804866, 1.0, -0.04454656018317582, 1.1125556363460358, 0.342905193075141, 1.0, 0.009223497357126575, 1.0593620630585874, 0.43232439773437603, 1.0, -0.06893729069462973, 1.1318276163177168, 0.4755019124242963, 1.0, 0.08050700263825616, 1.1749652405144606, 0.31219388344281884, 1.0, 0.09351924672160816, 1.1979948900004627, 0.04893463071448689, 1.0, 0.23106479380471234, 0.9914363940751381, 0.03387224672098732, 1.0, -0.11785994971756253, 0.9928514533779302, 0.3151021783048409, 1.0, -0.13147583042309652, 0.9930890169754685, 0.05086920141700024, 1.0, 0.01277369696599484, 0.7909373534094728, 0.039085449376500936, 1.0]}, {"id": 3, "category_id": 1, "image_id": 3, "iscrowd": 0, "bbox": [170.23768615722656, 237.9368133544922, 390.06866455078125, 390.06866455078125], "area": 152153.5630644299, "num_keypoints": 17, "keypoints": [368.17724609375, 411.8692626953125, 1.0, 343.07647705078125, 416.750244140625, 1.0, 353.4913635253906, 517.2151489257812, 1.0, 331.39996337890625, 595.499755859375, 1.0, 392.517578125, 407.1534729003906, 1.0, 398.1598815917969, 503.8897399902344, 1.0, 385.0705261230469, 588.869140625, 1.0, 362.5104675292969, 362.4866027832031, 1.0, 366.911376953125, 306.0451354980469, 1.0, 367.71380615234375, 290.8963623046875, 1.0, 363.420654296875, 270.4425354003906, 1.0, 386.78955078125, 316.49371337890625, 1.0, 399.1440734863281, 365.17083740234375, 1.0, 353.6079406738281, 381.5865478515625, 1.0, 342.8076477050781, 318.3569641113281, 1.0, 342.71282958984375, 364.5867614746094, 1.0, 366.3352355957031, 365.45220947265625, 1.0], "keypoints_3d": [-0.6301309252725336, -0.6344397044573773, 0.9206111026673937, 1.0, -0.6882790651010198, -0.7567714273847876, 0.9098024882822011, 1.0, -0.605197492011686, -0.8176252814087839, 0.47316837750582774, 1.0, -0.766804548823675, -0.755787085309862, 0.07078524813387999, 1.0, -0.5719828361729054, -0.5121080882530507, 0.9314197076230729, 1.0, -0.5345570064294822, -0.5306570068297987, 0.4847542734658332, 1.0, -0.6210461702486163, -0.5067827322307146, 0.056033899109727114, 1.0, -0.6429426515493781, -0.6598479640784474, 1.1450644187666705, 1.0, -0.6072065974986334, -0.6803959533246831, 1.3971234240234174, 1.0, -0.5632315191812003, -0.7496893544569412, 1.466008916363522, 1.0, -0.6158132161954746, -0.6947954939152909, 1.5523081521170095, 1.0, -0.5777025823723583, -0.5535770126047312, 1.3464529557428198, 1.0, -0.5941298810296396, -0.4062447056281533, 1.1141629855017834, 1.0, -0.7189789681120353, -0.6100210358087663, 1.0505663930038809, 1.0, -0.6584496220675611, -0.8004586993700178, 1.3473324561682447, 1.0, -0.5530156532845365, -0.9799554689731194, 1.1667654450548033, 1.0, -0.3551462593488601, -1.1271667875753684, 1.1850106181886038, 1.0]}, {"id": 4, "category_id": 1, "image_id": 4, "iscrowd": 0, "bbox": [426.30922546386716, 216.63374938964844, 331.06431884765624, 331.06431884765624], "area": 109603.5832140626, "num_keypoints": 17, "keypoints": [580.787841796875, 370.0909729003906, 1.0, 556.613037109375, 367.4950866699219, 1.0, 555.3511962890625, 443.8667907714844, 1.0, 555.9659423828125, 512.378173828125, 1.0, 605.2581176757812, 372.7404479980469, 1.0, 602.246826171875, 450.21441650390625, 1.0, 602.3458251953125, 520.109375, 1.0, 582.8226318359375, 324.77984619140625, 1.0, 583.9212036132812, 280.9236755371094, 1.0, 580.145751953125, 264.3388671875, 1.0, 578.8571166992188, 244.22244262695312, 1.0, 613.561279296875, 281.4887390136719, 1.0, 663.5371704101562, 290.7607727050781, 1.0, 703.6394653320312, 289.19964599609375, 1.0, 559.4656372070312, 283.4862365722656, 1.0, 518.9984741210938, 293.1536560058594, 1.0, 480.0433044433594, 294.3433837890625, 1.0], "keypoints_3d": [0.19220202254147178, 1.189309849497537, 0.9821400313764364, 1.0, 0.04600865836610568, 1.183930600453361, 0.9906196026753151, 1.0, 0.04461316569282081, 1.1926418282628157, 0.538558154313876, 1.0, 0.031201472570259625, 1.2700369233139401, 0.10701519045307029, 1.0, 0.33839414669826073, 1.1946890529146854, 0.9736605320016554, 1.0, 0.333404073845164, 1.193994506607667, 0.5215423147214628, 1.0, 0.3263552025015575, 1.2632366584322137, 0.08846607677389562, 1.0, 0.19539291023644667, 1.19850396285414, 1.243174997496277, 1.0, 0.1993584536744768, 1.1888196540778582, 1.493981021453426, 1.0, 0.19903504684644213, 1.1133379968098671, 1.5878569145306556, 1.0, 0.18327226834497345, 1.1310217923883403, 1.700390853655377, 1.0, 0.3645112266620312, 1.2261360502087753, 1.4942545468581971, 1.0, 0.6478297588674613, 1.2677031012472364, 1.4489805530636382, 1.0, 0.8872620514452785, 1.2228143555784783, 1.4657017622368516, 1.0, 0.03701256277219178, 1.2330351409283296, 1.475094968744001, 1.0, -0.23806504890556468, 1.2985863358997447, 1.4112075036634106, 1.0, -0.4812982456964936, 1.2808732588306402, 1.399103701708569, 1.0]}]}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/halpe/test_halpe.json b/vendor/ViTPose/tests/data/halpe/test_halpe.json
new file mode 100644
index 0000000000000000000000000000000000000000..85b9e9d6077c9da8767bc281d85ba2847a1057c4
--- /dev/null
+++ b/vendor/ViTPose/tests/data/halpe/test_halpe.json
@@ -0,0 +1,5991 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "license": 4,
+ "file_name": "000000000785.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000000785.jpg",
+ "height": 425,
+ "width": 640,
+ "date_captured": "2013-11-19 21:22:42",
+ "flickr_url": "http://farm8.staticflickr.com/7015/6795644157_f019453ae7_z.jpg",
+ "id": 785
+ },
+ {
+ "license": 3,
+ "file_name": "000000040083.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000040083.jpg",
+ "height": 333,
+ "width": 500,
+ "date_captured": "2013-11-18 03:30:24",
+ "flickr_url": "http://farm1.staticflickr.com/116/254881838_e21c6d17b8_z.jpg",
+ "id": 40083
+ },
+ {
+ "license": 1,
+ "file_name": "000000196141.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000196141.jpg",
+ "height": 429,
+ "width": 640,
+ "date_captured": "2013-11-22 22:37:15",
+ "flickr_url": "http://farm4.staticflickr.com/3310/3611902235_57d4ae496d_z.jpg",
+ "id": 196141
+ },
+ {
+ "license": 3,
+ "file_name": "000000197388.jpg",
+ "coco_url": "http://images.cocodataset.org/val2017/000000197388.jpg",
+ "height": 392,
+ "width": 640,
+ "date_captured": "2013-11-19 20:10:37",
+ "flickr_url": "http://farm9.staticflickr.com/8375/8507321836_5b8b13188f_z.jpg",
+ "id": 197388
+ }
+ ],
+ "annotations": [
+ {
+ "num_keypoints": 17,
+ "area": 27789.11055,
+ "iscrowd": 0,
+ "keypoints": [
+ 367,
+ 81,
+ 2,
+ 374,
+ 73,
+ 2,
+ 360,
+ 75,
+ 2,
+ 386,
+ 78,
+ 2,
+ 356,
+ 81,
+ 2,
+ 399,
+ 108,
+ 2,
+ 358,
+ 129,
+ 2,
+ 433,
+ 142,
+ 2,
+ 341,
+ 159,
+ 2,
+ 449,
+ 165,
+ 2,
+ 309,
+ 178,
+ 2,
+ 424,
+ 203,
+ 2,
+ 393,
+ 214,
+ 2,
+ 429,
+ 294,
+ 2,
+ 367,
+ 273,
+ 2,
+ 466,
+ 362,
+ 2,
+ 396,
+ 341,
+ 2,
+ 370,
+ 52,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 431,
+ 378,
+ 2,
+ 364,
+ 366,
+ 2,
+ 437,
+ 383,
+ 2,
+ 358,
+ 361,
+ 2,
+ 488,
+ 372,
+ 2,
+ 414,
+ 353,
+ 2,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 382.0,
+ 88.0,
+ 0.0,
+ 384.0,
+ 85.0,
+ 0.0,
+ 386.0,
+ 80.0,
+ 0.0,
+ 386.0,
+ 76.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 367.0,
+ 76.0,
+ 0.0,
+ 367.0,
+ 79.0,
+ 0.0,
+ 367.0,
+ 81.0,
+ 0.0,
+ 364.0,
+ 83.0,
+ 0.0,
+ 366.0,
+ 83.0,
+ 0.0,
+ 367.0,
+ 84.0,
+ 0.0,
+ 369.0,
+ 83.0,
+ 0.0,
+ 371.0,
+ 83.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 363.0,
+ 88.0,
+ 0.0,
+ 364.0,
+ 86.0,
+ 0.0,
+ 366.0,
+ 86.0,
+ 0.0,
+ 368.0,
+ 86.0,
+ 0.0,
+ 369.0,
+ 86.0,
+ 0.0,
+ 372.0,
+ 86.0,
+ 0.0,
+ 376.0,
+ 86.0,
+ 0.0,
+ 373.0,
+ 89.0,
+ 0.0,
+ 371.0,
+ 90.0,
+ 0.0,
+ 368.0,
+ 90.0,
+ 0.0,
+ 366.0,
+ 90.0,
+ 0.0,
+ 364.0,
+ 89.0,
+ 0.0,
+ 364.0,
+ 88.0,
+ 0.0,
+ 366.0,
+ 87.0,
+ 0.0,
+ 368.0,
+ 87.0,
+ 0.0,
+ 370.0,
+ 87.0,
+ 0.0,
+ 375.0,
+ 87.0,
+ 0.0,
+ 370.0,
+ 89.0,
+ 0.0,
+ 368.0,
+ 89.0,
+ 0.0,
+ 367.0,
+ 89.0,
+ 0.0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 435,
+ 172,
+ 2,
+ 441,
+ 178,
+ 2,
+ 442,
+ 160,
+ 2,
+ 444,
+ 170,
+ 2,
+ 444,
+ 173,
+ 2,
+ 444,
+ 178,
+ 2,
+ 447,
+ 161,
+ 2,
+ 448,
+ 170,
+ 2,
+ 448,
+ 174,
+ 2,
+ 448,
+ 180,
+ 2,
+ 453,
+ 161,
+ 2,
+ 453,
+ 170,
+ 2,
+ 452,
+ 176,
+ 2,
+ 453,
+ 181,
+ 2,
+ 459,
+ 163,
+ 2,
+ 459,
+ 171,
+ 2,
+ 458,
+ 176,
+ 2,
+ 456,
+ 182,
+ 2,
+ 306,
+ 188,
+ 2,
+ 302,
+ 181,
+ 2,
+ 298,
+ 179,
+ 2,
+ 293,
+ 183,
+ 2,
+ 290,
+ 188,
+ 2,
+ 291,
+ 174,
+ 2,
+ 285,
+ 180,
+ 2,
+ 285,
+ 185,
+ 2,
+ 290,
+ 189,
+ 2,
+ 283,
+ 177,
+ 2,
+ 282,
+ 185,
+ 2,
+ 285,
+ 190,
+ 2,
+ 290,
+ 194,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 785,
+ "bbox": [
+ 280.79,
+ 44.73,
+ 218.7,
+ 346.68
+ ],
+ "category_id": 1,
+ "id": 442619
+ },
+ {
+ "num_keypoints": 14,
+ "area": 11025.219,
+ "iscrowd": 0,
+ "keypoints": [
+ 99,
+ 144,
+ 2,
+ 104,
+ 141,
+ 2,
+ 96,
+ 137,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 133,
+ 2,
+ 56,
+ 161,
+ 2,
+ 81,
+ 162,
+ 2,
+ 0,
+ 0,
+ 0,
+ 103,
+ 208,
+ 2,
+ 116,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 57,
+ 246,
+ 1,
+ 82,
+ 259,
+ 1,
+ 137,
+ 219,
+ 2,
+ 138,
+ 247,
+ 2,
+ 177,
+ 256,
+ 2,
+ 158,
+ 296,
+ 1,
+ 106,
+ 120,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 207,
+ 256,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 184,
+ 272,
+ 2,
+ 0,
+ 0,
+ 0,
+ 82,
+ 130,
+ 2,
+ 80,
+ 134,
+ 2,
+ 80,
+ 139,
+ 2,
+ 80,
+ 143,
+ 2,
+ 81,
+ 147,
+ 2,
+ 82,
+ 151,
+ 2,
+ 85,
+ 154,
+ 2,
+ 88,
+ 156,
+ 2,
+ 92,
+ 158,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 94,
+ 131,
+ 2,
+ 97,
+ 131,
+ 2,
+ 98,
+ 131,
+ 2,
+ 100,
+ 133,
+ 2,
+ 101,
+ 134,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 101,
+ 139,
+ 2,
+ 100,
+ 142,
+ 2,
+ 99,
+ 144,
+ 2,
+ 98,
+ 146,
+ 2,
+ 96,
+ 147,
+ 2,
+ 97,
+ 147,
+ 2,
+ 98,
+ 148,
+ 2,
+ 99,
+ 148,
+ 2,
+ 99,
+ 148,
+ 2,
+ 93,
+ 137,
+ 2,
+ 95,
+ 136,
+ 2,
+ 97,
+ 136,
+ 2,
+ 97,
+ 138,
+ 2,
+ 96,
+ 138,
+ 2,
+ 95,
+ 138,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 89,
+ 149,
+ 2,
+ 92,
+ 149,
+ 2,
+ 95,
+ 150,
+ 2,
+ 96,
+ 150,
+ 2,
+ 97,
+ 151,
+ 2,
+ 97,
+ 152,
+ 2,
+ 97,
+ 153,
+ 2,
+ 96,
+ 153,
+ 2,
+ 96,
+ 153,
+ 2,
+ 94,
+ 153,
+ 2,
+ 93,
+ 153,
+ 2,
+ 91,
+ 151,
+ 2,
+ 90,
+ 149,
+ 2,
+ 95,
+ 150,
+ 2,
+ 96,
+ 151,
+ 2,
+ 97,
+ 151,
+ 2,
+ 97,
+ 153,
+ 2,
+ 96,
+ 153,
+ 2,
+ 94,
+ 153,
+ 2,
+ 94,
+ 152,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 118,
+ 198,
+ 2,
+ 113,
+ 197,
+ 2,
+ 109,
+ 197,
+ 2,
+ 0,
+ 0,
+ 0,
+ 118,
+ 202,
+ 2,
+ 111,
+ 201,
+ 2,
+ 106,
+ 201,
+ 2,
+ 0,
+ 0,
+ 0,
+ 117,
+ 206,
+ 2,
+ 111,
+ 205,
+ 2,
+ 108,
+ 205,
+ 2,
+ 104,
+ 203,
+ 2,
+ 116,
+ 209,
+ 2,
+ 110,
+ 209,
+ 2,
+ 107,
+ 208,
+ 2,
+ 104,
+ 206,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 38.08,
+ 110.95,
+ 174.71,
+ 174.71
+ ],
+ "category_id": 1,
+ "id": 198196
+ },
+ {
+ "num_keypoints": 15,
+ "area": 10171.9544,
+ "iscrowd": 0,
+ "keypoints": [
+ 343,
+ 164,
+ 2,
+ 348,
+ 160,
+ 2,
+ 340,
+ 160,
+ 2,
+ 359,
+ 163,
+ 2,
+ 332,
+ 164,
+ 2,
+ 370,
+ 189,
+ 2,
+ 334,
+ 190,
+ 2,
+ 358,
+ 236,
+ 2,
+ 348,
+ 234,
+ 2,
+ 339,
+ 270,
+ 2,
+ 330,
+ 262,
+ 2,
+ 378,
+ 262,
+ 2,
+ 343,
+ 254,
+ 2,
+ 338,
+ 280,
+ 2,
+ 283,
+ 272,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 343,
+ 143,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 332.3577977797564,
+ 162.3496914134306,
+ 2.0,
+ 332.8988608117197,
+ 165.6187214570887,
+ 2.0,
+ 333.63467933804384,
+ 168.74800139782477,
+ 2.0,
+ 334.44826124602673,
+ 171.62306650199142,
+ 2.0,
+ 335.45694729674096,
+ 174.28548183067173,
+ 2.0,
+ 336.96602223714194,
+ 176.06009946336934,
+ 2.0,
+ 339.1693873087565,
+ 177.1661381740196,
+ 2.0,
+ 342.3967300714231,
+ 178.20855305989585,
+ 2.0,
+ 346.46408769196154,
+ 178.59725353764554,
+ 2.0,
+ 349.8700014600567,
+ 177.54131727031634,
+ 2.0,
+ 352.5932256960401,
+ 176.49227677887563,
+ 2.0,
+ 354.83135782877605,
+ 175.44453310499006,
+ 2.0,
+ 356.3679296755323,
+ 173.84137070599724,
+ 2.0,
+ 357.0065454221239,
+ 171.40940037147672,
+ 2.0,
+ 357.534409347235,
+ 168.54578019684436,
+ 2.0,
+ 357.7505070106656,
+ 165.60219732546338,
+ 2.0,
+ 357.9972831576478,
+ 162.53520322313494,
+ 2.0,
+ 334.98978292427813,
+ 157.50515154670268,
+ 2.0,
+ 336.268189015108,
+ 155.9984682569317,
+ 2.0,
+ 338.20047804888554,
+ 155.20954518037684,
+ 2.0,
+ 339.8509974460976,
+ 155.23421301748238,
+ 2.0,
+ 341.352836967917,
+ 155.51378012264476,
+ 2.0,
+ 347.8451109044692,
+ 155.22197044222963,
+ 2.0,
+ 349.3337133669386,
+ 154.8293061798694,
+ 2.0,
+ 351.12965129777496,
+ 154.6547285491345,
+ 2.0,
+ 353.1635732613358,
+ 155.35309825224036,
+ 2.0,
+ 354.5697377522786,
+ 156.92000379375384,
+ 2.0,
+ 344.713427734375,
+ 159.3260030409869,
+ 2.0,
+ 344.74998306573605,
+ 161.3128111596201,
+ 2.0,
+ 344.9170358096852,
+ 163.04858473235487,
+ 2.0,
+ 344.9786475088082,
+ 164.92118542241116,
+ 2.0,
+ 342.8344047097599,
+ 167.29107576258042,
+ 2.0,
+ 343.73243414186965,
+ 167.34131457758886,
+ 2.0,
+ 345.013671875,
+ 167.60332833084405,
+ 2.0,
+ 345.8795548981311,
+ 167.26825794893153,
+ 2.0,
+ 346.9039867326325,
+ 167.04604671702666,
+ 2.0,
+ 337.4534390917011,
+ 160.08626361921722,
+ 2.0,
+ 338.55446807262945,
+ 159.17182970233992,
+ 2.0,
+ 340.002108854406,
+ 159.25801017611636,
+ 2.0,
+ 341.49895665785846,
+ 160.03499301087624,
+ 2.0,
+ 340.23350459080115,
+ 160.5913200228822,
+ 2.0,
+ 338.5602124083276,
+ 160.56629581825405,
+ 2.0,
+ 347.86048488242955,
+ 159.88770386938955,
+ 2.0,
+ 349.3879867254519,
+ 159.04122164857154,
+ 2.0,
+ 350.88049507889093,
+ 158.927533976237,
+ 2.0,
+ 352.11961969113815,
+ 159.93540822945388,
+ 2.0,
+ 350.849705954159,
+ 160.3235902374866,
+ 2.0,
+ 349.1870314654182,
+ 160.32544540704464,
+ 2.0,
+ 340.80742998310166,
+ 172.02484322342218,
+ 2.0,
+ 342.28591649672563,
+ 170.90962129480698,
+ 2.0,
+ 344.0934833302217,
+ 170.10430531221277,
+ 2.0,
+ 345.1530334472656,
+ 170.32844890519684,
+ 2.0,
+ 345.8770950616575,
+ 170.0848247453278,
+ 2.0,
+ 347.8689553653493,
+ 170.66106716978783,
+ 2.0,
+ 349.58350770239736,
+ 171.62832581763175,
+ 2.0,
+ 348.09330994849114,
+ 172.68533762015548,
+ 2.0,
+ 346.88256608551626,
+ 173.17178057502298,
+ 2.0,
+ 345.6372661515778,
+ 173.27078642003676,
+ 2.0,
+ 343.9210619159773,
+ 173.28780972349878,
+ 2.0,
+ 342.63790340049593,
+ 172.8480547736673,
+ 2.0,
+ 341.26428671444165,
+ 171.89147685929842,
+ 2.0,
+ 343.8292683320887,
+ 171.36270207423792,
+ 2.0,
+ 345.2252255308862,
+ 171.2339672013825,
+ 2.0,
+ 346.42121037501914,
+ 171.26879086961932,
+ 2.0,
+ 349.3406477385876,
+ 171.65391995299098,
+ 2.0,
+ 346.50171341241577,
+ 171.7467015883502,
+ 2.0,
+ 345.33072832892924,
+ 171.8389222986558,
+ 2.0,
+ 343.8844602697036,
+ 171.8535089231005,
+ 2.0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 257.76,
+ 139.06,
+ 140.05,
+ 154.21
+ ],
+ "category_id": 1,
+ "id": 230195
+ },
+ {
+ "num_keypoints": 0,
+ "area": 491.2669,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 40083,
+ "bbox": [
+ 275.17,
+ 126.5,
+ 10.69,
+ 68.26
+ ],
+ "category_id": 1,
+ "id": 1202706
+ },
+ {
+ "num_keypoints": 15,
+ "area": 17123.92955,
+ "iscrowd": 0,
+ "keypoints": [
+ 297,
+ 111,
+ 2,
+ 299,
+ 106,
+ 2,
+ 0,
+ 0,
+ 0,
+ 314,
+ 108,
+ 2,
+ 0,
+ 0,
+ 0,
+ 329,
+ 141,
+ 2,
+ 346,
+ 125,
+ 2,
+ 295,
+ 164,
+ 2,
+ 323,
+ 130,
+ 2,
+ 266,
+ 155,
+ 2,
+ 279,
+ 143,
+ 2,
+ 329,
+ 225,
+ 2,
+ 331,
+ 221,
+ 2,
+ 327,
+ 298,
+ 2,
+ 283,
+ 269,
+ 2,
+ 398,
+ 327,
+ 2,
+ 288,
+ 349,
+ 2,
+ 309,
+ 78,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 399,
+ 363,
+ 2,
+ 261,
+ 361,
+ 2,
+ 402,
+ 360,
+ 2,
+ 254,
+ 359,
+ 2,
+ 408,
+ 327,
+ 2,
+ 296,
+ 358,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 308,
+ 121,
+ 2,
+ 310,
+ 119,
+ 2,
+ 311,
+ 117,
+ 2,
+ 312,
+ 115,
+ 2,
+ 313,
+ 112,
+ 2,
+ 313,
+ 110,
+ 2,
+ 314,
+ 108,
+ 2,
+ 313,
+ 105,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 298,
+ 101,
+ 2,
+ 300,
+ 101,
+ 2,
+ 302,
+ 101,
+ 2,
+ 303,
+ 101,
+ 2,
+ 305,
+ 103,
+ 2,
+ 297,
+ 104,
+ 2,
+ 297,
+ 106,
+ 2,
+ 296,
+ 109,
+ 2,
+ 296,
+ 111,
+ 2,
+ 299,
+ 111,
+ 2,
+ 300,
+ 111,
+ 2,
+ 298,
+ 112,
+ 2,
+ 299,
+ 112,
+ 2,
+ 300,
+ 112,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 298,
+ 104,
+ 2,
+ 300,
+ 103,
+ 2,
+ 302,
+ 104,
+ 2,
+ 304,
+ 106,
+ 2,
+ 302,
+ 106,
+ 2,
+ 300,
+ 106,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 264,
+ 156,
+ 2,
+ 263,
+ 147,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 253,
+ 149,
+ 2,
+ 248,
+ 147,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 253,
+ 154,
+ 2,
+ 245,
+ 155,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 251,
+ 157,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 252,
+ 160,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 276,
+ 146,
+ 2,
+ 270,
+ 150,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 268,
+ 139,
+ 2,
+ 262,
+ 145,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 247.76,
+ 74.23,
+ 169.67,
+ 300.78
+ ],
+ "category_id": 1,
+ "id": 460541
+ },
+ {
+ "num_keypoints": 15,
+ "area": 2789.0208,
+ "iscrowd": 0,
+ "keypoints": [
+ 589,
+ 113,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 595,
+ 112,
+ 1,
+ 584,
+ 110,
+ 2,
+ 598,
+ 123,
+ 2,
+ 579,
+ 119,
+ 2,
+ 594,
+ 141,
+ 2,
+ 570,
+ 137,
+ 2,
+ 576,
+ 135,
+ 2,
+ 585,
+ 139,
+ 2,
+ 590,
+ 157,
+ 2,
+ 574,
+ 156,
+ 2,
+ 589,
+ 192,
+ 2,
+ 565,
+ 189,
+ 1,
+ 587,
+ 222,
+ 1,
+ 557,
+ 219,
+ 1,
+ 589,
+ 102,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 584,
+ 111,
+ 2,
+ 584,
+ 112,
+ 2,
+ 584,
+ 113,
+ 2,
+ 584,
+ 115,
+ 2,
+ 585,
+ 116,
+ 2,
+ 586,
+ 117,
+ 2,
+ 587,
+ 118,
+ 2,
+ 588,
+ 119,
+ 2,
+ 589,
+ 119,
+ 2,
+ 591,
+ 119,
+ 2,
+ 592,
+ 118,
+ 2,
+ 593,
+ 117,
+ 2,
+ 593,
+ 116,
+ 2,
+ 594,
+ 115,
+ 2,
+ 594,
+ 113,
+ 2,
+ 594,
+ 112,
+ 2,
+ 594,
+ 110,
+ 2,
+ 584,
+ 108,
+ 2,
+ 585,
+ 108,
+ 2,
+ 587,
+ 108,
+ 2,
+ 588,
+ 108,
+ 2,
+ 589,
+ 108,
+ 2,
+ 591,
+ 109,
+ 2,
+ 592,
+ 108,
+ 2,
+ 593,
+ 108,
+ 2,
+ 593,
+ 109,
+ 2,
+ 594,
+ 109,
+ 2,
+ 589,
+ 110,
+ 2,
+ 589,
+ 111,
+ 2,
+ 589,
+ 112,
+ 2,
+ 589,
+ 112,
+ 2,
+ 588,
+ 113,
+ 2,
+ 589,
+ 113,
+ 2,
+ 589,
+ 113,
+ 2,
+ 590,
+ 113,
+ 2,
+ 590,
+ 113,
+ 2,
+ 585,
+ 110,
+ 2,
+ 586,
+ 109,
+ 2,
+ 587,
+ 109,
+ 2,
+ 588,
+ 110,
+ 2,
+ 587,
+ 110,
+ 2,
+ 586,
+ 110,
+ 2,
+ 590,
+ 110,
+ 2,
+ 591,
+ 109,
+ 2,
+ 592,
+ 109,
+ 2,
+ 594,
+ 110,
+ 2,
+ 592,
+ 110,
+ 2,
+ 591,
+ 110,
+ 2,
+ 587,
+ 115,
+ 2,
+ 588,
+ 115,
+ 2,
+ 589,
+ 114,
+ 2,
+ 589,
+ 114,
+ 2,
+ 590,
+ 114,
+ 2,
+ 591,
+ 115,
+ 2,
+ 591,
+ 115,
+ 2,
+ 591,
+ 115,
+ 2,
+ 590,
+ 116,
+ 2,
+ 589,
+ 116,
+ 2,
+ 589,
+ 116,
+ 2,
+ 588,
+ 116,
+ 2,
+ 587,
+ 115,
+ 2,
+ 589,
+ 115,
+ 2,
+ 589,
+ 115,
+ 2,
+ 590,
+ 115,
+ 2,
+ 591,
+ 115,
+ 2,
+ 590,
+ 116,
+ 2,
+ 589,
+ 116,
+ 2,
+ 589,
+ 116,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 555.57,
+ 99.84,
+ 48.32,
+ 113.05
+ ],
+ "category_id": 1,
+ "id": 488308
+ },
+ {
+ "num_keypoints": 0,
+ "area": 285.7906,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 440.85,
+ 73.13,
+ 16.79,
+ 32.45
+ ],
+ "category_id": 1,
+ "id": 508900
+ },
+ {
+ "num_keypoints": 12,
+ "area": 21608.94075,
+ "iscrowd": 0,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 552,
+ 234,
+ 2,
+ 0,
+ 0,
+ 0,
+ 531,
+ 262,
+ 2,
+ 600,
+ 283,
+ 2,
+ 480,
+ 260,
+ 2,
+ 622,
+ 336,
+ 2,
+ 466,
+ 242,
+ 2,
+ 0,
+ 0,
+ 0,
+ 546,
+ 365,
+ 2,
+ 592,
+ 371,
+ 2,
+ 470,
+ 351,
+ 2,
+ 551,
+ 330,
+ 2,
+ 519,
+ 394,
+ 2,
+ 589,
+ 391,
+ 2,
+ 575,
+ 211,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 498,
+ 408,
+ 2,
+ 0,
+ 0,
+ 0,
+ 534,
+ 395,
+ 2,
+ 587,
+ 401,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 453.77,
+ 206.81,
+ 177.23,
+ 210.87
+ ],
+ "category_id": 1,
+ "id": 1717641
+ },
+ {
+ "num_keypoints": 17,
+ "area": 1870.14015,
+ "iscrowd": 0,
+ "keypoints": [
+ 48,
+ 79,
+ 2,
+ 50,
+ 77,
+ 2,
+ 46,
+ 77,
+ 2,
+ 54,
+ 78,
+ 2,
+ 45,
+ 78,
+ 2,
+ 57,
+ 90,
+ 2,
+ 42,
+ 90,
+ 2,
+ 63,
+ 103,
+ 2,
+ 42,
+ 105,
+ 2,
+ 56,
+ 113,
+ 2,
+ 49,
+ 112,
+ 2,
+ 55,
+ 117,
+ 2,
+ 44,
+ 117,
+ 2,
+ 55,
+ 140,
+ 2,
+ 47,
+ 140,
+ 2,
+ 56,
+ 160,
+ 2,
+ 49,
+ 159,
+ 2,
+ 47,
+ 71,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 196141,
+ "bbox": [
+ 36.12,
+ 67.59,
+ 30.41,
+ 96.08
+ ],
+ "category_id": 1,
+ "id": 1724673
+ },
+ {
+ "num_keypoints": 16,
+ "area": 14250.29385,
+ "iscrowd": 0,
+ "keypoints": [
+ 334,
+ 135,
+ 2,
+ 340,
+ 129,
+ 2,
+ 331,
+ 129,
+ 2,
+ 0,
+ 0,
+ 0,
+ 319,
+ 123,
+ 2,
+ 340,
+ 146,
+ 2,
+ 292,
+ 133,
+ 2,
+ 353,
+ 164,
+ 2,
+ 246,
+ 144,
+ 2,
+ 354,
+ 197,
+ 2,
+ 250,
+ 185,
+ 2,
+ 293,
+ 197,
+ 2,
+ 265,
+ 187,
+ 2,
+ 305,
+ 252,
+ 2,
+ 231,
+ 254,
+ 2,
+ 293,
+ 321,
+ 2,
+ 193,
+ 297,
+ 2,
+ 333,
+ 109,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 299,
+ 332,
+ 2,
+ 185,
+ 309,
+ 2,
+ 310,
+ 333,
+ 2,
+ 176,
+ 303,
+ 2,
+ 287,
+ 328,
+ 2,
+ 198,
+ 303,
+ 2,
+ 321,
+ 127,
+ 2,
+ 321,
+ 130,
+ 2,
+ 321,
+ 133,
+ 2,
+ 321,
+ 136,
+ 2,
+ 322,
+ 138,
+ 2,
+ 324,
+ 140,
+ 2,
+ 326,
+ 142,
+ 2,
+ 329,
+ 143,
+ 2,
+ 332,
+ 143,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 328,
+ 125,
+ 2,
+ 330,
+ 125,
+ 2,
+ 331,
+ 126,
+ 2,
+ 333,
+ 126,
+ 2,
+ 335,
+ 127,
+ 2,
+ 339,
+ 128,
+ 2,
+ 340,
+ 127,
+ 2,
+ 342,
+ 126,
+ 2,
+ 343,
+ 126,
+ 2,
+ 345,
+ 125,
+ 2,
+ 336,
+ 130,
+ 2,
+ 336,
+ 132,
+ 2,
+ 337,
+ 134,
+ 2,
+ 338,
+ 136,
+ 2,
+ 334,
+ 138,
+ 2,
+ 335,
+ 138,
+ 2,
+ 337,
+ 138,
+ 2,
+ 338,
+ 137,
+ 2,
+ 339,
+ 138,
+ 2,
+ 329,
+ 127,
+ 2,
+ 331,
+ 127,
+ 2,
+ 333,
+ 128,
+ 2,
+ 334,
+ 129,
+ 2,
+ 332,
+ 130,
+ 2,
+ 331,
+ 129,
+ 2,
+ 339,
+ 129,
+ 2,
+ 341,
+ 127,
+ 2,
+ 342,
+ 127,
+ 2,
+ 344,
+ 127,
+ 2,
+ 342,
+ 129,
+ 2,
+ 341,
+ 129,
+ 2,
+ 329,
+ 139,
+ 2,
+ 331,
+ 139,
+ 2,
+ 333,
+ 139,
+ 2,
+ 334,
+ 139,
+ 2,
+ 334,
+ 139,
+ 2,
+ 335,
+ 139,
+ 2,
+ 336,
+ 139,
+ 2,
+ 335,
+ 140,
+ 2,
+ 334,
+ 141,
+ 2,
+ 333,
+ 141,
+ 2,
+ 333,
+ 141,
+ 2,
+ 331,
+ 141,
+ 2,
+ 330,
+ 139,
+ 2,
+ 333,
+ 139,
+ 2,
+ 334,
+ 140,
+ 2,
+ 334,
+ 139,
+ 2,
+ 336,
+ 140,
+ 2,
+ 334,
+ 140,
+ 2,
+ 334,
+ 141,
+ 2,
+ 331,
+ 141,
+ 2,
+ 0,
+ 0,
+ 0,
+ 349,
+ 202,
+ 2,
+ 345,
+ 203,
+ 2,
+ 342,
+ 207,
+ 2,
+ 338,
+ 212,
+ 2,
+ 349,
+ 214,
+ 2,
+ 341,
+ 219,
+ 2,
+ 336,
+ 219,
+ 2,
+ 333,
+ 218,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 250,
+ 187,
+ 2,
+ 255,
+ 188,
+ 2,
+ 260,
+ 189,
+ 2,
+ 264,
+ 194,
+ 2,
+ 268,
+ 201,
+ 2,
+ 254,
+ 193,
+ 2,
+ 256,
+ 201,
+ 2,
+ 260,
+ 205,
+ 2,
+ 0,
+ 0,
+ 0,
+ 252,
+ 193,
+ 2,
+ 252,
+ 201,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 247,
+ 193,
+ 2,
+ 248,
+ 200,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 139.41,
+ 102.25,
+ 222.39,
+ 241.57
+ ],
+ "category_id": 1,
+ "id": 437295
+ },
+ {
+ "num_keypoints": 16,
+ "area": 3404.869,
+ "iscrowd": 0,
+ "keypoints": [
+ 345,
+ 92,
+ 2,
+ 350,
+ 87,
+ 2,
+ 341,
+ 87,
+ 2,
+ 0,
+ 0,
+ 0,
+ 330,
+ 83,
+ 2,
+ 357,
+ 94,
+ 2,
+ 316,
+ 92,
+ 2,
+ 357,
+ 104,
+ 2,
+ 291,
+ 123,
+ 1,
+ 351,
+ 133,
+ 2,
+ 281,
+ 136,
+ 1,
+ 326,
+ 131,
+ 1,
+ 305,
+ 128,
+ 1,
+ 336,
+ 152,
+ 1,
+ 303,
+ 171,
+ 1,
+ 318,
+ 206,
+ 2,
+ 294,
+ 211,
+ 1,
+ 344,
+ 70,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 320,
+ 214,
+ 2,
+ 0,
+ 0,
+ 0,
+ 328,
+ 213,
+ 2,
+ 0,
+ 0,
+ 0,
+ 313,
+ 210,
+ 2,
+ 0,
+ 0,
+ 0,
+ 333,
+ 85,
+ 2,
+ 333,
+ 87,
+ 2,
+ 333,
+ 89,
+ 2,
+ 334,
+ 92,
+ 2,
+ 335,
+ 95,
+ 2,
+ 337,
+ 97,
+ 2,
+ 338,
+ 98,
+ 2,
+ 341,
+ 99,
+ 2,
+ 343,
+ 100,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 337,
+ 86,
+ 2,
+ 339,
+ 85,
+ 2,
+ 341,
+ 85,
+ 2,
+ 342,
+ 86,
+ 2,
+ 344,
+ 87,
+ 2,
+ 348,
+ 87,
+ 2,
+ 349,
+ 86,
+ 2,
+ 350,
+ 85,
+ 2,
+ 351,
+ 85,
+ 2,
+ 353,
+ 84,
+ 2,
+ 345,
+ 88,
+ 2,
+ 345,
+ 90,
+ 2,
+ 345,
+ 92,
+ 2,
+ 345,
+ 94,
+ 2,
+ 342,
+ 94,
+ 2,
+ 343,
+ 94,
+ 2,
+ 345,
+ 95,
+ 2,
+ 346,
+ 94,
+ 2,
+ 347,
+ 94,
+ 2,
+ 337,
+ 87,
+ 2,
+ 339,
+ 86,
+ 2,
+ 341,
+ 86,
+ 2,
+ 343,
+ 88,
+ 2,
+ 341,
+ 88,
+ 2,
+ 340,
+ 88,
+ 2,
+ 348,
+ 88,
+ 2,
+ 349,
+ 86,
+ 2,
+ 351,
+ 86,
+ 2,
+ 353,
+ 86,
+ 2,
+ 351,
+ 87,
+ 2,
+ 350,
+ 88,
+ 2,
+ 340,
+ 97,
+ 2,
+ 341,
+ 96,
+ 2,
+ 343,
+ 96,
+ 2,
+ 344,
+ 96,
+ 2,
+ 345,
+ 96,
+ 2,
+ 346,
+ 96,
+ 2,
+ 346,
+ 97,
+ 2,
+ 346,
+ 98,
+ 2,
+ 345,
+ 98,
+ 2,
+ 344,
+ 98,
+ 2,
+ 343,
+ 98,
+ 2,
+ 341,
+ 98,
+ 2,
+ 341,
+ 97,
+ 2,
+ 343,
+ 96,
+ 2,
+ 344,
+ 96,
+ 2,
+ 345,
+ 96,
+ 2,
+ 346,
+ 97,
+ 2,
+ 345,
+ 98,
+ 2,
+ 344,
+ 98,
+ 2,
+ 343,
+ 98,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 287.17,
+ 61.52,
+ 74.88,
+ 165.61
+ ],
+ "category_id": 1,
+ "id": 467657
+ },
+ {
+ "num_keypoints": 15,
+ "area": 8913.98475,
+ "iscrowd": 0,
+ "keypoints": [
+ 591,
+ 78,
+ 2,
+ 594,
+ 74,
+ 2,
+ 586,
+ 74,
+ 2,
+ 0,
+ 0,
+ 0,
+ 573,
+ 70,
+ 2,
+ 598,
+ 86,
+ 2,
+ 566,
+ 93,
+ 2,
+ 626,
+ 105,
+ 2,
+ 546,
+ 126,
+ 2,
+ 0,
+ 0,
+ 0,
+ 561,
+ 150,
+ 2,
+ 582,
+ 150,
+ 2,
+ 557,
+ 154,
+ 2,
+ 606,
+ 194,
+ 2,
+ 558,
+ 209,
+ 1,
+ 591,
+ 252,
+ 2,
+ 539,
+ 262,
+ 1,
+ 587,
+ 57,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 600,
+ 262,
+ 2,
+ 0,
+ 0,
+ 0,
+ 604,
+ 261,
+ 2,
+ 0,
+ 0,
+ 0,
+ 586,
+ 262,
+ 2,
+ 0,
+ 0,
+ 0,
+ 576.0,
+ 73.0,
+ 2.0,
+ 577.0,
+ 76.0,
+ 2.0,
+ 577.0,
+ 78.0,
+ 2.0,
+ 577.0,
+ 81.0,
+ 2.0,
+ 579.0,
+ 83.0,
+ 2.0,
+ 580.0,
+ 85.0,
+ 2.0,
+ 583.0,
+ 86.0,
+ 2.0,
+ 585.0,
+ 87.0,
+ 2.0,
+ 588.0,
+ 88.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 590.0,
+ 76.0,
+ 0.0,
+ 590.0,
+ 77.0,
+ 0.0,
+ 591.0,
+ 79.0,
+ 0.0,
+ 591.0,
+ 80.0,
+ 0.0,
+ 587.0,
+ 81.0,
+ 0.0,
+ 589.0,
+ 81.0,
+ 0.0,
+ 591.0,
+ 81.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 586.8761575736252,
+ 83.61172634947533,
+ 2.0,
+ 588.9412473790786,
+ 83.41106519512101,
+ 2.0,
+ 590.7724136651731,
+ 82.86258592792586,
+ 2.0,
+ 591.6996507831648,
+ 82.73443932626762,
+ 2.0,
+ 592.2456105550131,
+ 82.31442081227021,
+ 2.0,
+ 593.6493129356235,
+ 81.90362788181679,
+ 2.0,
+ 594.2114473230698,
+ 81.26071885052849,
+ 2.0,
+ 594.1276526357614,
+ 83.53407437193627,
+ 2.0,
+ 593.6044897939645,
+ 84.44948682598039,
+ 2.0,
+ 592.6541667265051,
+ 84.92630393832337,
+ 2.0,
+ 590.9756801829618,
+ 85.08662594065947,
+ 2.0,
+ 589.348352170458,
+ 84.76877788468903,
+ 2.0,
+ 587.2321394378064,
+ 83.56702886843215,
+ 2.0,
+ 590.3445832495596,
+ 83.57368678672641,
+ 2.0,
+ 591.8126301484949,
+ 83.20736933689491,
+ 2.0,
+ 592.7565172980813,
+ 82.68511125153186,
+ 2.0,
+ 594.1612270579618,
+ 81.3825154024012,
+ 2.0,
+ 593.0988272872626,
+ 83.2510259291705,
+ 2.0,
+ 592.1117610557407,
+ 83.63720194498697,
+ 2.0,
+ 590.626023236443,
+ 84.00301465801164,
+ 2.0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 559,
+ 151,
+ 2,
+ 565,
+ 151,
+ 2,
+ 569,
+ 153,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 568,
+ 156,
+ 2,
+ 570,
+ 162,
+ 2,
+ 571,
+ 166,
+ 2,
+ 571,
+ 169,
+ 2,
+ 565,
+ 157,
+ 2,
+ 565,
+ 162,
+ 2,
+ 566,
+ 164,
+ 2,
+ 566,
+ 166,
+ 2,
+ 561,
+ 158,
+ 2,
+ 562,
+ 161,
+ 2,
+ 563,
+ 163,
+ 2,
+ 563,
+ 165,
+ 2,
+ 558,
+ 159,
+ 2,
+ 559,
+ 162,
+ 2,
+ 560,
+ 163,
+ 2,
+ 560,
+ 164,
+ 2
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 540.04,
+ 48.81,
+ 99.96,
+ 223.36
+ ],
+ "category_id": 1,
+ "id": 531914
+ },
+ {
+ "num_keypoints": 16,
+ "area": 14267.20475,
+ "iscrowd": 0,
+ "keypoints": [
+ 580,
+ 211,
+ 2,
+ 586,
+ 206,
+ 2,
+ 574,
+ 204,
+ 2,
+ 0,
+ 0,
+ 0,
+ 562,
+ 198,
+ 2,
+ 584,
+ 220,
+ 2,
+ 529,
+ 215,
+ 2,
+ 599,
+ 242,
+ 2,
+ 512,
+ 260,
+ 2,
+ 619,
+ 274,
+ 2,
+ 538,
+ 285,
+ 2,
+ 537,
+ 288,
+ 2,
+ 506,
+ 277,
+ 2,
+ 562,
+ 332,
+ 2,
+ 452,
+ 332,
+ 2,
+ 550,
+ 387,
+ 1,
+ 402,
+ 371,
+ 2,
+ 582,
+ 184,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 387,
+ 389,
+ 2,
+ 0,
+ 0,
+ 0,
+ 374,
+ 383,
+ 2,
+ 0,
+ 0,
+ 0,
+ 390,
+ 365,
+ 2,
+ 559,
+ 197,
+ 2,
+ 559,
+ 202,
+ 2,
+ 559,
+ 205,
+ 2,
+ 560,
+ 209,
+ 2,
+ 561,
+ 213,
+ 2,
+ 564,
+ 217,
+ 2,
+ 567,
+ 220,
+ 2,
+ 570,
+ 223,
+ 2,
+ 573,
+ 225,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 573,
+ 201,
+ 2,
+ 575,
+ 202,
+ 2,
+ 577,
+ 203,
+ 2,
+ 579,
+ 204,
+ 2,
+ 580,
+ 206,
+ 2,
+ 584,
+ 207,
+ 2,
+ 585,
+ 206,
+ 2,
+ 587,
+ 205,
+ 2,
+ 589,
+ 205,
+ 2,
+ 590,
+ 205,
+ 2,
+ 582,
+ 207,
+ 2,
+ 582,
+ 209,
+ 2,
+ 581,
+ 212,
+ 2,
+ 581,
+ 215,
+ 2,
+ 577,
+ 214,
+ 2,
+ 578,
+ 214,
+ 2,
+ 580,
+ 216,
+ 2,
+ 581,
+ 216,
+ 2,
+ 582,
+ 216,
+ 2,
+ 573,
+ 204,
+ 2,
+ 576,
+ 204,
+ 2,
+ 578,
+ 205,
+ 2,
+ 580,
+ 207,
+ 2,
+ 578,
+ 207,
+ 2,
+ 575,
+ 206,
+ 2,
+ 584,
+ 208,
+ 2,
+ 586,
+ 207,
+ 2,
+ 588,
+ 206,
+ 2,
+ 590,
+ 207,
+ 2,
+ 588,
+ 208,
+ 2,
+ 586,
+ 209,
+ 2,
+ 571,
+ 217,
+ 2,
+ 574,
+ 217,
+ 2,
+ 576,
+ 217,
+ 2,
+ 577,
+ 217,
+ 2,
+ 577,
+ 217,
+ 2,
+ 578,
+ 217,
+ 2,
+ 579,
+ 218,
+ 2,
+ 578,
+ 219,
+ 2,
+ 577,
+ 219,
+ 2,
+ 576,
+ 220,
+ 2,
+ 575,
+ 219,
+ 2,
+ 573,
+ 218,
+ 2,
+ 572,
+ 217,
+ 2,
+ 576,
+ 217,
+ 2,
+ 576,
+ 218,
+ 2,
+ 577,
+ 218,
+ 2,
+ 579,
+ 218,
+ 2,
+ 577,
+ 219,
+ 2,
+ 576,
+ 219,
+ 2,
+ 575,
+ 219,
+ 2,
+ 622,
+ 274,
+ 2,
+ 620,
+ 281,
+ 2,
+ 620,
+ 287,
+ 2,
+ 623,
+ 292,
+ 2,
+ 627,
+ 297,
+ 2,
+ 628,
+ 284,
+ 2,
+ 635,
+ 290,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 628,
+ 281,
+ 2,
+ 631,
+ 285,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 628,
+ 278,
+ 2,
+ 632,
+ 283,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 628,
+ 277,
+ 2,
+ 631,
+ 279,
+ 2,
+ 633,
+ 282,
+ 2,
+ 0,
+ 0,
+ 0,
+ 542,
+ 286,
+ 2,
+ 551,
+ 285,
+ 2,
+ 557,
+ 289,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 557,
+ 293,
+ 2,
+ 559,
+ 301,
+ 2,
+ 559,
+ 306,
+ 2,
+ 558,
+ 312,
+ 2,
+ 551,
+ 293,
+ 2,
+ 552,
+ 302,
+ 2,
+ 552,
+ 307,
+ 2,
+ 0,
+ 0,
+ 0,
+ 546,
+ 296,
+ 2,
+ 548,
+ 302,
+ 2,
+ 549,
+ 307,
+ 2,
+ 0,
+ 0,
+ 0,
+ 543,
+ 298,
+ 2,
+ 544,
+ 303,
+ 2,
+ 545,
+ 307,
+ 2,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 372.58,
+ 170.84,
+ 266.63,
+ 217.19
+ ],
+ "category_id": 1,
+ "id": 533949
+ },
+ {
+ "num_keypoints": 13,
+ "area": 8260.75085,
+ "iscrowd": 0,
+ "keypoints": [
+ 36,
+ 79,
+ 2,
+ 40,
+ 74,
+ 2,
+ 31,
+ 75,
+ 2,
+ 0,
+ 0,
+ 0,
+ 19,
+ 69,
+ 2,
+ 45,
+ 77,
+ 2,
+ 2,
+ 89,
+ 2,
+ 74,
+ 99,
+ 2,
+ 0,
+ 0,
+ 0,
+ 78,
+ 92,
+ 2,
+ 0,
+ 0,
+ 0,
+ 33,
+ 149,
+ 2,
+ 7,
+ 153,
+ 2,
+ 44,
+ 196,
+ 2,
+ 2,
+ 205,
+ 2,
+ 35,
+ 245,
+ 2,
+ 0,
+ 0,
+ 0,
+ 33,
+ 54,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 41,
+ 255,
+ 2,
+ 0,
+ 0,
+ 0,
+ 48,
+ 255,
+ 2,
+ 0,
+ 0,
+ 0,
+ 29,
+ 253,
+ 2,
+ 0,
+ 0,
+ 0,
+ 22,
+ 70,
+ 2,
+ 22,
+ 73,
+ 2,
+ 23,
+ 76,
+ 2,
+ 24,
+ 78,
+ 2,
+ 25,
+ 80,
+ 2,
+ 27,
+ 82,
+ 2,
+ 29,
+ 84,
+ 2,
+ 31,
+ 85,
+ 2,
+ 34,
+ 85,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 27,
+ 72,
+ 2,
+ 29,
+ 72,
+ 2,
+ 31,
+ 72,
+ 2,
+ 33,
+ 72,
+ 2,
+ 34,
+ 73,
+ 2,
+ 38,
+ 73,
+ 2,
+ 40,
+ 72,
+ 2,
+ 41,
+ 71,
+ 2,
+ 42,
+ 71,
+ 2,
+ 43,
+ 70,
+ 2,
+ 37,
+ 75,
+ 2,
+ 37,
+ 77,
+ 2,
+ 37,
+ 78,
+ 2,
+ 37,
+ 80,
+ 2,
+ 35,
+ 80,
+ 2,
+ 37,
+ 80,
+ 2,
+ 37,
+ 80,
+ 2,
+ 38,
+ 80,
+ 2,
+ 39,
+ 80,
+ 2,
+ 28,
+ 74,
+ 2,
+ 31,
+ 73,
+ 2,
+ 33,
+ 74,
+ 2,
+ 34,
+ 75,
+ 2,
+ 32,
+ 76,
+ 2,
+ 31,
+ 75,
+ 2,
+ 39,
+ 75,
+ 2,
+ 40,
+ 73,
+ 2,
+ 41,
+ 73,
+ 2,
+ 43,
+ 72,
+ 2,
+ 42,
+ 74,
+ 2,
+ 40,
+ 75,
+ 2,
+ 30,
+ 83,
+ 2,
+ 33,
+ 82,
+ 2,
+ 35,
+ 82,
+ 2,
+ 36,
+ 82,
+ 2,
+ 37,
+ 82,
+ 2,
+ 37,
+ 82,
+ 2,
+ 38,
+ 82,
+ 2,
+ 37,
+ 83,
+ 2,
+ 37,
+ 84,
+ 2,
+ 36,
+ 84,
+ 2,
+ 35,
+ 84,
+ 2,
+ 33,
+ 84,
+ 2,
+ 31,
+ 83,
+ 2,
+ 35,
+ 83,
+ 2,
+ 36,
+ 83,
+ 2,
+ 37,
+ 83,
+ 2,
+ 38,
+ 82,
+ 2,
+ 37,
+ 83,
+ 2,
+ 36,
+ 84,
+ 2,
+ 35,
+ 84,
+ 2,
+ 0,
+ 0,
+ 0,
+ 76,
+ 89,
+ 2,
+ 74,
+ 86,
+ 2,
+ 75,
+ 81,
+ 2,
+ 77,
+ 76,
+ 2,
+ 86,
+ 80,
+ 2,
+ 82,
+ 84,
+ 2,
+ 78,
+ 85,
+ 2,
+ 76,
+ 86,
+ 2,
+ 86,
+ 83,
+ 2,
+ 82,
+ 86,
+ 2,
+ 79,
+ 87,
+ 2,
+ 76,
+ 87,
+ 2,
+ 87,
+ 84,
+ 2,
+ 84,
+ 88,
+ 2,
+ 80,
+ 88,
+ 2,
+ 78,
+ 89,
+ 2,
+ 88,
+ 87,
+ 2,
+ 85,
+ 89,
+ 2,
+ 82,
+ 90,
+ 2,
+ 79,
+ 91,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 197388,
+ "bbox": [
+ 0.5,
+ 43.74,
+ 90.1,
+ 220.09
+ ],
+ "category_id": 1,
+ "id": 543117
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/horse10/0244.png b/vendor/ViTPose/tests/data/horse10/0244.png
new file mode 100644
index 0000000000000000000000000000000000000000..d4f23c3a8303a01b4a507629cbbed0011a228c92
Binary files /dev/null and b/vendor/ViTPose/tests/data/horse10/0244.png differ
diff --git a/vendor/ViTPose/tests/data/horse10/0292.png b/vendor/ViTPose/tests/data/horse10/0292.png
new file mode 100644
index 0000000000000000000000000000000000000000..6fbb079f28100528fba2899ae3178184f2d0dae6
Binary files /dev/null and b/vendor/ViTPose/tests/data/horse10/0292.png differ
diff --git a/vendor/ViTPose/tests/data/horse10/0465.png b/vendor/ViTPose/tests/data/horse10/0465.png
new file mode 100644
index 0000000000000000000000000000000000000000..762ff9406a548841c03db3acf479f32c4a95b79f
Binary files /dev/null and b/vendor/ViTPose/tests/data/horse10/0465.png differ
diff --git a/vendor/ViTPose/tests/data/horse10/test_horse10.json b/vendor/ViTPose/tests/data/horse10/test_horse10.json
new file mode 100644
index 0000000000000000000000000000000000000000..c85f53f9b88aefdf4c3478d2b561086759eda426
--- /dev/null
+++ b/vendor/ViTPose/tests/data/horse10/test_horse10.json
@@ -0,0 +1,302 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "horse",
+ "keypoints": [
+ "Nose",
+ "Eye",
+ "Nearknee",
+ "Nearfrontfetlock",
+ "Nearfrontfoot",
+ "Offknee",
+ "Offfrontfetlock",
+ "Offfrontfoot",
+ "Shoulder",
+ "Midshoulder",
+ "Elbow",
+ "Girth",
+ "Wither",
+ "Nearhindhock",
+ "Nearhindfetlock",
+ "Nearhindfoot",
+ "Hip",
+ "Stifle",
+ "Offhindhock",
+ "Offhindfetlock",
+ "Offhindfoot",
+ "Ischium"
+ ],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 100,
+ "file_name": "0244.png",
+ "height": 162,
+ "width": 288
+ },
+ {
+ "id": 500,
+ "file_name": "0292.png",
+ "height": 162,
+ "width": 288
+ },
+ {
+ "id": 900,
+ "file_name": "0465.png",
+ "height": 162,
+ "width": 288
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 126.0,
+ 71.1,
+ 2.0,
+ 117.3,
+ 56.4,
+ 2.0,
+ 90.0,
+ 98.7,
+ 2.0,
+ 92.1,
+ 112.8,
+ 2.0,
+ 98.7,
+ 117.3,
+ 2.0,
+ 71.39999999999999,
+ 102.89999999999999,
+ 2.0,
+ 63.599999999999994,
+ 114.0,
+ 2.0,
+ 56.699999999999996,
+ 120.0,
+ 2.0,
+ 80.1,
+ 73.5,
+ 2.0,
+ 78.3,
+ 63.0,
+ 2.0,
+ 67.5,
+ 82.2,
+ 2.0,
+ 65.39999999999999,
+ 82.8,
+ 2.0,
+ 72.0,
+ 52.199999999999996,
+ 2.0,
+ 29.4,
+ 97.5,
+ 2.0,
+ 27.0,
+ 113.39999999999999,
+ 2.0,
+ 31.5,
+ 120.6,
+ 2.0,
+ 36.3,
+ 56.1,
+ 2.0,
+ 37.5,
+ 75.6,
+ 2.0,
+ 38.4,
+ 97.8,
+ 2.0,
+ 46.8,
+ 112.8,
+ 2.0,
+ 51.0,
+ 120.3,
+ 2.0,
+ 23.099999999999998,
+ 63.599999999999994,
+ 2.0
+ ],
+ "image_id": 100,
+ "id": 100,
+ "num_keypoints": 22,
+ "bbox": [
+ 2,
+ 38,
+ 145,
+ 97
+ ],
+ "iscrowd": 0,
+ "area": 14065,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 267.9,
+ 67.8,
+ 2.0,
+ 265.5,
+ 51.6,
+ 2.0,
+ 200.7,
+ 94.8,
+ 2.0,
+ 190.79999999999998,
+ 106.2,
+ 2.0,
+ 190.2,
+ 114.6,
+ 2.0,
+ 229.5,
+ 97.8,
+ 2.0,
+ 234.0,
+ 111.6,
+ 2.0,
+ 240.6,
+ 118.19999999999999,
+ 2.0,
+ 233.7,
+ 69.0,
+ 2.0,
+ 226.5,
+ 57.599999999999994,
+ 2.0,
+ 219.6,
+ 79.5,
+ 2.0,
+ 213.0,
+ 81.6,
+ 2.0,
+ 216.29999999999998,
+ 48.3,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 182.1,
+ 47.699999999999996,
+ 2.0,
+ 176.1,
+ 72.0,
+ 2.0,
+ 162.0,
+ 92.1,
+ 2.0,
+ 162.9,
+ 111.89999999999999,
+ 2.0,
+ 167.4,
+ 117.6,
+ 2.0,
+ 161.4,
+ 54.9,
+ 2.0
+ ],
+ "image_id": 500,
+ "id": 500,
+ "num_keypoints": 19,
+ "bbox": [
+ 140,
+ 33,
+ 148,
+ 100
+ ],
+ "iscrowd": 0,
+ "area": 14800,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 286.2,
+ 118.8,
+ 2.0,
+ 282.0,
+ 123.89999999999999,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 252.0,
+ 102.89999999999999,
+ 2.0,
+ 261.9,
+ 119.39999999999999,
+ 2.0,
+ 268.8,
+ 128.1,
+ 2.0,
+ 263.09999999999997,
+ 60.3,
+ 2.0,
+ 253.5,
+ 84.0,
+ 2.0,
+ 236.7,
+ 103.2,
+ 2.0,
+ 230.7,
+ 121.19999999999999,
+ 2.0,
+ 234.6,
+ 129.0,
+ 2.0,
+ 240.29999999999998,
+ 67.5,
+ 2.0
+ ],
+ "image_id": 900,
+ "id": 900,
+ "num_keypoints": 11,
+ "bbox": [
+ 219,
+ 46,
+ 69,
+ 97
+ ],
+ "iscrowd": 0,
+ "area": 6693,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/image2017.jpg b/vendor/ViTPose/tests/data/interhand2.6m/image2017.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8d6a540270833a39ec368f654620876ad058bfc8
Binary files /dev/null and b/vendor/ViTPose/tests/data/interhand2.6m/image2017.jpg differ
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/image29590.jpg b/vendor/ViTPose/tests/data/interhand2.6m/image29590.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a312b5e9ae131ab90c6f817b539533f739ae597c
Binary files /dev/null and b/vendor/ViTPose/tests/data/interhand2.6m/image29590.jpg differ
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/image44669.jpg b/vendor/ViTPose/tests/data/interhand2.6m/image44669.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0bd170b8b653ea3c957ee1117132902c43d76ea9
Binary files /dev/null and b/vendor/ViTPose/tests/data/interhand2.6m/image44669.jpg differ
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/image69148.jpg b/vendor/ViTPose/tests/data/interhand2.6m/image69148.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5d63d3fddc9eab173f52fc728c73f29df19b8989
Binary files /dev/null and b/vendor/ViTPose/tests/data/interhand2.6m/image69148.jpg differ
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_camera.json b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_camera.json
new file mode 100644
index 0000000000000000000000000000000000000000..fabfe886a1e76f07cfdf480484b44d4490cc5b33
--- /dev/null
+++ b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_camera.json
@@ -0,0 +1,162 @@
+{
+ "3": {
+ "campos": {
+ "400026": [
+ -415.1940002441406,
+ 132.24954223632812,
+ 59.5650749206543
+ ]
+ },
+ "camrot": {
+ "400026": [
+ [
+ 0.9201921224594116,
+ -0.012140202336013317,
+ -0.39127883315086365
+ ],
+ [
+ 0.06150508299469948,
+ 0.9915890097618103,
+ 0.11387889832258224
+ ],
+ [
+ 0.38660526275634766,
+ -0.1288560926914215,
+ 0.9131990671157837
+ ]
+ ]
+ },
+ "focal": {
+ "400026": [
+ 1261.5291748046875,
+ 1261.6845703125
+ ]
+ },
+ "princpt": {
+ "400026": [
+ 155.8163604736328,
+ 258.8305969238281
+ ]
+ }
+ },
+ "2": {
+ "campos": {
+ "400012": [
+ 606.0524291992188,
+ -174.7548828125,
+ 163.86656188964844
+ ]
+ },
+ "camrot": {
+ "400012": [
+ [
+ 0.82091224193573,
+ 0.05194839835166931,
+ 0.5686866044998169
+ ],
+ [
+ 0.05824033170938492,
+ 0.9830448031425476,
+ -0.17387045919895172
+ ],
+ [
+ -0.5680767297744751,
+ 0.17585287988185883,
+ 0.8039680123329163
+ ]
+ ]
+ },
+ "focal": {
+ "400012": [
+ 1270.7069091796875,
+ 1270.5194091796875
+ ]
+ },
+ "princpt": {
+ "400012": [
+ 196.347412109375,
+ 240.42515563964844
+ ]
+ }
+ },
+ "7": {
+ "campos": {
+ "410053": [
+ 973.9876098632812,
+ -151.85047912597656,
+ 576.7235107421875
+ ]
+ },
+ "camrot": {
+ "410053": [
+ [
+ 0.42785099148750305,
+ 0.07326933741569519,
+ 0.900874674320221
+ ],
+ [
+ 0.10334496945142746,
+ 0.9862067103385925,
+ -0.12929096817970276
+ ],
+ [
+ -0.8979216814041138,
+ 0.148418128490448,
+ 0.41437748074531555
+ ]
+ ]
+ },
+ "focal": {
+ "410053": [
+ 1272.947021484375,
+ 1272.957275390625
+ ]
+ },
+ "princpt": {
+ "410053": [
+ 187.24343872070312,
+ 243.6494903564453
+ ]
+ }
+ },
+ "4": {
+ "campos": {
+ "410028": [
+ 224.87350463867188,
+ 144.3102569580078,
+ -8.186153411865234
+ ]
+ },
+ "camrot": {
+ "410028": [
+ [
+ 0.9784372448921204,
+ 0.024140462279319763,
+ 0.2051287442445755
+ ],
+ [
+ -0.048440802842378616,
+ 0.9922666549682617,
+ 0.11428194493055344
+ ],
+ [
+ -0.2007835954427719,
+ -0.12175431102514267,
+ 0.972040057182312
+ ]
+ ]
+ },
+ "focal": {
+ "410028": [
+ 1274.1224365234375,
+ 1274.2861328125
+ ]
+ },
+ "princpt": {
+ "410028": [
+ 270.805419921875,
+ 175.498046875
+ ]
+ }
+ }
+}
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_data.json b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..723af749ecc90a7fff2940108178171bf24c4ead
--- /dev/null
+++ b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_data.json
@@ -0,0 +1,610 @@
+{
+ "images": [
+ {
+ "id": 326750,
+ "file_name": "image69148.jpg",
+ "width": 334,
+ "height": 512,
+ "capture": 3,
+ "subject": 3,
+ "seq_name": "0390_dh_touchROM",
+ "camera": "400026",
+ "frame_idx": 69148
+ },
+ {
+ "id": 286291,
+ "file_name": "image44669.jpg",
+ "width": 334,
+ "height": 512,
+ "capture": 2,
+ "subject": 2,
+ "seq_name": "0266_dh_pray",
+ "camera": "400012",
+ "frame_idx": 44669
+ },
+ {
+ "id": 680801,
+ "file_name": "image29590.jpg",
+ "width": 334,
+ "height": 512,
+ "capture": 7,
+ "subject": 6,
+ "seq_name": "0115_rocker_backside",
+ "camera": "410053",
+ "frame_idx": 29590
+ },
+ {
+ "id": 471953,
+ "file_name": "image2017.jpg",
+ "width": 512,
+ "height": 334,
+ "capture": 4,
+ "subject": 0,
+ "seq_name": "0007_thumbup_normal",
+ "camera": "410028",
+ "frame_idx": 2017
+ }
+ ],
+ "annotations": [
+ {
+ "id": 326750,
+ "image_id": 326750,
+ "bbox": [
+ 33.56839370727539,
+ 164.92373657226562,
+ 185.057861328125,
+ 142.7256622314453
+ ],
+ "joint_valid": [
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ]
+ ],
+ "hand_type": "interacting",
+ "hand_type_valid": 1
+ },
+ {
+ "id": 286291,
+ "image_id": 286291,
+ "bbox": [
+ 116.43374633789062,
+ 79.66770935058594,
+ 163.1707763671875,
+ 175.00582885742188
+ ],
+ "joint_valid": [
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ]
+ ],
+ "hand_type": "interacting",
+ "hand_type_valid": 1
+ },
+ {
+ "id": 680801,
+ "image_id": 680801,
+ "bbox": [
+ 32.624629974365234,
+ 116.9090805053711,
+ 182.95919799804688,
+ 117.79376983642578
+ ],
+ "joint_valid": [
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ]
+ ],
+ "hand_type": "left",
+ "hand_type_valid": 1
+ },
+ {
+ "id": 471953,
+ "image_id": 471953,
+ "bbox": [
+ 154.45904541015625,
+ 27.944841384887695,
+ 90.6390380859375,
+ 184.53550720214844
+ ],
+ "joint_valid": [
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 1
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ],
+ [
+ 0
+ ]
+ ],
+ "hand_type": "right",
+ "hand_type_valid": 1
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7df48ec6dcc22ebcf7e9c2646f251665d6a98a7
--- /dev/null
+++ b/vendor/ViTPose/tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json
@@ -0,0 +1,1386 @@
+{
+ "3": {
+ "69148": {
+ "world_coord": [
+ [
+ 43.03519821166992,
+ -82.2948989868164,
+ 1090.739990234375
+ ],
+ [
+ 15.719200134277344,
+ -80.0010986328125,
+ 1093.1600341796875
+ ],
+ [
+ -14.644499778747559,
+ -69.28589630126953,
+ 1099.43994140625
+ ],
+ [
+ -49.500701904296875,
+ -48.752601623535156,
+ 1105.739990234375
+ ],
+ [
+ 54.10329818725586,
+ -52.207000732421875,
+ 1071.3699951171875
+ ],
+ [
+ 44.71070098876953,
+ -60.90570068359375,
+ 1053.8299560546875
+ ],
+ [
+ 23.355300903320312,
+ -66.95800018310547,
+ 1042.2099609375
+ ],
+ [
+ -16.109899520874023,
+ -59.50210189819336,
+ 1054.7900390625
+ ],
+ [
+ 46.1421012878418,
+ -27.448999404907227,
+ 1044.06005859375
+ ],
+ [
+ 37.125099182128906,
+ -28.523000717163086,
+ 1025.0699462890625
+ ],
+ [
+ 11.805299758911133,
+ -33.19449996948242,
+ 1015.8599853515625
+ ],
+ [
+ -16.28969955444336,
+ -35.808101654052734,
+ 1042.489990234375
+ ],
+ [
+ 38.874000549316406,
+ -5.6127800941467285,
+ 1047.969970703125
+ ],
+ [
+ 29.25860023498535,
+ 1.5931299924850464,
+ 1033.010009765625
+ ],
+ [
+ 5.099699974060059,
+ 0.5625370144844055,
+ 1025.989990234375
+ ],
+ [
+ -19.3031005859375,
+ -10.963600158691406,
+ 1047.75
+ ],
+ [
+ 32.95539855957031,
+ 15.77239990234375,
+ 1067.75
+ ],
+ [
+ 20.76289939880371,
+ 22.153799057006836,
+ 1056.8699951171875
+ ],
+ [
+ 1.3557000160217285,
+ 20.561199188232422,
+ 1053.72998046875
+ ],
+ [
+ -22.658199310302734,
+ 9.142279624938965,
+ 1060.719970703125
+ ],
+ [
+ -73.1697006225586,
+ -24.75469970703125,
+ 1109.239990234375
+ ],
+ [
+ -25.849300384521484,
+ -76.86360168457031,
+ 1051.6700439453125
+ ],
+ [
+ -3.0323801040649414,
+ -76.0531997680664,
+ 1065.52001953125
+ ],
+ [
+ 23.313899993896484,
+ -64.78929901123047,
+ 1083.3800048828125
+ ],
+ [
+ 52.25170135498047,
+ -44.1338005065918,
+ 1104.050048828125
+ ],
+ [
+ -33.858699798583984,
+ -56.21229934692383,
+ 1052.5
+ ],
+ [
+ -22.00670051574707,
+ -54.78179931640625,
+ 1034.52001953125
+ ],
+ [
+ -1.2521899938583374,
+ -52.484100341796875,
+ 1021.3099975585938
+ ],
+ [
+ 34.637001037597656,
+ -44.75859832763672,
+ 1045.5
+ ],
+ [
+ -38.37810134887695,
+ -28.615999221801758,
+ 1048.77001953125
+ ],
+ [
+ -27.590499877929688,
+ -21.247900009155273,
+ 1031.81005859375
+ ],
+ [
+ -2.5142500400543213,
+ -15.51039981842041,
+ 1018.5399780273438
+ ],
+ [
+ 27.976900100708008,
+ -17.880300521850586,
+ 1042.260009765625
+ ],
+ [
+ -42.81999969482422,
+ -7.296229839324951,
+ 1054.06005859375
+ ],
+ [
+ -29.135400772094727,
+ 4.149099826812744,
+ 1043.780029296875
+ ],
+ [
+ -3.725130081176758,
+ 12.038700103759766,
+ 1038.300048828125
+ ],
+ [
+ 22.907699584960938,
+ 3.807229995727539,
+ 1054.469970703125
+ ],
+ [
+ -37.29899978637695,
+ 14.395500183105469,
+ 1082.6500244140625
+ ],
+ [
+ -24.36440086364746,
+ 21.874000549316406,
+ 1073.199951171875
+ ],
+ [
+ -4.3188300132751465,
+ 23.260000228881836,
+ 1067.699951171875
+ ],
+ [
+ 19.15329933166504,
+ 15.103500366210938,
+ 1070.9300537109375
+ ],
+ [
+ 68.70819854736328,
+ -17.395599365234375,
+ 1113.5
+ ]
+ ],
+ "joint_valid": [
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ]
+ ],
+ "hand_type": "interacting",
+ "hand_type_valid": true
+ }
+ },
+ "2": {
+ "44669": {
+ "world_coord": [
+ [
+ 5.577770233154297,
+ -108.26300048828125,
+ 1036.1800537109375
+ ],
+ [
+ -1.7330399751663208,
+ -83.05719757080078,
+ 1046.1300048828125
+ ],
+ [
+ -9.004420280456543,
+ -57.55229949951172,
+ 1056.969970703125
+ ],
+ [
+ -15.412199974060059,
+ -23.791000366210938,
+ 1063.6199951171875
+ ],
+ [
+ 14.21619987487793,
+ -101.65699768066406,
+ 939.3079833984375
+ ],
+ [
+ 8.966190338134766,
+ -89.8812026977539,
+ 958.7620239257812
+ ],
+ [
+ 2.066649913787842,
+ -76.95020294189453,
+ 978.1170043945312
+ ],
+ [
+ -10.0802001953125,
+ -54.769500732421875,
+ 1008.3200073242188
+ ],
+ [
+ 15.2121000289917,
+ -91.22869873046875,
+ 921.1090087890625
+ ],
+ [
+ 9.925020217895508,
+ -78.84239959716797,
+ 940.4929809570312
+ ],
+ [
+ 0.7208520174026489,
+ -62.57080078125,
+ 963.5479736328125
+ ],
+ [
+ -12.486300468444824,
+ -35.79169845581055,
+ 996.291015625
+ ],
+ [
+ 15.342300415039062,
+ -71.01309967041016,
+ 920.666015625
+ ],
+ [
+ 10.613200187683105,
+ -56.69279861450195,
+ 938.3099975585938
+ ],
+ [
+ 3.1483700275421143,
+ -40.60240173339844,
+ 958.9559936523438
+ ],
+ [
+ -7.7616801261901855,
+ -16.659000396728516,
+ 990.3289794921875
+ ],
+ [
+ 9.923910140991211,
+ -27.469100952148438,
+ 926.0250244140625
+ ],
+ [
+ 7.101960182189941,
+ -17.535900115966797,
+ 944.4459838867188
+ ],
+ [
+ 3.706239938735962,
+ -9.478739738464355,
+ 961.2869873046875
+ ],
+ [
+ -3.7822699546813965,
+ 4.785309791564941,
+ 988.9990234375
+ ],
+ [
+ -38.23350143432617,
+ 10.85420036315918,
+ 1060.1099853515625
+ ],
+ [
+ 16.591100692749023,
+ -104.06300354003906,
+ 1032.6300048828125
+ ],
+ [
+ 17.85449981689453,
+ -79.44409942626953,
+ 1044.3399658203125
+ ],
+ [
+ 20.3125,
+ -54.15850067138672,
+ 1059.300048828125
+ ],
+ [
+ 23.35300064086914,
+ -20.347400665283203,
+ 1065.97998046875
+ ],
+ [
+ 28.29199981689453,
+ -103.08699798583984,
+ 941.9169921875
+ ],
+ [
+ 25.710399627685547,
+ -89.40409851074219,
+ 960.614013671875
+ ],
+ [
+ 24.782400131225586,
+ -75.04440307617188,
+ 980.3049926757812
+ ],
+ [
+ 28.035999298095703,
+ -51.11090087890625,
+ 1011.47998046875
+ ],
+ [
+ 28.736099243164062,
+ -94.62069702148438,
+ 921.4580078125
+ ],
+ [
+ 26.57539939880371,
+ -79.62640380859375,
+ 940.0040283203125
+ ],
+ [
+ 27.174400329589844,
+ -61.3489990234375,
+ 963.3049926757812
+ ],
+ [
+ 30.206899642944336,
+ -34.29090118408203,
+ 998.177001953125
+ ],
+ [
+ 27.319000244140625,
+ -72.35669708251953,
+ 919.8040161132812
+ ],
+ [
+ 24.843399047851562,
+ -56.612098693847656,
+ 937.927001953125
+ ],
+ [
+ 24.483699798583984,
+ -40.11029815673828,
+ 958.5869750976562
+ ],
+ [
+ 26.43560028076172,
+ -15.020400047302246,
+ 993.2479858398438
+ ],
+ [
+ 21.380199432373047,
+ -28.9552001953125,
+ 928.9580078125
+ ],
+ [
+ 19.721099853515625,
+ -17.84980010986328,
+ 945.948974609375
+ ],
+ [
+ 19.108400344848633,
+ -9.263039588928223,
+ 961.9609985351562
+ ],
+ [
+ 21.694400787353516,
+ 6.166550159454346,
+ 992.6019897460938
+ ],
+ [
+ 41.08219909667969,
+ 14.29419994354248,
+ 1066.219970703125
+ ]
+ ],
+ "joint_valid": [
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ]
+ ],
+ "hand_type": "interacting",
+ "hand_type_valid": true
+ }
+ },
+ "7": {
+ "29590": {
+ "world_coord": [
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ -40.71049880981445,
+ -95.89289855957031,
+ 957.885986328125
+ ],
+ [
+ -26.640199661254883,
+ -90.76909637451172,
+ 980.64697265625
+ ],
+ [
+ -13.102499961853027,
+ -84.02850341796875,
+ 1006.030029296875
+ ],
+ [
+ -6.498330116271973,
+ -56.183799743652344,
+ 1019.77001953125
+ ],
+ [
+ -41.832698822021484,
+ -92.818603515625,
+ 924.9089965820312
+ ],
+ [
+ -22.394100189208984,
+ -89.95939636230469,
+ 928.322998046875
+ ],
+ [
+ -1.334820032119751,
+ -85.85769653320312,
+ 937.4730224609375
+ ],
+ [
+ 16.722900390625,
+ -76.81639862060547,
+ 969.6079711914062
+ ],
+ [
+ -50.71979904174805,
+ -79.05770111083984,
+ 930.426025390625
+ ],
+ [
+ -30.792600631713867,
+ -72.3136978149414,
+ 923.927978515625
+ ],
+ [
+ -3.884079933166504,
+ -64.59410095214844,
+ 926.760986328125
+ ],
+ [
+ 17.58009910583496,
+ -55.07429885864258,
+ 961.2780151367188
+ ],
+ [
+ -49.63779830932617,
+ -60.04690170288086,
+ 929.5150146484375
+ ],
+ [
+ -30.802799224853516,
+ -51.40850067138672,
+ 926.1209716796875
+ ],
+ [
+ -7.165579795837402,
+ -42.4640998840332,
+ 930.6539916992188
+ ],
+ [
+ 11.347599983215332,
+ -35.320701599121094,
+ 960.9920043945312
+ ],
+ [
+ -39.11000061035156,
+ -22.250900268554688,
+ 922.677978515625
+ ],
+ [
+ -25.091400146484375,
+ -18.327800750732422,
+ 932.7310180664062
+ ],
+ [
+ -11.675299644470215,
+ -16.21780014038086,
+ 943.9849853515625
+ ],
+ [
+ -1.8433400392532349,
+ -18.33609962463379,
+ 966.3060302734375
+ ],
+ [
+ 12.858099937438965,
+ -27.72319984436035,
+ 1035.3299560546875
+ ]
+ ],
+ "joint_valid": [
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ]
+ ],
+ "hand_type": "left",
+ "hand_type_valid": true
+ }
+ },
+ "4": {
+ "2017": {
+ "world_coord": [
+ [
+ -43.12799835205078,
+ -103.62300109863281,
+ 1034.6500244140625
+ ],
+ [
+ -46.652801513671875,
+ -77.29830169677734,
+ 1047.8599853515625
+ ],
+ [
+ -61.026798248291016,
+ -60.12670135498047,
+ 1071.8699951171875
+ ],
+ [
+ -65.07230377197266,
+ -23.870800018310547,
+ 1084.949951171875
+ ],
+ [
+ -21.526500701904297,
+ -36.08110046386719,
+ 1048.530029296875
+ ],
+ [
+ -23.342899322509766,
+ -42.05630111694336,
+ 1027.739990234375
+ ],
+ [
+ -40.31650161743164,
+ -48.307098388671875,
+ 1008.1400146484375
+ ],
+ [
+ -80.72380065917969,
+ -43.20439910888672,
+ 1025.510009765625
+ ],
+ [
+ -28.87649917602539,
+ -21.68560028076172,
+ 1051.6800537109375
+ ],
+ [
+ -20.812700271606445,
+ -22.777000427246094,
+ 1031.489990234375
+ ],
+ [
+ -34.154598236083984,
+ -25.006399154663086,
+ 1004.1900024414062
+ ],
+ [
+ -80.04650115966797,
+ -17.935100555419922,
+ 1016.8699951171875
+ ],
+ [
+ -29.98819923400879,
+ -4.726659774780273,
+ 1053.030029296875
+ ],
+ [
+ -20.322599411010742,
+ -2.968640089035034,
+ 1034.4000244140625
+ ],
+ [
+ -30.557600021362305,
+ -0.6155570149421692,
+ 1006.489990234375
+ ],
+ [
+ -75.96330261230469,
+ 6.1682000160217285,
+ 1017.4600219726562
+ ],
+ [
+ -36.91109848022461,
+ 10.100500106811523,
+ 1045.4200439453125
+ ],
+ [
+ -28.660600662231445,
+ 15.840399742126465,
+ 1029.7099609375
+ ],
+ [
+ -37.89339828491211,
+ 24.52589988708496,
+ 1012.3099975585938
+ ],
+ [
+ -72.37090301513672,
+ 29.537099838256836,
+ 1021.8099975585938
+ ],
+ [
+ -93.10230255126953,
+ 5.9222798347473145,
+ 1101.989990234375
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ -60.13209915161133,
+ -4.4926300048828125,
+ 1036.8599853515625
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ [
+ 1.0,
+ 1.0,
+ 1.0
+ ]
+ ],
+ "joint_valid": [
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ true
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ],
+ [
+ false
+ ]
+ ],
+ "hand_type": "right",
+ "hand_type_valid": true
+ }
+ }
+}
diff --git a/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_0/00001.png b/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_0/00001.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b786e20c68421c39422789fe4eb46752664a3d0
Binary files /dev/null and b/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_0/00001.png differ
diff --git a/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_1/00001.png b/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_1/00001.png
new file mode 100644
index 0000000000000000000000000000000000000000..4c505f7ddb8147a0eaf106770930261ab6edf1a3
Binary files /dev/null and b/vendor/ViTPose/tests/data/jhmdb/Frisbee_catch_f_cm_np1_ri_med_1/00001.png differ
diff --git a/vendor/ViTPose/tests/data/jhmdb/Goalkeeper_Training_Day_@_7_catch_f_cm_np1_ri_med_0/00001.png b/vendor/ViTPose/tests/data/jhmdb/Goalkeeper_Training_Day_@_7_catch_f_cm_np1_ri_med_0/00001.png
new file mode 100644
index 0000000000000000000000000000000000000000..ee9ad4a75c75758f4001c79c48baccad7f2b497b
Binary files /dev/null and b/vendor/ViTPose/tests/data/jhmdb/Goalkeeper_Training_Day_@_7_catch_f_cm_np1_ri_med_0/00001.png differ
diff --git a/vendor/ViTPose/tests/data/jhmdb/test_jhmdb_sub1.json b/vendor/ViTPose/tests/data/jhmdb/test_jhmdb_sub1.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c9d8daa61d80e6bae4d89438e17687d35f0f345
--- /dev/null
+++ b/vendor/ViTPose/tests/data/jhmdb/test_jhmdb_sub1.json
@@ -0,0 +1,298 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "neck",
+ "belly",
+ "head",
+ "right_shoulder",
+ "left_shoulder",
+ "right_hip",
+ "left_hip",
+ "right_elbow",
+ "left_elbow",
+ "right_knee",
+ "left_knee",
+ "right_wrist",
+ "left_wrist",
+ "right_ankle",
+ "left_ankle"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 3
+ ],
+ [
+ 1,
+ 4
+ ],
+ [
+ 1,
+ 5
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 4,
+ 8
+ ],
+ [
+ 8,
+ 12
+ ],
+ [
+ 5,
+ 9
+ ],
+ [
+ 9,
+ 13
+ ],
+ [
+ 2,
+ 6
+ ],
+ [
+ 2,
+ 7
+ ],
+ [
+ 6,
+ 10
+ ],
+ [
+ 10,
+ 14
+ ],
+ [
+ 7,
+ 11
+ ],
+ [
+ 11,
+ 15
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "is_labeled": true,
+ "file_name": "Frisbee_catch_f_cm_np1_ri_med_0/00001.png",
+ "nframes": 37,
+ "frame_id": 2280001,
+ "vid_id": "00228",
+ "id": 2280001,
+ "width": 320,
+ "height": 240
+ },
+ {
+ "is_labeled": true,
+ "file_name": "Frisbee_catch_f_cm_np1_ri_med_1/00001.png",
+ "nframes": 40,
+ "frame_id": 2290001,
+ "vid_id": "00229",
+ "id": 2290001,
+ "width": 320,
+ "height": 240
+ },
+ {
+ "is_labeled": true,
+ "file_name": "Goalkeeper_Training_Day_@_7_catch_f_cm_np1_ri_med_0/00001.png",
+ "nframes": 30,
+ "frame_id": 2300001,
+ "vid_id": "00230",
+ "id": 2300001,
+ "width": 320,
+ "height": 240
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 98.851746,
+ 92.59851,
+ 2.0,
+ 101.382222,
+ 133.488694,
+ 2.0,
+ 100.914365,
+ 79.770933,
+ 2.0,
+ 86.888258,
+ 101.976452,
+ 2.0,
+ 107.314272,
+ 103.37138,
+ 2.0,
+ 96.914279,
+ 145.028519,
+ 2.0,
+ 106.514281,
+ 141.828552,
+ 2.0,
+ 91.779302,
+ 90.131713,
+ 2.0,
+ 111.71446,
+ 119.029127,
+ 2.0,
+ 101.371546,
+ 177.429379,
+ 2.0,
+ 113.428535,
+ 169.257124,
+ 2.0,
+ 90.261035,
+ 71.796419,
+ 2.0,
+ 125.372119,
+ 117.142762,
+ 2.0,
+ 96.68488,
+ 206.18226,
+ 2.0,
+ 87.838304,
+ 191.933582,
+ 2.0
+ ],
+ "track_id": 0,
+ "image_id": 2280001,
+ "bbox": [
+ 79.0,
+ 62.0,
+ 54.0,
+ 159.0
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1000002280001
+ },
+ {
+ "keypoints": [
+ 126.293586,
+ 86.516958,
+ 2.0,
+ 125.127052,
+ 119.880592,
+ 2.0,
+ 128.800121,
+ 77.713852,
+ 2.0,
+ 123.142858,
+ 93.771388,
+ 2.0,
+ 127.599998,
+ 93.314242,
+ 2.0,
+ 125.314285,
+ 126.685723,
+ 2.0,
+ 125.257142,
+ 128.685637,
+ 2.0,
+ 122.857184,
+ 111.302686,
+ 2.0,
+ 128.40003,
+ 107.885918,
+ 2.0,
+ 122.228575,
+ 148.91426,
+ 2.0,
+ 125.600109,
+ 150.403006,
+ 2.0,
+ 141.391708,
+ 106.511998,
+ 2.0,
+ 141.254766,
+ 105.486158,
+ 2.0,
+ 119.657303,
+ 169.255877,
+ 2.0,
+ 127.656398,
+ 173.757251,
+ 2.0
+ ],
+ "track_id": 0,
+ "image_id": 2290001,
+ "bbox": [
+ 114.0,
+ 68.0,
+ 38.0,
+ 115.0
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1000002290001
+ },
+ {
+ "keypoints": [
+ 104.590181,
+ 138.44876,
+ 2.0,
+ 105.733877,
+ 165.843418,
+ 2.0,
+ 104.400092,
+ 130.809558,
+ 2.0,
+ 113.714288,
+ 142.914297,
+ 2.0,
+ 97.999996,
+ 146.228585,
+ 2.0,
+ 110.914299,
+ 173.028594,
+ 2.0,
+ 102.628562,
+ 171.028599,
+ 2.0,
+ 116.687889,
+ 156.478492,
+ 2.0,
+ 94.455572,
+ 157.210571,
+ 2.0,
+ 121.257055,
+ 190.342707,
+ 2.0,
+ 95.200265,
+ 191.484992,
+ 2.0,
+ 120.571144,
+ 170.45612,
+ 2.0,
+ 93.885656,
+ 169.029784,
+ 2.0,
+ 128.177332,
+ 206.720448,
+ 2.0,
+ 90.000104,
+ 209.256786,
+ 2.0
+ ],
+ "track_id": 0,
+ "image_id": 2300001,
+ "bbox": [
+ 84.0,
+ 123.0,
+ 51.0,
+ 94.0
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1000002300001
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/locust/630.jpg b/vendor/ViTPose/tests/data/locust/630.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9a4c644b21aae0fe5e07e8dc7e0c06c6344d1295
Binary files /dev/null and b/vendor/ViTPose/tests/data/locust/630.jpg differ
diff --git a/vendor/ViTPose/tests/data/locust/650.jpg b/vendor/ViTPose/tests/data/locust/650.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..373b0023d2588a6a97e557c3e824e2387681a5ec
Binary files /dev/null and b/vendor/ViTPose/tests/data/locust/650.jpg differ
diff --git a/vendor/ViTPose/tests/data/locust/test_locust.json b/vendor/ViTPose/tests/data/locust/test_locust.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc8bbcd5dbfc88b22512e00ffda28f86d36a2390
--- /dev/null
+++ b/vendor/ViTPose/tests/data/locust/test_locust.json
@@ -0,0 +1,410 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "locust",
+ "keypoints": [
+ "head",
+ "neck",
+ "thorax",
+ "abdomen1",
+ "abdomen2",
+ "anttipL",
+ "antbaseL",
+ "eyeL",
+ "forelegL1",
+ "forelegL2",
+ "forelegL3",
+ "forelegL4",
+ "midlegL1",
+ "midlegL2",
+ "midlegL3",
+ "midlegL4",
+ "hindlegL1",
+ "hindlegL2",
+ "hindlegL3",
+ "hindlegL4",
+ "anttipR",
+ "antbaseR",
+ "eyeR",
+ "forelegR1",
+ "forelegR2",
+ "forelegR3",
+ "forelegR4",
+ "midlegR1",
+ "midlegR2",
+ "midlegR3",
+ "midlegR4",
+ "hindlegR1",
+ "hindlegR2",
+ "hindlegR3",
+ "hindlegR4"
+ ],
+ "skeleton": [
+ [
+ 2,
+ 1
+ ],
+ [
+ 3,
+ 2
+ ],
+ [
+ 4,
+ 3
+ ],
+ [
+ 5,
+ 4
+ ],
+ [
+ 7,
+ 6
+ ],
+ [
+ 8,
+ 7
+ ],
+ [
+ 10,
+ 9
+ ],
+ [
+ 11,
+ 10
+ ],
+ [
+ 12,
+ 11
+ ],
+ [
+ 14,
+ 13
+ ],
+ [
+ 15,
+ 14
+ ],
+ [
+ 16,
+ 15
+ ],
+ [
+ 18,
+ 17
+ ],
+ [
+ 19,
+ 18
+ ],
+ [
+ 20,
+ 19
+ ],
+ [
+ 22,
+ 21
+ ],
+ [
+ 23,
+ 22
+ ],
+ [
+ 25,
+ 24
+ ],
+ [
+ 26,
+ 25
+ ],
+ [
+ 27,
+ 26
+ ],
+ [
+ 29,
+ 28
+ ],
+ [
+ 30,
+ 29
+ ],
+ [
+ 31,
+ 30
+ ],
+ [
+ 33,
+ 32
+ ],
+ [
+ 34,
+ 33
+ ],
+ [
+ 35,
+ 34
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "id": 630,
+ "file_name": "630.jpg",
+ "height": 160,
+ "width": 160
+ },
+ {
+ "id": 650,
+ "file_name": "650.jpg",
+ "height": 160,
+ "width": 160
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 96.50167788139936,
+ 79.08306303388312,
+ 2.0,
+ 88.16894217433088,
+ 80.0,
+ 2.0,
+ 71.83105782566912,
+ 80.0,
+ 2.0,
+ 43.076199694670166,
+ 81.43588116352915,
+ 2.0,
+ 25.32887764003749,
+ 82.27820200265606,
+ 2.0,
+ 110.83265850033396,
+ 64.38260807811851,
+ 2.0,
+ 96.89436603268481,
+ 77.79724180953298,
+ 2.0,
+ 92.64247009206748,
+ 75.90977635533528,
+ 2.0,
+ 83.39926607647823,
+ 72.82433076402732,
+ 2.0,
+ 82.67339213429909,
+ 64.27184461240981,
+ 2.0,
+ 77.6884112016259,
+ 61.04563086937941,
+ 2.0,
+ 77.45675634815713,
+ 53.70793132675738,
+ 2.0,
+ 76.53903805777047,
+ 72.5751936338004,
+ 2.0,
+ 71.96661261225319,
+ 65.52855444465679,
+ 2.0,
+ 71.75442535243388,
+ 57.456652943107045,
+ 2.0,
+ 71.32325166700342,
+ 50.50892818053555,
+ 2.0,
+ 68.30277076791707,
+ 73.75801488839979,
+ 2.0,
+ 53.231016278533986,
+ 76.08684171200879,
+ 2.0,
+ 43.82802063202446,
+ 71.2340227958044,
+ 2.0,
+ 35.106594786098235,
+ 71.66012724670512,
+ 2.0,
+ 106.38084243468204,
+ 93.57855909773465,
+ 2.0,
+ 96.92326999269929,
+ 80.82566265131587,
+ 2.0,
+ 94.00509910253301,
+ 82.81711130561807,
+ 2.0,
+ 86.23508453811776,
+ 87.44135484984199,
+ 2.0,
+ 89.53039251130028,
+ 95.03156856963247,
+ 2.0,
+ 93.56705070950602,
+ 96.78650579864731,
+ 2.0,
+ 95.92358648030009,
+ 102.7013970756846,
+ 2.0,
+ 76.38469744035021,
+ 88.48766220561612,
+ 2.0,
+ 68.9346295215593,
+ 95.07191551878313,
+ 2.0,
+ 61.51609313834261,
+ 101.49429058760627,
+ 2.0,
+ 58.801694058956855,
+ 107.68266252152361,
+ 2.0,
+ 68.60028938490109,
+ 86.4375531155976,
+ 2.0,
+ 49.508565619095066,
+ 85.14994772406058,
+ 2.0,
+ 46.69889605871468,
+ 93.99222310236672,
+ 2.0,
+ 38.16941690562348,
+ 96.27433127807184,
+ 2.0
+ ],
+ "image_id": 630,
+ "id": 630,
+ "num_keypoints": 35,
+ "bbox": [
+ 25.32887764003749,
+ 50.50892818053555,
+ 86.50378086029647,
+ 58.17373434098806
+ ],
+ "iscrowd": 0,
+ "area": 5032.247967257935,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 97.23191700267623,
+ 80.39325063190708,
+ 2.0,
+ 88.51415643927471,
+ 80.0,
+ 2.0,
+ 71.48584356072527,
+ 80.0,
+ 2.0,
+ 36.905138572570486,
+ 78.04476695194448,
+ 2.0,
+ 16.961673753971056,
+ 75.93092988166644,
+ 2.0,
+ 113.49247835569392,
+ 67.25231199016146,
+ 2.0,
+ 97.64673560186061,
+ 78.62374942355183,
+ 2.0,
+ 94.59207701254518,
+ 76.42905623590133,
+ 2.0,
+ 86.61299882845682,
+ 72.98025939672249,
+ 2.0,
+ 92.79065379033919,
+ 63.557810609540184,
+ 2.0,
+ 98.53306658179334,
+ 60.560826412407806,
+ 2.0,
+ 103.15691560103025,
+ 54.704957013528016,
+ 2.0,
+ 78.15050140841085,
+ 72.0525607684763,
+ 2.0,
+ 67.19679320947252,
+ 63.129491930981956,
+ 2.0,
+ 66.81613570544552,
+ 56.68704758248447,
+ 2.0,
+ 65.81511750771388,
+ 50.30081842401707,
+ 2.0,
+ 68.60029149309025,
+ 71.73022380161136,
+ 2.0,
+ 46.45069339825895,
+ 75.19901789908113,
+ 2.0,
+ 52.58790600614371,
+ 64.54029671009006,
+ 2.0,
+ 43.39186120464909,
+ 61.90008440661086,
+ 2.0,
+ 114.31225140311544,
+ 94.14582220648037,
+ 2.0,
+ 97.0916788683189,
+ 82.39643083701381,
+ 2.0,
+ 93.88962787007102,
+ 84.03290507899544,
+ 2.0,
+ 85.2589207759562,
+ 87.7242665022609,
+ 2.0,
+ 86.20699274387225,
+ 96.23021381618412,
+ 2.0,
+ 85.92496886773941,
+ 99.18054227199636,
+ 2.0,
+ 87.80771669496954,
+ 103.97613146233982,
+ 2.0,
+ 77.42016997828726,
+ 87.49638798189035,
+ 2.0,
+ 70.98251459751503,
+ 98.88127929151817,
+ 2.0,
+ 77.88427189277336,
+ 101.23547565641657,
+ 2.0,
+ 78.23906551163462,
+ 108.63777750516068,
+ 2.0,
+ 68.33776490317005,
+ 85.89688698861642,
+ 2.0,
+ 42.71215070869465,
+ 90.66846983209739,
+ 2.0,
+ 33.419979116798764,
+ 90.66772059057342,
+ 2.0,
+ 23.04868990312741,
+ 92.48441448580822,
+ 2.0
+ ],
+ "image_id": 650,
+ "id": 650,
+ "num_keypoints": 35,
+ "bbox": [
+ 16.961673753971056,
+ 50.30081842401707,
+ 98.35057764914438,
+ 59.33695908114361
+ ],
+ "iscrowd": 0,
+ "area": 5835.824201574118,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/macaque/PRI_1473.jpg b/vendor/ViTPose/tests/data/macaque/PRI_1473.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0c0c3d5788bb965dc6d6e930f3fbeca0a3884b41
Binary files /dev/null and b/vendor/ViTPose/tests/data/macaque/PRI_1473.jpg differ
diff --git a/vendor/ViTPose/tests/data/macaque/d47f1b1ee9d3217e.jpg b/vendor/ViTPose/tests/data/macaque/d47f1b1ee9d3217e.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1f5aa1243e9df54b02e64687e332ec6450eb5cad
Binary files /dev/null and b/vendor/ViTPose/tests/data/macaque/d47f1b1ee9d3217e.jpg differ
diff --git a/vendor/ViTPose/tests/data/macaque/test_macaque.json b/vendor/ViTPose/tests/data/macaque/test_macaque.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0cbb5dad03da962c6ddc6c9136480021169c934
--- /dev/null
+++ b/vendor/ViTPose/tests/data/macaque/test_macaque.json
@@ -0,0 +1,426 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "macaque",
+ "keypoints": [
+ "nose",
+ "left_eye",
+ "right_eye",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "id": 12900,
+ "file_name": "d47f1b1ee9d3217e.jpg",
+ "height": 710,
+ "width": 1024
+ },
+ {
+ "id": 12950,
+ "file_name": "PRI_1473.jpg",
+ "height": 1424,
+ "width": 1728
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 641.06,
+ 308.89,
+ 2.0,
+ 644.23,
+ 269.75,
+ 2.0,
+ 612.5,
+ 275.04,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 545.85,
+ 216.86,
+ 2.0,
+ 580.0,
+ 225.0,
+ 2.0,
+ 464.0,
+ 258.0,
+ 2.0,
+ 582.0,
+ 366.0,
+ 2.0,
+ 377.65,
+ 360.73,
+ 2.0,
+ 697.12,
+ 358.61,
+ 2.0,
+ 590.28,
+ 394.58,
+ 2.0,
+ 508.0,
+ 424.0,
+ 2.0,
+ 385.0,
+ 475.0,
+ 2.0,
+ 618.0,
+ 369.0,
+ 2.0,
+ 445.0,
+ 386.0,
+ 2.0,
+ 695.0,
+ 468.0,
+ 2.0,
+ 522.58,
+ 550.08,
+ 2.0
+ ],
+ "image_id": 12900,
+ "id": 16169,
+ "num_keypoints": 16,
+ "bbox": [
+ 143.87,
+ 147.04,
+ 623.01,
+ 535.22
+ ],
+ "iscrowd": 0,
+ "area": 138067.96479999926,
+ "category_id": 1,
+ "segmentation": [
+ [
+ 408.33,
+ 534.21,
+ 465.45,
+ 547.97,
+ 502.48,
+ 587.11,
+ 573.36,
+ 593.45,
+ 652.69,
+ 608.26,
+ 687.6,
+ 578.64,
+ 656.93,
+ 544.79,
+ 616.73,
+ 537.39,
+ 588.17,
+ 501.42,
+ 532.1,
+ 483.44,
+ 518.35,
+ 455.93,
+ 536.33,
+ 426.31,
+ 567.01,
+ 417.85,
+ 629.42,
+ 430.55,
+ 696.07,
+ 437.95,
+ 670.68,
+ 403.04,
+ 705.59,
+ 395.64,
+ 737.32,
+ 413.62,
+ 738.38,
+ 431.6,
+ 765.88,
+ 431.6,
+ 759.54,
+ 379.77,
+ 722.51,
+ 341.69,
+ 654.81,
+ 322.64,
+ 652.69,
+ 271.87,
+ 675.97,
+ 230.61,
+ 651.64,
+ 168.2,
+ 567.01,
+ 147.04,
+ 492.96,
+ 157.62,
+ 423.14,
+ 186.18,
+ 372.36,
+ 243.31,
+ 317.36,
+ 325.82,
+ 307.83,
+ 404.1,
+ 334.28,
+ 469.69,
+ 353.32,
+ 488.73,
+ 290.91,
+ 521.52,
+ 231.67,
+ 563.83,
+ 174.55,
+ 628.36,
+ 143.87,
+ 675.97,
+ 200.99,
+ 681.26,
+ 245.42,
+ 622.02,
+ 307.83,
+ 574.41,
+ 407.27,
+ 536.33
+ ]
+ ]
+ },
+ {
+ "keypoints": [
+ 783.0,
+ 890.0,
+ 2.0,
+ 775.14,
+ 848.5,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 834.0,
+ 796.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 987.0,
+ 815.0,
+ 2.0,
+ 833.0,
+ 819.0,
+ 2.0,
+ 1132.15,
+ 789.82,
+ 2.0,
+ 887.0,
+ 919.0,
+ 2.0,
+ 1191.0,
+ 852.0,
+ 2.0,
+ 869.0,
+ 1040.0,
+ 2.0,
+ 1177.0,
+ 527.0,
+ 2.0,
+ 1082.0,
+ 513.0,
+ 2.0,
+ 1173.72,
+ 721.35,
+ 2.0,
+ 1086.0,
+ 737.0,
+ 2.0,
+ 1307.0,
+ 678.0,
+ 2.0,
+ 1218.0,
+ 783.0,
+ 2.0
+ ],
+ "image_id": 12950,
+ "id": 16227,
+ "num_keypoints": 15,
+ "bbox": [
+ 722.61,
+ 393.97,
+ 642.3100000000001,
+ 754.1700000000001
+ ],
+ "iscrowd": 0,
+ "area": 242621.70749999955,
+ "category_id": 1,
+ "segmentation": [
+ [
+ 1248.37,
+ 529.25,
+ 1212.77,
+ 474.67,
+ 1179.55,
+ 420.08,
+ 1134.45,
+ 398.72,
+ 1084.61,
+ 393.97,
+ 1053.76,
+ 429.57,
+ 1043.26,
+ 467.44,
+ 996.8,
+ 531.63,
+ 930.35,
+ 607.57,
+ 842.53,
+ 659.79,
+ 806.93,
+ 704.88,
+ 768.96,
+ 728.61,
+ 722.61,
+ 767.77,
+ 727.12,
+ 831.0,
+ 740.67,
+ 878.42,
+ 783.58,
+ 937.13,
+ 812.93,
+ 957.45,
+ 812.93,
+ 1027.46,
+ 799.38,
+ 1072.62,
+ 765.51,
+ 1113.27,
+ 758.74,
+ 1142.62,
+ 803.9,
+ 1147.14,
+ 862.61,
+ 1138.11,
+ 923.58,
+ 1068.1,
+ 950.68,
+ 1000.36,
+ 948.42,
+ 905.52,
+ 1007.13,
+ 891.97,
+ 1083.91,
+ 873.9,
+ 1151.65,
+ 919.07,
+ 1187.78,
+ 957.45,
+ 1223.91,
+ 982.29,
+ 1262.3,
+ 950.68,
+ 1272.11,
+ 901.87,
+ 1264.56,
+ 844.55,
+ 1302.95,
+ 821.96,
+ 1341.34,
+ 779.06,
+ 1363.92,
+ 704.54,
+ 1352.8,
+ 633.68,
+ 1274.48,
+ 598.08
+ ]
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/mhp/10084.jpg b/vendor/ViTPose/tests/data/mhp/10084.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..107a047031453a2885b069295a53e8b52b8c4920
Binary files /dev/null and b/vendor/ViTPose/tests/data/mhp/10084.jpg differ
diff --git a/vendor/ViTPose/tests/data/mhp/10112.jpg b/vendor/ViTPose/tests/data/mhp/10112.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b538b5eed7d59d7bf0bbbf19b56eb8900b941977
Binary files /dev/null and b/vendor/ViTPose/tests/data/mhp/10112.jpg differ
diff --git a/vendor/ViTPose/tests/data/mhp/test_mhp.json b/vendor/ViTPose/tests/data/mhp/test_mhp.json
new file mode 100644
index 0000000000000000000000000000000000000000..3740f720e478fbdaca7d3961095723d3f1502e45
--- /dev/null
+++ b/vendor/ViTPose/tests/data/mhp/test_mhp.json
@@ -0,0 +1,391 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "Right-ankle",
+ "Right-knee",
+ "Right-hip",
+ "Left-hip",
+ "Left-knee",
+ "Left-ankle",
+ "Pelvis",
+ "Thorax",
+ "Upper-neck",
+ "Head-top",
+ "Right-wrist",
+ "Right-elbow",
+ "Right-shoulder",
+ "Left-shoulder",
+ "Left-elbow",
+ "Left-wrist"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 7
+ ],
+ [
+ 7,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 5,
+ 6
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 9
+ ],
+ [
+ 9,
+ 10
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 13,
+ 9
+ ],
+ [
+ 9,
+ 14
+ ],
+ [
+ 14,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "license": 0,
+ "file_name": "10084.jpg",
+ "height": 299,
+ "width": 298,
+ "id": 2889
+ },
+ {
+ "license": 0,
+ "file_name": "10112.jpg",
+ "height": 180,
+ "width": 215,
+ "id": 3928
+ }
+ ],
+ "annotations": [
+ {
+ "segmentation": [],
+ "num_keypoints": 13,
+ "iscrowd": 0,
+ "keypoints": [
+ 151.74249267578125,
+ 251.90750122070312,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 156.2274932861328,
+ 136.0449981689453,
+ 2.0,
+ 94.18499755859375,
+ 203.32000732421875,
+ 2.0,
+ 128.57000732421875,
+ 246.6750030517578,
+ 2.0,
+ 119.5999984741211,
+ 143.52000427246094,
+ 2.0,
+ 116.61000061035156,
+ 85.9625015258789,
+ 2.0,
+ 109.13500213623047,
+ 41.86000061035156,
+ 2.0,
+ 108.38749694824219,
+ 7.474999904632568,
+ 2.0,
+ 44.849998474121094,
+ 105.39749908447266,
+ 2.0,
+ 80.7300033569336,
+ 112.125,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 122.58999633789062,
+ 47.84000015258789,
+ 2.0,
+ 87.4574966430664,
+ 83.72000122070312,
+ 2.0,
+ 35.880001068115234,
+ 97.17500305175781,
+ 2.0
+ ],
+ "image_id": 2889,
+ "bbox": [
+ 3.737499952316284,
+ 5.232500076293945,
+ 169.68249821662903,
+ 282.5550060272217
+ ],
+ "category_id": 1,
+ "id": 7646,
+ "face_box": [
+ 96.42749786376953,
+ 12.707500457763672,
+ 35.13249969482422,
+ 29.15250015258789
+ ],
+ "area": 47944.63930631365
+ },
+ {
+ "segmentation": [],
+ "num_keypoints": 14,
+ "iscrowd": 0,
+ "keypoints": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 254.14999389648438,
+ 219.76499938964844,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 292.2724914550781,
+ 147.25750732421875,
+ 2.0,
+ 223.50250244140625,
+ 195.09750366210938,
+ 2.0,
+ 242.19000244140625,
+ 276.57501220703125,
+ 2.0,
+ 264.614990234375,
+ 150.9949951171875,
+ 2.0,
+ 233.22000122070312,
+ 81.47750091552734,
+ 2.0,
+ 236.95750427246094,
+ 59.0525016784668,
+ 2.0,
+ 230.9774932861328,
+ 16.44499969482422,
+ 2.0,
+ 142.02499389648438,
+ 66.52749633789062,
+ 2.0,
+ 180.89500427246094,
+ 65.77999877929688,
+ 2.0,
+ 221.25999450683594,
+ 63.537498474121094,
+ 2.0,
+ 260.87750244140625,
+ 59.79999923706055,
+ 2.0,
+ 296.010009765625,
+ 92.69000244140625,
+ 2.0,
+ 281.05999755859375,
+ 146.50999450683594,
+ 2.0
+ ],
+ "image_id": 2889,
+ "bbox": [
+ 117.35749816894531,
+ 11.212499618530273,
+ 181.6425018310547,
+ 285.5450077056885
+ ],
+ "category_id": 1,
+ "id": 7647,
+ "face_box": [
+ 210.04750061035156,
+ 19.434999465942383,
+ 31.395004272460938,
+ 38.12249946594238
+ ],
+ "area": 51867.109585029044
+ },
+ {
+ "segmentation": [],
+ "num_keypoints": 7,
+ "iscrowd": 0,
+ "keypoints": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 38.70000076293945,
+ 117.44999694824219,
+ 2.0,
+ 48.599998474121094,
+ 71.0999984741211,
+ 2.0,
+ 66.5999984741211,
+ 17.549999237060547,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 3.1500000953674316,
+ 158.39999389648438,
+ 2.0,
+ 3.5999999046325684,
+ 74.69999694824219,
+ 2.0,
+ 97.19999694824219,
+ 76.94999694824219,
+ 2.0,
+ 102.1500015258789,
+ 145.35000610351562,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "image_id": 3928,
+ "bbox": [
+ 2.25,
+ 10.350000381469727,
+ 114.30000305175781,
+ 169.2000026702881
+ ],
+ "category_id": 1,
+ "id": 10379,
+ "face_box": [
+ 30.600000381469727,
+ 26.100000381469727,
+ 47.24999809265137,
+ 45.89999961853027
+ ],
+ "area": 19339.56082157136
+ },
+ {
+ "segmentation": [],
+ "num_keypoints": 7,
+ "iscrowd": 0,
+ "keypoints": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 147.60000610351562,
+ 126.9000015258789,
+ 2.0,
+ 155.6999969482422,
+ 81.9000015258789,
+ 2.0,
+ 152.5500030517578,
+ 25.200000762939453,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 127.3499984741211,
+ 93.1500015258789,
+ 2.0,
+ 198.4499969482422,
+ 89.0999984741211,
+ 2.0,
+ 198.89999389648438,
+ 163.35000610351562,
+ 2.0,
+ 148.5,
+ 151.64999389648438,
+ 2.0
+ ],
+ "image_id": 3928,
+ "bbox": [
+ 112.05000305175781,
+ 18.450000762939453,
+ 96.30000305175781,
+ 161.10000228881836
+ ],
+ "category_id": 1,
+ "id": 10380,
+ "face_box": [
+ 132.3000030517578,
+ 39.150001525878906,
+ 44.55000305175781,
+ 40.04999542236328
+ ],
+ "area": 15513.930712051399
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/mpi_inf_3dhp/S4_Seq2_Cam0_001033.jpg b/vendor/ViTPose/tests/data/mpi_inf_3dhp/S4_Seq2_Cam0_001033.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1264e598d8890081333a5622d68e7c0ad0ad229a
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpi_inf_3dhp/S4_Seq2_Cam0_001033.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpi_inf_3dhp/S8_Seq1_Cam8_002165.jpg b/vendor/ViTPose/tests/data/mpi_inf_3dhp/S8_Seq1_Cam8_002165.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d5443510fd8316b0ea396f78d80267aec05fd4f6
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpi_inf_3dhp/S8_Seq1_Cam8_002165.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS1_002001.jpg b/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS1_002001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3b48c44565423965fa013a1f4a03923b90e67d59
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS1_002001.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS2_001850.jpg b/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS2_001850.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b2fe386abbc696ab8c2c78c21567fec9662f7f08
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpi_inf_3dhp/TS2_001850.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/004645041.jpg b/vendor/ViTPose/tests/data/mpii/004645041.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d487a1b8ccdd34a213084f39b291b641e48d7f01
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpii/004645041.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/005808361.jpg b/vendor/ViTPose/tests/data/mpii/005808361.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f4cdf225e0002cc15c0948ab40ba3cbf8adb189f
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpii/005808361.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/051423444.jpg b/vendor/ViTPose/tests/data/mpii/051423444.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..39216ce992a5697aa66c28a0401c5ca16d0e2b54
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpii/051423444.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/052475643.jpg b/vendor/ViTPose/tests/data/mpii/052475643.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a210662bc8cddaed8aab79aefa1ce3990ae47f7a
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpii/052475643.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/060754485.jpg b/vendor/ViTPose/tests/data/mpii/060754485.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3d6afe1fa836ca84e7934268c8263fed5f0ddbf9
Binary files /dev/null and b/vendor/ViTPose/tests/data/mpii/060754485.jpg differ
diff --git a/vendor/ViTPose/tests/data/mpii/test_mpii.json b/vendor/ViTPose/tests/data/mpii/test_mpii.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c13d6a8601d1992b58b225aa74dfda36aae633a
--- /dev/null
+++ b/vendor/ViTPose/tests/data/mpii/test_mpii.json
@@ -0,0 +1,462 @@
+[
+ {
+ "joints_vis": [
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+ ],
+ "joints": [
+ [
+ 804.0,
+ 711.0
+ ],
+ [
+ 816.0,
+ 510.0
+ ],
+ [
+ 908.0,
+ 438.0
+ ],
+ [
+ 1040.0,
+ 454.0
+ ],
+ [
+ 906.0,
+ 528.0
+ ],
+ [
+ 883.0,
+ 707.0
+ ],
+ [
+ 974.0,
+ 446.0
+ ],
+ [
+ 985.0,
+ 253.0
+ ],
+ [
+ 982.7591,
+ 235.9694
+ ],
+ [
+ 962.2409,
+ 80.0306
+ ],
+ [
+ 869.0,
+ 214.0
+ ],
+ [
+ 798.0,
+ 340.0
+ ],
+ [
+ 902.0,
+ 253.0
+ ],
+ [
+ 1067.0,
+ 253.0
+ ],
+ [
+ 1167.0,
+ 353.0
+ ],
+ [
+ 1142.0,
+ 478.0
+ ]
+ ],
+ "image": "005808361.jpg",
+ "scale": 4.718488,
+ "center": [
+ 966.0,
+ 340.0
+ ]
+ },
+ {
+ "joints_vis": [
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+ ],
+ "joints": [
+ [
+ 317.0,
+ 412.0
+ ],
+ [
+ 318.0,
+ 299.0
+ ],
+ [
+ 290.0,
+ 274.0
+ ],
+ [
+ 353.0,
+ 275.0
+ ],
+ [
+ 403.0,
+ 299.0
+ ],
+ [
+ 394.0,
+ 409.0
+ ],
+ [
+ 322.0,
+ 275.0
+ ],
+ [
+ 327.0,
+ 172.0
+ ],
+ [
+ 329.9945,
+ 162.1051
+ ],
+ [
+ 347.0055,
+ 105.8949
+ ],
+ [
+ 296.0,
+ 135.0
+ ],
+ [
+ 281.0,
+ 208.0
+ ],
+ [
+ 296.0,
+ 167.0
+ ],
+ [
+ 358.0,
+ 177.0
+ ],
+ [
+ 387.0,
+ 236.0
+ ],
+ [
+ 392.0,
+ 167.0
+ ]
+ ],
+ "image": "052475643.jpg",
+ "scale": 1.761835,
+ "center": [
+ 316.0,
+ 220.0
+ ]
+ },
+ {
+ "joints_vis": [
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+ ],
+ "joints": [
+ [
+ -1.0,
+ -1.0
+ ],
+ [
+ 1033.0,
+ 649.0
+ ],
+ [
+ 1072.0,
+ 474.0
+ ],
+ [
+ 973.0,
+ 496.0
+ ],
+ [
+ 961.0,
+ 650.0
+ ],
+ [
+ -1.0,
+ -1.0
+ ],
+ [
+ 1023.0,
+ 485.0
+ ],
+ [
+ 1031.0,
+ 295.0
+ ],
+ [
+ 1026.998,
+ 281.6248
+ ],
+ [
+ 997.002,
+ 181.3752
+ ],
+ [
+ 988.0,
+ 294.0
+ ],
+ [
+ 1018.0,
+ 317.0
+ ],
+ [
+ 1070.0,
+ 290.0
+ ],
+ [
+ 991.0,
+ 300.0
+ ],
+ [
+ 912.0,
+ 345.0
+ ],
+ [
+ 842.0,
+ 330.0
+ ]
+ ],
+ "image": "051423444.jpg",
+ "scale": 3.139233,
+ "center": [
+ 1030.0,
+ 396.0
+ ]
+ },
+ {
+ "joints_vis": [
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+ ],
+ "joints": [
+ [
+ -1.0,
+ -1.0
+ ],
+ [
+ 804.0,
+ 659.0
+ ],
+ [
+ 786.0,
+ 498.0
+ ],
+ [
+ 868.0,
+ 509.0
+ ],
+ [
+ 860.0,
+ 693.0
+ ],
+ [
+ -1.0,
+ -1.0
+ ],
+ [
+ 827.0,
+ 504.0
+ ],
+ [
+ 840.0,
+ 314.0
+ ],
+ [
+ 838.9079,
+ 308.9326
+ ],
+ [
+ 816.0921,
+ 203.0674
+ ],
+ [
+ 698.0,
+ 264.0
+ ],
+ [
+ 740.0,
+ 297.0
+ ],
+ [
+ 790.0,
+ 300.0
+ ],
+ [
+ 889.0,
+ 328.0
+ ],
+ [
+ 915.0,
+ 452.0
+ ],
+ [
+ 906.0,
+ 553.0
+ ]
+ ],
+ "image": "004645041.jpg",
+ "scale": 3.248877,
+ "center": [
+ 809.0,
+ 403.0
+ ]
+ },
+ {
+ "joints_vis": [
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+ ],
+ "joints": [
+ [
+ 694.0,
+ 684.0
+ ],
+ [
+ 685.0,
+ 579.0
+ ],
+ [
+ 670.0,
+ 437.0
+ ],
+ [
+ 747.0,
+ 421.0
+ ],
+ [
+ 751.0,
+ 574.0
+ ],
+ [
+ 768.0,
+ 717.0
+ ],
+ [
+ 709.0,
+ 429.0
+ ],
+ [
+ 649.0,
+ 230.0
+ ],
+ [
+ 642.6337,
+ 217.5659
+ ],
+ [
+ 591.3663,
+ 117.4341
+ ],
+ [
+ 488.0,
+ 351.0
+ ],
+ [
+ 551.0,
+ 307.0
+ ],
+ [
+ 600.0,
+ 242.0
+ ],
+ [
+ 698.0,
+ 217.0
+ ],
+ [
+ 767.0,
+ 310.0
+ ],
+ [
+ 790.0,
+ 405.0
+ ]
+ ],
+ "image": "060754485.jpg",
+ "scale": 3.374796,
+ "center": [
+ 698.0,
+ 404.0
+ ]
+ }
+]
diff --git a/vendor/ViTPose/tests/data/mpii/test_mpii_trb.json b/vendor/ViTPose/tests/data/mpii/test_mpii_trb.json
new file mode 100644
index 0000000000000000000000000000000000000000..8014c99c8fd428f3eee7c7ecf4bd8c326066aa7d
--- /dev/null
+++ b/vendor/ViTPose/tests/data/mpii/test_mpii_trb.json
@@ -0,0 +1,760 @@
+{
+ "info": {
+ "description": "For TRBMPI testing.",
+ "year": "2020",
+ "date_created": "2020/06/20"
+ },
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle",
+ "head",
+ "neck",
+ "right_neck",
+ "left_neck",
+ "medial_right_shoulder",
+ "lateral_right_shoulder",
+ "medial_right_bow",
+ "lateral_right_bow",
+ "medial_right_wrist",
+ "lateral_right_wrist",
+ "medial_left_shoulder",
+ "lateral_left_shoulder",
+ "medial_left_bow",
+ "lateral_left_bow",
+ "medial_left_wrist",
+ "lateral_left_wrist",
+ "medial_right_hip",
+ "lateral_right_hip",
+ "medial_right_knee",
+ "lateral_right_knee",
+ "medial_right_ankle",
+ "lateral_right_ankle",
+ "medial_left_hip",
+ "lateral_left_hip",
+ "medial_left_knee",
+ "lateral_left_knee",
+ "medial_left_ankle",
+ "lateral_left_ankle"
+ ]
+ }
+ ],
+ "images": [
+ {
+ "file_name": "004645041.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 4645041
+ },
+ {
+ "file_name": "005808361.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 5808361
+ },
+ {
+ "file_name": "051423444.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 51423444
+ },
+ {
+ "file_name": "052475643.jpg",
+ "height": 480,
+ "width": 854,
+ "id": 52475643
+ },
+ {
+ "file_name": "060754485.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 60754485
+ }
+ ],
+ "annotations": [
+ {
+ "num_joints": 38,
+ "keypoints": [
+ 1067.0,
+ 253.0,
+ 2.0,
+ 902.0,
+ 253.0,
+ 2.0,
+ 1167.0,
+ 353.0,
+ 2.0,
+ 798.0,
+ 340.0,
+ 2.0,
+ 1142.0,
+ 478.0,
+ 2.0,
+ 869.0,
+ 214.0,
+ 2.0,
+ 1040.0,
+ 454.0,
+ 2.0,
+ 908.0,
+ 438.0,
+ 2.0,
+ 906.0,
+ 528.0,
+ 2.0,
+ 816.0,
+ 510.0,
+ 2.0,
+ 883.0,
+ 707.0,
+ 2.0,
+ 804.0,
+ 711.0,
+ 2.0,
+ 962.2409,
+ 80.0306,
+ 2.0,
+ 982.7591,
+ 235.9694,
+ 2.0,
+ 895.418,
+ 241.258,
+ 2,
+ 1043.704,
+ 160.177,
+ 2,
+ 901.513,
+ 343.02,
+ 2,
+ 863.72,
+ 263.644,
+ 2,
+ 837.5939,
+ 349.993,
+ 2,
+ 862.766,
+ 257.015,
+ 2,
+ 801.5946,
+ 274.022,
+ 2,
+ 879.233,
+ 196.169,
+ 2,
+ 1110.547,
+ 339.254,
+ 2,
+ 1036.455,
+ 221.547,
+ 2,
+ 1133.252,
+ 424.742,
+ 2,
+ 1157.976,
+ 298.364,
+ 2,
+ 1128.938,
+ 496.521,
+ 2,
+ 1178.462,
+ 418.695,
+ 2,
+ 906.36,
+ 495.814,
+ 2,
+ 886.084,
+ 430.921,
+ 2,
+ 921.047,
+ 497.919,
+ 2,
+ 798.3963,
+ 620.615,
+ 2,
+ 883.956,
+ 622.444,
+ 2,
+ 0,
+ 0,
+ 0,
+ 906.36,
+ 495.814,
+ 2,
+ 1063.55,
+ 427.43,
+ 2,
+ 858.607,
+ 625.533,
+ 2,
+ 998.667,
+ 532.689,
+ 2,
+ 0,
+ 0,
+ 0,
+ 930.346,
+ 637.297,
+ 2
+ ],
+ "image_id": 5808361,
+ "center": [
+ 966.0,
+ 340.0
+ ],
+ "scale": 0.1756068552,
+ "category_id": 1,
+ "id": 2736,
+ "iscrowd": 0
+ },
+ {
+ "num_joints": 40,
+ "keypoints": [
+ 358.0,
+ 177.0,
+ 2.0,
+ 296.0,
+ 167.0,
+ 2.0,
+ 387.0,
+ 236.0,
+ 2.0,
+ 281.0,
+ 208.0,
+ 2.0,
+ 392.0,
+ 167.0,
+ 2.0,
+ 296.0,
+ 135.0,
+ 2.0,
+ 353.0,
+ 275.0,
+ 2.0,
+ 290.0,
+ 274.0,
+ 2.0,
+ 403.0,
+ 299.0,
+ 2.0,
+ 318.0,
+ 299.0,
+ 2.0,
+ 394.0,
+ 409.0,
+ 2.0,
+ 317.0,
+ 412.0,
+ 2.0,
+ 347.0055,
+ 105.8949,
+ 2.0,
+ 329.9945,
+ 162.1051,
+ 2.0,
+ 288.387,
+ 168.411,
+ 2,
+ 352.646,
+ 153.542,
+ 2,
+ 278.645,
+ 195.766,
+ 2,
+ 272.16,
+ 185.5,
+ 2,
+ 295.672,
+ 202.247,
+ 2,
+ 275.016,
+ 171.472,
+ 2,
+ 297.774,
+ 179.573,
+ 2,
+ 314.8,
+ 136.217,
+ 2,
+ 362.128,
+ 228.378,
+ 2,
+ 343.02,
+ 176.81,
+ 2,
+ 402.3,
+ 211.171,
+ 2,
+ 373.628,
+ 192.749,
+ 2,
+ 389.14,
+ 148.105,
+ 2,
+ 382.448,
+ 186.517,
+ 2,
+ 340.876,
+ 312.739,
+ 2,
+ 271.97,
+ 273.448,
+ 2,
+ 323.194,
+ 285.55,
+ 2,
+ 300.533,
+ 368.868,
+ 2,
+ 328.476,
+ 360.12,
+ 2,
+ 309.13,
+ 434.758,
+ 2,
+ 340.876,
+ 312.739,
+ 2,
+ 362.155,
+ 232.654,
+ 2,
+ 381.581,
+ 365.148,
+ 2,
+ 388.754,
+ 284.757,
+ 2,
+ 396.32,
+ 448.91,
+ 2,
+ 409.878,
+ 357.015,
+ 2
+ ],
+ "image_id": 52475643,
+ "center": [
+ 316.0,
+ 220.0
+ ],
+ "scale": 0.47030507400000005,
+ "category_id": 1,
+ "id": 28438,
+ "iscrowd": 0
+ },
+ {
+ "num_joints": 32,
+ "keypoints": [
+ 991.0,
+ 300.0,
+ 2.0,
+ 1070.0,
+ 290.0,
+ 2.0,
+ 912.0,
+ 345.0,
+ 2.0,
+ 1018.0,
+ 317.0,
+ 1.0,
+ 842.0,
+ 330.0,
+ 2.0,
+ 988.0,
+ 294.0,
+ 1.0,
+ 973.0,
+ 496.0,
+ 2.0,
+ 1072.0,
+ 474.0,
+ 2.0,
+ 961.0,
+ 650.0,
+ 2.0,
+ 1033.0,
+ 649.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 997.002,
+ 181.3752,
+ 2.0,
+ 1026.998,
+ 281.6248,
+ 2.0,
+ 1071.131,
+ 283.036,
+ 2,
+ 969.6,
+ 247.337,
+ 2,
+ 1087.017,
+ 347.51,
+ 2,
+ 1058.52,
+ 305.636,
+ 2,
+ 1026.458,
+ 332.152,
+ 2,
+ 1014.72,
+ 288.149,
+ 2,
+ 995.817,
+ 309.098,
+ 2,
+ 980.493,
+ 294.738,
+ 2,
+ 937.925,
+ 366.241,
+ 2,
+ 987.08,
+ 282.067,
+ 2,
+ 869.4918,
+ 356.925,
+ 2,
+ 931.259,
+ 311.619,
+ 2,
+ 844.2,
+ 326.671,
+ 2,
+ 873.5471,
+ 326.164,
+ 2,
+ 1004.56,
+ 610.365,
+ 2,
+ 1075.26,
+ 526.816,
+ 2,
+ 1005.788,
+ 610.747,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1004.56,
+ 610.365,
+ 2,
+ 935.105,
+ 446.09,
+ 2,
+ 0,
+ 0,
+ 0,
+ 937.158,
+ 604.939,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 51423444,
+ "center": [
+ 1030.0,
+ 396.0
+ ],
+ "scale": 0.2639497014,
+ "category_id": 1,
+ "id": 27407,
+ "iscrowd": 0
+ },
+ {
+ "num_joints": 32,
+ "keypoints": [
+ 889.0,
+ 328.0,
+ 2.0,
+ 790.0,
+ 300.0,
+ 2.0,
+ 915.0,
+ 452.0,
+ 2.0,
+ 740.0,
+ 297.0,
+ 2.0,
+ 906.0,
+ 553.0,
+ 2.0,
+ 698.0,
+ 264.0,
+ 2.0,
+ 868.0,
+ 509.0,
+ 2.0,
+ 786.0,
+ 498.0,
+ 2.0,
+ 860.0,
+ 693.0,
+ 2.0,
+ 804.0,
+ 659.0,
+ 2.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 816.0921,
+ 203.0674,
+ 2.0,
+ 838.9079,
+ 308.9326,
+ 2.0,
+ 790.983,
+ 286.144,
+ 2,
+ 864.959,
+ 243.71,
+ 2,
+ 769.273,
+ 388.686,
+ 2,
+ 780.19,
+ 289.158,
+ 2,
+ 742.1957,
+ 339.679,
+ 2,
+ 729.0975,
+ 277.63,
+ 2,
+ 710.4349,
+ 292.928,
+ 2,
+ 690.765,
+ 253.113,
+ 2,
+ 871.88,
+ 429.244,
+ 2,
+ 861.04,
+ 275.182,
+ 2,
+ 894.319,
+ 509.588,
+ 2,
+ 929.981,
+ 418.01,
+ 2,
+ 901.22,
+ 581.445,
+ 2,
+ 924.708,
+ 508.795,
+ 2,
+ 823.63,
+ 647.69,
+ 2,
+ 769.341,
+ 541.653,
+ 2,
+ 850.322,
+ 625.912,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 823.63,
+ 647.69,
+ 2,
+ 905.804,
+ 486.059,
+ 2,
+ 0,
+ 0,
+ 0,
+ 907.2,
+ 647.636,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "image_id": 4645041,
+ "center": [
+ 809.0,
+ 403.0
+ ],
+ "scale": 0.2550422514,
+ "category_id": 1,
+ "id": 26901,
+ "iscrowd": 0
+ },
+ {
+ "num_joints": 39,
+ "keypoints": [
+ 698.0,
+ 217.0,
+ 2.0,
+ 600.0,
+ 242.0,
+ 2.0,
+ 767.0,
+ 310.0,
+ 2.0,
+ 551.0,
+ 307.0,
+ 2.0,
+ 790.0,
+ 405.0,
+ 2.0,
+ 488.0,
+ 351.0,
+ 2.0,
+ 747.0,
+ 421.0,
+ 2.0,
+ 670.0,
+ 437.0,
+ 2.0,
+ 751.0,
+ 574.0,
+ 2.0,
+ 685.0,
+ 579.0,
+ 2.0,
+ 768.0,
+ 717.0,
+ 2.0,
+ 694.0,
+ 684.0,
+ 2.0,
+ 591.3663,
+ 117.4341,
+ 2.0,
+ 642.6337,
+ 217.5659,
+ 2.0,
+ 584.59,
+ 231.591,
+ 2,
+ 649.816,
+ 141.342,
+ 2,
+ 605.668,
+ 337.961,
+ 2,
+ 566.695,
+ 256.226,
+ 2,
+ 581.685,
+ 330.685,
+ 2,
+ 510.6881,
+ 317.872,
+ 2,
+ 530.2038,
+ 341.493,
+ 2,
+ 481.6367,
+ 358.297,
+ 2,
+ 725.537,
+ 311.805,
+ 2,
+ 651.465,
+ 169.726,
+ 2,
+ 766.905,
+ 363.613,
+ 2,
+ 774.747,
+ 267.874,
+ 2,
+ 784.675,
+ 432.399,
+ 2,
+ 796.495,
+ 356.847,
+ 2,
+ 726.118,
+ 528.068,
+ 2,
+ 649.638,
+ 446.552,
+ 2,
+ 737.496,
+ 516.31,
+ 2,
+ 667.32,
+ 620.422,
+ 2,
+ 736.118,
+ 628.657,
+ 2,
+ 663.697,
+ 699.859,
+ 2,
+ 726.118,
+ 528.068,
+ 2,
+ 799.279,
+ 341.113,
+ 2,
+ 727.888,
+ 644.205,
+ 2,
+ 798.633,
+ 526.499,
+ 2,
+ 0,
+ 0,
+ 0,
+ 799.314,
+ 644.016,
+ 2
+ ],
+ "image_id": 60754485,
+ "center": [
+ 698.0,
+ 404.0
+ ],
+ "scale": 0.24552578040000003,
+ "category_id": 1,
+ "id": 26834,
+ "iscrowd": 0
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/ochuman/000817.jpg b/vendor/ViTPose/tests/data/ochuman/000817.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3d367be2e17f35fdaa377d2279050a8e4533d6a6
Binary files /dev/null and b/vendor/ViTPose/tests/data/ochuman/000817.jpg differ
diff --git a/vendor/ViTPose/tests/data/ochuman/003799.jpg b/vendor/ViTPose/tests/data/ochuman/003799.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1c43b53fe578a825a835c75f883f614547c48e6a
Binary files /dev/null and b/vendor/ViTPose/tests/data/ochuman/003799.jpg differ
diff --git a/vendor/ViTPose/tests/data/ochuman/003896.jpg b/vendor/ViTPose/tests/data/ochuman/003896.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b099339fce9b5cfee5b9975b057d2d729a43bad
Binary files /dev/null and b/vendor/ViTPose/tests/data/ochuman/003896.jpg differ
diff --git a/vendor/ViTPose/tests/data/ochuman/test_ochuman.json b/vendor/ViTPose/tests/data/ochuman/test_ochuman.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdb1c67200354d9c02e32b11cf971b02e30cbb22
--- /dev/null
+++ b/vendor/ViTPose/tests/data/ochuman/test_ochuman.json
@@ -0,0 +1,504 @@
+{
+ "categories": [
+ {
+ "keypoints": [
+ "nose",
+ "left_eye",
+ "right_eye",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ],
+ "id": 1,
+ "supercategory": "person",
+ "name": "person"
+ }
+ ],
+ "images": [
+ {
+ "file_name": "003799.jpg",
+ "width": 900,
+ "height": 864,
+ "id": 1
+ },
+ {
+ "file_name": "000817.jpg",
+ "width": 665,
+ "height": 1000,
+ "id": 2
+ },
+ {
+ "file_name": "003896.jpg",
+ "width": 602,
+ "height": 900,
+ "id": 3
+ }
+ ],
+ "annotations": [
+ {
+ "area": 356725,
+ "image_id": 1,
+ "iscrowd": 0,
+ "category_id": 1,
+ "bbox": [
+ 250,
+ 112,
+ 475,
+ 751
+ ],
+ "segmentation": {
+ "counts": "Zei62gj08I7N2N2N2N1O1O001O1O1O001O1O4L3M3M2N2N1N3N1c_OdNU7]1jHhNQ7Y1nHlNm6V1PIPOk6Q1TITOh6l0fFcNnKf0W=h0kFjNeKc0[=d0oFQO]Ka0`=>SGQO\\Kg0[=9YGQOZKk0X=5^GPOYKS1g9mNoFWOnMk1R6oNYK]1\\9WO\\FnNkNa1T6mNWKd1W9EdEdNIX1U6lNUKe1V9f0`EjNU6kNUKf1S9f0cEjNT6kNUKh1Q9c0fEkNT6kNSKi1_4oMRNc2XLkNT6jNRKk1Z4bNfMo1jLkNT6iNRKm1S4SO]M]1ZMkNS6iNRKn1n3]OYMS1dMjNS6iNPKo1l33dL>]NhNS6nNjJk1l37dL;cNfNS6mNiJn1`3?nL2fNeNR6mNiJP2l2P1_MAiNcNS6lNhJT2[2Z1oMUOkNbNS6jNiJW2U2X1UNVOjNbNS6iNhJZ2Q2T1[NYOiNaNR6iNhJ\\2m1U1]NVOlN`NS6iNgJ^2h1X1[NUOSO]NS6hNfJa2d1Z1gMFLhMS6gNeJd2a1V6gM_HS6hNdJf2^1T6kM_Ho5`MRJW1f0h2Z1S6oM]HP6bMQJU1f0l2V1Q6SN\\HP6dMPJS1f0P3S1n5VNZHR6fMnIS1g0Q3o0m5ZNYHR6gMnIR1f0T3j0m5`NVHR6QOcJP3e0k5fNTHR6QOdJR3?k5kNRHR6QOdJa3N^5\\OPH^5EXKQ>ZOZBZ5I\\Km=ZOZBo44hKa=YO\\Bm44jK`=YO\\Bl45kK_=YO`Ah5Q1PK^=XO_Ai5U1nJn>R5TAmJl>R5VAnJj>Q5VAoJj>Q5WAnJj>R5VAnJi>R5XAmJi>S5WAlJi>U5WAkJi>T5WAlJi>U5WAgJm>X5TAdJX=F_Ag5ja0YJV^Og5ia0ZJU^Oi5ia0XJV^Oj5ia0VJW^Oj5ha0WJX^Oj5ea0RJa^On5l`0dJU_O]5\\`0oJf_OR5m?YKS@h4k?YKV@h4g?XK[@h4c?YK]@i4`?WKa@k4[?WKe@j4Y?WKf@l4X?TKh@n4U?RKm@o4P?RKQAo4l>RKUAP5g>QKZAP5b>RK_Ao4^>QKdAQ5W>PKkAQ5R>oJPBR5n=mJTBU5h=lJYBU5d=kJ^BV5_=jJcBX5Z=iJfBX5W=iJjBX5S=jJmBX5o_9o@fFU7HBg9RIeFY7O[Oa9UIcF^75UO^:h0eEUO]:j0fESO[:k0hERO[:k0iEROX:m0jEPOX:o0jEoNX:n0kEoNW:P1kEnNV:Q1lEmNV:P1lEnNV:Q1lEmNU:R1mElNU::eFD\\96kFHV94nFJT92QGLP93QGLQ91QGMQ91QGNP90RGNP91QGOo8OTGOn8MUG2l8LVG4j8JXG5i8IYG5i8JXG4k8JUG4n8JTG4n8KSG1Q9MQGOT9OmFOU9NnFOU9OmFOU9NmF1U9MjF1\\9CcFd0b9[O\\Ff0f9YOXFh0j9XOTFg0o9XOPFh0R:TOSEnKlMm4S=ROPEVLjMh4X=QOlD\\LeMf4a=mNhDaLdMc4e=kNeDfLbM`4k=iNbDkL_MY4T>^NiD\\MPMP4_>cN`D`MmLm3g>aNZDh1YMXK_>P3VDf1dm\\OaEd`0m:k^OgEd`0fN1O2N2N2N010O010O010O0100O0YM_Eh@a:S?iEh@V:T?QFi@P:R?WFl@k9n>[Fo@h9j>^FTAe9e>bFWAb9b>eF[A^9^>iF_AZ9Z>mFbAX9V>nFhAV9P>PGnAS9k=SGRBQ9g=UGWBm8c=YG[Bj8J\\DVU:V@nFc?aTFSAl9b>^F`Ab9o;ZFgB>\\1X9_;PGaB2R2n8o:fG[BFh2c8`:RJoChK0V:c;_J_D^KJS:[;iJoDVKAR:`;fJSEZKYOP:c;eJWE^KROl9h;dJZEbKjNj9l;bJ^EfKbNh9P0100O00100O00100O10O01O10O0`MZE_@f:Z?eEb@Z:X?PFe@o9U?\\Ff@f9R?dFk@^9k>lFSAW9e>PGXAS9a>TG\\Ao8]>XG_Al8Z>ZGcAj8V>]GfAg8S>`GiAd8P>cGlAa8m=eGPB_8j=gGSB]8f=iGVB\\8;mCQ]m0\\OhROb0Pn0O101N2N3M3K5Kd^R9",
+ "size": [
+ 1000,
+ 665
+ ]
+ },
+ "num_keypoints": 13,
+ "id": 3,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 217.01654468945043,
+ 302.1730492658786,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 285,
+ 328,
+ 2,
+ 187,
+ 339,
+ 2,
+ 293,
+ 412,
+ 2,
+ 176,
+ 414,
+ 2,
+ 308,
+ 490,
+ 2,
+ 174,
+ 495,
+ 2,
+ 270,
+ 491,
+ 2,
+ 224,
+ 493,
+ 2,
+ 271,
+ 614,
+ 1,
+ 227,
+ 614,
+ 2,
+ 225,
+ 704,
+ 2,
+ 215,
+ 716,
+ 2
+ ]
+ },
+ {
+ "area": 517461,
+ "image_id": 3,
+ "iscrowd": 0,
+ "category_id": 1,
+ "bbox": [
+ 0,
+ 39,
+ 601,
+ 861
+ ],
+ "segmentation": {
+ "counts": "Xj0Y1jj02N101N10001N10000O0100O010O10O0100O02O00001N101O001N2O001O0O2O1O0O2O0UAUN]3l1aLZNY3h1fLXNZ3h1eLYN[3h1dLXN[3j1cLWN]3i1cLWN[EL[=o1XMVNZEN]=m1YMUNWE1_=l1XMSNXE4^=j1XMRNZE6]=i1XMQNZE8]=h1XMPNZE:j<^O_CZ2[:nM\\E<_S1gGn3^JjJl=W1eGP4cJeJh=\\1dGo3U9PLjFQ4W9oKgFR4Z9nKeFR4\\9mKdFS4\\9nKbFS4_9mK`FS4a9lK_FT4b9lK]FT4d9kK[FV4e9kKZFT4h9kKXFU4i9kKVFU4k9jKUFV4l9iKTFW4l9jKTFU4m9jKTFU4m9jKSFV4n9jKRFU4n9kKSFU4m9kKRFU4o9jKRFU4o9jKRFU4n9lKQFT4o9lKRFS4o9lKQFT4o9mKQFR4o9nKRFQ4o9nKQFR4o9nKRFQ4n9PLQFP4P:oKQFo3P:QLQFn3o9RLQFn3P:RLPFm3P:SLQFl3o9TLRFk3o9SLSFl3m9SLUFk3l9ULUFj3l9VLTFi3l9XLRFi3n9XLoEj3R:VLlEk3U:ULhEm3X:SLgEn3Z:RLdEo3]:QL`EQ4`:PL^EQ4c:oK[ER4e:oKYER4h:nKWER4j:nKTES4l:nKRET4n:lKPEU4P;lKnDU4S;jKmDV4S;kKkDV4U;kKjDU4W;kKgDV4Y;jKfDW4[;iKdDW4\\;jKbDW4^;iKaDX4`;hK_DX4a;iK]DX4c;hK\\DY4e;gKYDZ4g;gKWD[4i;dKVD]4j;dKUD\\4l;dKRD^4m;bKSD^4n;bKPD_4QXJbAj5^>VJ`Ak5a>UJ]Am5c>SJ[An5f>QJYAQ6g>oIWAR6i>oIUAS6k>mIRAU6o>jIPAW6Q?iIm@X6T?hIj@Y6V?hIg@Z6Z?fIc@]6\\?dIa@^6`?cI\\@_6d?cIY@^6h?cI`_OZN2U8_`0bIZ_ObN0m7f`0cIV_OU7k`0Q2001O1O001O0O2O010O0010O01O10O01O100O1O1000000001O000000001O0000001O0oLUEiDk:W;WEgDi:Y;YEeDh:Z;YEeDg:[;ZEdDf:\\;[EcDf:[;\\EdDd:\\;]EcDc:\\;_EcDa:];_EdDa:Z;aEeD_:[;cEcD^:[;dEdD]:[;dEdD^:Y;dEfD]:Y;dEfD^:W;eEgD[:Y;fEgDZ:h:XFVEh9i:\\FTEe9j:_FSEa9l:dFPE\\9o:kFkDU9U;PGeDP9\\;TG`Dk8a;ZGYDg8g;]GUDb8l;bGoC^8RR3U:k7fEjD7[3Z9`L\\FX;^1YHT8cLZF\\:FkEo1W3o7jLTFm98mEh1\\3k7]7cFSEd1a3h7Y7XIhHf6W7\\IiHc6U7aIkH]6T7eImHY6R7jInHT6Q7nIPIQ6n6RJQIm5n6UJSIi5k6[JUIc5j6_JVI`5g6eJYIZ5a6lJ_IS5X6WKiIg4S6^KmIb4o5bKQJ]4l5gKUJW4h5mKYJQ4f5QLZJo3c5UL]Ji3b5YL^Jf3W1jFTOd5Ec3T1oFoNb5KS4a0]FAc5NS4=\\FAe50P4=^F_Oe54m3<_F]Oh55k3<^F\\Oj58h3;_F[Ok59d3>bFXOk5<\\3`0jFROl5c0R3=SGnNm5j0h29\\GlNn5o0`26cGjNn5T1Y23jGgNo5V1U24mGdNS6T1n1:oGaNZ6P1e1`0RH_N`6k0]1g0SH\\Ng6h0mHbLc7Z4jH[Nh6j0fHdLh7`0oGm2k0SOk6m0]HfLn7;PHP3j0ROm6\\18aLQHR3k0oNo6]14aLRHT3l0mNP7\\12aLTHV3k0lNQ7\\1O`LVHY3j0jNT7\\1LVL_He3a0iNV7[1ROMgHWM1^2Y8;nN1iHVM0]2[8:mN1iHWM1]2[8:kN1jHWM1]2]88iNfMnGf1S2=W75hNdMRHa1D]N^2X2V74gNbMTHe1DXN^2\\2V73WOYOiId0Q71VO[OjIc0S7OSO]OlIc0S7NQO_OlId0U7KmNBoIb0W7IjNEPJa0W7IhNGRJ`0W7GgNIRJ`0X7GdNJUJ>Y7FbNLVJ>X7EaNNWJ=Y7D_N0YJN10100000000001OO01000000O1O100O1N2N3M2N3M2N3M2K5OO1O20O0000100O1O1O1O1O2N1N3QOnTOh0Xk0N3M2H8M3N3M2NTW3",
+ "size": [
+ 900,
+ 602
+ ]
+ },
+ "num_keypoints": 12,
+ "id": 4,
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 446,
+ 292,
+ 2,
+ 263,
+ 225,
+ 2,
+ 540,
+ 346,
+ 2,
+ 161,
+ 269,
+ 2,
+ 560,
+ 327,
+ 2,
+ 103,
+ 312,
+ 2,
+ 308,
+ 495,
+ 2,
+ 236,
+ 491,
+ 2,
+ 352,
+ 499,
+ 2,
+ 124,
+ 652,
+ 2,
+ 442,
+ 677,
+ 2,
+ 67,
+ 827,
+ 2
+ ]
+ },
+ {
+ "area": 404028,
+ "image_id": 3,
+ "iscrowd": 0,
+ "category_id": 1,
+ "bbox": [
+ 84,
+ 45,
+ 516,
+ 783
+ ],
+ "segmentation": {
+ "counts": "ln`2?ck04M2M3N2M3N2M3N2M3N2M3M4L3M3N3L5K4L5K5L2M3M2O2O0O2N2O0O2N101N2N2N2O1N2N2O1N2N4M4K4L2O1N2O1O000O10000000O1001N101O001O001O001O001O0010O01O1O001O1O3M;E1O2O0O2N1O2N2N1O2N1O3N2M3M3M3N2R@PKc8o4\\GQKe8o4YGSKh8l4VGUKl8f4e@fJ]6f0o8b4g@fJY6i0P9a4SG`Ko8_4PGaKR9]4mFcKV9\\4iFdKY9[4fFdK\\9[4cFfK_9Y4dARKX3d0W;Y4aATKW3a0[;Z4]AXKU3=`;Z4\\AZKR3;e;Z4YA]KP36k;\\4TAbKm21R<[4RAgKXO1U2Jf=]4m@kKiN>`2TOP>c4g@VMX1XNS>`4e@YMU1ZNY>Z4b@\\MS1[N_>U4_@`Mo0]Ng>o3Y@dMo0^Nl>j3V@hMk0_NT?f3Q@kMi0_N[?b3m_OoMe0`Na?`3i_OQNc0`Ng?]3g_OSN`0`Nk?]3e_OSN=`NR`0[3a_OVN;_NW`0Z3^_OWN7bN\\`0V3^_OXN1eNd`0R3[_OYNMgNj`0o2Z_OZNHjNPa0k2X_O[NDkN[;QO[Ig3gK]N_OlNa;SOWId3jK\\NZOnNh;TOSI`3lK^NUOPOn;UOnH]3oK^NQOROSmMa5P1l4b2]E_Lg5o0l4o4UKQKk4o4TKRKl4n4TKRKl4m4UKSKl4l4TKTKl4k4TKVKl4j4TKWKk4h4VKXKk4g4UKYKk4g4TKZKm4d4TK]Kl4b4TK^Kl4a4UK_Kl4`4SKbKl4]4UKcKl4\\4TKeKk4[4UKeKl4Y4TKiKl4V4TKjKl4U4UKkKl4T4TKmKk4R4VKnKj4R4WKnKh4R4XKSIcI^2U;^4ZKQIeI_2Q;`4ZKPIiI^2l:a4]KoHjI]2j:d4\\KnHmI\\2g:f4]KkHPJ\\2d:h4ULTKl3l4VLRKi3n4ZLoJg3Q5[LlJe3T5_LXJR4h5PLkI[4U6gK`Ia4`6cKWIc4i6_KoHg4P7V810O01000O010UKS[O^2md0dMS[O[2md0fMS[OX2md0jMT[OT2ld0nMS[OQ2ld0QNT[Om1md0TNS[Ok1ld0WNT[Og1md0[NS[O\\Nn0S1nc0c0U[OjMc1[1Wc0l0g]OlNZb0T1m]OdNTb0]1S^OZNna0f1W^OTNla0j1X^OQNja0n1Y^OnMia0R2Z^OiMha0V2[^OfMga0Y2]^ObMea0]2_^O^Mca0b2P4O1O2N3N1N2N3M2O2M3_NoUOi0Uj0POoUOP1Sj0hNRVOY1\\j0010O010000O10000O1000]LhNY\\OW1ec0mN[\\OR1bc0RO]\\Om0ac0VO`\\Oi0]c0[Oc\\Od0Zc0_Og\\O`0Ub0nNX[Of0b2PJRAQ6P?oIn@Q6T?oIi@R6Y?nIe@R6]?mIc@S6^?mI`@S6b?mI]@R6e?nIZ@P6i?PJV@j5Q`0VJm_Oh5W`0WJi_Og5Z`0YJe_Od5_`0\\J__Oc5d`0]J\\_O`5g`0_JY_O_5j`0`JV_O]5o`0aJQ_OR5]a0mJc^Oo4ba0QK^^Ok4fa0TKZ^Oh4ka0WKU^Og4S1WKR=1kAf4V1\\KkP2GkIHX4`0n1HSJ\\OQ4l0m1Gf38[LHd38]LHc37^LHb38_LHa37`LH`38aLH_37bLI^36cLI]37dLI\\36eLI\\36eLJZ36fLKZ33hLLY33hLMW33jLLW33jLMU32lLOT30mLOS31nLOR30oLOQ31PMOP3ORM0n20RM1n2NSM2l2MVM2k2MVM3i2LYM3h2KYM6f2J[M6d2I^M6c2I^M7a2HaM7_2IaM8^2GdM9\\2FeM:Z2EhM;W2EjM;V2ClM=T2BmM>R2APN?P2@PNa0P2]ORNc0m1]OTNc0l1[OVNe0j1ZOWNf0h1YOZNg0f1XO[Nh0e1VO]Nj0b1UO`Nl0_1ROcNo0\\1oNeNR1[1lNgNW1V1gNlN]1_CaNY=OZOl1c0SN^Oo1`0oMBS2W=1O1N0`VOlMQi0S2dVOXN]i0W21O1O1O1O010O1O1N2L4L4M2F;L3M4L3M4M2N3L3N3M2N3O0O2O0O2O0O2O0O2O002M5L3M4L_d0",
+ "size": [
+ 900,
+ 602
+ ]
+ },
+ "num_keypoints": 15,
+ "id": 5,
+ "keypoints": [
+ 138.28123044948043,
+ 178.42673457794075,
+ 2,
+ 133.4071828533262,
+ 152.16976849543238,
+ 2,
+ 0,
+ 0,
+ 0,
+ 168.78333476089736,
+ 123.08271026031831,
+ 2,
+ 0,
+ 0,
+ 0,
+ 201,
+ 192,
+ 2,
+ 322,
+ 124,
+ 1,
+ 249,
+ 344,
+ 1,
+ 488,
+ 172,
+ 2,
+ 219,
+ 433,
+ 1,
+ 572,
+ 238,
+ 2,
+ 436,
+ 381,
+ 2,
+ 439,
+ 380,
+ 2,
+ 354,
+ 608,
+ 2,
+ 307,
+ 523,
+ 1,
+ 494,
+ 683,
+ 2,
+ 313,
+ 753,
+ 2
+ ]
+ }
+ ],
+ "info": {
+ "description": "MMPose example ochuman dataset",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/08/31"
+ }
+}
diff --git a/vendor/ViTPose/tests/data/onehand10k/1402.jpg b/vendor/ViTPose/tests/data/onehand10k/1402.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..962b2746170efe8d12faf07f820ab76bda147a91
Binary files /dev/null and b/vendor/ViTPose/tests/data/onehand10k/1402.jpg differ
diff --git a/vendor/ViTPose/tests/data/onehand10k/33.jpg b/vendor/ViTPose/tests/data/onehand10k/33.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..236a49964ad7d0cb87944b4e93ebfe101d703fbc
Binary files /dev/null and b/vendor/ViTPose/tests/data/onehand10k/33.jpg differ
diff --git a/vendor/ViTPose/tests/data/onehand10k/784.jpg b/vendor/ViTPose/tests/data/onehand10k/784.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..01c8c9204bb6edb41c638f5ee4afa4d21befbdfa
Binary files /dev/null and b/vendor/ViTPose/tests/data/onehand10k/784.jpg differ
diff --git a/vendor/ViTPose/tests/data/onehand10k/9.jpg b/vendor/ViTPose/tests/data/onehand10k/9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d7c78ecf32b9b1f69b486d65be1dfbc2a5f6cd7
Binary files /dev/null and b/vendor/ViTPose/tests/data/onehand10k/9.jpg differ
diff --git a/vendor/ViTPose/tests/data/onehand10k/test_onehand10k.json b/vendor/ViTPose/tests/data/onehand10k/test_onehand10k.json
new file mode 100644
index 0000000000000000000000000000000000000000..479f6aacda03bae98bb5a2b6c70f087330144ed8
--- /dev/null
+++ b/vendor/ViTPose/tests/data/onehand10k/test_onehand10k.json
@@ -0,0 +1,541 @@
+{
+ "info": {
+ "description": "OneHand10K",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/08/03"
+ },
+ "licenses": "",
+ "images": [
+ {
+ "file_name": "9.jpg",
+ "height": 358,
+ "width": 238,
+ "id": 9
+ },
+ {
+ "file_name": "33.jpg",
+ "height": 346,
+ "width": 226,
+ "id": 33
+ },
+ {
+ "file_name": "784.jpg",
+ "height": 960,
+ "width": 540,
+ "id": 784
+ },
+ {
+ "file_name": "1402.jpg",
+ "height": 339,
+ "width": 226,
+ "id": 1402
+ }
+ ],
+ "annotations": [
+ {
+ "bbox": [
+ 63,
+ 92,
+ 99,
+ 194
+ ],
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 81,
+ 251,
+ 1,
+ 71,
+ 229,
+ 1,
+ 72,
+ 192,
+ 1,
+ 76,
+ 169,
+ 1,
+ 95,
+ 196,
+ 1,
+ 91,
+ 144,
+ 1,
+ 93,
+ 122,
+ 1,
+ 91,
+ 98,
+ 1,
+ 116,
+ 199,
+ 1,
+ 111,
+ 148,
+ 1,
+ 108,
+ 120,
+ 1,
+ 107,
+ 101,
+ 1,
+ 139,
+ 203,
+ 1,
+ 130,
+ 153,
+ 1,
+ 128,
+ 124,
+ 1,
+ 122,
+ 107,
+ 1,
+ 154,
+ 205,
+ 1,
+ 150,
+ 177,
+ 1,
+ 147,
+ 159,
+ 1,
+ 142,
+ 132,
+ 1
+ ],
+ "category_id": 1,
+ "id": 9,
+ "image_id": 9,
+ "segmentation": [
+ [
+ 63,
+ 92,
+ 63,
+ 188.5,
+ 63,
+ 285,
+ 112.0,
+ 285,
+ 161,
+ 285,
+ 161,
+ 188.5,
+ 161,
+ 92,
+ 112.0,
+ 92
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 19206
+ },
+ {
+ "bbox": [
+ 61,
+ 154,
+ 34,
+ 68
+ ],
+ "keypoints": [
+ 86,
+ 221,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 92,
+ 176,
+ 1,
+ 90,
+ 168,
+ 1,
+ 90,
+ 160,
+ 1,
+ 92,
+ 189,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 87,
+ 191,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 79,
+ 194,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "category_id": 1,
+ "id": 33,
+ "image_id": 33,
+ "segmentation": [
+ [
+ 61,
+ 154,
+ 61,
+ 187.5,
+ 61,
+ 221,
+ 77.5,
+ 221,
+ 94,
+ 221,
+ 94,
+ 187.5,
+ 94,
+ 154,
+ 77.5,
+ 154
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 2312
+ },
+ {
+ "bbox": [
+ 51,
+ 312,
+ 376,
+ 372
+ ],
+ "keypoints": [
+ 153,
+ 652,
+ 1,
+ 198,
+ 486,
+ 1,
+ 258,
+ 438,
+ 1,
+ 333,
+ 384,
+ 1,
+ 393,
+ 352,
+ 1,
+ 160,
+ 369,
+ 1,
+ 274,
+ 334,
+ 1,
+ 325,
+ 337,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 412,
+ 418,
+ 1,
+ 0,
+ 0,
+ 0,
+ 334,
+ 454,
+ 1,
+ 303,
+ 492,
+ 1,
+ 270,
+ 540,
+ 1,
+ 0,
+ 0,
+ 0,
+ 325,
+ 508,
+ 1,
+ 295,
+ 544,
+ 1,
+ 258,
+ 562,
+ 1
+ ],
+ "category_id": 1,
+ "id": 784,
+ "image_id": 784,
+ "segmentation": [
+ [
+ 51,
+ 312,
+ 51,
+ 497.5,
+ 51,
+ 683,
+ 238.5,
+ 683,
+ 426,
+ 683,
+ 426,
+ 497.5,
+ 426,
+ 312,
+ 238.5,
+ 312
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 139872
+ },
+ {
+ "bbox": [
+ 32,
+ 68,
+ 150,
+ 210
+ ],
+ "keypoints": [
+ 92,
+ 264,
+ 1,
+ 150,
+ 213,
+ 1,
+ 167,
+ 202,
+ 1,
+ 172,
+ 187,
+ 1,
+ 174,
+ 172,
+ 1,
+ 126,
+ 164,
+ 1,
+ 142,
+ 147,
+ 1,
+ 157,
+ 151,
+ 1,
+ 163,
+ 168,
+ 1,
+ 105,
+ 151,
+ 1,
+ 108,
+ 120,
+ 1,
+ 112,
+ 98,
+ 1,
+ 109,
+ 70,
+ 1,
+ 85,
+ 157,
+ 1,
+ 77,
+ 132,
+ 1,
+ 78,
+ 108,
+ 1,
+ 72,
+ 89,
+ 1,
+ 74,
+ 174,
+ 1,
+ 63,
+ 157,
+ 1,
+ 47,
+ 137,
+ 1,
+ 37,
+ 119,
+ 1
+ ],
+ "category_id": 1,
+ "id": 1402,
+ "image_id": 1402,
+ "segmentation": [
+ [
+ 32,
+ 68,
+ 32,
+ 172.5,
+ 32,
+ 277,
+ 106.5,
+ 277,
+ 181,
+ 277,
+ 181,
+ 172.5,
+ 181,
+ 68,
+ 106.5,
+ 68
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 31500
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "hand",
+ "id": 1,
+ "name": "hand",
+ "keypoints": [
+ "wrist",
+ "thumb1",
+ "thumb2",
+ "thumb3",
+ "thumb4",
+ "forefinger1",
+ "forefinger2",
+ "forefinger3",
+ "forefinger4",
+ "middle_finger1",
+ "middle_finger2",
+ "middle_finger3",
+ "middle_finger4",
+ "ring_finger1",
+ "ring_finger2",
+ "ring_finger3",
+ "ring_finger4",
+ "pinky_finger1",
+ "pinky_finger2",
+ "pinky_finger3",
+ "pinky_finger4"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 1,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 9
+ ],
+ [
+ 1,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 1,
+ 14
+ ],
+ [
+ 14,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ],
+ [
+ 1,
+ 18
+ ],
+ [
+ 18,
+ 19
+ ],
+ [
+ 19,
+ 20
+ ],
+ [
+ 20,
+ 21
+ ]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/panoptic/005880453_01_l.jpg b/vendor/ViTPose/tests/data/panoptic/005880453_01_l.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b9a8f688233d740ed4aea4d60d3a6a39cfa356be
Binary files /dev/null and b/vendor/ViTPose/tests/data/panoptic/005880453_01_l.jpg differ
diff --git a/vendor/ViTPose/tests/data/panoptic/005880453_01_r.jpg b/vendor/ViTPose/tests/data/panoptic/005880453_01_r.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b9a8f688233d740ed4aea4d60d3a6a39cfa356be
Binary files /dev/null and b/vendor/ViTPose/tests/data/panoptic/005880453_01_r.jpg differ
diff --git a/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_l.jpg b/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_l.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2dd0342b4a0b44247226a02aa0662d7a6fc2166a
Binary files /dev/null and b/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_l.jpg differ
diff --git a/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_r.jpg b/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_r.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2dd0342b4a0b44247226a02aa0662d7a6fc2166a
Binary files /dev/null and b/vendor/ViTPose/tests/data/panoptic/ex2_2.flv_000040_r.jpg differ
diff --git a/vendor/ViTPose/tests/data/panoptic/test_panoptic.json b/vendor/ViTPose/tests/data/panoptic/test_panoptic.json
new file mode 100644
index 0000000000000000000000000000000000000000..6007d2db14e1ac1adb4e13249d51370fccd063b3
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic/test_panoptic.json
@@ -0,0 +1,565 @@
+{
+ "info": {
+ "description": "panoptic",
+ "version": "1.0",
+ "year": "2020",
+ "date_created": "2020/09/21"
+ },
+ "licenses": "",
+ "images": [
+ {
+ "file_name": "005880453_01_l.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 100586
+ },
+ {
+ "file_name": "005880453_01_r.jpg",
+ "height": 720,
+ "width": 1280,
+ "id": 100587
+ },
+ {
+ "file_name": "ex2_2.flv_000040_l.jpg",
+ "height": 300,
+ "width": 400,
+ "id": 100520
+ },
+ {
+ "file_name": "ex2_2.flv_000040_r.jpg",
+ "height": 300,
+ "width": 400,
+ "id": 100521
+ }
+ ],
+ "annotations": [
+ {
+ "bbox": [
+ 720.32470703125,
+ 188.09907531738278,
+ 55.445434570312614,
+ 36.900924682617216
+ ],
+ "head_size": 140.0,
+ "center": [
+ 747.5474243164062,
+ 206.0495376586914
+ ],
+ "keypoints": [
+ 721.0,
+ 224.0,
+ 1.0,
+ 720.32470703125,
+ 216.51248168945315,
+ 1.0,
+ 727.2195434570314,
+ 200.88510131835943,
+ 1.0,
+ 739.107177734375,
+ 194.2553100585938,
+ 1.0,
+ 752.6591186523438,
+ 191.17718505859378,
+ 1.0,
+ 740.0582275390625,
+ 188.09907531738278,
+ 1.0,
+ 757.6519165039061,
+ 189.75651550292972,
+ 1.0,
+ 760.98046875,
+ 196.62309265136716,
+ 1.0,
+ 760.98046875,
+ 202.77932739257812,
+ 1.0,
+ 742.9112548828123,
+ 196.14953613281247,
+ 1.0,
+ 753.3723754882812,
+ 189.75651550292972,
+ 1.0,
+ 765.0222778320311,
+ 199.93798828125,
+ 1.0,
+ 767.6375122070312,
+ 207.04133605957034,
+ 1.0,
+ 747.6663208007811,
+ 202.5425567626953,
+ 1.0,
+ 757.1763916015626,
+ 197.80697631835943,
+ 1.0,
+ 769.3018188476562,
+ 203.72644042968753,
+ 1.0,
+ 772.3925781249999,
+ 209.40911865234378,
+ 1.0,
+ 753.610107421875,
+ 209.17234802246097,
+ 1.0,
+ 761.6937255859375,
+ 205.85745239257815,
+ 1.0,
+ 769.7772827148438,
+ 208.93556213378903,
+ 1.0,
+ 774.7701416015626,
+ 213.43435668945315,
+ 1.0
+ ],
+ "category_id": 1,
+ "id": 100586,
+ "image_id": 100586,
+ "segmentation": [
+ [
+ 720.32470703125,
+ 188.09907531738278,
+ 720.32470703125,
+ 206.0495376586914,
+ 720.32470703125,
+ 224.0,
+ 747.5474243164062,
+ 224.0,
+ 774.7701416015626,
+ 224.0,
+ 774.7701416015626,
+ 206.0495376586914,
+ 774.7701416015626,
+ 188.09907531738278,
+ 747.5474243164062,
+ 188.09907531738278
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 2045.9878050740865,
+ "dataset": "mpii"
+ },
+ {
+ "bbox": [
+ 746.0122680664061,
+ 313.11645507812494,
+ 50.4322509765625,
+ 77.21240234375
+ ],
+ "head_size": 140.0,
+ "center": [
+ 770.7283935546874,
+ 351.22265624999994
+ ],
+ "keypoints": [
+ 746.0122680664061,
+ 363.82229614257807,
+ 1.0,
+ 750.3375854492186,
+ 347.22766113281256,
+ 1.0,
+ 766.0941162109375,
+ 330.6329956054688,
+ 1.0,
+ 778.1432495117188,
+ 324.1795349121093,
+ 1.0,
+ 785.2490844726564,
+ 313.11645507812494,
+ 1.0,
+ 787.720703125,
+ 349.3788146972656,
+ 1.0,
+ 795.4445190429686,
+ 349.3788146972656,
+ 1.0,
+ 790.5012817382812,
+ 348.7641906738282,
+ 1.0,
+ 785.5580444335938,
+ 347.5349426269532,
+ 1.0,
+ 787.720703125,
+ 362.90036010742193,
+ 1.0,
+ 793.8997802734376,
+ 360.74920654296875,
+ 1.0,
+ 785.5580444335938,
+ 359.51998901367193,
+ 1.0,
+ 780.9237670898438,
+ 359.2126770019531,
+ 1.0,
+ 783.7043457031251,
+ 374.2707824707031,
+ 1.0,
+ 791.4281616210938,
+ 374.57806396484375,
+ 1.0,
+ 784.3222656249998,
+ 371.5050048828125,
+ 1.0,
+ 778.7611083984374,
+ 370.2757568359375,
+ 1.0,
+ 777.8342895507812,
+ 384.71923828124994,
+ 1.0,
+ 782.1596069335938,
+ 389.32885742187494,
+ 1.0,
+ 774.435791015625,
+ 385.3338623046875,
+ 1.0,
+ 771.9641723632812,
+ 382.87539672851557,
+ 1.0
+ ],
+ "category_id": 1,
+ "id": 100587,
+ "image_id": 100587,
+ "segmentation": [
+ [
+ 746.0122680664061,
+ 313.11645507812494,
+ 746.0122680664061,
+ 351.22265624999994,
+ 746.0122680664061,
+ 389.32885742187494,
+ 770.7283935546874,
+ 389.32885742187494,
+ 795.4445190429686,
+ 389.32885742187494,
+ 795.4445190429686,
+ 351.22265624999994,
+ 795.4445190429686,
+ 313.11645507812494,
+ 770.7283935546874,
+ 313.11645507812494
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 3893.9952535033226,
+ "dataset": "mpii"
+ },
+ {
+ "bbox": [
+ 179.84646606445315,
+ 260.29730224609364,
+ 42.822525024414034,
+ 30.21246337890642
+ ],
+ "head_size": 72.5531,
+ "center": [
+ 200.75772857666016,
+ 274.9035339355469
+ ],
+ "keypoints": [
+ 221.6689910888672,
+ 260.29730224609364,
+ 1.0,
+ 211.34570312499997,
+ 260.29730224609364,
+ 1.0,
+ 192.55204772949222,
+ 265.0344543457031,
+ 1.0,
+ 188.31684875488278,
+ 271.0874938964844,
+ 1.0,
+ 180.90525817871094,
+ 275.82464599609375,
+ 1.0,
+ 192.02264404296878,
+ 265.2976379394531,
+ 1.0,
+ 179.84646606445315,
+ 278.7195739746093,
+ 1.0,
+ 186.4639587402344,
+ 279.7722778320313,
+ 1.0,
+ 190.43444824218753,
+ 275.82464599609375,
+ 1.0,
+ 194.4049377441406,
+ 271.6138305664063,
+ 1.0,
+ 187.52275085449222,
+ 284.7726135253906,
+ 1.0,
+ 192.8167419433594,
+ 282.14086914062506,
+ 1.0,
+ 195.72843933105474,
+ 278.4563903808594,
+ 1.0,
+ 201.28712463378906,
+ 277.93005371093744,
+ 1.0,
+ 194.14024353027344,
+ 288.1938781738282,
+ 1.0,
+ 199.6989288330078,
+ 285.0357971191406,
+ 1.0,
+ 202.34593200683594,
+ 282.14086914062506,
+ 1.0,
+ 207.1105194091797,
+ 281.35131835937494,
+ 1.0,
+ 201.28712463378906,
+ 289.50976562500006,
+ 1.0,
+ 204.72822570800778,
+ 286.3516540527344,
+ 1.0,
+ 207.1105194091797,
+ 284.7726135253906,
+ 1.0
+ ],
+ "category_id": 1,
+ "id": 100520,
+ "image_id": 100520,
+ "segmentation": [
+ [
+ 179.84646606445315,
+ 260.29730224609364,
+ 179.84646606445315,
+ 274.9035339355469,
+ 179.84646606445315,
+ 289.50976562500006,
+ 200.75772857666016,
+ 289.50976562500006,
+ 221.6689910888672,
+ 289.50976562500006,
+ 221.6689910888672,
+ 274.9035339355469,
+ 221.6689910888672,
+ 260.29730224609364,
+ 200.75772857666016,
+ 260.29730224609364
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 1293.7739690924127,
+ "dataset": "nzsl"
+ },
+ {
+ "bbox": [
+ 186.37617492675776,
+ 196.84266662597656,
+ 46.34579467773443,
+ 44.16563415527344
+ ],
+ "head_size": 72.5531,
+ "center": [
+ 209.04907226562497,
+ 218.42548370361328
+ ],
+ "keypoints": [
+ 186.37617492675776,
+ 232.66671752929688,
+ 1.0,
+ 190.1365051269531,
+ 223.60145568847656,
+ 1.0,
+ 200.65892028808597,
+ 212.39300537109378,
+ 1.0,
+ 212.89080810546878,
+ 203.87417602539062,
+ 1.0,
+ 219.56018066406247,
+ 196.84266662597656,
+ 1.0,
+ 205.30081176757812,
+ 208.3026885986328,
+ 1.0,
+ 217.7982025146484,
+ 207.76916503906244,
+ 1.0,
+ 227.2103424072266,
+ 213.4448394775391,
+ 1.0,
+ 231.7219696044922,
+ 220.28102111816406,
+ 1.0,
+ 198.5165100097656,
+ 212.5708312988281,
+ 1.0,
+ 219.04794311523443,
+ 217.55035400390625,
+ 1.0,
+ 223.4833374023438,
+ 223.01550292968747,
+ 1.0,
+ 226.81802368164062,
+ 230.0469970703125,
+ 1.0,
+ 196.21739196777344,
+ 218.71846008300778,
+ 1.0,
+ 215.47726440429688,
+ 225.73097229003903,
+ 1.0,
+ 219.04794311523443,
+ 232.31103515625,
+ 1.0,
+ 213.47929382324216,
+ 232.78147888183597,
+ 1.0,
+ 200.53286743164062,
+ 233.95339965820318,
+ 1.0,
+ 211.71386718749997,
+ 236.68786621093753,
+ 1.0,
+ 216.81396484374997,
+ 240.00830078125,
+ 1.0,
+ 210.92922973632812,
+ 239.6176605224609,
+ 1.0
+ ],
+ "category_id": 1,
+ "id": 100521,
+ "image_id": 100521,
+ "segmentation": [
+ [
+ 186.37617492675776,
+ 196.84266662597656,
+ 186.37617492675776,
+ 218.42548370361328,
+ 186.37617492675776,
+ 240.00830078125,
+ 209.04907226562497,
+ 240.00830078125,
+ 231.7219696044922,
+ 240.00830078125,
+ 231.7219696044922,
+ 218.42548370361328,
+ 231.7219696044922,
+ 196.84266662597656,
+ 209.04907226562497,
+ 196.84266662597656
+ ]
+ ],
+ "iscrowd": 0,
+ "area": 2046.8914123722377,
+ "dataset": "nzsl"
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "hand",
+ "id": 1,
+ "name": "hand",
+ "keypoints": [
+ "wrist",
+ "thumb1",
+ "thumb2",
+ "thumb3",
+ "thumb4",
+ "forefinger1",
+ "forefinger2",
+ "forefinger3",
+ "forefinger4",
+ "middle_finger1",
+ "middle_finger2",
+ "middle_finger3",
+ "middle_finger4",
+ "ring_finger1",
+ "ring_finger2",
+ "ring_finger3",
+ "ring_finger4",
+ "pinky_finger1",
+ "pinky_finger2",
+ "pinky_finger3",
+ "pinky_finger4"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 1,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 9
+ ],
+ [
+ 1,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 1,
+ 14
+ ],
+ [
+ 14,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ],
+ [
+ 1,
+ 18
+ ],
+ [
+ 18,
+ 19
+ ],
+ [
+ 19,
+ 20
+ ],
+ [
+ 20,
+ 21
+ ]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/calibration_160906_band1.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/calibration_160906_band1.json
new file mode 100644
index 0000000000000000000000000000000000000000..31c0429b03ee8fe8c6b4680e6205056c397af2b4
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/calibration_160906_band1.json
@@ -0,0 +1,11965 @@
+{
+ "calibDataSource": "160906_calib_norm",
+ "cameras": [
+ {
+ "name": "01_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 1,
+ "K": [
+ [745.698,0,375.512],
+ [0,745.89,226.023],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324009,0.0732398,-0.000601245,0.000808154,0.0311011],
+ "R": [
+ [0.9609979695,0.02878724306,-0.2750530807],
+ [-0.05024448072,0.9961896773,-0.07128547526],
+ [0.2719529274,0.08232509619,0.9587826572]
+ ],
+ "t": [
+ [-51.56945892],
+ [143.9587601],
+ [282.5664691]
+ ]
+ },
+ {
+ "name": "01_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 2,
+ "K": [
+ [745.462,0,369.225],
+ [0,745.627,226.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336594,0.141798,-0.000612176,0.000160485,-0.0646767],
+ "R": [
+ [0.9715220842,-0.01574832828,-0.2364251047],
+ [0.005323209906,0.998987679,-0.04466856407],
+ [0.2368892218,0.042137956,0.9706224236]
+ ],
+ "t": [
+ [-66.22242206],
+ [142.1317177],
+ [278.6626087]
+ ]
+ },
+ {
+ "name": "01_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 3,
+ "K": [
+ [746.261,0,378.952],
+ [0,746.496,239.595],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322069,0.0440329,-0.000951664,0.000892653,0.103376],
+ "R": [
+ [0.9665011873,0.05534363601,-0.2506242943],
+ [-0.07024277085,0.996230894,-0.05089164033],
+ [0.2468631364,0.06679137568,0.9667458322]
+ ],
+ "t": [
+ [-54.75524211],
+ [118.3584455],
+ [281.78809]
+ ]
+ },
+ {
+ "name": "01_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 4,
+ "K": [
+ [747.661,0,366.929],
+ [0,747.759,234.022],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32333,0.0462607,-0.000972333,-0.000898261,0.102804],
+ "R": [
+ [0.9662588837,0.08601234823,-0.2427872436],
+ [-0.1112831564,0.9894890375,-0.09234448444],
+ [0.23229255,0.1162468093,0.9656742984]
+ ],
+ "t": [
+ [-29.08626445],
+ [96.75744843],
+ [287.7183779]
+ ]
+ },
+ {
+ "name": "01_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 5,
+ "K": [
+ [742.413,0,353.224],
+ [0,742.622,209.478],
+ [0,0,1]
+ ],
+ "distCoef": [-0.297729,-0.0985766,-0.000505185,-0.000773418,0.328727],
+ "R": [
+ [0.9718071292,0.05098345905,-0.2301990238],
+ [-0.07271497659,0.9935575811,-0.0869244798],
+ [0.2242842746,0.1012127458,0.9692536016]
+ ],
+ "t": [
+ [-26.91018729],
+ [77.97642882],
+ [285.7140393]
+ ]
+ },
+ {
+ "name": "01_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 6,
+ "K": [
+ [743.487,0,372.277],
+ [0,743.725,241.821],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317534,0.0281748,0.00130284,-0.000186889,0.119129],
+ "R": [
+ [0.9681278444,0.07458666466,-0.2390926732],
+ [-0.09383510211,0.9931135585,-0.07014580141],
+ [0.2322142341,0.09034538891,0.968459736]
+ ],
+ "t": [
+ [-7.038020326],
+ [73.51221006],
+ [284.7303027]
+ ]
+ },
+ {
+ "name": "01_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 7,
+ "K": [
+ [748.393,0,380.919],
+ [0,748.388,229.353],
+ [0,0,1]
+ ],
+ "distCoef": [-0.344193,0.174813,-0.00034307,0.00107023,-0.0968505],
+ "R": [
+ [0.9670535143,-0.02995409712,-0.2528047715],
+ [0.01712365053,0.9984582116,-0.0528013286],
+ [0.2539966162,0.04673276982,0.9660754459]
+ ],
+ "t": [
+ [-4.52170598],
+ [98.55800179],
+ [280.6705064]
+ ]
+ },
+ {
+ "name": "01_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 8,
+ "K": [
+ [745.37,0,362.362],
+ [0,745.56,217.483],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326014,0.0789588,-0.000462463,-0.00138061,0.0222432],
+ "R": [
+ [0.9652282485,0.06485174985,-0.2532364089],
+ [-0.07898708824,0.9958116468,-0.0460456736],
+ [0.2491896228,0.06444699145,0.9663079826]
+ ],
+ "t": [
+ [26.28384049],
+ [86.2200762],
+ [282.8912643]
+ ]
+ },
+ {
+ "name": "01_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 9,
+ "K": [
+ [746.037,0,338.236],
+ [0,746.053,236.859],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314486,0.0395532,0.000625849,-0.000232478,0.0599275],
+ "R": [
+ [0.9656569777,0.07278005487,-0.2494186543],
+ [-0.09030273149,0.9941334749,-0.05953193019],
+ [0.2436226964,0.08001060955,0.9665641645]
+ ],
+ "t": [
+ [45.35508632],
+ [94.7965848],
+ [284.0947744]
+ ]
+ },
+ {
+ "name": "01_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 10,
+ "K": [
+ [747.938,0,379.271],
+ [0,748.269,227.432],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3484,0.205218,-0.00110069,0.000562921,-0.151344],
+ "R": [
+ [0.9662738854,-0.001312373382,-0.2575132151],
+ [-0.009587322107,0.9991104143,-0.04106657164],
+ [0.2573380297,0.04215041788,0.9654017199]
+ ],
+ "t": [
+ [30.05861189],
+ [130.0028668],
+ [279.9552314]
+ ]
+ },
+ {
+ "name": "01_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 11,
+ "K": [
+ [746.12,0,364.693],
+ [0,745.844,223.621],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335335,0.119703,0.000192218,0.00118296,-0.00812072],
+ "R": [
+ [0.9869891455,-0.01212212734,-0.1603292883],
+ [0.00355647539,0.9985558958,-0.05360479805],
+ [0.1607475603,0.05233714665,0.9856069424]
+ ],
+ "t": [
+ [71.07099717],
+ [142.6182462],
+ [275.3539702]
+ ]
+ },
+ {
+ "name": "01_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 12,
+ "K": [
+ [745.407,0,358.691],
+ [0,745.503,226.329],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325389,0.0923962,-0.00061832,-0.00189678,-0.0159561],
+ "R": [
+ [0.9589650047,0.08538224277,-0.2703627054],
+ [-0.09708669181,0.9948178626,-0.03019262438],
+ [0.2663837347,0.05520229083,0.9622849957]
+ ],
+ "t": [
+ [54.63033668],
+ [157.9150468],
+ [281.9236261]
+ ]
+ },
+ {
+ "name": "01_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 13,
+ "K": [
+ [744.389,0,339.442],
+ [0,744.512,216.258],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320138,0.0543285,-0.000196977,-0.00116274,0.0473598],
+ "R": [
+ [0.9724830194,-0.06319437739,-0.2242392645],
+ [0.03959405574,0.9933373951,-0.1082272161],
+ [0.2295845984,0.09637058799,0.9685058709]
+ ],
+ "t": [
+ [19.90234626],
+ [154.6647449],
+ [286.7518211]
+ ]
+ },
+ {
+ "name": "01_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 14,
+ "K": [
+ [746.213,0,363.165],
+ [0,746.641,235.418],
+ [0,0,1]
+ ],
+ "distCoef": [-0.33414,0.127633,-0.000792357,0.000136075,-0.0405619],
+ "R": [
+ [0.9643490552,0.006836134333,-0.2645452079],
+ [-0.02440508255,0.9977035557,-0.06318233054],
+ [0.2635057717,0.0673860684,0.9623013177]
+ ],
+ "t": [
+ [19.24633902],
+ [182.0747755],
+ [282.9928946]
+ ]
+ },
+ {
+ "name": "01_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 15,
+ "K": [
+ [745.225,0,366.568],
+ [0,745.569,216.05],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319743,0.046174,-0.00158438,-0.000953331,0.0743504],
+ "R": [
+ [0.9602661069,0.03565913048,-0.2767985376],
+ [-0.06162250151,0.9944158624,-0.08567239854],
+ [0.2721978533,0.09932531892,0.9571012536]
+ ],
+ "t": [
+ [0.9330302863],
+ [174.5612072],
+ [288.1067574]
+ ]
+ },
+ {
+ "name": "01_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 16,
+ "K": [
+ [747.633,0,371.752],
+ [0,747.88,230.613],
+ [0,0,1]
+ ],
+ "distCoef": [-0.347758,0.198029,0.00072103,0.00029865,-0.136932],
+ "R": [
+ [0.9682573711,0.05614690975,-0.2435676248],
+ [-0.07153002565,0.9959334273,-0.05477283913],
+ [0.2395018137,0.07045660367,0.968336072]
+ ],
+ "t": [
+ [-3.74774],
+ [172.5737662],
+ [282.7618788]
+ ]
+ },
+ {
+ "name": "01_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 17,
+ "K": [
+ [748.152,0,373.9],
+ [0,748.508,234.452],
+ [0,0,1]
+ ],
+ "distCoef": [-0.345127,0.177692,-0.00116897,0.00210199,-0.0818461],
+ "R": [
+ [0.9639501783,0.02458774974,-0.264944327],
+ [-0.04477053879,0.9965129817,-0.07040934697],
+ [0.2622892538,0.07973280283,0.9616896732]
+ ],
+ "t": [
+ [-36.08309916],
+ [173.4726636],
+ [283.4522322]
+ ]
+ },
+ {
+ "name": "01_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 18,
+ "K": [
+ [743.791,0,363.617],
+ [0,744.126,236.963],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312734,0.0122172,-0.00120247,-0.000963953,0.133944],
+ "R": [
+ [0.9523198878,0.06045552763,-0.2990517689],
+ [-0.07234112338,0.9969633514,-0.02882425707],
+ [0.2964010681,0.04908365416,0.9538014478]
+ ],
+ "t": [
+ [-57.80984395],
+ [175.8598769],
+ [275.2458542]
+ ]
+ },
+ {
+ "name": "01_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 19,
+ "K": [
+ [743.162,0,364.748],
+ [0,743.331,220.785],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311505,0.00290054,-0.000860754,-0.000437091,0.146397],
+ "R": [
+ [0.9677776267,0.05243241618,-0.246287042],
+ [-0.06515666231,0.9969134625,-0.04379677618],
+ [0.243230497,0.05843278173,0.968206866]
+ ],
+ "t": [
+ [-19.88792012],
+ [144.796335],
+ [280.8929426]
+ ]
+ },
+ {
+ "name": "01_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 20,
+ "K": [
+ [744.661,0,343.237],
+ [0,744.907,246.044],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326994,0.0904776,0.000984855,-0.00107766,-0.0214165],
+ "R": [
+ [0.9717064093,0.03462931454,-0.2336396043],
+ [-0.0436324388,0.998486683,-0.03347468014],
+ [0.2321268283,0.04272182698,0.9717468709]
+ ],
+ "t": [
+ [-15.15244103],
+ [127.7778149],
+ [279.5122056]
+ ]
+ },
+ {
+ "name": "01_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 21,
+ "K": [
+ [742.462,0,365.246],
+ [0,742.468,221.387],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311193,-0.0017069,-0.0010044,-5.33063e-05,0.168374],
+ "R": [
+ [0.9650420793,0.04068979072,-0.2589172188],
+ [-0.04945049005,0.9984003719,-0.02741069744],
+ [0.257387712,0.03925605981,0.965510501]
+ ],
+ "t": [
+ [-1.672862451],
+ [122.1992626],
+ [279.1232554]
+ ]
+ },
+ {
+ "name": "01_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 22,
+ "K": [
+ [744.021,0,363.587],
+ [0,744.301,226.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330855,0.115198,-0.00111581,-0.000578883,-0.0257811],
+ "R": [
+ [0.9624230562,-0.007741542698,-0.2714441553],
+ [-0.003557050749,0.9991484058,-0.04110730506],
+ [0.271531229,0.0405281588,0.9615759252]
+ ],
+ "t": [
+ [4.289641778],
+ [135.1743597],
+ [279.2863723]
+ ]
+ },
+ {
+ "name": "01_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 23,
+ "K": [
+ [745.029,0,358.645],
+ [0,745.162,224.101],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31925,0.0412999,-0.000788365,0.000625647,0.108146],
+ "R": [
+ [0.9553340738,0.01211961015,-0.2952793973],
+ [-0.03701510886,0.9961975848,-0.07886858543],
+ [0.293200766,0.08627564605,0.9521501057]
+ ],
+ "t": [
+ [-2.968489269],
+ [143.230855],
+ [285.3382881]
+ ]
+ },
+ {
+ "name": "01_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 24,
+ "K": [
+ [744.501,0,369.38],
+ [0,744.575,244.409],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317214,0.0306635,-5.65201e-05,-0.000305408,0.106933],
+ "R": [
+ [0.9627375442,0.05351140442,-0.2650904574],
+ [-0.07422624073,0.9948691584,-0.06874462026],
+ [0.2600516991,0.08585969499,0.9617698408]
+ ],
+ "t": [
+ [-7.333655278],
+ [148.0612654],
+ [284.8699573]
+ ]
+ },
+ {
+ "name": "02_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 1,
+ "K": [
+ [746.79,0,376.022],
+ [0,747.048,234.17],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317408,0.0301922,-0.000108969,-0.00027109,0.105931],
+ "R": [
+ [0.977473966,0.04697618088,0.2057617172],
+ [0.001487552662,0.9733575223,-0.2292878562],
+ [-0.211050783,0.2244289915,0.9513617581]
+ ],
+ "t": [
+ [-1.729507611],
+ [175.3460492],
+ [304.9109171]
+ ]
+ },
+ {
+ "name": "02_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 2,
+ "K": [
+ [747.689,0,367.065],
+ [0,747.811,212.158],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333664,0.117162,0.000577725,-0.000310896,-0.0327554],
+ "R": [
+ [0.9812751339,-0.05714257326,0.183939767],
+ [0.09271495859,0.9771941455,-0.1910380552],
+ [-0.1688284573,0.2045148611,0.9641942873]
+ ],
+ "t": [
+ [-50.62568249],
+ [190.9654762],
+ [299.6250374]
+ ]
+ },
+ {
+ "name": "02_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 3,
+ "K": [
+ [745.627,0,353.486],
+ [0,745.817,252.683],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321416,0.0392112,-0.00107045,-0.00134198,0.0908854],
+ "R": [
+ [0.9757098845,0.1270834984,0.1784376802],
+ [-0.07601456941,0.9603325594,-0.2682967771],
+ [-0.2054556071,0.248215954,0.946666168]
+ ],
+ "t": [
+ [-23.13649132],
+ [169.3490841],
+ [309.2380875]
+ ]
+ },
+ {
+ "name": "02_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 4,
+ "K": [
+ [746.11,0,381.584],
+ [0,746.321,224.917],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323963,0.0585021,-0.000871966,0.000552522,0.0715102],
+ "R": [
+ [0.979331342,0.07410153523,0.1881995881],
+ [-0.02608477747,0.9689731658,-0.2457856551],
+ [-0.2005734451,0.2357964511,0.950878713]
+ ],
+ "t": [
+ [-32.63906075],
+ [150.8763932],
+ [306.9317958]
+ ]
+ },
+ {
+ "name": "02_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 5,
+ "K": [
+ [744.11,0,378.377],
+ [0,744.035,244.823],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323078,0.0494134,-0.000238923,-0.000981516,0.0727453],
+ "R": [
+ [0.9857440106,0.05652749171,0.1584720428],
+ [-0.01525193411,0.9680163878,-0.250422945],
+ [-0.1675593154,0.244435913,0.95507851]
+ ],
+ "t": [
+ [-62.3494258],
+ [135.8190029],
+ [306.0165552]
+ ]
+ },
+ {
+ "name": "02_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 6,
+ "K": [
+ [743.928,0,352.844],
+ [0,744.181,228.627],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303908,-0.0528673,-0.000528541,8.08764e-05,0.267531],
+ "R": [
+ [0.9814194485,0.06212733968,0.1815380393],
+ [-0.0101664424,0.9616367605,-0.2741375282],
+ [-0.1916050874,0.2671983057,0.9444006332]
+ ],
+ "t": [
+ [-53.86742917],
+ [106.6702196],
+ [310.2214119]
+ ]
+ },
+ {
+ "name": "02_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 7,
+ "K": [
+ [746.501,0,376.178],
+ [0,746.591,217.394],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323449,0.0621904,-0.000592526,0.000355354,0.0689781],
+ "R": [
+ [0.9775323693,0.09704954661,0.1871145437],
+ [-0.05094527723,0.9701636443,-0.2370381445],
+ [-0.2045361721,0.2221798567,0.9533105819]
+ ],
+ "t": [
+ [-27.21830655],
+ [111.2122483],
+ [305.8578091]
+ ]
+ },
+ {
+ "name": "02_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 8,
+ "K": [
+ [747.056,0,346.722],
+ [0,747.425,231.954],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331626,0.0978711,0.000923123,-0.00170198,0.0128988],
+ "R": [
+ [0.9738310577,0.04398424166,0.222976361],
+ [0.006459505741,0.9753414162,-0.2206068824],
+ [-0.2271813062,0.2162741507,0.9495336465]
+ ],
+ "t": [
+ [-23.1615402],
+ [89.62617671],
+ [306.715437]
+ ]
+ },
+ {
+ "name": "02_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 9,
+ "K": [
+ [746.084,0,344.827],
+ [0,746.456,222.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31385,0.00765504,0.000335804,0.000338293,0.157318],
+ "R": [
+ [0.9708044988,0.02558390192,0.2385038556],
+ [0.01777728087,0.9838878899,-0.1779005014],
+ [-0.2392124442,0.1769465571,0.9547079776]
+ ],
+ "t": [
+ [-1.622489705],
+ [92.86686988],
+ [302.6276511]
+ ]
+ },
+ {
+ "name": "02_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 10,
+ "K": [
+ [743.875,0,345.16],
+ [0,744.131,231.932],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309364,-0.0158069,0.000435688,-0.000318284,0.167974],
+ "R": [
+ [0.9837217555,0.04774800386,0.1732386674],
+ [-0.008457215477,0.9752859506,-0.220784488],
+ [-0.179499257,0.2157253874,0.9598138226]
+ ],
+ "t": [
+ [0.6070589451],
+ [94.58504844],
+ [305.3954199]
+ ]
+ },
+ {
+ "name": "02_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 11,
+ "K": [
+ [748.642,0,372.727],
+ [0,749.029,221.349],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329743,0.0894243,0.000705225,0.000452301,0.0255748],
+ "R": [
+ [0.9762818677,-0.03993432779,0.2127885436],
+ [0.08495434643,0.9746762651,-0.20685487],
+ [-0.1991393328,0.2200259705,0.9549513592]
+ ],
+ "t": [
+ [18.17502224],
+ [86.30258496],
+ [305.899008]
+ ]
+ },
+ {
+ "name": "02_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 12,
+ "K": [
+ [746.297,0,386.393],
+ [0,746.341,223.432],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329805,0.088881,-0.000101498,-0.000342857,0.0238941],
+ "R": [
+ [0.9769251111,-0.05225372472,0.2070914666],
+ [0.09392861168,0.9759243238,-0.1968479875],
+ [-0.1918195589,0.211757556,0.9583130982]
+ ],
+ "t": [
+ [31.97904484],
+ [101.8192368],
+ [305.2554798]
+ ]
+ },
+ {
+ "name": "02_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 13,
+ "K": [
+ [746.887,0,386.903],
+ [0,746.77,241.912],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330222,0.0894843,0.000608161,-0.000202457,0.0188277],
+ "R": [
+ [0.9805035597,0.07291108666,0.1824739514],
+ [-0.03359954242,0.9771464723,-0.2098948364],
+ [-0.1936074385,0.199671593,0.9605453736]
+ ],
+ "t": [
+ [39.8755561],
+ [121.0360498],
+ [302.8306622]
+ ]
+ },
+ {
+ "name": "02_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 14,
+ "K": [
+ [745.399,0,359.381],
+ [0,745.103,221.453],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32351,0.0564367,0.000553752,0.000358328,0.0789504],
+ "R": [
+ [0.9639890244,-0.01369700088,0.2655890681],
+ [0.06651808592,0.9793475216,-0.1909287203],
+ [-0.2574888447,0.2017196672,0.9449913601]
+ ],
+ "t": [
+ [64.66924198],
+ [136.2834945],
+ [299.1868513]
+ ]
+ },
+ {
+ "name": "02_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 15,
+ "K": [
+ [746.343,0,376.035],
+ [0,746.136,233.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332319,0.10939,0.000552685,0.00121175,-0.00685584],
+ "R": [
+ [0.9739293667,-0.02993852249,0.2248672353],
+ [0.07982373372,0.9730868608,-0.2161715356],
+ [-0.2123434957,0.2284855491,0.9501076748]
+ ],
+ "t": [
+ [41.67937397],
+ [146.9667487],
+ [305.3208703]
+ ]
+ },
+ {
+ "name": "02_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 16,
+ "K": [
+ [747.983,0,369.069],
+ [0,747.865,212.357],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333814,0.119177,-0.00123283,0.000206724,-0.0313224],
+ "R": [
+ [0.9828420813,0.01261378295,0.1840172159],
+ [0.03080156014,0.9724259604,-0.2311688027],
+ [-0.181859031,0.2328704445,0.9553526307]
+ ],
+ "t": [
+ [22.33056427],
+ [154.6384713],
+ [307.0242051]
+ ]
+ },
+ {
+ "name": "02_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 17,
+ "K": [
+ [743.255,0,372.405],
+ [0,743.629,259.514],
+ [0,0,1]
+ ],
+ "distCoef": [-0.301911,-0.0577323,-0.000292445,-0.000537705,0.240913],
+ "R": [
+ [0.9702237144,0.05425789408,0.2360551311],
+ [-0.004184220731,0.978195713,-0.2076430576],
+ [-0.2421743923,0.2004725119,0.9492957051]
+ ],
+ "t": [
+ [39.95715372],
+ [182.9757461],
+ [299.4720725]
+ ]
+ },
+ {
+ "name": "02_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 18,
+ "K": [
+ [746.171,0,380.016],
+ [0,746.628,215.7],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310416,0.0111871,-0.00156578,-0.000885002,0.110566],
+ "R": [
+ [0.9751942313,0.01121985931,0.2210663386],
+ [0.02134458651,0.9892938663,-0.1443677759],
+ [-0.220319359,0.1455051918,0.9645141882]
+ ],
+ "t": [
+ [9.159436194],
+ [213.6293599],
+ [288.3403437]
+ ]
+ },
+ {
+ "name": "02_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 19,
+ "K": [
+ [745.09,0,380.114],
+ [0,745.176,232.983],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31746,0.043353,-0.000108725,0.000220738,0.0862213],
+ "R": [
+ [0.9809185988,0.05584586521,0.1862255137],
+ [-0.01423917048,0.975920974,-0.2176591338],
+ [-0.1938967473,0.2108541957,0.9580942331]
+ ],
+ "t": [
+ [-1.989355998],
+ [159.4183424],
+ [303.0216832]
+ ]
+ },
+ {
+ "name": "02_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 20,
+ "K": [
+ [746.359,0,393.165],
+ [0,746.438,228.007],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32236,0.0673245,-0.000115957,0.00130444,0.0588071],
+ "R": [
+ [0.9826018096,0.03015545669,0.1832602856],
+ [0.01576123022,0.9696317731,-0.2440610748],
+ [-0.1850547688,0.2427032613,0.9522866477]
+ ],
+ "t": [
+ [-25.36954265],
+ [136.7143691],
+ [307.7149997]
+ ]
+ },
+ {
+ "name": "02_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 21,
+ "K": [
+ [747.137,0,358.509],
+ [0,747.202,238.678],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327929,0.0852816,0.000460613,0.000357406,0.0365027],
+ "R": [
+ [0.9780966382,0.08951991601,0.1879179366],
+ [-0.04045439222,0.9673344336,-0.2502549415],
+ [-0.2041822921,0.2371714111,0.9497680314]
+ ],
+ "t": [
+ [-10.00427836],
+ [118.005594],
+ [307.3165834]
+ ]
+ },
+ {
+ "name": "02_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 22,
+ "K": [
+ [745.847,0,374.568],
+ [0,746.074,247.807],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32052,0.063252,0.000743322,-0.000945252,0.0534877],
+ "R": [
+ [0.9839840132,0.07804627455,0.160263036],
+ [-0.03749054936,0.9695570383,-0.2419785283],
+ [-0.1742696772,0.2320946541,0.9569546233]
+ ],
+ "t": [
+ [-1.458572059],
+ [110.2636917],
+ [306.6072245]
+ ]
+ },
+ {
+ "name": "02_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 23,
+ "K": [
+ [744.851,0,375.128],
+ [0,744.899,236.672],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328747,0.0731957,0.000409854,0.000115616,0.0573405],
+ "R": [
+ [0.9798731388,0.006836815724,0.1995041098],
+ [0.04188111895,0.9701291749,-0.2389463451],
+ [-0.1951783896,0.2424925605,0.9503171862]
+ ],
+ "t": [
+ [13.92766978],
+ [118.8861106],
+ [308.0337581]
+ ]
+ },
+ {
+ "name": "02_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 24,
+ "K": [
+ [748.108,0,365.63],
+ [0,748.409,236.546],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337502,0.145226,-9.99404e-05,-0.000712599,-0.0768278],
+ "R": [
+ [0.9858983234,-0.01937546959,0.166219996],
+ [0.057736328,0.9716683618,-0.2291879382],
+ [-0.1570700873,0.2355529362,0.9590848773]
+ ],
+ "t": [
+ [-5.69779309],
+ [141.0775615],
+ [307.1963385]
+ ]
+ },
+ {
+ "name": "03_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 1,
+ "K": [
+ [745.205,0,364.445],
+ [0,745.671,223.278],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321278,0.0550501,-0.000663141,0.000431329,0.0680735],
+ "R": [
+ [0.789168654,0.1464091436,-0.5964706181],
+ [-0.3274382264,0.921936374,-0.2069239719],
+ [0.5196123973,0.3586051937,0.7755032377]
+ ],
+ "t": [
+ [-15.48720347],
+ [106.8731646],
+ [321.197831]
+ ]
+ },
+ {
+ "name": "03_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 2,
+ "K": [
+ [746.402,0,367.989],
+ [0,746.656,218.884],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319108,0.0415571,-0.000289565,0.00121415,0.0978966],
+ "R": [
+ [0.7844411333,0.123213727,-0.6078408392],
+ [-0.3461950886,0.9001611021,-0.2643084389],
+ [0.5145882519,0.4177659246,0.7487793823]
+ ],
+ "t": [
+ [-25.69855827],
+ [65.19717944],
+ [326.035328]
+ ]
+ },
+ {
+ "name": "03_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 3,
+ "K": [
+ [747.999,0,350.415],
+ [0,748.222,213.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322361,0.0444301,-0.000132478,-4.14576e-05,0.110213],
+ "R": [
+ [0.8075592295,0.0617799019,-0.5865418439],
+ [-0.2672496857,0.9248714179,-0.2705373648],
+ [0.525762015,0.3752280693,0.763399109]
+ ],
+ "t": [
+ [-8.799326732],
+ [72.40249706],
+ [323.1224723]
+ ]
+ },
+ {
+ "name": "03_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 4,
+ "K": [
+ [744.819,0,376.394],
+ [0,744.912,212.894],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335892,0.121706,-0.00015411,0.0017688,-0.0013985],
+ "R": [
+ [0.8410364559,-0.03582960221,-0.5397906256],
+ [-0.192384631,0.9127679401,-0.3603371217],
+ [0.5056143132,0.4069040761,0.7607780486]
+ ],
+ "t": [
+ [3.728898504],
+ [75.32503712],
+ [325.8417248]
+ ]
+ },
+ {
+ "name": "03_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 5,
+ "K": [
+ [746.446,0,376.523],
+ [0,746.682,251.012],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330943,0.0996499,0.00144142,-0.000113946,0.0131394],
+ "R": [
+ [0.8610606531,-0.05437396314,-0.5055868113],
+ [-0.176556083,0.9004429458,-0.3975304402],
+ [0.4768673833,0.4315622475,0.7657359371]
+ ],
+ "t": [
+ [31.93527518],
+ [62.43528973],
+ [326.764058]
+ ]
+ },
+ {
+ "name": "03_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 6,
+ "K": [
+ [744.998,0,378.484],
+ [0,744.973,240.788],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31652,0.0338012,-0.0010118,-0.000122735,0.0959735],
+ "R": [
+ [0.8769583834,-0.06555368648,-0.4760742674],
+ [-0.1128149484,0.9348860407,-0.3365425358],
+ [0.4671367907,0.348842092,0.8124607151]
+ ],
+ "t": [
+ [52.69213606],
+ [109.2131316],
+ [317.2562433]
+ ]
+ },
+ {
+ "name": "03_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 7,
+ "K": [
+ [744.942,0,394.454],
+ [0,745.513,230.902],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322593,0.0669124,0.000685625,0.000650135,0.0435827],
+ "R": [
+ [0.8511772215,-0.03734239681,-0.5235483579],
+ [-0.1521244983,0.9371023984,-0.3141611561],
+ [0.5023499524,0.3470513512,0.7919595223]
+ ],
+ "t": [
+ [39.57000229],
+ [127.8421428],
+ [318.5564893]
+ ]
+ },
+ {
+ "name": "03_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 8,
+ "K": [
+ [744.592,0,375.596],
+ [0,744.695,234.586],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314208,0.0115966,-0.0002404,-0.00129875,0.131833],
+ "R": [
+ [0.863242284,-0.08735605341,-0.4971736911],
+ [-0.1241310572,0.9179337282,-0.3768144785],
+ [0.4892895255,0.386996887,0.7815556088]
+ ],
+ "t": [
+ [48.3076273],
+ [133.8669044],
+ [323.1008342]
+ ]
+ },
+ {
+ "name": "03_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 9,
+ "K": [
+ [746.083,0,388.49],
+ [0,746.196,219.485],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327776,0.0952708,0.000477894,0.00116098,0.0130168],
+ "R": [
+ [0.8627791791,-0.162720556,-0.478679547],
+ [-0.06768333431,0.9010943873,-0.4283081501],
+ [0.5010299935,0.401933982,0.766432006]
+ ],
+ "t": [
+ [23.91664651],
+ [150.3571005],
+ [326.7446808]
+ ]
+ },
+ {
+ "name": "03_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 10,
+ "K": [
+ [744.984,0,374.291],
+ [0,745.244,231.69],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317288,0.0201616,0.000340337,0.000302133,0.135473],
+ "R": [
+ [0.8433461687,-0.104156761,-0.5271798639],
+ [-0.1611508321,0.8868626272,-0.433018579],
+ [0.5126379318,0.4501400333,0.7311472501]
+ ],
+ "t": [
+ [5.809004706],
+ [133.1751931],
+ [335.4888131]
+ ]
+ },
+ {
+ "name": "03_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 11,
+ "K": [
+ [746.325,0,369.755],
+ [0,746.606,238.315],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330117,0.107892,0.000853042,-0.00148033,-0.0192727],
+ "R": [
+ [0.8487877999,-0.06352852013,-0.5249032272],
+ [-0.1660312052,0.9105147821,-0.3786772643],
+ [0.5019889537,0.4085669574,0.7622861219]
+ ],
+ "t": [
+ [10.90299391],
+ [168.9126588],
+ [328.8547345]
+ ]
+ },
+ {
+ "name": "03_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 12,
+ "K": [
+ [745.397,0,373.191],
+ [0,745.394,241.989],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315431,0.0239438,0.00152043,8.78247e-05,0.132462],
+ "R": [
+ [0.7899500519,0.01447673769,-0.613000277],
+ [-0.2772192125,0.9001468868,-0.3359837649],
+ [0.5469263421,0.4353458466,0.7150843098]
+ ],
+ "t": [
+ [-11.01289772],
+ [165.4412244],
+ [333.9391633]
+ ]
+ },
+ {
+ "name": "03_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 13,
+ "K": [
+ [746.289,0,356.696],
+ [0,746.559,221.83],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307674,-0.0320128,-0.000713248,-0.000212304,0.187939],
+ "R": [
+ [0.7812025858,0.003231301473,-0.6242692358],
+ [-0.256925784,0.9130359895,-0.316787663],
+ [0.5689566429,0.4078662043,0.7140962805]
+ ],
+ "t": [
+ [-30.04397497],
+ [158.6113997],
+ [327.0561852]
+ ]
+ },
+ {
+ "name": "03_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 14,
+ "K": [
+ [744.216,0,367.374],
+ [0,744.503,234.384],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313106,0.0107213,0.00051099,0.000391129,0.137335],
+ "R": [
+ [0.7647493291,0.08765142393,-0.6383382266],
+ [-0.3090501184,0.9192036391,-0.2440342068],
+ [0.5653728752,0.3839035005,0.7300490493]
+ ],
+ "t": [
+ [-30.23656889],
+ [178.7825502],
+ [321.7207122]
+ ]
+ },
+ {
+ "name": "03_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 15,
+ "K": [
+ [747.827,0,380.852],
+ [0,747.806,237.021],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329904,0.102056,0.000500868,0.000776535,0.0163276],
+ "R": [
+ [0.8420936086,0.09442452017,-0.5310012847],
+ [-0.2692856411,0.9266613257,-0.2622670985],
+ [0.4672939095,0.3638444688,0.8057627471]
+ ],
+ "t": [
+ [-9.683781844],
+ [164.2881649],
+ [322.7392687]
+ ]
+ },
+ {
+ "name": "03_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 16,
+ "K": [
+ [745.289,0,371.652],
+ [0,745.447,216.538],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317152,0.0301694,-0.000847782,0.000226416,0.100881],
+ "R": [
+ [0.7751085928,0.08020770062,-0.6267163586],
+ [-0.2817854267,0.9316829094,-0.2292682483],
+ [0.5655118413,0.3543073259,0.74475679]
+ ],
+ "t": [
+ [-42.18053512],
+ [150.9579844],
+ [316.9204289]
+ ]
+ },
+ {
+ "name": "03_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 17,
+ "K": [
+ [744.591,0,386.471],
+ [0,744.601,243.766],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308716,-0.020066,-0.000742984,7.36231e-05,0.18193],
+ "R": [
+ [0.8000888793,0.13985822,-0.5833502066],
+ [-0.3086873752,0.9298003917,-0.2004578159],
+ [0.5143635773,0.3404569133,0.7870954202]
+ ],
+ "t": [
+ [-29.24407076],
+ [139.76037],
+ [318.5389184]
+ ]
+ },
+ {
+ "name": "03_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 18,
+ "K": [
+ [747.091,0,388.41],
+ [0,747.213,245.147],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331947,0.109947,-0.00018029,-0.000335458,-0.0100282],
+ "R": [
+ [0.7812031275,0.143907843,-0.6074637489],
+ [-0.3493109676,0.9072427652,-0.2342912992],
+ [0.5174007358,0.3952228456,0.7590094735]
+ ],
+ "t": [
+ [-39.38157975],
+ [101.9329028],
+ [324.6812046]
+ ]
+ },
+ {
+ "name": "03_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 19,
+ "K": [
+ [743.815,0,380.782],
+ [0,743.921,233.579],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31618,0.0384848,0.000240219,0.000426998,0.0977231],
+ "R": [
+ [0.8097086682,0.09665101941,-0.578818152],
+ [-0.2718115959,0.9359285209,-0.2239559336],
+ [0.5200868476,0.3386685464,0.784100304]
+ ],
+ "t": [
+ [-3.817362892],
+ [126.1763792],
+ [318.2990602]
+ ]
+ },
+ {
+ "name": "03_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 20,
+ "K": [
+ [746.163,0,356.033],
+ [0,746.281,215.327],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323416,0.0556958,5.62358e-06,-0.000684023,0.0815018],
+ "R": [
+ [0.8690981447,0.003405692177,-0.4946279574],
+ [-0.1831744592,0.9310985933,-0.3154402114],
+ [0.4594731031,0.3647517111,0.8098398958]
+ ],
+ "t": [
+ [22.15812523],
+ [111.197586],
+ [320.9871724]
+ ]
+ },
+ {
+ "name": "03_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 21,
+ "K": [
+ [745.277,0,370.698],
+ [0,745.633,251.594],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309423,-0.0154759,-0.000871178,-0.000110471,0.185828],
+ "R": [
+ [0.8519925598,-0.01534543221,-0.5233289556],
+ [-0.157671027,0.9456449668,-0.2844212441],
+ [0.4992479597,0.3248385977,0.8032629458]
+ ],
+ "t": [
+ [23.66925749],
+ [140.0971121],
+ [315.3107012]
+ ]
+ },
+ {
+ "name": "03_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 22,
+ "K": [
+ [749.812,0,361.025],
+ [0,750.052,224.033],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333335,0.0892582,3.32371e-05,-0.00136116,0.0353235],
+ "R": [
+ [0.8242021998,-0.0118106517,-0.5661724493],
+ [-0.2609232338,0.8794144434,-0.3981824994],
+ [0.5026030242,0.4759104383,0.7217336453]
+ ],
+ "t": [
+ [6.739100305],
+ [105.8858326],
+ [336.9710973]
+ ]
+ },
+ {
+ "name": "03_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 23,
+ "K": [
+ [744.781,0,365.976],
+ [0,744.836,235.682],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319452,0.032528,0.000754874,-0.000913445,0.102166],
+ "R": [
+ [0.8233335342,0.02583843362,-0.5669693703],
+ [-0.2570181529,0.9076367155,-0.3318693443],
+ [0.506027233,0.4189605805,0.7539286912]
+ ],
+ "t": [
+ [-4.103462359],
+ [133.5127669],
+ [329.5726238]
+ ]
+ },
+ {
+ "name": "03_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 24,
+ "K": [
+ [746.135,0,373.553],
+ [0,746.515,225.298],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323756,0.0623909,2.70614e-05,0.000962707,0.0761173],
+ "R": [
+ [0.8557458945,0.0294251088,-0.5165589289],
+ [-0.2234217673,0.921515875,-0.3176337608],
+ [0.4666708454,0.3872242956,0.7951576366]
+ ],
+ "t": [
+ [-1.49693002],
+ [128.5290469],
+ [325.1203285]
+ ]
+ },
+ {
+ "name": "04_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 1,
+ "K": [
+ [745.756,0,368.953],
+ [0,745.945,245.188],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3245,0.0724334,-0.000312337,0.000678015,0.0415529],
+ "R": [
+ [0.04501388353,-0.06073969189,-0.9971381249],
+ [-0.08162898106,0.9945884367,-0.06426936354],
+ [0.9956457501,0.08428838276,0.03981216889]
+ ],
+ "t": [
+ [-59.71104012],
+ [137.3658878],
+ [280.4259077]
+ ]
+ },
+ {
+ "name": "04_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 2,
+ "K": [
+ [745.144,0,382.474],
+ [0,745.286,222.525],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322843,0.0690658,-0.000684608,-0.000275864,0.0370253],
+ "R": [
+ [0.1096717734,-0.01795980665,-0.9938055884],
+ [-0.007042199406,0.9997976117,-0.01884523745],
+ [0.9939429106,0.009065367736,0.1095231006]
+ ],
+ "t": [
+ [-53.83503278],
+ [149.6185443],
+ [272.7820927]
+ ]
+ },
+ {
+ "name": "04_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 3,
+ "K": [
+ [742.832,0,377.499],
+ [0,742.665,258.984],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312355,-0.00257413,0.000454129,0.00111055,0.151137],
+ "R": [
+ [0.07040546321,0.04162572676,-0.9966495721],
+ [-0.08610880414,0.9956530214,0.03550119457],
+ [0.9937949208,0.08332082476,0.07368375372]
+ ],
+ "t": [
+ [-50.21742462],
+ [111.4103034],
+ [280.5940976]
+ ]
+ },
+ {
+ "name": "04_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 4,
+ "K": [
+ [743.339,0,393.561],
+ [0,743.571,223.626],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307228,-0.0295629,-0.000661125,6.4492e-05,0.183577],
+ "R": [
+ [0.09450112049,0.05679880598,-0.993903131],
+ [-0.03670643306,0.9978910099,0.05353662459],
+ [0.9948478155,0.03142336774,0.09638670013]
+ ],
+ "t": [
+ [-21.9069],
+ [118.1273376],
+ [275.8163164]
+ ]
+ },
+ {
+ "name": "04_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 5,
+ "K": [
+ [746.019,0,364.58],
+ [0,746.273,258.887],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327759,0.0738839,0.000801649,0.000211169,0.0604088],
+ "R": [
+ [0.135847977,0.01131634816,-0.9906650632],
+ [-0.049797809,0.9987488181,0.004580011864],
+ [0.98947739,0.04871076425,0.1362415358]
+ ],
+ "t": [
+ [-12.12624478],
+ [90.71810202],
+ [278.5550143]
+ ]
+ },
+ {
+ "name": "04_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 6,
+ "K": [
+ [745.588,0,362.328],
+ [0,745.695,224.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317313,0.0342325,-0.00011624,0.00140051,0.0955503],
+ "R": [
+ [0.09768474559,0.09486669264,-0.9906856217],
+ [-0.08671696061,0.9924717325,0.0864871607],
+ [0.9914322262,0.07746076975,0.1051758999]
+ ],
+ "t": [
+ [6.120914551],
+ [75.66522558],
+ [280.1538331]
+ ]
+ },
+ {
+ "name": "04_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 7,
+ "K": [
+ [744.949,0,374.902],
+ [0,744.948,218.152],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307279,-0.0368619,-0.000928182,-0.000206153,0.214368],
+ "R": [
+ [0.08413477249,-0.05845821559,-0.994738145],
+ [-0.03729096802,0.9973936317,-0.06176833509],
+ [0.9957563576,0.04229161317,0.08173552284]
+ ],
+ "t": [
+ [3.352563309],
+ [99.7043349],
+ [277.3248716]
+ ]
+ },
+ {
+ "name": "04_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 8,
+ "K": [
+ [744.851,0,365.832],
+ [0,744.82,236.655],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313642,0.00106915,0.000461187,-0.00049658,0.163492],
+ "R": [
+ [0.1068294918,-0.02053293437,-0.9940653189],
+ [-0.04471775106,0.998675844,-0.02543386204],
+ [0.9932712532,0.04716945203,0.1057698462]
+ ],
+ "t": [
+ [34.88142403],
+ [92.93282517],
+ [277.1804593]
+ ]
+ },
+ {
+ "name": "04_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 9,
+ "K": [
+ [745.947,0,354.92],
+ [0,745.962,217.292],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332252,0.114802,-0.000779302,-0.000175195,-0.0220414],
+ "R": [
+ [0.0951039165,0.01286389124,-0.99538423],
+ [-0.04378002227,0.9990030715,0.008727700331],
+ [0.9945041753,0.04274790527,0.09557228614]
+ ],
+ "t": [
+ [51.3876018],
+ [107.4685168],
+ [276.8925649]
+ ]
+ },
+ {
+ "name": "04_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 10,
+ "K": [
+ [743.419,0,373.623],
+ [0,743.493,209.714],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312784,-0.00205334,-0.00151839,-4.48796e-05,0.146707],
+ "R": [
+ [0.07554192003,-0.02015366607,-0.996938939],
+ [-0.05402378201,0.9982445697,-0.02427365106],
+ [0.9956780852,0.05569209012,0.07432053419]
+ ],
+ "t": [
+ [36.95032578],
+ [126.4783785],
+ [278.9862968]
+ ]
+ },
+ {
+ "name": "04_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 11,
+ "K": [
+ [743.168,0,378.723],
+ [0,743.196,231.359],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312654,0.00616666,0.000125459,-0.000163635,0.137741],
+ "R": [
+ [0.104627794,-0.01026277171,-0.994458496],
+ [-0.05855646041,0.9981483637,-0.01646162423],
+ [0.9927860624,0.05995431298,0.1038331098]
+ ],
+ "t": [
+ [61.78762978],
+ [139.882294],
+ [278.0088471]
+ ]
+ },
+ {
+ "name": "04_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 12,
+ "K": [
+ [746.755,0,377.564],
+ [0,747.014,231.526],
+ [0,0,1]
+ ],
+ "distCoef": [-0.342661,0.169314,0.000669193,0.000564241,-0.092518],
+ "R": [
+ [0.09069981891,0.03748374052,-0.9951726041],
+ [-0.02832816732,0.9989841486,0.03504548138],
+ [0.9954752924,0.02501279723,0.09166952704]
+ ],
+ "t": [
+ [63.18640006],
+ [168.1511303],
+ [272.7093484]
+ ]
+ },
+ {
+ "name": "04_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 13,
+ "K": [
+ [745.766,0,371.377],
+ [0,745.897,229.211],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323265,0.06437,0.000357726,0.000480753,0.061899],
+ "R": [
+ [0.03414536791,0.03842962758,-0.9986777546],
+ [-0.02717943982,0.9989265658,0.03750992125],
+ [0.9990472321,0.02586271187,0.03515321085]
+ ],
+ "t": [
+ [27.04698548],
+ [171.5967975],
+ [274.5649723]
+ ]
+ },
+ {
+ "name": "04_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 14,
+ "K": [
+ [744.965,0,366.266],
+ [0,745.319,235.632],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317134,0.0349168,5.85303e-05,0.000379707,0.110605],
+ "R": [
+ [0.05221731101,0.04748668842,-0.9975060736],
+ [0.03426805086,0.9981953182,0.04931335942],
+ [0.9980476207,-0.03675759989,0.05049579913]
+ ],
+ "t": [
+ [31.93275734],
+ [208.7852536],
+ [260.7309393]
+ ]
+ },
+ {
+ "name": "04_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 15,
+ "K": [
+ [744.586,0,371.051],
+ [0,745.106,212.085],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332822,0.11382,-0.000911903,0.000640183,-0.00904196],
+ "R": [
+ [0.0693166226,0.04834029473,-0.9964228127],
+ [-0.01396942206,0.9987743784,0.04748258878],
+ [0.9974968978,0.01062811814,0.06990695264]
+ ],
+ "t": [
+ [16.12425569],
+ [198.357827],
+ [269.7404532]
+ ]
+ },
+ {
+ "name": "04_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 16,
+ "K": [
+ [742.58,0,362.432],
+ [0,742.717,222.722],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316061,0.0181932,0.000637155,-0.000119442,0.122715],
+ "R": [
+ [0.07545496093,-0.0349426896,-0.9965367817],
+ [-0.03652359913,0.9986183515,-0.03778114217],
+ [0.9964800929,0.03924788454,0.07407447592]
+ ],
+ "t": [
+ [-15.86676392],
+ [179.6369531],
+ [275.0674259]
+ ]
+ },
+ {
+ "name": "04_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 17,
+ "K": [
+ [745.044,0,350.241],
+ [0,745.211,214.104],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330556,0.0995367,-0.000406045,-3.83783e-05,-0.00374247],
+ "R": [
+ [0.0837025501,0.02221656332,-0.9962430965],
+ [-0.04478154079,0.9988252756,0.01851168242],
+ [0.9954840515,0.04306382584,0.08459911461]
+ ],
+ "t": [
+ [-23.0620205],
+ [182.4550181],
+ [276.0013748]
+ ]
+ },
+ {
+ "name": "04_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 18,
+ "K": [
+ [747.543,0,399.307],
+ [0,747.43,229.515],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337874,0.152604,0.000377489,0.002871,-0.0603327],
+ "R": [
+ [0.03967719066,0.06607189882,-0.9970256891],
+ [-0.02383145062,0.9975901546,0.06516091958],
+ [0.998928317,0.02117516625,0.04115616396]
+ ],
+ "t": [
+ [-45.47747339],
+ [181.8911988],
+ [269.8403328]
+ ]
+ },
+ {
+ "name": "04_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 19,
+ "K": [
+ [743.963,0,369.391],
+ [0,744.08,218.072],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320196,0.0539371,0.000417857,0.00192962,0.0700112],
+ "R": [
+ [0.0434323362,0.03783761887,-0.9983395949],
+ [-0.08481170801,0.9958149524,0.03405223652],
+ [0.9954499517,0.08319191804,0.04645964289]
+ ],
+ "t": [
+ [-24.42650241],
+ [136.5925943],
+ [281.0885176]
+ ]
+ },
+ {
+ "name": "04_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 20,
+ "K": [
+ [745.858,0,356.253],
+ [0,746.045,207.418],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328012,0.0801152,-7.74627e-05,-0.000454429,0.0269942],
+ "R": [
+ [0.0976780849,0.06705669278,-0.9929563896],
+ [-0.1171365339,0.9915671608,0.05544004021],
+ [0.9883005738,0.1108961929,0.1047091699]
+ ],
+ "t": [
+ [-1.775430866],
+ [107.2147587],
+ [285.054156]
+ ]
+ },
+ {
+ "name": "04_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 21,
+ "K": [
+ [746.156,0,369.678],
+ [0,746.129,226.325],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331296,0.10434,-0.000526263,0.0017798,0.0107539],
+ "R": [
+ [0.06864954522,0.009029787974,-0.9975999714],
+ [-0.09824772164,0.9951594531,0.00224680986],
+ [0.9927913301,0.09785768182,0.06920439997]
+ ],
+ "t": [
+ [2.330018678],
+ [104.6606406],
+ [283.2576255]
+ ]
+ },
+ {
+ "name": "04_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 22,
+ "K": [
+ [746.305,0,363.016],
+ [0,746.511,222.294],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313633,0.00103632,0.000318828,-0.000294887,0.154057],
+ "R": [
+ [0.08441946195,-0.0784287402,-0.9933389588],
+ [-0.07957536672,0.9931828981,-0.08517917513],
+ [0.9932477614,0.08623609206,0.07760297012]
+ ],
+ "t": [
+ [9.995164317],
+ [122.6888691],
+ [282.4272415]
+ ]
+ },
+ {
+ "name": "04_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 23,
+ "K": [
+ [745.178,0,358.539],
+ [0,745.299,233.674],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315081,0.0210219,-6.99317e-06,-0.000330658,0.115227],
+ "R": [
+ [0.1162513982,0.03935918122,-0.9924396542],
+ [-0.02556811677,0.999001962,0.03662446354],
+ [0.9928906706,0.02111716788,0.117141715]
+ ],
+ "t": [
+ [32.91845612],
+ [159.7823772],
+ [272.1694603]
+ ]
+ },
+ {
+ "name": "04_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 24,
+ "K": [
+ [746.014,0,365.199],
+ [0,746.411,216.584],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320661,0.0432533,-0.00136099,-0.000113861,0.0956118],
+ "R": [
+ [0.1001711426,-0.0639180002,-0.9929150172],
+ [-0.0054812292,0.9978838124,-0.06479084071],
+ [0.9949551238,0.01193256733,0.09960881242]
+ ],
+ "t": [
+ [-9.066812064],
+ [167.2144724],
+ [271.0944115]
+ ]
+ },
+ {
+ "name": "05_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 1,
+ "K": [
+ [744.506,0,379.212],
+ [0,745.093,221.816],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322425,0.0503962,-0.00139268,-0.000488272,0.0792831],
+ "R": [
+ [0.4832137358,-0.07031409603,-0.8726742883],
+ [-0.1214142278,0.9817563233,-0.14633218],
+ [0.8670427157,0.1766647942,0.465861009]
+ ],
+ "t": [
+ [-31.81590772],
+ [187.5269902],
+ [291.8752718]
+ ]
+ },
+ {
+ "name": "05_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 2,
+ "K": [
+ [746.146,0,379.909],
+ [0,746.274,243.237],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327102,0.0750235,0.00051439,0.000830868,0.0552106],
+ "R": [
+ [0.559561068,-0.04316954181,-0.8276640634],
+ [-0.1711397799,0.9711012062,-0.1663539088],
+ [0.8109269924,0.2347314165,0.5360024022]
+ ],
+ "t": [
+ [-21.47998338],
+ [182.028679],
+ [304.5116426]
+ ]
+ },
+ {
+ "name": "05_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 3,
+ "K": [
+ [746.598,0,366.137],
+ [0,746.916,245.497],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34673,0.191883,-0.000717065,0.000142378,-0.151818],
+ "R": [
+ [0.4493443217,0.06721032382,-0.8908268367],
+ [-0.2833621033,0.9563979118,-0.07077395533],
+ [0.8472281859,0.2842284411,0.4487968296]
+ ],
+ "t": [
+ [-42.79170468],
+ [156.78227],
+ [309.5144468]
+ ]
+ },
+ {
+ "name": "05_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 4,
+ "K": [
+ [744.97,0,361.533],
+ [0,745.268,216.194],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320215,0.0355127,-0.000935438,6.82351e-05,0.107335],
+ "R": [
+ [0.5139859054,0.07264601249,-0.8547169391],
+ [-0.2477501277,0.96651576,-0.06683681477],
+ [0.8212419639,0.2461094116,0.5147735369]
+ ],
+ "t": [
+ [-21.66847624],
+ [145.8563675],
+ [305.5618637]
+ ]
+ },
+ {
+ "name": "05_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 5,
+ "K": [
+ [743.904,0,367.466],
+ [0,744.108,216.808],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328736,0.086922,-0.000934339,0.000214876,0.0243362],
+ "R": [
+ [0.4889793362,0.07185582001,-0.8693307483],
+ [-0.2209595119,0.9743010874,-0.0437525441],
+ [0.8438460185,0.2134809878,0.4922903259]
+ ],
+ "t": [
+ [-47.80972546],
+ [144.3254019],
+ [299.7644507]
+ ]
+ },
+ {
+ "name": "05_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 6,
+ "K": [
+ [745.323,0,383.952],
+ [0,745.526,234.808],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334223,0.133657,-0.000107051,0.00148947,-0.0461754],
+ "R": [
+ [0.4969854565,0.0559027949,-0.8659563116],
+ [-0.2018212488,0.978003949,-0.05269211703],
+ [0.8439630558,0.2009556001,0.4973361109]
+ ],
+ "t": [
+ [-46.56558119],
+ [125.7186081],
+ [298.6423415]
+ ]
+ },
+ {
+ "name": "05_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 7,
+ "K": [
+ [746.158,0,356.674],
+ [0,746.317,240.893],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334568,0.11153,0.000321304,-0.000871385,-0.0157856],
+ "R": [
+ [0.5541201274,0.02610072644,-0.8320274253],
+ [-0.1769665492,0.9803549196,-0.08710380092],
+ [0.8134087072,0.1955069916,0.5478533484]
+ ],
+ "t": [
+ [-14.70019562],
+ [115.5481293],
+ [299.4445791]
+ ]
+ },
+ {
+ "name": "05_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 8,
+ "K": [
+ [744.96,0,386.044],
+ [0,745.46,258.776],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325919,0.068823,-0.000458274,0.000477805,0.0465958],
+ "R": [
+ [0.4763065258,-0.004539644313,-0.8792675845],
+ [-0.1710253429,0.980409884,-0.09770768372],
+ [0.8624861886,0.1969158475,0.4661992314]
+ ],
+ "t": [
+ [-40.46029545],
+ [93.91456762],
+ [297.4902987]
+ ]
+ },
+ {
+ "name": "05_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 9,
+ "K": [
+ [745.188,0,367.116],
+ [0,745.437,236.843],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328194,0.058828,0.000388874,-0.00143808,0.0829656],
+ "R": [
+ [0.5065601345,-0.04543027129,-0.8610069225],
+ [-0.1705921502,0.9735884993,-0.1517357977],
+ [0.845159836,0.2237443283,0.4854310735]
+ ],
+ "t": [
+ [-16.55300824],
+ [76.93410209],
+ [300.8962768]
+ ]
+ },
+ {
+ "name": "05_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 10,
+ "K": [
+ [747.452,0,374.886],
+ [0,747.648,257.28],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337728,0.123608,0.00138141,5.97732e-05,-0.0225942],
+ "R": [
+ [0.4549222289,-0.02855444123,-0.8900732608],
+ [-0.1699899924,0.9783230281,-0.1182685721],
+ [0.8741562607,0.2051065493,0.4402069233]
+ ],
+ "t": [
+ [-13.61854908],
+ [96.6157071],
+ [299.0141417]
+ ]
+ },
+ {
+ "name": "05_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 11,
+ "K": [
+ [746.39,0,405.604],
+ [0,746.458,241.87],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333064,0.100943,0.000870611,0.00103156,0.0180409],
+ "R": [
+ [0.5002384593,-0.05591048228,-0.8640807264],
+ [-0.1916757277,0.9660062257,-0.1734715752],
+ [0.8444062406,0.2524004556,0.4725167836]
+ ],
+ "t": [
+ [16.55277765],
+ [75.44647006],
+ [303.7304898]
+ ]
+ },
+ {
+ "name": "05_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 12,
+ "K": [
+ [745.943,0,392.757],
+ [0,746.143,272.1],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323245,0.0770562,0.00168738,0.000666505,0.0382015],
+ "R": [
+ [0.5344619138,-0.0483612619,-0.8438078283],
+ [-0.2099054746,0.9594877737,-0.1879438847],
+ [0.818712498,0.277568731,0.5026583782]
+ ],
+ "t": [
+ [45.5535171],
+ [81.37072912],
+ [304.8427161]
+ ]
+ },
+ {
+ "name": "05_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 13,
+ "K": [
+ [748.463,0,383.471],
+ [0,748.465,243.614],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34071,0.149034,0.000455623,0.000254671,-0.0668973],
+ "R": [
+ [0.550270912,-0.09726860505,-0.8293013577],
+ [-0.1127468592,0.975440235,-0.1892207537],
+ [0.82733915,0.1976238001,0.525789658]
+ ],
+ "t": [
+ [34.15956958],
+ [127.9842494],
+ [295.9545727]
+ ]
+ },
+ {
+ "name": "05_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 14,
+ "K": [
+ [744.467,0,372.192],
+ [0,744.287,242.67],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321164,0.0557106,-0.000170048,0.000249902,0.0584864],
+ "R": [
+ [0.5607110475,-0.1151130063,-0.8199708025],
+ [-0.101866971,0.9731761842,-0.2062795062],
+ [0.8217215109,0.1991911399,0.5339444244]
+ ],
+ "t": [
+ [50.41224037],
+ [142.3474205],
+ [294.74195]
+ ]
+ },
+ {
+ "name": "05_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 15,
+ "K": [
+ [746.542,0,352.38],
+ [0,746.666,240.759],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327959,0.100036,-0.000636984,-0.00122606,-0.0366604],
+ "R": [
+ [0.5029624145,-0.05772144518,-0.8623787128],
+ [-0.198700467,0.9633205664,-0.180365215],
+ [0.8411580909,0.262071977,0.4730447599]
+ ],
+ "t": [
+ [34.04469815],
+ [136.31759],
+ [307.4406203]
+ ]
+ },
+ {
+ "name": "05_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 16,
+ "K": [
+ [747.042,0,371.719],
+ [0,747.231,244.896],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323957,0.0675271,-0.000219383,0.00030566,0.0452733],
+ "R": [
+ [0.5145114331,-0.105655334,-0.8509494319],
+ [-0.1209004538,0.9735279663,-0.1939752023],
+ [0.8489175846,0.2026826318,0.4881174913]
+ ],
+ "t": [
+ [9.341169646],
+ [165.8735131],
+ [297.8569993]
+ ]
+ },
+ {
+ "name": "05_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 17,
+ "K": [
+ [745.814,0,386.675],
+ [0,746.085,252.153],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320652,0.0597547,0.000647483,5.56623e-05,0.0523558],
+ "R": [
+ [0.5123119379,-0.06682282728,-0.856195765],
+ [-0.1341513719,0.9785027468,-0.1566390244],
+ [0.8482569703,0.1951078787,0.4923342645]
+ ],
+ "t": [
+ [9.076647729],
+ [186.6487394],
+ [296.0424945]
+ ]
+ },
+ {
+ "name": "05_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 18,
+ "K": [
+ [744.362,0,367.747],
+ [0,744.705,261.961],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317525,0.0240072,0.000331,-0.000409781,0.122239],
+ "R": [
+ [0.5214772573,-0.05602259067,-0.8514240656],
+ [-0.1526209796,0.9756261952,-0.1576716965],
+ [0.8395047985,0.2121673788,0.5002166498]
+ ],
+ "t": [
+ [-2.829687906],
+ [192.8140289],
+ [298.6606918]
+ ]
+ },
+ {
+ "name": "05_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 19,
+ "K": [
+ [744.259,0,353.379],
+ [0,744.524,245.823],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320328,0.0298824,0.00026675,-0.00161079,0.123162],
+ "R": [
+ [0.5556726344,-0.05485450779,-0.8295896012],
+ [-0.2099711545,0.9562161648,-0.2038694692],
+ [0.8044501462,0.2874745713,0.519825291]
+ ],
+ "t": [
+ [-1.476630227],
+ [134.2745178],
+ [310.4571486]
+ ]
+ },
+ {
+ "name": "05_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 20,
+ "K": [
+ [743.679,0,405.845],
+ [0,743.856,234.88],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326644,0.0646831,0.000108119,5.73367e-05,0.058946],
+ "R": [
+ [0.447769915,-0.01338423954,-0.894048637],
+ [-0.18660487,0.9764723016,-0.1080762074],
+ [0.8744602482,0.2152271039,0.4347373552]
+ ],
+ "t": [
+ [-41.39083575],
+ [143.2049031],
+ [297.8732354]
+ ]
+ },
+ {
+ "name": "05_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 21,
+ "K": [
+ [746.956,0,354.763],
+ [0,747.081,232.068],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333648,0.0797639,-0.000768992,-0.00091097,0.0508097],
+ "R": [
+ [0.5053420531,-0.009379958189,-0.8628681393],
+ [-0.2526298673,0.9545207072,-0.1583299394],
+ [0.8251106347,0.2979970402,0.4799897963]
+ ],
+ "t": [
+ [-19.66925616],
+ [96.29580053],
+ [309.4868577]
+ ]
+ },
+ {
+ "name": "05_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 22,
+ "K": [
+ [748.369,0,375.575],
+ [0,748.642,247.648],
+ [0,0,1]
+ ],
+ "distCoef": [-0.339087,0.143465,-0.000470446,0.00132222,-0.0624301],
+ "R": [
+ [0.54260376,-0.05746408722,-0.8380209057],
+ [-0.1470082191,0.975763273,-0.1620944744],
+ [0.8270246327,0.2111490322,0.5210051277]
+ ],
+ "t": [
+ [3.173863757],
+ [116.0988382],
+ [299.4207466]
+ ]
+ },
+ {
+ "name": "05_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 23,
+ "K": [
+ [744.544,0,368.615],
+ [0,744.426,281.181],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322575,0.0664483,0.00114224,0.000391788,0.0483369],
+ "R": [
+ [0.5347472888,-0.05715349527,-0.8430769924],
+ [-0.1466458645,0.9762943366,-0.1591991164],
+ [0.832190079,0.2087650503,0.5136894259]
+ ],
+ "t": [
+ [16.7223507],
+ [130.5590862],
+ [298.5444367]
+ ]
+ },
+ {
+ "name": "05_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 24,
+ "K": [
+ [743.308,0,356.74],
+ [0,743.243,228.93],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321093,0.0447792,0.000127467,-8.40104e-05,0.095825],
+ "R": [
+ [0.5706235669,-0.133891243,-0.8102233519],
+ [-0.1678811389,0.9467635938,-0.2746900447],
+ [0.8038685639,0.2927658322,0.5177678046]
+ ],
+ "t": [
+ [6.742844805],
+ [124.9131408],
+ [309.8640068]
+ ]
+ },
+ {
+ "name": "06_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 1,
+ "K": [
+ [744.518,0,344.042],
+ [0,744.512,240.289],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313532,-0.0139368,0.00116047,-0.000125352,0.195046],
+ "R": [
+ [-0.3305715804,0.1011846603,-0.9383411399],
+ [-0.314462461,0.9256148845,0.2105954561],
+ [0.8898515555,0.3646899369,-0.2741631979]
+ ],
+ "t": [
+ [-23.56718534],
+ [104.1648487],
+ [320.754952]
+ ]
+ },
+ {
+ "name": "06_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 2,
+ "K": [
+ [748.956,0,345.566],
+ [0,748.875,227.82],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335662,0.0955564,-6.0167e-05,-0.0012999,0.0278092],
+ "R": [
+ [-0.2903396332,0.1603112194,-0.9433998147],
+ [-0.341086429,0.9037763758,0.2585504022],
+ [0.8940709957,0.3968483028,-0.2077221201]
+ ],
+ "t": [
+ [-2.499901432],
+ [69.14355517],
+ [325.2941984]
+ ]
+ },
+ {
+ "name": "06_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 3,
+ "K": [
+ [743.901,0,369.68],
+ [0,743.816,251.042],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320568,0.044977,0.000366128,-0.00033077,0.103335],
+ "R": [
+ [-0.3123459653,0.110763308,-0.943488997],
+ [-0.3278062139,0.9196080197,0.216481353],
+ [0.891618239,0.3768986331,-0.250926954]
+ ],
+ "t": [
+ [2.578346941],
+ [71.05917793],
+ [323.4074447]
+ ]
+ },
+ {
+ "name": "06_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 4,
+ "K": [
+ [745.814,0,378.476],
+ [0,745.908,222.393],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316287,0.0251632,0.000357033,0.00145486,0.13215],
+ "R": [
+ [-0.2756543214,0.09031338143,-0.9570048005],
+ [-0.3333214643,0.9248259371,0.1832860813],
+ [0.9016160472,0.3695138418,-0.2248288776]
+ ],
+ "t": [
+ [26.15902854],
+ [86.10496093],
+ [322.4382284]
+ ]
+ },
+ {
+ "name": "06_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 5,
+ "K": [
+ [750.419,0,363.736],
+ [0,750.614,222.964],
+ [0,0,1]
+ ],
+ "distCoef": [-0.344753,0.14329,-0.000836382,-0.000451111,-0.060951],
+ "R": [
+ [-0.2930259634,0.06094491301,-0.9541601031],
+ [-0.3875087878,0.9047544541,0.1767945619],
+ [0.8740553324,0.4215508218,-0.2414998562]
+ ],
+ "t": [
+ [36.26889278],
+ [61.41890121],
+ [327.3260635]
+ ]
+ },
+ {
+ "name": "06_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 6,
+ "K": [
+ [747.394,0,354.724],
+ [0,747.506,211.184],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329009,0.0921746,-0.00050966,0.000333806,0.021085],
+ "R": [
+ [-0.2297156979,0.02557529828,-0.9729216835],
+ [-0.3964529538,0.9104994627,0.1175405629],
+ [0.888850805,0.4127185877,-0.199016617]
+ ],
+ "t": [
+ [62.78312093],
+ [81.38139883],
+ [324.7093469]
+ ]
+ },
+ {
+ "name": "06_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 7,
+ "K": [
+ [746.623,0,374.989],
+ [0,746.758,209.923],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319339,0.0433323,-0.00139256,0.000754597,0.0938733],
+ "R": [
+ [-0.2846142448,0.03267216609,-0.9580852056],
+ [-0.3313740809,0.934457856,0.1303063082],
+ [0.8995476364,0.3545716359,-0.255133308]
+ ],
+ "t": [
+ [45.81195811],
+ [121.7115234],
+ [320.8009986]
+ ]
+ },
+ {
+ "name": "06_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 8,
+ "K": [
+ [745.971,0,357.954],
+ [0,746.024,209.947],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314348,0.0246684,-0.0014997,0.000635776,0.111152],
+ "R": [
+ [-0.3038162213,-0.0261928812,-0.9523705354],
+ [-0.3441704234,0.9351353343,0.08407512184],
+ [0.8883931693,0.3533211563,-0.2931240987]
+ ],
+ "t": [
+ [41.47715732],
+ [140.438376],
+ [322.3540865]
+ ]
+ },
+ {
+ "name": "06_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 9,
+ "K": [
+ [742.648,0,362.103],
+ [0,742.703,220.817],
+ [0,0,1]
+ ],
+ "distCoef": [-0.304218,-0.0643312,-0.000139411,-0.000234647,0.289172],
+ "R": [
+ [-0.2807259034,-0.0411671215,-0.958904706],
+ [-0.3740921558,0.9247597922,0.06981680165],
+ [0.8838823599,0.3783181134,-0.2750043253]
+ ],
+ "t": [
+ [37.64720227],
+ [153.3424109],
+ [325.0305142]
+ ]
+ },
+ {
+ "name": "06_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 10,
+ "K": [
+ [747.72,0,366.165],
+ [0,747.851,213.209],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324647,0.0523798,-0.00077308,-0.000271098,0.0916616],
+ "R": [
+ [-0.2880158499,0.02777358159,-0.957222805],
+ [-0.3788720768,0.9147158267,0.1405379157],
+ [0.8794900907,0.4031421393,-0.2529300217]
+ ],
+ "t": [
+ [33.16578395],
+ [147.9736193],
+ [327.8869733]
+ ]
+ },
+ {
+ "name": "06_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 11,
+ "K": [
+ [745.331,0,369.444],
+ [0,745.587,207.732],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317455,0.0357855,-0.00041249,0.000556817,0.0920153],
+ "R": [
+ [-0.3142048567,0.04518634316,-0.9482792323],
+ [-0.3166241188,0.9366885696,0.1495449465],
+ [0.8949997069,0.3472358248,-0.2800050117]
+ ],
+ "t": [
+ [26.61359186],
+ [187.9055539],
+ [317.8889871]
+ ]
+ },
+ {
+ "name": "06_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 12,
+ "K": [
+ [747.25,0,346.366],
+ [0,747.394,225.779],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328454,0.0750084,3.92686e-05,0.00130952,0.0669429],
+ "R": [
+ [-0.2993781475,0.05639323365,-0.9524665495],
+ [-0.3171785116,0.9355987261,0.1550897014],
+ [0.8998725002,0.3485323901,-0.2622110915]
+ ],
+ "t": [
+ [13.58039626],
+ [195.4066632],
+ [317.2443523]
+ ]
+ },
+ {
+ "name": "06_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 13,
+ "K": [
+ [743.861,0,344.414],
+ [0,743.872,231.421],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307564,-0.0231037,-0.000140407,-0.000635225,0.208058],
+ "R": [
+ [-0.2583036736,0.07116007646,-0.9634393887],
+ [-0.3357690773,0.9284960528,0.1586007776],
+ [0.905835713,0.3644603181,-0.2159405881]
+ ],
+ "t": [
+ [14.66480509],
+ [172.1699927],
+ [320.6722019]
+ ]
+ },
+ {
+ "name": "06_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 14,
+ "K": [
+ [744.949,0,378.98],
+ [0,744.921,225.408],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321047,0.0567081,-0.000162218,0.000699701,0.0634367],
+ "R": [
+ [-0.3208579847,0.07871363947,-0.9438507915],
+ [-0.3472646452,0.9173632389,0.1945557869],
+ [0.8811682132,0.3901907879,-0.267008856]
+ ],
+ "t": [
+ [-45.70363788],
+ [100.2282059],
+ [322.9364507]
+ ]
+ },
+ {
+ "name": "06_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 15,
+ "K": [
+ [745.712,0,360.895],
+ [0,745.741,234.163],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31006,-0.0103454,0.000398478,0.000813845,0.181221],
+ "R": [
+ [-0.3227895896,0.1367774117,-0.9365355415],
+ [-0.3406635237,0.9063958148,0.2497898928],
+ [0.8830375102,0.3996730746,-0.245980058]
+ ],
+ "t": [
+ [-14.93002532],
+ [154.0180569],
+ [326.396188]
+ ]
+ },
+ {
+ "name": "06_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 16,
+ "K": [
+ [745.931,0,372.193],
+ [0,746.03,212.813],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325757,0.0830346,-0.000419051,0.00216162,0.0290765],
+ "R": [
+ [-0.311559769,0.02363818266,-0.9499324958],
+ [-0.312276077,0.9416182622,0.1258518973],
+ [0.8974486961,0.3358515813,-0.2859887293]
+ ],
+ "t": [
+ [-41.03283731],
+ [153.3338286],
+ [314.9665339]
+ ]
+ },
+ {
+ "name": "06_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 17,
+ "K": [
+ [744.756,0,368.403],
+ [0,744.752,202.816],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313223,0.00720848,-0.00119606,0.000542174,0.130737],
+ "R": [
+ [-0.3236003046,0.09291211415,-0.9416210394],
+ [-0.3175516679,0.9267842511,0.2005788875],
+ [0.8913157584,0.3639207207,-0.2704032691]
+ ],
+ "t": [
+ [-41.098271],
+ [130.5289196],
+ [319.7107876]
+ ]
+ },
+ {
+ "name": "06_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 18,
+ "K": [
+ [744.889,0,373.989],
+ [0,745.092,230.989],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319065,0.0283013,-0.000935078,-0.000739787,0.111424],
+ "R": [
+ [-0.3391260928,0.0773602665,-0.9375547357],
+ [-0.3008220503,0.9353680392,0.1859911968],
+ [0.8913470633,0.3451116057,-0.2939360344]
+ ],
+ "t": [
+ [-22.38901828],
+ [189.8595323],
+ [315.0907711]
+ ]
+ },
+ {
+ "name": "06_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 19,
+ "K": [
+ [743.21,0,358.424],
+ [0,743.138,251.445],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316603,0.00648778,0.000375455,-0.000277526,0.16085],
+ "R": [
+ [-0.34774011,0.09728469559,-0.9325301624],
+ [-0.3453355468,0.9113903597,0.2238548019],
+ [0.8716766465,0.399879107,-0.2833311204]
+ ],
+ "t": [
+ [-13.32995299],
+ [105.9918293],
+ [324.8353482]
+ ]
+ },
+ {
+ "name": "06_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 20,
+ "K": [
+ [745.315,0,375.798],
+ [0,745.342,214.671],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317661,0.021421,-0.000865931,0.000266434,0.124612],
+ "R": [
+ [-0.2889220833,0.06736289331,-0.9549797225],
+ [-0.355115135,0.918816287,0.172249446],
+ [0.8890541438,0.3888944219,-0.2415447329]
+ ],
+ "t": [
+ [16.18922492],
+ [101.394333],
+ [324.5371374]
+ ]
+ },
+ {
+ "name": "06_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 21,
+ "K": [
+ [743.803,0,341.335],
+ [0,743.805,238.935],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305727,-0.0577903,-0.000702133,-0.00085287,0.249773],
+ "R": [
+ [-0.2867564999,0.0564691645,-0.9563377767],
+ [-0.3641939053,0.9168870998,0.1633427245],
+ [0.8860775977,0.3951319776,-0.24235761]
+ ],
+ "t": [
+ [29.77890794],
+ [113.785435],
+ [325.4988706]
+ ]
+ },
+ {
+ "name": "06_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 22,
+ "K": [
+ [745.285,0,373.625],
+ [0,745.232,235.431],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319503,0.0483306,-0.000362012,0.00120612,0.080115],
+ "R": [
+ [-0.3458253526,0.08893014684,-0.9340750797],
+ [-0.3902640321,0.8916714915,0.2293816395],
+ [0.8532870623,0.4438618933,-0.2736563703]
+ ],
+ "t": [
+ [18.96316513],
+ [116.1979138],
+ [333.2100324]
+ ]
+ },
+ {
+ "name": "06_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 23,
+ "K": [
+ [744.536,0,366.592],
+ [0,744.501,224.531],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312705,-0.014521,0.000375544,8.36622e-05,0.188212],
+ "R": [
+ [-0.3181142509,0.09038767844,-0.94373375],
+ [-0.4081954831,0.8853909401,0.2223945386],
+ [0.8556750382,0.455974726,-0.2447596336]
+ ],
+ "t": [
+ [6.972278595],
+ [119.3141773],
+ [334.5341124]
+ ]
+ },
+ {
+ "name": "06_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 24,
+ "K": [
+ [744.6,0,358.514],
+ [0,744.655,220.515],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30152,-0.0573254,-0.000856409,-0.000288003,0.227002],
+ "R": [
+ [-0.3545583501,0.05661769889,-0.9333181732],
+ [-0.3227337004,0.929412527,0.1789841147],
+ [0.8775712706,0.3646735401,-0.3112585327]
+ ],
+ "t": [
+ [-25.22428756],
+ [139.0090865],
+ [319.514146]
+ ]
+ },
+ {
+ "name": "07_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 1,
+ "K": [
+ [745.635,0,384.154],
+ [0,745.75,223.733],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328279,0.104082,-0.000872931,0.00144148,0.00404207],
+ "R": [
+ [-0.9078071857,0.03344162453,-0.4180523547],
+ [0.00958043905,0.9982092569,0.05904654639],
+ [0.4192783428,0.049597754,-0.9065019217]
+ ],
+ "t": [
+ [-23.31434773],
+ [152.0493649],
+ [282.3431498]
+ ]
+ },
+ {
+ "name": "07_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 2,
+ "K": [
+ [746.944,0,375.746],
+ [0,747.112,207.581],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321827,0.078307,-0.00112183,4.35862e-05,0.0396046],
+ "R": [
+ [-0.9306435439,0.005427673037,-0.3658867782],
+ [-0.02457764723,0.9967049447,0.07729936951],
+ [0.3651007167,0.08093079535,-0.9274436225]
+ ],
+ "t": [
+ [-62.01828104],
+ [131.8151818],
+ [284.3018088]
+ ]
+ },
+ {
+ "name": "07_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 3,
+ "K": [
+ [743.881,0,383.122],
+ [0,743.965,237.105],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311008,0.000325185,-0.000782967,0.00055371,0.154469],
+ "R": [
+ [-0.9217631286,0.06528892794,-0.3822173342],
+ [0.03992506463,0.996464058,0.07392814261],
+ [0.3856925251,0.05288418425,-0.9211104924]
+ ],
+ "t": [
+ [-43.22640533],
+ [121.5976731],
+ [282.3432951]
+ ]
+ },
+ {
+ "name": "07_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 4,
+ "K": [
+ [743.69,0,370.307],
+ [0,743.828,227.79],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303025,-0.0263668,-0.000445815,0.00071591,0.180166],
+ "R": [
+ [-0.9409979296,0.06863452498,-0.3313792366],
+ [0.04529042225,0.9959498431,0.07767037874],
+ [0.3353679682,0.05807936004,-0.9402952269]
+ ],
+ "t": [
+ [-38.37277115],
+ [113.0266013],
+ [281.4230584]
+ ]
+ },
+ {
+ "name": "07_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 5,
+ "K": [
+ [743.998,0,375.484],
+ [0,744.299,220.79],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310908,0.00595719,-5.69241e-05,0.000519591,0.131448],
+ "R": [
+ [-0.9269484075,0.08594630429,-0.3652121064],
+ [0.04467826469,0.9917683984,0.1199970688],
+ [0.3725191305,0.09491404865,-0.9231580692]
+ ],
+ "t": [
+ [-23.36597135],
+ [80.23534001],
+ [286.4206576]
+ ]
+ },
+ {
+ "name": "07_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 6,
+ "K": [
+ [745.602,0,379.444],
+ [0,745.67,224.268],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303286,-0.0402497,-0.00132196,0.00012981,0.210105],
+ "R": [
+ [-0.923694641,0.09319000989,-0.3716232396],
+ [0.04673933936,0.9901316615,0.1321163393],
+ [0.3802678586,0.1046657299,-0.9189349491]
+ ],
+ "t": [
+ [-0.9450645075],
+ [68.69008136],
+ [287.3198917]
+ ]
+ },
+ {
+ "name": "07_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 7,
+ "K": [
+ [745.731,0,365.823],
+ [0,745.481,229.263],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308219,-0.0231519,0.000110727,0.000180113,0.209056],
+ "R": [
+ [-0.917494877,0.04967698427,-0.3946331815],
+ [0.001316203411,0.9925436367,0.1218827179],
+ [0.3977454189,0.1113073518,-0.9107190869]
+ ],
+ "t": [
+ [18.92434207],
+ [79.05208738],
+ [288.1952445]
+ ]
+ },
+ {
+ "name": "07_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 8,
+ "K": [
+ [745.611,0,393.911],
+ [0,745.863,244.069],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318705,0.0460564,0.000184451,0.000507881,0.0745222],
+ "R": [
+ [-0.9083609307,0.09070031,-0.4082326216],
+ [0.05268537174,0.9932388068,0.1034452715],
+ [0.4148550001,0.07245775567,-0.9069979066]
+ ],
+ "t": [
+ [48.31394514],
+ [81.42535523],
+ [283.8217571]
+ ]
+ },
+ {
+ "name": "07_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 9,
+ "K": [
+ [745.77,0,370.33],
+ [0,746.047,217.48],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321786,0.069205,4.67533e-05,5.58471e-05,0.0372207],
+ "R": [
+ [-0.9211612824,0.007939579541,-0.3891000576],
+ [-0.02433705705,0.996659961,0.07795274024],
+ [0.3884193603,0.08127659646,-0.9178913418]
+ ],
+ "t": [
+ [49.65486911],
+ [97.0413663],
+ [285.6851525]
+ ]
+ },
+ {
+ "name": "07_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 10,
+ "K": [
+ [744.504,0,363.969],
+ [0,744.833,247.068],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335916,0.144192,-0.000823922,-0.000462503,-0.076361],
+ "R": [
+ [-0.9225918644,-0.01579725191,-0.3854538864],
+ [-0.05416624958,0.9945677902,0.08888716518],
+ [0.381955847,0.1028851669,-0.9184358297]
+ ],
+ "t": [
+ [40.86826856],
+ [113.0714764],
+ [288.4804376]
+ ]
+ },
+ {
+ "name": "07_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 11,
+ "K": [
+ [744.999,0,387.199],
+ [0,745.384,239.21],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313806,0.0330336,-7.01628e-05,0.00132279,0.0985619],
+ "R": [
+ [-0.9109471902,-0.006922747781,-0.4124648981],
+ [-0.04540685091,0.9954664163,0.08357530662],
+ [0.4100163832,0.09486142287,-0.9071316751]
+ ],
+ "t": [
+ [65.64483344],
+ [130.0336458],
+ [285.8729547]
+ ]
+ },
+ {
+ "name": "07_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 12,
+ "K": [
+ [743.664,0,350.646],
+ [0,743.861,222.503],
+ [0,0,1]
+ ],
+ "distCoef": [-0.300623,-0.0667329,-0.000394627,-0.00107967,0.272621],
+ "R": [
+ [-0.9268683851,0.02536908581,-0.3745282449],
+ [0.006256924582,0.9986192343,0.0521581796],
+ [0.3753343145,0.04600037271,-0.9257473295]
+ ],
+ "t": [
+ [57.10937388],
+ [163.0891099],
+ [280.8513179]
+ ]
+ },
+ {
+ "name": "07_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 13,
+ "K": [
+ [744.176,0,390.977],
+ [0,744.332,246.666],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327257,0.10216,-0.000582688,0.00201022,0.0126373],
+ "R": [
+ [-0.9290120658,-0.01909429991,-0.3695564765],
+ [-0.04453762663,0.9971777882,0.06043888335],
+ [0.3673594716,0.07260762025,-0.9272406117]
+ ],
+ "t": [
+ [26.5211548],
+ [160.1280328],
+ [285.2494721]
+ ]
+ },
+ {
+ "name": "07_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 14,
+ "K": [
+ [744.044,0,360.721],
+ [0,744.333,226.474],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311296,-0.00746755,-0.00165304,-0.000168766,0.17966],
+ "R": [
+ [-0.9305033137,0.06302128148,-0.3608211486],
+ [0.03165130136,0.9952368859,0.09220485899],
+ [0.3649133847,0.07437646791,-0.9280659258]
+ ],
+ "t": [
+ [37.8814582],
+ [178.0304645],
+ [285.6034633]
+ ]
+ },
+ {
+ "name": "07_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 15,
+ "K": [
+ [744.03,0,362.147],
+ [0,744.447,229.329],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314413,0.0379836,-0.000745365,2.01034e-05,0.0898919],
+ "R": [
+ [-0.9265853662,0.03975182478,-0.373977742],
+ [0.01411888978,0.9973739765,0.07103385017],
+ [0.3758193929,0.06053877555,-0.9247133829]
+ ],
+ "t": [
+ [16.14446289],
+ [185.021862],
+ [282.5666312]
+ ]
+ },
+ {
+ "name": "07_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 16,
+ "K": [
+ [743.673,0,368.897],
+ [0,743.962,238.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314216,0.0200058,-0.0002257,-0.000345788,0.11969],
+ "R": [
+ [-0.9350006114,0.024774913,-0.3537796777],
+ [-0.006073372197,0.9962920776,0.08582080369],
+ [0.354594093,0.08239113958,-0.9313832344]
+ ],
+ "t": [
+ [-10.51100446],
+ [168.6528502],
+ [285.9762696]
+ ]
+ },
+ {
+ "name": "07_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 17,
+ "K": [
+ [744.686,0,385.346],
+ [0,745.049,227.767],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317176,0.0455424,-0.000136917,0.000534438,0.0739505],
+ "R": [
+ [-0.908638426,0.05327873405,-0.4141709639],
+ [0.04010861029,0.9983767379,0.04043746577],
+ [0.4156531128,0.02013121347,-0.9093004036]
+ ],
+ "t": [
+ [-7.322164421],
+ [189.4505625],
+ [275.8940033]
+ ]
+ },
+ {
+ "name": "07_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 18,
+ "K": [
+ [746.282,0,378.432],
+ [0,746.624,237.775],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320382,0.058651,0.000451819,0.000534403,0.062414],
+ "R": [
+ [-0.916555331,0.01769811564,-0.3995160846],
+ [-0.01470055472,0.9968539618,0.07788499561],
+ [0.3996376094,0.077259016,-0.9134116408]
+ ],
+ "t": [
+ [-37.37478029],
+ [164.0712496],
+ [285.8486829]
+ ]
+ },
+ {
+ "name": "07_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 19,
+ "K": [
+ [743.687,0,374.362],
+ [0,743.883,225.048],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322503,0.0715253,7.77555e-05,0.000517375,0.0539586],
+ "R": [
+ [-0.9239544056,0.01616424802,-0.3821609261],
+ [-0.020576852,0.9955594902,0.09185801365],
+ [0.3819487525,0.09273628522,-0.9195189677]
+ ],
+ "t": [
+ [-17.14443298],
+ [133.4982453],
+ [287.2304165]
+ ]
+ },
+ {
+ "name": "07_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 20,
+ "K": [
+ [745.801,0,368.555],
+ [0,746.033,233.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317685,0.0475287,-3.52395e-05,0.000512076,0.0805211],
+ "R": [
+ [-0.9241543321,-0.01069440692,-0.3818696113],
+ [-0.04324692472,0.9961108974,0.076764468],
+ [0.3795635307,0.08745690199,-0.9210227014]
+ ],
+ "t": [
+ [-16.56758847],
+ [113.8864258],
+ [286.5218078]
+ ]
+ },
+ {
+ "name": "07_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 21,
+ "K": [
+ [744.1,0,390.405],
+ [0,744.284,237.593],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322514,0.0588182,0.000321804,0.00147162,0.0689104],
+ "R": [
+ [-0.9369369296,0.006948104691,-0.3494294118],
+ [-0.02026391849,0.9970404822,0.07415962808],
+ [0.3489105381,0.07656370335,-0.9340232522]
+ ],
+ "t": [
+ [-3.618393153],
+ [111.1940513],
+ [285.5030449]
+ ]
+ },
+ {
+ "name": "07_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 22,
+ "K": [
+ [747.001,0,381.032],
+ [0,747.132,234.437],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324882,0.0577225,-0.00134011,-0.00135265,0.0819201],
+ "R": [
+ [-0.9282296861,0.06047570579,-0.3670590401],
+ [0.02337036389,0.9942284933,0.1047068731],
+ [0.3712727784,0.08861372459,-0.9242857414]
+ ],
+ "t": [
+ [25.6408869],
+ [119.8980517],
+ [286.9452799]
+ ]
+ },
+ {
+ "name": "07_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 23,
+ "K": [
+ [743.981,0,363.51],
+ [0,744.339,258.582],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313768,0.0101513,0.00111395,-0.00104272,0.1345],
+ "R": [
+ [-0.9138255678,-0.001018785166,-0.4061056435],
+ [-0.03060482875,0.9973259054,0.06636552484],
+ [0.4049520663,0.0730753071,-0.9114130916]
+ ],
+ "t": [
+ [24.3580015],
+ [146.5427691],
+ [284.2261849]
+ ]
+ },
+ {
+ "name": "07_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 24,
+ "K": [
+ [744.847,0,398.685],
+ [0,745.01,270.264],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328511,0.106892,0.000179407,0.00152869,-0.00291861],
+ "R": [
+ [-0.915939158,0.01937877811,-0.4008490012],
+ [-0.01852012751,0.9957282098,0.09045627137],
+ [0.4008895904,0.09027621565,-0.9116675607]
+ ],
+ "t": [
+ [6.147743662],
+ [145.7157982],
+ [287.1579534]
+ ]
+ },
+ {
+ "name": "08_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 1,
+ "K": [
+ [743.703,0,360.221],
+ [0,744.108,227.682],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309411,-0.0239561,-0.001159,0.000249551,0.191643],
+ "R": [
+ [-0.6256262875,-0.004424555618,-0.7801103586],
+ [-0.1745259617,0.9754325172,0.134432485],
+ [0.7603502068,0.2202540071,-0.6110284243]
+ ],
+ "t": [
+ [5.656398722],
+ [175.9817187],
+ [302.7764948]
+ ]
+ },
+ {
+ "name": "08_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 2,
+ "K": [
+ [747.203,0,376.344],
+ [0,747.435,209.923],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331616,0.11313,4.7739e-05,0.00134479,-0.0154118],
+ "R": [
+ [-0.6724252099,0.1092176997,-0.7320627235],
+ [-0.09964199407,0.9666926758,0.2357472025],
+ [0.7334274403,0.2314665517,-0.6391458561]
+ ],
+ "t": [
+ [-0.9742570867],
+ [185.4525058],
+ [305.0714088]
+ ]
+ },
+ {
+ "name": "08_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 3,
+ "K": [
+ [747.234,0,368.091],
+ [0,747.404,224.293],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329137,0.0905459,-0.000565165,-0.000329878,0.0231933],
+ "R": [
+ [-0.656899377,0.0205246652,-0.7536988435],
+ [-0.2005757989,0.9588523348,0.2009267253],
+ [0.7268098496,0.2831623883,-0.6257527502]
+ ],
+ "t": [
+ [-32.7353206],
+ [153.4285774],
+ [313.8994992]
+ ]
+ },
+ {
+ "name": "08_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 4,
+ "K": [
+ [747.386,0,362.788],
+ [0,747.713,235.953],
+ [0,0,1]
+ ],
+ "distCoef": [-0.341304,0.154379,-0.000777774,-0.000654564,-0.0867958],
+ "R": [
+ [-0.6631685233,0.06657565756,-0.7455033143],
+ [-0.1433461882,0.9663011288,0.2138083224],
+ [0.7346151238,0.2486560079,-0.6312771259]
+ ],
+ "t": [
+ [-22.98714967],
+ [144.6795235],
+ [307.788251]
+ ]
+ },
+ {
+ "name": "08_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 5,
+ "K": [
+ [745.746,0,376.748],
+ [0,745.752,233.642],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32088,0.0642866,0.000720856,0.00118823,0.0489989],
+ "R": [
+ [-0.6568191598,0.04935682433,-0.7524310568],
+ [-0.1452125328,0.970898021,0.19044777],
+ [0.7399337211,0.2343521638,-0.6305371929]
+ ],
+ "t": [
+ [-42.15667108],
+ [135.9397275],
+ [306.138018]
+ ]
+ },
+ {
+ "name": "08_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 6,
+ "K": [
+ [743.581,0,359.642],
+ [0,743.625,223.766],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309434,-0.0145066,-0.000137344,-0.000208072,0.169515],
+ "R": [
+ [-0.6714433509,-0.01781555577,-0.7408417054],
+ [-0.2359597182,0.9528188479,0.1909430659],
+ [0.7024861834,0.3030162521,-0.6439676336]
+ ],
+ "t": [
+ [-57.25895983],
+ [89.79547495],
+ [311.6502108]
+ ]
+ },
+ {
+ "name": "08_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 7,
+ "K": [
+ [745.148,0,371.237],
+ [0,745.103,220.621],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318768,0.034703,-0.000217256,0.000447556,0.0954449],
+ "R": [
+ [-0.7012843801,0.01049644172,-0.7128043511],
+ [-0.1276034542,0.9818947595,0.1400001421],
+ [0.7013683602,0.1891362102,-0.6872480755]
+ ],
+ "t": [
+ [-43.70728874],
+ [118.2041714],
+ [298.0588141]
+ ]
+ },
+ {
+ "name": "08_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 8,
+ "K": [
+ [743.06,0,391.891],
+ [0,743.237,230.861],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322908,0.0553375,0.000339696,0.00130059,0.0777268],
+ "R": [
+ [-0.6299217379,0.07604043096,-0.7729272003],
+ [-0.1362742651,0.9689348188,0.2063846932],
+ [0.7646096578,0.2353362908,-0.5999907511]
+ ],
+ "t": [
+ [-3.915515028],
+ [82.19520224],
+ [306.2551203]
+ ]
+ },
+ {
+ "name": "08_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 9,
+ "K": [
+ [746.456,0,356.955],
+ [0,746.592,233.352],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320498,0.0507213,0.000550471,0.000126643,0.0741224],
+ "R": [
+ [-0.684872543,0.06612723284,-0.7256561093],
+ [-0.09767122593,0.9785553778,0.1813551881],
+ [0.7220872049,0.1950809107,-0.6637269822]
+ ],
+ "t": [
+ [-6.194765679],
+ [87.40737989],
+ [301.7039487]
+ ]
+ },
+ {
+ "name": "08_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 10,
+ "K": [
+ [747.33,0,361.528],
+ [0,747.71,220.883],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322455,0.0389243,0.00118705,0.000768992,0.12227],
+ "R": [
+ [-0.6055801648,0.01225702185,-0.7956899079],
+ [-0.1760343759,0.973047512,0.1489645524],
+ [0.7760699469,0.2302787546,-0.5871006154]
+ ],
+ "t": [
+ [32.64204154],
+ [89.24589085],
+ [303.2777117]
+ ]
+ },
+ {
+ "name": "08_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 11,
+ "K": [
+ [747.774,0,350.264],
+ [0,747.981,233.163],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312094,-0.0263709,0.00148203,-0.000526901,0.233175],
+ "R": [
+ [-0.6738094891,0.06987822761,-0.7355935058],
+ [-0.1142917175,0.9736808734,0.1971876265],
+ [0.730012449,0.216939139,-0.6480889092]
+ ],
+ "t": [
+ [35.79986479],
+ [83.7107121],
+ [303.8218457]
+ ]
+ },
+ {
+ "name": "08_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 12,
+ "K": [
+ [744.899,0,366.47],
+ [0,744.848,222.726],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30396,-0.0418844,-0.00058576,-0.000160605,0.231689],
+ "R": [
+ [-0.6160341517,-0.01803679921,-0.7875129191],
+ [-0.1884772348,0.9740736778,0.1251271436],
+ [0.7648387123,0.2255108512,-0.6034621779]
+ ],
+ "t": [
+ [61.57356311],
+ [97.36793025],
+ [301.4047959]
+ ]
+ },
+ {
+ "name": "08_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 13,
+ "K": [
+ [746.859,0,368.586],
+ [0,747.139,224.684],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318047,0.0428323,-0.000551709,0.000692584,0.0895927],
+ "R": [
+ [-0.6485099772,-0.04236983322,-0.7600260566],
+ [-0.2235198928,0.9650338886,0.1369249841],
+ [0.7276494121,0.258678161,-0.6353046057]
+ ],
+ "t": [
+ [38.13208236],
+ [106.9572182],
+ [307.8393222]
+ ]
+ },
+ {
+ "name": "08_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 14,
+ "K": [
+ [744.505,0,357.32],
+ [0,744.53,228.165],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303025,-0.0702212,0.000533599,-0.000753966,0.269146],
+ "R": [
+ [-0.6825611814,-0.04644305139,-0.729351271],
+ [-0.1871280484,0.9758162042,0.1129859684],
+ [0.7064653757,0.213601916,-0.6747450588]
+ ],
+ "t": [
+ [41.82592662],
+ [132.5834032],
+ [304.3020009]
+ ]
+ },
+ {
+ "name": "08_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 15,
+ "K": [
+ [745.837,0,357.73],
+ [0,745.88,221.629],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3197,0.0439542,-0.00136466,0.00170195,0.109142],
+ "R": [
+ [-0.6069626381,-0.02117938565,-0.7944481037],
+ [-0.2107505505,0.968144583,0.1352045554],
+ [0.7662770787,0.2494944888,-0.5920911574]
+ ],
+ "t": [
+ [64.87618524],
+ [141.1933336],
+ [303.6799609]
+ ]
+ },
+ {
+ "name": "08_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 16,
+ "K": [
+ [744.767,0,345.102],
+ [0,744.781,229.581],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307131,-0.033453,0.0002274,-0.000565369,0.224073],
+ "R": [
+ [-0.6350262321,-0.03398669713,-0.7717425665],
+ [-0.2527580664,0.9531820242,0.1660041824],
+ [0.7299692079,0.3004811693,-0.6138860012]
+ ],
+ "t": [
+ [34.611726],
+ [134.434862],
+ [314.3473002]
+ ]
+ },
+ {
+ "name": "08_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 17,
+ "K": [
+ [743.543,0,370.548],
+ [0,743.847,224.118],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308645,-0.0111516,9.80345e-05,-0.000744439,0.160705],
+ "R": [
+ [-0.6124225565,-0.05791042639,-0.7884066177],
+ [-0.1936876385,0.977907652,0.07862393367],
+ [0.7664357188,0.2008556864,-0.610109238]
+ ],
+ "t": [
+ [28.62018644],
+ [186.6213498],
+ [297.6164741]
+ ]
+ },
+ {
+ "name": "08_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 18,
+ "K": [
+ [743.39,0,376.249],
+ [0,743.751,216.723],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319375,0.0602092,-1.05699e-05,0.00110696,0.0487054],
+ "R": [
+ [-0.6887185447,0.08181736584,-0.720397588],
+ [-0.1043667464,0.9720764384,0.2101784484],
+ [0.7174777686,0.2199393475,-0.6609480577]
+ ],
+ "t": [
+ [20.48604056],
+ [189.7333893],
+ [302.8177068]
+ ]
+ },
+ {
+ "name": "08_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 19,
+ "K": [
+ [747.038,0,360.923],
+ [0,747.259,204.023],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32724,0.0825647,-0.000697091,0.000733699,0.0397455],
+ "R": [
+ [-0.6726100217,0.03848005322,-0.7389959704],
+ [-0.1487286588,0.9712392562,0.1859411014],
+ [0.7248969201,0.2349757278,-0.6475421705]
+ ],
+ "t": [
+ [3.177324598],
+ [151.0352965],
+ [305.3818706]
+ ]
+ },
+ {
+ "name": "08_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 20,
+ "K": [
+ [747.914,0,388.693],
+ [0,747.835,242.83],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338429,0.134609,0.00136964,0.000561914,-0.0365273],
+ "R": [
+ [-0.6685313457,0.02780025068,-0.7431641715],
+ [-0.1765857142,0.9647874561,0.194942684],
+ [0.722414926,0.2615574708,-0.6400815293]
+ ],
+ "t": [
+ [-14.15175066],
+ [129.456494],
+ [308.9585645]
+ ]
+ },
+ {
+ "name": "08_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 21,
+ "K": [
+ [746.296,0,369.274],
+ [0,746.424,219.198],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312598,-0.010091,-0.000298989,-0.000771876,0.160922],
+ "R": [
+ [-0.6341455554,-0.01222382885,-0.7731170626],
+ [-0.1896201401,0.9718007188,0.1401697733],
+ [0.7496023059,0.2354866044,-0.6185809907]
+ ],
+ "t": [
+ [-6.414673774],
+ [116.5175191],
+ [305.5663378]
+ ]
+ },
+ {
+ "name": "08_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 22,
+ "K": [
+ [743.609,0,361.562],
+ [0,743.794,221.87],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314273,0.00142644,4.14402e-05,0.000150079,0.159707],
+ "R": [
+ [-0.6552794634,-0.0176584532,-0.7551801135],
+ [-0.2007508014,0.9678470127,0.1515627784],
+ [0.7282224527,0.2509189891,-0.6377552198]
+ ],
+ "t": [
+ [4.541098798],
+ [103.6271831],
+ [307.0310837]
+ ]
+ },
+ {
+ "name": "08_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 23,
+ "K": [
+ [748.435,0,354.117],
+ [0,748.457,219.552],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324308,0.0627041,-0.000215295,-0.000444561,0.0758056],
+ "R": [
+ [-0.6485698923,-0.03356212054,-0.7604148071],
+ [-0.2015811272,0.9709293787,0.1290782349],
+ [0.733976937,0.2370015309,-0.6364810526]
+ ],
+ "t": [
+ [20.56445448],
+ [121.4098798],
+ [305.3725739]
+ ]
+ },
+ {
+ "name": "08_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 24,
+ "K": [
+ [745.572,0,350.678],
+ [0,745.729,218.826],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313081,0.00890587,-0.000465969,-0.00023462,0.141032],
+ "R": [
+ [-0.6716141,0.00283216084,-0.7408957278],
+ [-0.1390702972,0.9817365211,0.1298185488],
+ [0.7277320613,0.1902245569,-0.6589542206]
+ ],
+ "t": [
+ [13.95231346],
+ [154.9907046],
+ [298.6967118]
+ ]
+ },
+ {
+ "name": "09_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 1,
+ "K": [
+ [745.377,0,383.314],
+ [0,745.581,229.65],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311824,0.0113225,-0.000890232,0.000288511,0.13186],
+ "R": [
+ [-0.9888207636,0.1490770148,-0.003088867539],
+ [0.1339941062,0.8974831076,0.420201917],
+ [0.06541465384,0.4150904904,-0.9074253732]
+ ],
+ "t": [
+ [-5.5065201],
+ [83.70733211],
+ [330.6651976]
+ ]
+ },
+ {
+ "name": "09_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 2,
+ "K": [
+ [745.133,0,380.598],
+ [0,746.347,248.499],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340543,0.0603048,-0.00219925,-0.00194065,0.128165],
+ "R": [
+ [-0.9728033822,0.2090533065,0.09975116351],
+ [0.2316107347,0.8720009628,0.4312433055],
+ [0.003169728315,0.4426183864,-0.8967044758]
+ ],
+ "t": [
+ [-23.76195567],
+ [58.26386366],
+ [329.69794]
+ ]
+ },
+ {
+ "name": "09_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 3,
+ "K": [
+ [745.787,0,382.41],
+ [0,745.973,216.203],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309439,0.00115788,-0.000439278,0.00154239,0.140783],
+ "R": [
+ [-0.995096801,0.09728424012,-0.01783629191],
+ [0.08253738581,0.9161639792,0.3922131349],
+ [0.05449712496,0.3888178749,-0.9197014317]
+ ],
+ "t": [
+ [6.72584843],
+ [65.39953055],
+ [327.4514754]
+ ]
+ },
+ {
+ "name": "09_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 4,
+ "K": [
+ [744.782,0,384.335],
+ [0,745.051,230.833],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319171,0.0452003,0.000841339,0.00114337,0.0902557],
+ "R": [
+ [-0.9962766095,0.08536470964,0.01207409478],
+ [0.0830687393,0.9129812009,0.3994557689],
+ [0.02307600417,0.3989714189,-0.9166729542]
+ ],
+ "t": [
+ [12.91980994],
+ [75.72355875],
+ [328.4117918]
+ ]
+ },
+ {
+ "name": "09_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 5,
+ "K": [
+ [745.938,0,386.124],
+ [0,746.151,234.663],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322825,0.0563734,0.000659785,0.00216478,0.0846192],
+ "R": [
+ [-0.9996885429,0.02460566921,0.004168718214],
+ [0.02372582958,0.8852416043,0.464525981],
+ [0.007739649829,0.4644802074,-0.8855496794]
+ ],
+ "t": [
+ [23.79490616],
+ [45.57973364],
+ [333.4360246]
+ ]
+ },
+ {
+ "name": "09_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 6,
+ "K": [
+ [745.533,0,376.456],
+ [0,745.938,237.583],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324418,0.0645728,-2.52302e-05,0.000695669,0.0784542],
+ "R": [
+ [-0.9996292032,0.0242501169,-0.01238498622],
+ [0.01720849374,0.9151046106,0.4028491273],
+ [0.02110269642,0.4024866252,-0.9151826008]
+ ],
+ "t": [
+ [44.50201086],
+ [83.15135806],
+ [329.4460526]
+ ]
+ },
+ {
+ "name": "09_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 7,
+ "K": [
+ [745.538,0,357.165],
+ [0,745.859,222.198],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30448,-0.0356601,-0.000261684,-0.000249049,0.226264],
+ "R": [
+ [-0.9994703128,-0.005373675551,-0.03209699996],
+ [-0.01769948118,0.9174086112,0.3975527241],
+ [0.02730974481,0.3979102457,-0.9170177829]
+ ],
+ "t": [
+ [39.28939518],
+ [107.3778293],
+ [329.1138759]
+ ]
+ },
+ {
+ "name": "09_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 8,
+ "K": [
+ [746.393,0,361.584],
+ [0,746.73,220.937],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31726,0.0513551,0.000643529,-0.000795525,0.0635312],
+ "R": [
+ [-0.9973050313,-0.005865573042,-0.0731318648],
+ [-0.03181904441,0.9327538711,0.3591068981],
+ [0.06610766226,0.3604661023,-0.9304267656]
+ ],
+ "t": [
+ [64.05594666],
+ [137.6750859],
+ [322.0323762]
+ ]
+ },
+ {
+ "name": "09_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 9,
+ "K": [
+ [750.271,0,344.156],
+ [0,750.817,228.346],
+ [0,0,1]
+ ],
+ "distCoef": [-0.379154,0.391779,0.000225814,-0.000528714,-0.53339],
+ "R": [
+ [-0.9991212371,-0.002089946585,-0.04186150665],
+ [-0.01685937738,0.9344344151,0.355735977],
+ [0.03837336329,0.3561291283,-0.933648504]
+ ],
+ "t": [
+ [51.49527243],
+ [159.1149955],
+ [322.66132]
+ ]
+ },
+ {
+ "name": "09_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 10,
+ "K": [
+ [744.897,0,366.998],
+ [0,745.389,227.752],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317307,0.0499201,-0.000255849,-0.000414203,0.0689696],
+ "R": [
+ [-0.9956077306,0.03830608065,-0.08542769468],
+ [0.005132094192,0.9334237661,0.3587390896],
+ [0.093482129,0.3567249879,-0.9295205079]
+ ],
+ "t": [
+ [51.9897871],
+ [163.3127669],
+ [320.2676037]
+ ]
+ },
+ {
+ "name": "09_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 11,
+ "K": [
+ [745.812,0,365.568],
+ [0,746.463,243.927],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334591,0.135033,-0.000586766,0.000648781,-0.0516408],
+ "R": [
+ [-0.998272905,0.02856351314,-0.05133549401],
+ [0.007150624435,0.926422355,0.3764179707],
+ [0.05831016891,0.3754007803,-0.9250265825]
+ ],
+ "t": [
+ [35.7749059],
+ [177.7642897],
+ [325.0135255]
+ ]
+ },
+ {
+ "name": "09_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 12,
+ "K": [
+ [743.195,0,380.908],
+ [0,743.577,227.789],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308886,-0.0148964,-0.00146189,1.64512e-05,0.167268],
+ "R": [
+ [-0.9994731762,0.02727182579,0.01759595347],
+ [0.03184982914,0.9284235071,0.3701558858],
+ [-0.006241669996,0.370521307,-0.9288029945]
+ ],
+ "t": [
+ [-0.9618436208],
+ [187.4005014],
+ [324.424529]
+ ]
+ },
+ {
+ "name": "09_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 13,
+ "K": [
+ [745.52,0,396.637],
+ [0,745.641,231.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327971,0.0908214,-0.00010844,0.00165709,0.0286999],
+ "R": [
+ [-0.9916965419,0.1263943494,0.02371575794],
+ [0.1244737261,0.8970729317,0.4239887342],
+ [0.03231501572,0.4234201503,-0.9053568998]
+ ],
+ "t": [
+ [12.62306638],
+ [150.537484],
+ [333.7640249]
+ ]
+ },
+ {
+ "name": "09_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 14,
+ "K": [
+ [744.91,0,372.463],
+ [0,744.965,226.423],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308854,-0.0214085,8.99951e-05,0.000256405,0.180188],
+ "R": [
+ [-0.9924146786,0.1180105859,0.03444716585],
+ [0.1215225705,0.8993517426,0.4199984619],
+ [0.01858414592,0.4209987468,-0.9068708203]
+ ],
+ "t": [
+ [-10.68067405],
+ [162.2988485],
+ [333.0026074]
+ ]
+ },
+ {
+ "name": "09_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 15,
+ "K": [
+ [747.246,0,368.718],
+ [0,747.604,232.745],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3413,0.139342,-0.00187439,-0.000934376,-0.0485015],
+ "R": [
+ [-0.9858543141,0.1593536378,0.05193928607],
+ [0.1663907088,0.8933064559,0.4175137217],
+ [0.02013463084,0.4202499184,-0.9071849882]
+ ],
+ "t": [
+ [-16.61956214],
+ [147.1949584],
+ [331.9981158]
+ ]
+ },
+ {
+ "name": "09_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 16,
+ "K": [
+ [743.705,0,367.288],
+ [0,743.835,246.124],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316616,0.0215265,-3.02132e-05,0.000242548,0.131229],
+ "R": [
+ [-0.9974602961,0.07055123587,0.009771425173],
+ [0.06902048446,0.9235857212,0.3771280794],
+ [0.01758210332,0.3768447143,-0.9261095675]
+ ],
+ "t": [
+ [-30.73982653],
+ [139.9628037],
+ [324.9351286]
+ ]
+ },
+ {
+ "name": "09_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 17,
+ "K": [
+ [742.776,0,376.251],
+ [0,742.956,242.934],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317736,0.0249159,0.000195501,0.000659428,0.110976],
+ "R": [
+ [-0.9810894361,0.1806813104,0.06941024814],
+ [0.1934432758,0.9031273242,0.3833284952],
+ [0.006574003146,0.389506483,-0.9210002618]
+ ],
+ "t": [
+ [-32.91453507],
+ [125.2651482],
+ [325.9500645]
+ ]
+ },
+ {
+ "name": "09_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 18,
+ "K": [
+ [744.563,0,383.579],
+ [0,744.554,245.613],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324188,0.0688729,0.000784842,0.000316148,0.0548859],
+ "R": [
+ [-0.970594512,0.2257141743,0.08366244524],
+ [0.2406675117,0.9026066179,0.3569039677],
+ [0.005044007626,0.3665438649,-0.9303870985]
+ ],
+ "t": [
+ [-30.64851648],
+ [114.5848432],
+ [323.1694161]
+ ]
+ },
+ {
+ "name": "09_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 19,
+ "K": [
+ [745.897,0,369.27],
+ [0,746.007,226.27],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314378,0.0131268,-0.000749673,-0.000436078,0.140449],
+ "R": [
+ [-0.9929061616,0.1118291068,0.04039313118],
+ [0.1187797946,0.9175946163,0.3793566667],
+ [0.005358597494,0.3814634596,-0.9243683867]
+ ],
+ "t": [
+ [-9.348770156],
+ [111.4514571],
+ [325.9373984]
+ ]
+ },
+ {
+ "name": "09_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 20,
+ "K": [
+ [743.647,0,378.532],
+ [0,743.859,221.629],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312883,-0.00145442,-0.000725648,-1.91192e-05,0.160115],
+ "R": [
+ [-0.9995005243,0.01416777706,-0.02824846864],
+ [0.002450265794,0.9259270935,0.3776943389],
+ [0.03150711165,0.3774364735,-0.9254993303]
+ ],
+ "t": [
+ [6.861259295],
+ [105.360829],
+ [326.1962043]
+ ]
+ },
+ {
+ "name": "09_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 21,
+ "K": [
+ [745.35,0,364.423],
+ [0,745.51,242.824],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317615,0.0309367,1.60295e-05,-0.00084218,0.138729],
+ "R": [
+ [-0.9983267687,0.03243769532,-0.0478691851],
+ [0.01510269673,0.9453721551,0.3256430514],
+ [0.05581730476,0.3243752215,-0.9442802255]
+ ],
+ "t": [
+ [30.85545331],
+ [138.1219419],
+ [318.1793043]
+ ]
+ },
+ {
+ "name": "09_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 22,
+ "K": [
+ [744.248,0,356.027],
+ [0,744.436,238.226],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308137,-0.0481761,0.000357682,-8.3696e-05,0.245728],
+ "R": [
+ [-0.9955839097,0.09158830299,-0.0205976113],
+ [0.07579544873,0.9137019347,0.3992540852],
+ [0.05538708142,0.3959297379,-0.9166089209]
+ ],
+ "t": [
+ [35.25988756],
+ [131.4528362],
+ [328.3382973]
+ ]
+ },
+ {
+ "name": "09_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 23,
+ "K": [
+ [744.535,0,363.359],
+ [0,744.632,254.668],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311847,-0.00198079,0.000462082,-0.000460419,0.174118],
+ "R": [
+ [-0.9946906764,0.1028474748,0.003585412436],
+ [0.09771594436,0.9329851386,0.346396197],
+ [0.03228083764,0.3449074195,-0.9380814567]
+ ],
+ "t": [
+ [12.3985171],
+ [157.8437238],
+ [320.5381764]
+ ]
+ },
+ {
+ "name": "09_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 24,
+ "K": [
+ [743.311,0,385.98],
+ [0,743.511,229.743],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319602,0.0480118,-0.000790169,0.000699953,0.0704098],
+ "R": [
+ [-0.9986396845,0.04700092247,-0.02257640097],
+ [0.03617494752,0.9363507866,0.3491970469],
+ [0.03755201414,0.3479053287,-0.93677731]
+ ],
+ "t": [
+ [-8.936415104],
+ [142.1371611],
+ [321.4431282]
+ ]
+ },
+ {
+ "name": "10_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 1,
+ "K": [
+ [744.128,0,369.511],
+ [0,744.056,233.67],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31156,0.00550691,-0.000430053,0.000410016,0.149166],
+ "R": [
+ [-0.6229970612,0.0209936641,0.781942407],
+ [0.05250109858,0.9985078863,0.01502117145],
+ [-0.7804603106,0.05041098106,-0.6231696692]
+ ],
+ "t": [
+ [-46.84686717],
+ [150.7389104],
+ [280.0083694]
+ ]
+ },
+ {
+ "name": "10_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 2,
+ "K": [
+ [743.282,0,357.827],
+ [0,743.347,211.632],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30948,-0.00718458,0.000285593,0.000547399,0.164062],
+ "R": [
+ [-0.6512046155,0.0977241901,0.7525839032],
+ [0.103617117,0.9938368806,-0.03939223155],
+ [-0.7517952126,0.05232817138,-0.6573170626]
+ ],
+ "t": [
+ [-42.32005533],
+ [143.0774393],
+ [282.200902]
+ ]
+ },
+ {
+ "name": "10_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 3,
+ "K": [
+ [744.012,0,361.17],
+ [0,744.101,225.217],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303567,-0.0563565,0.000757602,-0.000519388,0.263551],
+ "R": [
+ [-0.6320598226,0.04182219841,0.773790207],
+ [0.06737176964,0.9977273282,0.001106034268],
+ [-0.771985379,0.05283069539,-0.6334409935]
+ ],
+ "t": [
+ [-54.02554254],
+ [119.7786683],
+ [280.9354705]
+ ]
+ },
+ {
+ "name": "10_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 4,
+ "K": [
+ [744.209,0,380.966],
+ [0,744.256,205.476],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315194,0.0249601,-0.000765583,0.001001,0.10286],
+ "R": [
+ [-0.6566261636,0.06356030055,0.7515332125],
+ [0.0713368826,0.9972094103,-0.02201002698],
+ [-0.7508349555,0.03915967697,-0.6593279831]
+ ],
+ "t": [
+ [-22.38173011],
+ [115.5645607],
+ [280.9145253]
+ ]
+ },
+ {
+ "name": "10_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 5,
+ "K": [
+ [744.499,0,353.834],
+ [0,744.652,215.524],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317042,0.0236932,-0.00147688,-0.000206715,0.11602],
+ "R": [
+ [-0.6480155592,0.1057846486,0.754244949],
+ [0.1559047408,0.9877614348,-0.004589090624],
+ [-0.7454995284,0.1146165612,-0.6565771067]
+ ],
+ "t": [
+ [-17.37690425],
+ [72.84298088],
+ [287.4167752]
+ ]
+ },
+ {
+ "name": "10_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 6,
+ "K": [
+ [746.493,0,367.328],
+ [0,746.754,207.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323089,0.0587326,-0.000981175,-0.000221417,0.0550321],
+ "R": [
+ [-0.6607542091,0.07289791872,0.74705406],
+ [0.1340507848,0.9907326878,0.02188900409],
+ [-0.738535214,0.1146064347,-0.6644028167]
+ ],
+ "t": [
+ [3.021864726],
+ [64.04371811],
+ [286.9062935]
+ ]
+ },
+ {
+ "name": "10_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 7,
+ "K": [
+ [744.949,0,365.308],
+ [0,744.944,217.014],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320697,0.0459897,0.000335318,2.89241e-06,0.0947246],
+ "R": [
+ [-0.643287111,0.03528116955,0.764811697],
+ [0.0902182212,0.9954712387,0.02996140018],
+ [-0.7602909742,0.08827373343,-0.6435568215]
+ ],
+ "t": [
+ [9.776307982],
+ [84.51813798],
+ [285.3816638]
+ ]
+ },
+ {
+ "name": "10_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 8,
+ "K": [
+ [748.112,0,395.78],
+ [0,748.17,229.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325424,0.0774932,-0.000546,0.000524276,0.0351183],
+ "R": [
+ [-0.6241633069,0.05185263499,0.7795713377],
+ [0.04102617023,0.9985938587,-0.03357318505],
+ [-0.7802160084,0.0110276762,-0.6254129601]
+ ],
+ "t": [
+ [-46.24758235],
+ [183.5392889],
+ [272.6641799]
+ ]
+ },
+ {
+ "name": "10_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 9,
+ "K": [
+ [746.122,0,370.333],
+ [0,746.261,210.753],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323285,0.0813962,-0.00031195,0.00117949,0.0118242],
+ "R": [
+ [-0.6717702835,0.002860846795,0.7407540089],
+ [0.1085475528,0.9895782107,0.09461708989],
+ [-0.7327633417,0.1439679842,-0.6650797731]
+ ],
+ "t": [
+ [53.6134591],
+ [78.01841366],
+ [288.9552018]
+ ]
+ },
+ {
+ "name": "10_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 10,
+ "K": [
+ [746.498,0,355.775],
+ [0,746.616,218.183],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320479,0.0482256,-0.000295345,0.000515541,0.088746],
+ "R": [
+ [-0.6274497943,0.01735785812,0.7784635254],
+ [0.05740772193,0.9980618939,0.02401685623],
+ [-0.7765378993,0.0597591891,-0.6272302051]
+ ],
+ "t": [
+ [35.32452291],
+ [122.8912729],
+ [283.9520693]
+ ]
+ },
+ {
+ "name": "10_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 11,
+ "K": [
+ [745.209,0,387.948],
+ [0,745.058,237.868],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312054,0.0106095,2.04654e-05,-0.000407432,0.122509],
+ "R": [
+ [-0.663538187,0.0558857692,0.74605218],
+ [0.09086672278,0.9958436408,0.006219474654],
+ [-0.742603739,0.07191817555,-0.6658584406]
+ ],
+ "t": [
+ [70.41193089],
+ [130.903078],
+ [283.3216663]
+ ]
+ },
+ {
+ "name": "10_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 12,
+ "K": [
+ [746.923,0,359.191],
+ [0,746.955,219.728],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34193,0.180291,-0.0011698,0.000387434,-0.142263],
+ "R": [
+ [-0.6573529902,0.02662022179,0.7531124817],
+ [0.0203979596,0.9996382488,-0.01752982786],
+ [-0.7533066902,0.003838673213,-0.6576581901]
+ ],
+ "t": [
+ [61.18715226],
+ [173.543055],
+ [273.2477614]
+ ]
+ },
+ {
+ "name": "10_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 13,
+ "K": [
+ [747.063,0,362.554],
+ [0,747.091,228.588],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334743,0.115617,-0.000133435,0.000763825,-0.0142674],
+ "R": [
+ [-0.6314178936,0.07344004486,0.771957255],
+ [0.07624079511,0.9965613541,-0.03244701456],
+ [-0.7716856775,0.03836700932,-0.6348457984]
+ ],
+ "t": [
+ [39.63694261],
+ [165.7689372],
+ [279.8275089]
+ ]
+ },
+ {
+ "name": "10_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 14,
+ "K": [
+ [745.722,0,380.721],
+ [0,745.932,237.231],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319645,0.0532601,-0.00105825,0.00148804,0.0812854],
+ "R": [
+ [-0.6464741699,0.0407242176,0.7618482039],
+ [0.05782238306,0.998317631,-0.004298792509],
+ [-0.7607415591,0.04127282036,-0.6477413331]
+ ],
+ "t": [
+ [37.16059778],
+ [187.0284564],
+ [279.5510011]
+ ]
+ },
+ {
+ "name": "10_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 15,
+ "K": [
+ [745.212,0,345.945],
+ [0,745.407,234.052],
+ [0,0,1]
+ ],
+ "distCoef": [-0.345973,0.208044,0.00063894,-0.000591324,-0.26389],
+ "R": [
+ [-0.6892736753,0.06991501806,0.7211197479],
+ [0.04097555303,0.9975016565,-0.0575451947],
+ [-0.7233414164,-0.01011610737,-0.6904164394]
+ ],
+ "t": [
+ [38.38229011],
+ [201.7157692],
+ [268.6124541]
+ ]
+ },
+ {
+ "name": "10_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 16,
+ "K": [
+ [746.402,0,351.743],
+ [0,746.432,235.34],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332074,0.123634,0.000553061,0.000200886,-0.050504],
+ "R": [
+ [-0.6626903808,0.1069713565,0.7412142659],
+ [0.1159650419,0.9924654921,-0.03955194002],
+ [-0.7398605059,0.05974425322,-0.6701022728]
+ ],
+ "t": [
+ [18.24762504],
+ [172.5928493],
+ [282.9657885]
+ ]
+ },
+ {
+ "name": "10_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 17,
+ "K": [
+ [745.425,0,381.954],
+ [0,745.576,234.397],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316953,0.0361047,-0.000329948,0.00146685,0.0995591],
+ "R": [
+ [-0.6439914485,0.08005681888,0.7608323863],
+ [0.04150323442,0.9967010496,-0.06974596286],
+ [-0.7639060779,-0.01333879876,-0.6451895695]
+ ],
+ "t": [
+ [-14.39474973],
+ [198.5707312],
+ [268.934139]
+ ]
+ },
+ {
+ "name": "10_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 18,
+ "K": [
+ [742.866,0,374.357],
+ [0,743.163,216.484],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313801,-0.00472223,0.00105562,-0.000883374,0.146196],
+ "R": [
+ [-0.6735625977,0.03695414336,0.7382058102],
+ [0.08136680684,0.9963864104,0.02436316713],
+ [-0.7346379174,0.07647556771,-0.6741354596]
+ ],
+ "t": [
+ [41.81793908],
+ [81.57199105],
+ [283.0241236]
+ ]
+ },
+ {
+ "name": "10_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 19,
+ "K": [
+ [747.195,0,374.317],
+ [0,747.324,252.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325848,0.0754879,0.000850799,-0.000494425,0.0423325],
+ "R": [
+ [-0.6398121174,0.03550225829,0.7677109118],
+ [0.06489671873,0.9978603994,0.00793971962],
+ [-0.7657864391,0.05490184793,-0.6407471551]
+ ],
+ "t": [
+ [-18.67539454],
+ [143.739157],
+ [281.6554752]
+ ]
+ },
+ {
+ "name": "10_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 20,
+ "K": [
+ [744.074,0,359.595],
+ [0,744.232,222.54],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312038,-0.00652471,0.000517579,-0.000473896,0.154037],
+ "R": [
+ [-0.6341018605,0.07503908623,0.769599874],
+ [0.1134623387,0.9935365213,-0.003387984729],
+ [-0.7648798129,0.08517227417,-0.6385174669]
+ ],
+ "t": [
+ [-10.64771601],
+ [114.6784971],
+ [285.5473806]
+ ]
+ },
+ {
+ "name": "10_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 21,
+ "K": [
+ [745.669,0,353.595],
+ [0,745.986,221.41],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331248,0.0956435,-0.00124938,0.0010706,0.0394747],
+ "R": [
+ [-0.618235149,0.02815342604,0.7854888192],
+ [0.09838720035,0.994269895,0.04180113162],
+ [-0.7798110408,0.1031249747,-0.6174625335]
+ ],
+ "t": [
+ [-3.462045404],
+ [102.4105128],
+ [287.5712577]
+ ]
+ },
+ {
+ "name": "10_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 22,
+ "K": [
+ [745.836,0,367.536],
+ [0,745.883,217.602],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306908,-0.0326669,-0.000283909,0.000278093,0.200484],
+ "R": [
+ [-0.6189078213,0.03804187807,0.7845418563],
+ [0.07413417155,0.9971968305,0.01012945108],
+ [-0.7819573092,0.06443055706,-0.6199931209]
+ ],
+ "t": [
+ [14.73270812],
+ [126.5060302],
+ [283.9045417]
+ ]
+ },
+ {
+ "name": "10_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 23,
+ "K": [
+ [742.749,0,379.273],
+ [0,742.868,231.204],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310394,-0.00460726,-0.000822068,-0.000336616,0.147608],
+ "R": [
+ [-0.6037549899,0.1086195044,0.7897352186],
+ [0.1215591915,0.9916324658,-0.04345590495],
+ [-0.787847241,0.0697628552,-0.6119067485]
+ ],
+ "t": [
+ [19.26192194],
+ [145.0128457],
+ [284.7838402]
+ ]
+ },
+ {
+ "name": "10_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 24,
+ "K": [
+ [745.597,0,368.627],
+ [0,745.598,227.731],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309585,-0.00749389,-0.000770097,-0.000330202,0.147896],
+ "R": [
+ [-0.6450785239,0.075478584,0.760379301],
+ [0.07622559694,0.9965021766,-0.03425011393],
+ [-0.7603047786,0.03586635318,-0.6485755533]
+ ],
+ "t": [
+ [7.856697427],
+ [160.1393432],
+ [279.1413867]
+ ]
+ },
+ {
+ "name": "11_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 1,
+ "K": [
+ [742.855,0,374.596],
+ [0,743.116,213.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312561,0.00631745,-0.000399255,9.31566e-05,0.13435],
+ "R": [
+ [-0.9229364354,0.00164792287,0.3849488544],
+ [0.08421827064,0.9766305816,0.1977371741],
+ [-0.3756269679,0.2149185694,-0.9015067329]
+ ],
+ "t": [
+ [-1.777017447],
+ [176.3500352],
+ [303.9155303]
+ ]
+ },
+ {
+ "name": "11_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 2,
+ "K": [
+ [743.543,0,362.467],
+ [0,743.612,228.587],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311508,-0.0063044,0.000209199,0.000389142,0.157517],
+ "R": [
+ [-0.9382305089,-0.009495783218,0.3458805319],
+ [0.07354737957,0.9713073762,0.226169768],
+ [-0.338103971,0.2376379833,-0.9106118238]
+ ],
+ "t": [
+ [-11.88478771],
+ [180.6527832],
+ [308.9268929]
+ ]
+ },
+ {
+ "name": "11_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 3,
+ "K": [
+ [749.382,0,384.698],
+ [0,749.44,241.756],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334994,0.135003,0.000819921,0.00199466,-0.05032],
+ "R": [
+ [-0.9215516186,0.03410543981,0.3867550042],
+ [0.1287847641,0.966589567,0.2216282778],
+ [-0.3662746221,0.2540500501,-0.895154441]
+ ],
+ "t": [
+ [-28.84627719],
+ [162.2565593],
+ [311.7587167]
+ ]
+ },
+ {
+ "name": "11_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 4,
+ "K": [
+ [747.478,0,355.1],
+ [0,747.786,237.425],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332665,0.125805,0.000559145,-0.000285828,-0.0488142],
+ "R": [
+ [-0.9186497576,-0.03493542623,0.3935252708],
+ [0.05923251482,0.9726444983,0.2246200995],
+ [-0.3906073886,0.2296566914,-0.8914503195]
+ ],
+ "t": [
+ [-43.73591523],
+ [146.455357],
+ [306.7233507]
+ ]
+ },
+ {
+ "name": "11_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 5,
+ "K": [
+ [744.546,0,358.346],
+ [0,744.606,240.06],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319412,0.0357687,0.00118284,-0.000939418,0.105494],
+ "R": [
+ [-0.9252091585,0.02778676908,0.3784387777],
+ [0.1130706466,0.9721977994,0.2050523536],
+ [-0.3622196044,0.2325066328,-0.9026281759]
+ ],
+ "t": [
+ [-43.43063623],
+ [134.4377466],
+ [308.7383564]
+ ]
+ },
+ {
+ "name": "11_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 6,
+ "K": [
+ [744.682,0,386.644],
+ [0,744.47,247.576],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310524,-0.0156223,-0.000288596,-3.26402e-05,0.156674],
+ "R": [
+ [-0.9144551399,0.0484228537,0.4017798207],
+ [0.1449564791,0.9661327489,0.2134833264],
+ [-0.3778351707,0.2534615133,-0.8905042645]
+ ],
+ "t": [
+ [-44.21957265],
+ [107.5274508],
+ [309.8949628]
+ ]
+ },
+ {
+ "name": "11_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 7,
+ "K": [
+ [746.436,0,349.001],
+ [0,746.553,211.863],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330393,0.0902383,-0.000783974,-0.000712996,0.00481592],
+ "R": [
+ [-0.9105637485,0.003264968682,0.4133557789],
+ [0.1001837456,0.9718993559,0.2130137535],
+ [-0.401044732,0.2353741321,-0.8853034174]
+ ],
+ "t": [
+ [-36.21090107],
+ [102.2867759],
+ [306.6852556]
+ ]
+ },
+ {
+ "name": "11_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 8,
+ "K": [
+ [745.743,0,370.625],
+ [0,745.85,233.671],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3257,0.0614375,0.00126654,-0.000627381,0.0722474],
+ "R": [
+ [-0.8981193216,-0.01090147501,0.4396166989],
+ [0.09488580103,0.9713398361,0.2179348702],
+ [-0.4293930238,0.2374449004,-0.8713446794]
+ ],
+ "t": [
+ [-42.17364239],
+ [80.07059019],
+ [305.3107943]
+ ]
+ },
+ {
+ "name": "11_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 9,
+ "K": [
+ [743.294,0,376.993],
+ [0,743.306,225.516],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315184,-0.00458353,0.00085295,-0.000315923,0.19344],
+ "R": [
+ [-0.9287334953,0.02657190893,0.369794576],
+ [0.1072763174,0.9740215576,0.1994336907],
+ [-0.354888555,0.2248909489,-0.9074569822]
+ ],
+ "t": [
+ [4.627896612],
+ [76.0139061],
+ [305.925361]
+ ]
+ },
+ {
+ "name": "11_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 10,
+ "K": [
+ [746.981,0,373.015],
+ [0,746.916,231.087],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31553,-0.0133214,-7.49701e-05,-0.000474937,0.183355],
+ "R": [
+ [-0.897589008,-0.01428097087,0.4406018914],
+ [0.092180686,0.9712994893,0.219271574],
+ [-0.431087803,0.2374307391,-0.8705113154]
+ ],
+ "t": [
+ [-5.834972436],
+ [85.69962032],
+ [306.7617687]
+ ]
+ },
+ {
+ "name": "11_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 11,
+ "K": [
+ [743.956,0,385.014],
+ [0,743.968,233.944],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321873,0.0619652,-0.000204505,0.000631491,0.0680901],
+ "R": [
+ [-0.9171447001,-0.01735780695,0.3981762243],
+ [0.08629809142,0.9667012777,0.2409175774],
+ [-0.3890992656,0.2553181275,-0.8851070078]
+ ],
+ "t": [
+ [26.82061991],
+ [73.01187567],
+ [307.7528197]
+ ]
+ },
+ {
+ "name": "11_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 12,
+ "K": [
+ [749.192,0,349.167],
+ [0,749.113,221.266],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334032,0.094759,-0.000689735,0.000727903,0.0409048],
+ "R": [
+ [-0.937850977,-0.03419002209,0.345349949],
+ [0.06230645433,0.9623765935,0.2644791068],
+ [-0.341399254,0.2695595196,-0.9004355695]
+ ],
+ "t": [
+ [57.17130279],
+ [82.80130245],
+ [306.825197]
+ ]
+ },
+ {
+ "name": "11_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 13,
+ "K": [
+ [744.715,0,367.122],
+ [0,744.786,220.538],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315954,0.0180051,3.91318e-05,0.000697083,0.145396],
+ "R": [
+ [-0.9312656673,-0.01667316508,0.3639591494],
+ [0.07039560041,0.9718946087,0.2246448954],
+ [-0.3574754765,0.2348252013,-0.9039183639]
+ ],
+ "t": [
+ [46.96203938],
+ [112.2947483],
+ [304.8878272]
+ ]
+ },
+ {
+ "name": "11_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 14,
+ "K": [
+ [746.505,0,367.697],
+ [0,746.62,222.237],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323622,0.0629014,0.000917096,0.00064017,0.0716359],
+ "R": [
+ [-0.9260527677,-0.07925799212,0.3689775632],
+ [0.02937617957,0.9595934278,0.279852628],
+ [-0.3762490021,0.2699974518,-0.8863058527]
+ ],
+ "t": [
+ [50.81898209],
+ [116.0290364],
+ [310.1255555]
+ ]
+ },
+ {
+ "name": "11_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 15,
+ "K": [
+ [746.042,0,355.995],
+ [0,745.821,261.077],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321065,0.0443736,0.000927074,0.000280863,0.106789],
+ "R": [
+ [-0.9208600933,-0.04678508348,0.387076019],
+ [0.03581020852,0.9784294414,0.2034538209],
+ [-0.3882451771,0.2012137775,-0.8993212431]
+ ],
+ "t": [
+ [43.08113165],
+ [154.6066575],
+ [301.5640854]
+ ]
+ },
+ {
+ "name": "11_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 16,
+ "K": [
+ [741.668,0,363.735],
+ [0,741.796,217.06],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309875,-0.0179015,-1.19394e-05,-0.000437783,0.188022],
+ "R": [
+ [-0.8991061052,-0.0185684781,0.437336739],
+ [0.0842559957,0.9730755765,0.214534029],
+ [-0.4295452698,0.2297370977,-0.873333686]
+ ],
+ "t": [
+ [16.70791642],
+ [154.14567],
+ [307.2679797]
+ ]
+ },
+ {
+ "name": "11_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 17,
+ "K": [
+ [747.822,0,361.761],
+ [0,747.76,222.34],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334628,0.097635,0.00152491,-0.000486737,0.0213673],
+ "R": [
+ [-0.9162397179,0.01033450945,0.4004971626],
+ [0.1187416248,0.9617552428,0.2468345183],
+ [-0.3826293322,0.2737152732,-0.8824254888]
+ ],
+ "t": [
+ [27.8785048],
+ [159.3368695],
+ [313.9971646]
+ ]
+ },
+ {
+ "name": "11_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 18,
+ "K": [
+ [745.448,0,360.818],
+ [0,745.84,214.85],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329534,0.0903331,0.00014069,0.000717079,0.0211508],
+ "R": [
+ [-0.9101418911,0.04432675398,0.411918532],
+ [0.1391589893,0.9692024732,0.2031781034],
+ [-0.3902262342,0.2422430698,-0.888280238]
+ ],
+ "t": [
+ [16.35209076],
+ [181.679224],
+ [308.9632727]
+ ]
+ },
+ {
+ "name": "11_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 19,
+ "K": [
+ [746.167,0,363.996],
+ [0,746.229,234.387],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310901,-0.0147285,-0.000729007,-0.000655789,0.178193],
+ "R": [
+ [-0.9157731435,-0.03755396433,0.3999365568],
+ [0.06406747528,0.9692207168,0.2377110865],
+ [-0.3965537899,0.2433123544,-0.8851803149]
+ ],
+ "t": [
+ [-10.79527777],
+ [146.8696803],
+ [308.5271108]
+ ]
+ },
+ {
+ "name": "11_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 20,
+ "K": [
+ [744.588,0,384.664],
+ [0,744.662,240.853],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307863,-0.0295446,-0.000517465,0.000242427,0.189333],
+ "R": [
+ [-0.9170523574,0.0431160901,0.396429031],
+ [0.124694228,0.9752892469,0.1823793695],
+ [-0.3787694858,0.2166838427,-0.8997676305]
+ ],
+ "t": [
+ [-9.200936127],
+ [142.5227957],
+ [304.9039442]
+ ]
+ },
+ {
+ "name": "11_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 21,
+ "K": [
+ [745.832,0,378.426],
+ [0,745.825,230.649],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317765,0.041948,0.000140897,0.000331931,0.0876249],
+ "R": [
+ [-0.903416406,0.009580467792,0.4286572198],
+ [0.1299134284,0.9588705554,0.2523683006],
+ [-0.4086089801,0.2836819921,-0.8675040223]
+ ],
+ "t": [
+ [-22.38884391],
+ [100.2357286],
+ [311.942278]
+ ]
+ },
+ {
+ "name": "11_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 22,
+ "K": [
+ [745.759,0,381.189],
+ [0,746.033,229.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307738,-0.0303832,0.000694314,-0.000395606,0.211723],
+ "R": [
+ [-0.9121889441,-0.007451044875,0.4097021017],
+ [0.1102495844,0.9585035751,0.2628990789],
+ [-0.394659802,0.2849831196,-0.8735148895]
+ ],
+ "t": [
+ [-0.4671669308],
+ [91.25062129],
+ [311.8622342]
+ ]
+ },
+ {
+ "name": "11_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 23,
+ "K": [
+ [748.678,0,358.839],
+ [0,748.651,239.635],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328983,0.0919887,-1.22475e-05,-0.000911096,0.0194744],
+ "R": [
+ [-0.9251940915,-0.06790089301,0.3733702744],
+ [0.01633387562,0.9758259889,0.2179377065],
+ [-0.3791425821,0.207733262,-0.9017193545]
+ ],
+ "t": [
+ [15.23843998],
+ [129.776393],
+ [302.9631654]
+ ]
+ },
+ {
+ "name": "11_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 24,
+ "K": [
+ [747.741,0,374.843],
+ [0,747.8,238.972],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320184,0.0453956,8.07771e-05,-0.000586724,0.0799959],
+ "R": [
+ [-0.901120423,0.005145678853,0.4335383549],
+ [0.1030532182,0.9738156258,0.2026404726],
+ [-0.4211437016,0.2272809911,-0.8780554275]
+ ],
+ "t": [
+ [6.522845915],
+ [142.0951003],
+ [306.255293]
+ ]
+ },
+ {
+ "name": "12_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 1,
+ "K": [
+ [745.397,0,350.188],
+ [0,745.422,244.528],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318784,0.0421446,0.000567418,-0.000208,0.092208],
+ "R": [
+ [-0.2717431751,0.1656287556,0.9480098956],
+ [0.4128654434,0.9098857043,-0.04062180222],
+ [-0.86930879,0.3803618284,-0.3156376199]
+ ],
+ "t": [
+ [-13.70303847],
+ [97.1923903],
+ [326.2673629]
+ ]
+ },
+ {
+ "name": "12_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 2,
+ "K": [
+ [747.727,0,370.501],
+ [0,747.788,234.298],
+ [0,0,1]
+ ],
+ "distCoef": [-0.349811,0.202844,-0.00194754,-0.000389321,-0.178679],
+ "R": [
+ [-0.3883456032,0.1438043201,0.9102241537],
+ [0.3131714459,0.9495549238,-0.01640403197],
+ [-0.8666667975,0.2786857806,-0.4137908865]
+ ],
+ "t": [
+ [13.37192963],
+ [105.5473845],
+ [318.08591]
+ ]
+ },
+ {
+ "name": "12_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 3,
+ "K": [
+ [746.831,0,387.09],
+ [0,746.752,242.092],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338844,0.109538,-0.000689346,-0.00140957,-0.0011227],
+ "R": [
+ [-0.2489409576,0.07810816372,0.9653639285],
+ [0.3865744043,0.9219167609,0.0250941395],
+ [-0.8880251289,0.3794319447,-0.2596974581]
+ ],
+ "t": [
+ [-20.03334166],
+ [70.50216381],
+ [325.3775618]
+ ]
+ },
+ {
+ "name": "12_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 4,
+ "K": [
+ [746.601,0,360.45],
+ [0,746.776,222.063],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336822,0.124774,0.000206697,-0.000417774,-0.0398672],
+ "R": [
+ [-0.3081671276,0.03567998316,0.9506629057],
+ [0.4212102042,0.9011275261,0.1027187694],
+ [-0.8530035084,0.4320834647,-0.2927266543]
+ ],
+ "t": [
+ [4.764737811],
+ [63.41476985],
+ [331.1517594]
+ ]
+ },
+ {
+ "name": "12_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 5,
+ "K": [
+ [748.2,0,362.212],
+ [0,748.363,218.877],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337789,0.133894,-0.000945522,-0.000498923,-0.0570031],
+ "R": [
+ [-0.2841336654,-0.004801876737,0.9587726541],
+ [0.3831436474,0.9161034097,0.118133349],
+ [-0.8789021593,0.4009133132,-0.2584560111]
+ ],
+ "t": [
+ [10.92507323],
+ [68.32263664],
+ [329.7866549]
+ ]
+ },
+ {
+ "name": "12_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 6,
+ "K": [
+ [747.371,0,350.388],
+ [0,747.497,231.124],
+ [0,0,1]
+ ],
+ "distCoef": [-0.351189,0.233364,-0.000450075,-0.00118874,-0.265042],
+ "R": [
+ [-0.3878504716,-0.01635524947,0.9215771902],
+ [0.3346075558,0.9291346168,0.1573106717],
+ [-0.8588421248,0.3693797093,-0.3548927092]
+ ],
+ "t": [
+ [53.76493542],
+ [97.09757883],
+ [324.1315487]
+ ]
+ },
+ {
+ "name": "12_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 7,
+ "K": [
+ [747.196,0,383.602],
+ [0,747.258,260.076],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340453,0.149462,7.57635e-05,-0.00150211,-0.0810731],
+ "R": [
+ [-0.3567494973,0.01375486298,0.934098817],
+ [0.3428523716,0.9320474424,0.1172169629],
+ [-0.8690121101,0.3620750873,-0.3372233439]
+ ],
+ "t": [
+ [46.87962376],
+ [118.8343508],
+ [324.070693]
+ ]
+ },
+ {
+ "name": "12_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 8,
+ "K": [
+ [748.388,0,360.952],
+ [0,748.584,220.934],
+ [0,0,1]
+ ],
+ "distCoef": [-0.353387,0.236369,0.000317101,-0.000350889,-0.25062],
+ "R": [
+ [-0.3882650784,-0.0538394581,0.9199736636],
+ [0.3529834406,0.9134681838,0.2024316376],
+ [-0.8512654812,0.4033326047,-0.3356633588]
+ ],
+ "t": [
+ [53.63586961],
+ [124.5990463],
+ [329.2926486]
+ ]
+ },
+ {
+ "name": "12_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 9,
+ "K": [
+ [745.023,0,373.202],
+ [0,745.321,253.183],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310235,-0.0270349,0.000213071,-0.0010354,0.204812],
+ "R": [
+ [-0.3615436505,-0.1034754049,0.9265953968],
+ [0.3189620476,0.9201303682,0.2272076531],
+ [-0.8760989676,0.3776942494,-0.2996625652]
+ ],
+ "t": [
+ [26.36947949],
+ [154.1173845],
+ [328.14772]
+ ]
+ },
+ {
+ "name": "12_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 10,
+ "K": [
+ [743.497,0,337.094],
+ [0,743.775,230.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323522,0.0697077,-0.000922284,-0.00112939,0.0376595],
+ "R": [
+ [-0.409013364,-0.03192166586,0.9119698873],
+ [0.3635432206,0.9109541012,0.1949331996],
+ [-0.8369853014,0.4112707536,-0.3609874961]
+ ],
+ "t": [
+ [36.39561956],
+ [146.2733377],
+ [330.6860766]
+ ]
+ },
+ {
+ "name": "12_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 11,
+ "K": [
+ [744.432,0,350.161],
+ [0,744.664,216.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3138,0.0423232,-0.000980128,0.000347352,0.0411803],
+ "R": [
+ [-0.3625324698,0.01191238118,0.9318950067],
+ [0.4332658145,0.8874493782,0.157207936],
+ [-0.8251369234,0.4607512304,-0.3268904424]
+ ],
+ "t": [
+ [30.02223667],
+ [146.021886],
+ [340.9352409]
+ ]
+ },
+ {
+ "name": "12_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 12,
+ "K": [
+ [745.59,0,349.499],
+ [0,745.978,243.824],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328804,0.102744,-0.00034172,-0.00160085,-0.0230968],
+ "R": [
+ [-0.3184962228,0.07265474811,0.9451356747],
+ [0.3862627531,0.9204738181,0.05940568743],
+ [-0.8656565379,0.3839911948,-0.3212312573]
+ ],
+ "t": [
+ [17.04074577],
+ [180.9741057],
+ [327.7548666]
+ ]
+ },
+ {
+ "name": "12_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 13,
+ "K": [
+ [744.766,0,364.423],
+ [0,744.926,205.341],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32165,0.0514735,-0.000885848,-0.00113933,0.0656482],
+ "R": [
+ [-0.2748509499,0.06379038152,0.9593684081],
+ [0.3894986417,0.919644886,0.05043898999],
+ [-0.8790607279,0.3875358962,-0.2776115375]
+ ],
+ "t": [
+ [-9.802475588],
+ [164.1613661],
+ [327.7325897]
+ ]
+ },
+ {
+ "name": "12_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 14,
+ "K": [
+ [744.556,0,345.329],
+ [0,744.551,253.003],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311027,-0.00213006,0.0011289,-0.000863959,0.162024],
+ "R": [
+ [-0.3202755169,0.1244082889,0.9391198917],
+ [0.4530679872,0.8907277919,0.0365157459],
+ [-0.831957326,0.4371802584,-0.3416437171]
+ ],
+ "t": [
+ [0.5161253202],
+ [152.8799295],
+ [338.113135]
+ ]
+ },
+ {
+ "name": "12_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 15,
+ "K": [
+ [747.233,0,347.644],
+ [0,747.329,227.375],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323105,0.049287,-0.00101918,5.08353e-05,0.100564],
+ "R": [
+ [-0.2639942301,0.1219548974,0.9567831779],
+ [0.4010015368,0.9160569375,-0.006120025947],
+ [-0.8772142349,0.3820558732,-0.2907378472]
+ ],
+ "t": [
+ [-27.43280694],
+ [159.7105652],
+ [325.8203908]
+ ]
+ },
+ {
+ "name": "12_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 16,
+ "K": [
+ [744.634,0,382.866],
+ [0,744.52,241.14],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320913,0.0518689,0.000556907,0.000900625,0.0851061],
+ "R": [
+ [-0.2918914105,0.1153635448,0.9494686183],
+ [0.4055533141,0.9139698053,0.01362734066],
+ [-0.8662135499,0.3890378484,-0.3135660035]
+ ],
+ "t": [
+ [-22.908528],
+ [135.1916248],
+ [327.5972929]
+ ]
+ },
+ {
+ "name": "12_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 17,
+ "K": [
+ [745.929,0,399.922],
+ [0,745.76,235.115],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324412,0.0924767,0.000808772,0.00160345,0.0125449],
+ "R": [
+ [-0.2332319969,0.1531844985,0.9602798264],
+ [0.4252056559,0.9041694633,-0.04096012482],
+ [-0.8745301515,0.3987632018,-0.2760161646]
+ ],
+ "t": [
+ [-42.90434909],
+ [120.9469461],
+ [326.5490528]
+ ]
+ },
+ {
+ "name": "12_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 18,
+ "K": [
+ [745.596,0,390.427],
+ [0,745.457,235.855],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331545,0.0834192,0.000515021,-0.000851112,0.0388274],
+ "R": [
+ [-0.2198853867,0.1587089693,0.9625288982],
+ [0.4990272732,0.8661072571,-0.02880971702],
+ [-0.8382256244,0.4739933356,-0.2696444333]
+ ],
+ "t": [
+ [-48.83152805],
+ [73.52609427],
+ [332.6787653]
+ ]
+ },
+ {
+ "name": "12_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 19,
+ "K": [
+ [744.284,0,396.863],
+ [0,744.47,248.804],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318049,0.0444362,0.000417829,0.000948817,0.0847095],
+ "R": [
+ [-0.2972813843,0.0975420226,0.9497943632],
+ [0.4134272643,0.9098266462,0.03596346693],
+ [-0.8606402708,0.4033621545,-0.3108010564]
+ ],
+ "t": [
+ [-6.347004052],
+ [101.4062297],
+ [328.9550302]
+ ]
+ },
+ {
+ "name": "12_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 20,
+ "K": [
+ [745.173,0,391.68],
+ [0,745.292,239.851],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316891,0.030971,0.000827356,0.00064571,0.114679],
+ "R": [
+ [-0.3480625566,0.05516818218,0.9358466372],
+ [0.3680676982,0.9261498325,0.08229615655],
+ [-0.8621940769,0.3730991283,-0.3426637043]
+ ],
+ "t": [
+ [18.00373906],
+ [105.1024652],
+ [325.6162418]
+ ]
+ },
+ {
+ "name": "12_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 21,
+ "K": [
+ [744.07,0,385.155],
+ [0,744.184,238.534],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325321,0.0749068,6.22505e-05,8.78769e-06,0.0274316],
+ "R": [
+ [-0.2944173655,-0.00519814937,0.9556628036],
+ [0.365777539,0.9232287513,0.117709238],
+ [-0.882907247,0.3842156322,-0.2699132104]
+ ],
+ "t": [
+ [4.17424328],
+ [116.8807078],
+ [328.2455421]
+ ]
+ },
+ {
+ "name": "12_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 22,
+ "K": [
+ [747.36,0,358.25],
+ [0,747.451,237.291],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329867,0.116416,-0.000580151,-0.000763801,-0.0625995],
+ "R": [
+ [-0.323867873,0.0530845029,0.9446118972],
+ [0.387407199,0.9183241349,0.08121850418],
+ [-0.8631484594,0.3922535134,-0.3179810029]
+ ],
+ "t": [
+ [22.53106717],
+ [133.6738778],
+ [328.8995429]
+ ]
+ },
+ {
+ "name": "12_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 23,
+ "K": [
+ [748.813,0,380.156],
+ [0,748.859,237.356],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333932,0.115832,0.000621747,-0.000254241,-0.0140772],
+ "R": [
+ [-0.3097958639,0.0326105921,0.9502436908],
+ [0.3550951383,0.9310652686,0.08381472691],
+ [-0.8820056493,0.3633923705,-0.3000200319]
+ ],
+ "t": [
+ [-6.485061334],
+ [151.418855],
+ [323.8858443]
+ ]
+ },
+ {
+ "name": "12_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 24,
+ "K": [
+ [745.33,0,360.408],
+ [0,745.472,237.433],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321653,0.057929,3.69615e-05,-0.000478596,0.0560779],
+ "R": [
+ [-0.3250711399,0.1046959739,0.9398763254],
+ [0.4072848242,0.9124585149,0.03922410658],
+ [-0.8534915501,0.395547989,-0.3392550109]
+ ],
+ "t": [
+ [2.217299854],
+ [123.8595425],
+ [329.2221602]
+ ]
+ },
+ {
+ "name": "13_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 1,
+ "K": [
+ [747.6,0,355.92],
+ [0,747.783,249.853],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333712,0.144699,-6.46303e-05,-0.0011294,-0.0924471],
+ "R": [
+ [0.5138271048,0.01100033104,0.857823233],
+ [0.08358608019,0.9945184566,-0.06282043172],
+ [-0.8538120833,0.1039809221,0.5100910647]
+ ],
+ "t": [
+ [-37.95328646],
+ [135.6435695],
+ [289.9999799]
+ ]
+ },
+ {
+ "name": "13_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 2,
+ "K": [
+ [743.227,0,372.15],
+ [0,743.265,265.407],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306942,-0.0266079,0.000311285,0.000595534,0.199806],
+ "R": [
+ [0.4485620057,-0.005900946102,0.8937322339],
+ [0.06601293956,0.9974655925,-0.02654587691],
+ [-0.8913105064,0.07090536373,0.4478147055]
+ ],
+ "t": [
+ [-38.28645032],
+ [133.2984516],
+ [288.856211]
+ ]
+ },
+ {
+ "name": "13_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 3,
+ "K": [
+ [746.538,0,387.516],
+ [0,746.833,233.181],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322577,0.0715483,-4.90461e-05,0.000787497,0.0326639],
+ "R": [
+ [0.5260210271,0.02315422103,0.8501563157],
+ [0.07372016672,0.9946254291,-0.07270208278],
+ [-0.8472704504,0.1009164896,0.5214869567]
+ ],
+ "t": [
+ [-53.0750023],
+ [105.7642054],
+ [287.8235486]
+ ]
+ },
+ {
+ "name": "13_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 4,
+ "K": [
+ [744.864,0,367.763],
+ [0,745.005,229.771],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318118,0.0367901,0.000364188,-0.000713933,0.0879467],
+ "R": [
+ [0.4575577495,0.1623260474,0.8742374736],
+ [-0.0244195278,0.9851184177,-0.1701334469],
+ [-0.8888445267,0.05649741078,0.4547124916]
+ ],
+ "t": [
+ [4.756699591],
+ [110.8595803],
+ [285.3944853]
+ ]
+ },
+ {
+ "name": "13_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 5,
+ "K": [
+ [744.026,0,374.462],
+ [0,744.21,219.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309274,-0.00813814,-0.000611939,0.000562163,0.16533],
+ "R": [
+ [0.5236500196,-0.01990538858,0.8517009055],
+ [0.0479853053,0.9988290545,-0.006158764858],
+ [-0.8505810176,0.04409416531,0.5239920201]
+ ],
+ "t": [
+ [-32.80347729],
+ [91.75629107],
+ [282.6719703]
+ ]
+ },
+ {
+ "name": "13_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 6,
+ "K": [
+ [746.172,0,347.715],
+ [0,746.412,223.735],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315889,0.0243673,0.00083413,-0.000596366,0.129203],
+ "R": [
+ [0.489601615,0.07237643337,0.8689372305],
+ [-0.010214584,0.9969567785,-0.07728417735],
+ [-0.8718864151,0.02896262571,0.488850944]
+ ],
+ "t": [
+ [7.55259059],
+ [89.5920217],
+ [281.8493454]
+ ]
+ },
+ {
+ "name": "13_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 7,
+ "K": [
+ [745.619,0,383.372],
+ [0,745.683,224.508],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315816,0.0424659,0.000456201,0.000714024,0.0879752],
+ "R": [
+ [0.5142457137,-0.005076098829,0.8576278792],
+ [0.07753605572,0.9961627141,-0.04059565316],
+ [-0.8541308483,0.08737322366,0.5126659866]
+ ],
+ "t": [
+ [9.165152848],
+ [86.80281732],
+ [287.1451009]
+ ]
+ },
+ {
+ "name": "13_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 8,
+ "K": [
+ [746.151,0,390.693],
+ [0,746.159,238.847],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312796,0.0112848,0.00109903,0.000945928,0.138088],
+ "R": [
+ [0.5333632905,-0.08775347438,0.841322131],
+ [0.13459771,0.9907366672,0.0180086874],
+ [-0.8351090089,0.1036348594,0.5402339855]
+ ],
+ "t": [
+ [14.59630248],
+ [78.12680456],
+ [289.302137]
+ ]
+ },
+ {
+ "name": "13_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 9,
+ "K": [
+ [744.811,0,365.557],
+ [0,745.05,239.01],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302561,-0.0588071,-0.000331846,-0.00065645,0.252299],
+ "R": [
+ [0.515993865,0.007464548532,0.8565597538],
+ [0.05311793688,0.9977587535,-0.04069342277],
+ [-0.8549437502,0.06649624343,0.5144408941]
+ ],
+ "t": [
+ [47.02842806],
+ [101.5821868],
+ [285.7219747]
+ ]
+ },
+ {
+ "name": "13_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 10,
+ "K": [
+ [744.185,0,393.537],
+ [0,744.44,231.354],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321367,0.0639595,-3.49657e-05,0.000800078,0.0579089],
+ "R": [
+ [0.5364096096,-0.02345912583,0.8436316733],
+ [0.07330244032,0.9971310212,-0.01888064639],
+ [-0.8407683884,0.07196802054,0.536590273]
+ ],
+ "t": [
+ [31.38919798],
+ [122.486781],
+ [287.1552388]
+ ]
+ },
+ {
+ "name": "13_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 11,
+ "K": [
+ [745.973,0,365.594],
+ [0,746.037,211.677],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32905,0.0977698,-0.000962762,0.000946642,0.0190885],
+ "R": [
+ [0.5178117038,0.00482526951,0.8554810087],
+ [0.01921134431,0.9996663333,-0.01726691564],
+ [-0.8552788806,0.02537595122,0.5175462273]
+ ],
+ "t": [
+ [57.16543019],
+ [149.3252564],
+ [279.6241941]
+ ]
+ },
+ {
+ "name": "13_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 12,
+ "K": [
+ [745.909,0,358.218],
+ [0,746.022,220.333],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338571,0.148871,-0.00100229,-0.000678393,-0.0710162],
+ "R": [
+ [0.5368407815,0.02503814463,0.8433119628],
+ [-0.01156171997,0.9996840035,-0.02232083821],
+ [-0.8436043516,0.002232599467,0.5369606257]
+ ],
+ "t": [
+ [51.57359577],
+ [176.1957711],
+ [275.7319623]
+ ]
+ },
+ {
+ "name": "13_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 13,
+ "K": [
+ [743.068,0,370.139],
+ [0,743.357,232.303],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302401,-0.0553181,-0.00107418,-0.000672395,0.220417],
+ "R": [
+ [0.5299693687,-0.06080201885,0.8458342525],
+ [0.13849556,0.9902402801,-0.01559383094],
+ [-0.8366310107,0.1254085412,0.5332178257]
+ ],
+ "t": [
+ [16.99243391],
+ [145.7883087],
+ [295.0494301]
+ ]
+ },
+ {
+ "name": "13_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 14,
+ "K": [
+ [743.724,0,347.611],
+ [0,743.902,235.434],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315484,0.0296225,-0.000529931,-0.000276443,0.110913],
+ "R": [
+ [0.5388576125,-0.001120175332,0.8423961174],
+ [0.06888686412,0.9967085439,-0.04273965901],
+ [-0.8395755317,0.08106061749,0.5371611517]
+ ],
+ "t": [
+ [22.68047362],
+ [178.4537167],
+ [288.5132471]
+ ]
+ },
+ {
+ "name": "13_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 15,
+ "K": [
+ [748.48,0,370.578],
+ [0,748.498,231.761],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333743,0.123731,0.000274987,0.00129665,-0.0264397],
+ "R": [
+ [0.5569883215,-0.02228411773,0.8302213126],
+ [0.06483002391,0.9977563557,-0.01671294857],
+ [-0.827986158,0.06313218472,0.5571833177]
+ ],
+ "t": [
+ [-8.30154925],
+ [184.6918205],
+ [284.5865319]
+ ]
+ },
+ {
+ "name": "13_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 16,
+ "K": [
+ [748.413,0,364.616],
+ [0,748.358,230.166],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337541,0.138107,0.000557985,-0.000490808,-0.0648839],
+ "R": [
+ [0.5035312414,0.04830043061,0.8626258501],
+ [0.03089895722,0.996790644,-0.07384894344],
+ [-0.8634243125,0.06383948941,0.5004227975]
+ ],
+ "t": [
+ [5.312179267],
+ [173.5565462],
+ [284.5085099]
+ ]
+ },
+ {
+ "name": "13_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 17,
+ "K": [
+ [745.143,0,372.782],
+ [0,745.112,223.2],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321603,0.0646008,-0.000584526,0.000805086,0.0603349],
+ "R": [
+ [0.5471603314,0.02993221277,0.8364924593],
+ [0.06649342528,0.9946477166,-0.07908567611],
+ [-0.8343825239,0.09889379359,0.5422414789]
+ ],
+ "t": [
+ [-32.63653561],
+ [167.4383368],
+ [289.2367997]
+ ]
+ },
+ {
+ "name": "13_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 18,
+ "K": [
+ [745.136,0,373.506],
+ [0,745.259,215.704],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333755,0.12331,-0.00049301,0.00138004,-0.0323155],
+ "R": [
+ [0.5039095131,0.07384116584,0.8605943788],
+ [0.02822760746,0.9943991795,-0.1018502524],
+ [-0.8632950856,0.07561583139,0.4990028469]
+ ],
+ "t": [
+ [-29.61131213],
+ [166.0398843],
+ [286.9453226]
+ ]
+ },
+ {
+ "name": "13_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 19,
+ "K": [
+ [743.638,0,344.046],
+ [0,743.783,238.416],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319291,0.0355055,-0.000169258,0.000161892,0.118247],
+ "R": [
+ [0.5180347054,0.01180967192,0.8552780692],
+ [0.1057363227,0.9913513706,-0.07773216881],
+ [-0.8487990775,0.1307019191,0.512305704]
+ ],
+ "t": [
+ [-19.08174331],
+ [122.2280138],
+ [293.3272927]
+ ]
+ },
+ {
+ "name": "13_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 20,
+ "K": [
+ [745.321,0,372.761],
+ [0,745.559,236.547],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320489,0.0479206,-9.03328e-05,-0.000256288,0.0784864],
+ "R": [
+ [0.4966252135,-0.01754426777,0.8677877598],
+ [0.06583916704,0.9976766247,-0.01750875645],
+ [-0.8654643848,0.06582971318,0.4966264667]
+ ],
+ "t": [
+ [-11.61163777],
+ [120.2765647],
+ [285.1928757]
+ ]
+ },
+ {
+ "name": "13_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 21,
+ "K": [
+ [745.539,0,371.886],
+ [0,745.656,230.519],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326644,0.0839413,-0.000557984,0.000204085,0.0126328],
+ "R": [
+ [0.5330371562,-0.03752357961,0.8452593514],
+ [0.08887796824,0.9959722199,-0.01183402057],
+ [-0.8414107777,0.08143290645,0.5342252193]
+ ],
+ "t": [
+ [-6.03247131],
+ [109.6165459],
+ [286.9430377]
+ ]
+ },
+ {
+ "name": "13_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 22,
+ "K": [
+ [744.018,0,396.717],
+ [0,744.224,249.141],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315372,0.0205822,-0.000440151,0.000134817,0.105074],
+ "R": [
+ [0.4984198723,-0.001673636668,0.8669341554],
+ [0.03130878513,0.9993805529,-0.01607079461],
+ [-0.8663702389,0.03515265859,0.4981635271]
+ ],
+ "t": [
+ [26.09238071],
+ [136.8142763],
+ [280.4949188]
+ ]
+ },
+ {
+ "name": "13_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 23,
+ "K": [
+ [744.884,0,382.514],
+ [0,744.877,235.74],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326378,0.0966908,-9.48994e-05,0.00105607,0.00534895],
+ "R": [
+ [0.4908089633,-0.01723518027,0.8710967283],
+ [0.04978157704,0.9987257364,-0.008288432131],
+ [-0.8698438688,0.04743260567,0.4910415377]
+ ],
+ "t": [
+ [21.95453226],
+ [154.6836493],
+ [281.6596012]
+ ]
+ },
+ {
+ "name": "13_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 24,
+ "K": [
+ [744.481,0,341.813],
+ [0,744.509,213.322],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310201,-0.0109775,-0.00130948,-0.000370453,0.189258],
+ "R": [
+ [0.5283332962,-0.01827851401,0.8488402818],
+ [0.07383881778,0.996969434,-0.02449033896],
+ [-0.8458201683,0.0756164244,0.5280818111]
+ ],
+ "t": [
+ [-10.59416721],
+ [149.8670778],
+ [286.3856475]
+ ]
+ },
+ {
+ "name": "14_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 1,
+ "K": [
+ [745.639,0,394.42],
+ [0,745.872,232.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317821,0.05701,0.000216723,0.00145431,0.0516441],
+ "R": [
+ [0.1117244957,0.006687085701,0.9937167202],
+ [0.1929264895,0.9808052728,-0.02829110459],
+ [-0.9748317838,0.1948750877,0.1082898585]
+ ],
+ "t": [
+ [-10.76838593],
+ [183.2092961],
+ [300.2249606]
+ ]
+ },
+ {
+ "name": "14_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 2,
+ "K": [
+ [744.265,0,384.24],
+ [0,744.607,234.555],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314122,0.0172489,-0.000351192,-3.05431e-05,0.116521],
+ "R": [
+ [0.09126102309,0.01926845044,0.9956405739],
+ [0.1889483007,0.9813154942,-0.03631033643],
+ [-0.9777371658,0.191438313,0.08591511501]
+ ],
+ "t": [
+ [-20.54744948],
+ [195.8515337],
+ [299.6149103]
+ ]
+ },
+ {
+ "name": "14_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 3,
+ "K": [
+ [742.909,0,383.13],
+ [0,743.051,234.161],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311566,0.0211516,-0.000212815,-9.64233e-05,0.110817],
+ "R": [
+ [0.07658267666,-0.01244461629,0.9969855692],
+ [0.2193131093,0.9756433613,-0.004668149478],
+ [-0.9726442586,0.2190095044,0.07744664757]
+ ],
+ "t": [
+ [-39.95619704],
+ [171.7405641],
+ [305.3439137]
+ ]
+ },
+ {
+ "name": "14_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 4,
+ "K": [
+ [745.057,0,349.277],
+ [0,745.321,214.2],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31581,0.0237721,-0.00140945,-0.000667487,0.124292],
+ "R": [
+ [0.09341145846,-0.02354383001,0.9953491787],
+ [0.2305453591,0.9730606003,0.001380415192],
+ [-0.9685675696,0.2293441873,0.09632293059]
+ ],
+ "t": [
+ [-43.73412593],
+ [146.7921304],
+ [306.2893961]
+ ]
+ },
+ {
+ "name": "14_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 5,
+ "K": [
+ [744.634,0,387.597],
+ [0,744.752,225.246],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315944,0.0434616,-0.000268259,0.00110436,0.0780237],
+ "R": [
+ [0.1133728096,0.0374780752,0.9928454059],
+ [0.2222309073,0.973014014,-0.06210597779],
+ [-0.9683801061,0.2276820645,0.1019845459]
+ ],
+ "t": [
+ [-53.79623552],
+ [137.113178],
+ [305.5099477]
+ ]
+ },
+ {
+ "name": "14_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 6,
+ "K": [
+ [744.759,0,388.645],
+ [0,744.666,221.73],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306159,-0.0283273,-0.000508774,0.00094455,0.192402],
+ "R": [
+ [0.1564984143,0.01913164242,0.9874928995],
+ [0.2309282446,0.9713913042,-0.05541732523],
+ [-0.96030224,0.2367127254,0.1476031622]
+ ],
+ "t": [
+ [-66.24261018],
+ [112.7515407],
+ [303.5978047]
+ ]
+ },
+ {
+ "name": "14_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 7,
+ "K": [
+ [744.959,0,375.286],
+ [0,745.092,235.744],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302136,-0.0624017,-0.000302824,-0.00146028,0.239945],
+ "R": [
+ [0.0628689268,0.03077162571,0.9975472947],
+ [0.2444661638,0.9685997585,-0.04528578729],
+ [-0.967617586,0.2467136292,0.05337220603]
+ ],
+ "t": [
+ [-19.11814477],
+ [98.74694092],
+ [308.9777955]
+ ]
+ },
+ {
+ "name": "14_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 8,
+ "K": [
+ [746.649,0,384.752],
+ [0,746.836,237.267],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321628,0.0600031,0.000104796,0.000953791,0.0524376],
+ "R": [
+ [0.1158239713,-0.07384920575,0.9905206219],
+ [0.2473198554,0.9679682291,0.043248082],
+ [-0.9619863288,0.2399662524,0.1303782992]
+ ],
+ "t": [
+ [-45.76229918],
+ [76.40869106],
+ [305.3733784]
+ ]
+ },
+ {
+ "name": "14_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 9,
+ "K": [
+ [745.672,0,372.774],
+ [0,745.737,209.129],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30917,-0.00857977,-4.68803e-05,-0.000521617,0.17194],
+ "R": [
+ [0.1233501146,0.01050711315,0.9923075883],
+ [0.2153087978,0.9758411417,-0.0370970036],
+ [-0.9687243523,0.2182284735,0.1181078428]
+ ],
+ "t": [
+ [-15.44854612],
+ [78.73632155],
+ [304.5944309]
+ ]
+ },
+ {
+ "name": "14_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 10,
+ "K": [
+ [744.36,0,350.493],
+ [0,744.605,227.167],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324539,0.0696676,-0.000964917,-0.000688724,0.0453805],
+ "R": [
+ [0.0653712546,0.005547467364,0.9978455916],
+ [0.2748842968,0.9611936881,-0.02335203178],
+ [-0.9592524289,0.2758186354,0.06130952564]
+ ],
+ "t": [
+ [17.36142141],
+ [73.86484437],
+ [309.5485763]
+ ]
+ },
+ {
+ "name": "14_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 11,
+ "K": [
+ [744.072,0,352.953],
+ [0,744.032,218.847],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310531,-0.00866492,-5.61729e-06,0.000627577,0.179884],
+ "R": [
+ [0.08325845442,0.01268657881,0.9964472292],
+ [0.1993298125,0.97949952,-0.02912586749],
+ [-0.9763890903,0.2010466141,0.07902280276]
+ ],
+ "t": [
+ [33.26019053],
+ [89.58305599],
+ [303.0664402]
+ ]
+ },
+ {
+ "name": "14_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 12,
+ "K": [
+ [743.677,0,359.077],
+ [0,743.623,233.815],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305265,-0.0518121,0.000714314,0.000432839,0.265088],
+ "R": [
+ [0.06818541392,0.004787243789,0.9976611808],
+ [0.2533830838,0.9671167716,-0.02195821049],
+ [-0.9649599796,0.2542876962,0.06473025078]
+ ],
+ "t": [
+ [54.03449748],
+ [85.53998459],
+ [306.9876015]
+ ]
+ },
+ {
+ "name": "14_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 13,
+ "K": [
+ [742.736,0,368.122],
+ [0,742.832,238.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303469,-0.0412536,1.82225e-05,-0.000473228,0.205739],
+ "R": [
+ [0.1225239282,-0.0735967149,0.9897329996],
+ [0.2305366224,0.9720798639,0.0437447595],
+ [-0.9653189902,0.222809923,0.1360697815]
+ ],
+ "t": [
+ [17.43625272],
+ [116.7070017],
+ [307.0317679]
+ ]
+ },
+ {
+ "name": "14_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 14,
+ "K": [
+ [745.328,0,371.219],
+ [0,745.487,209.713],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318297,0.0286867,-0.0013247,0.000626009,0.137928],
+ "R": [
+ [0.06972690557,-0.0276618613,0.9971825209],
+ [0.2175762615,0.9759712693,0.01185967683],
+ [-0.9735495514,0.2161363064,0.0740700209]
+ ],
+ "t": [
+ [57.75964066],
+ [131.0709572],
+ [303.578107]
+ ]
+ },
+ {
+ "name": "14_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 15,
+ "K": [
+ [743.637,0,370.163],
+ [0,743.479,235.403],
+ [0,0,1]
+ ],
+ "distCoef": [-0.301307,-0.0600698,0.000220332,0.000264974,0.263845],
+ "R": [
+ [0.0871387997,-0.1078492175,0.9903410402],
+ [0.2171380052,0.9722761796,0.08677624828],
+ [-0.9722437535,0.2074790999,0.1081411432]
+ ],
+ "t": [
+ [27.10934266],
+ [155.0300785],
+ [303.8314173]
+ ]
+ },
+ {
+ "name": "14_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 16,
+ "K": [
+ [747.749,0,388.765],
+ [0,747.73,234.855],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320028,0.057848,-0.00103044,0.00101463,0.0716113],
+ "R": [
+ [0.09276252326,-0.02731891999,0.9953134134],
+ [0.2004837996,0.9796626634,0.008204393401],
+ [-0.9752955246,0.1987831547,0.09635298148]
+ ],
+ "t": [
+ [25.02944215],
+ [165.1686099],
+ [301.5459594]
+ ]
+ },
+ {
+ "name": "14_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 17,
+ "K": [
+ [745.477,0,358.035],
+ [0,745.633,228.78],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315933,0.0359808,-0.000244793,0.00106736,0.101835],
+ "R": [
+ [0.09323456203,-0.04884472803,0.9944453273],
+ [0.1997864834,0.9793990461,0.02937464128],
+ [-0.9753936013,0.1959380031,0.1010723576]
+ ],
+ "t": [
+ [12.52671676],
+ [185.8338565],
+ [300.6683817]
+ ]
+ },
+ {
+ "name": "14_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 19,
+ "K": [
+ [746.962,0,392.223],
+ [0,747.34,219.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325078,0.0885503,-0.00165532,0.000580691,0.0160315],
+ "R": [
+ [0.129696032,0.03909405168,0.990782819],
+ [0.1776002444,0.9821476201,-0.06200165731],
+ [-0.9755188837,0.1840046397,0.1204375361]
+ ],
+ "t": [
+ [-4.746570817],
+ [166.089254],
+ [298.9402723]
+ ]
+ },
+ {
+ "name": "14_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 20,
+ "K": [
+ [744.91,0,339.915],
+ [0,744.956,221.133],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306862,-0.0244375,-6.76743e-05,-0.000102471,0.205298],
+ "R": [
+ [0.09943504227,-0.007298095184,0.9950172914],
+ [0.2125993636,0.9770380132,-0.01407946415],
+ [-0.9720669642,0.212940035,0.09870338653]
+ ],
+ "t": [
+ [-22.7866272],
+ [143.0595857],
+ [303.8181509]
+ ]
+ },
+ {
+ "name": "14_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 21,
+ "K": [
+ [743.577,0,349.797],
+ [0,743.73,227.793],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307046,-0.0206712,-0.000861395,-9.97172e-05,0.196115],
+ "R": [
+ [0.09969364468,-0.01462231859,0.9949107322],
+ [0.2541863771,0.9670897407,-0.01125696175],
+ [-0.9620033591,0.2540150021,0.1001294952]
+ ],
+ "t": [
+ [-20.43364439],
+ [109.4423166],
+ [308.9174676]
+ ]
+ },
+ {
+ "name": "14_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 22,
+ "K": [
+ [745.066,0,381.498],
+ [0,745.047,229.678],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314894,0.0257947,-0.000483886,0.00117112,0.111876],
+ "R": [
+ [0.08696832552,-0.05294226024,0.9948033109],
+ [0.2154078845,0.9759627551,0.03310806346],
+ [-0.9726437959,0.2114091239,0.09628202687]
+ ],
+ "t": [
+ [-4.298071534],
+ [115.0382234],
+ [303.8536261]
+ ]
+ },
+ {
+ "name": "14_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 23,
+ "K": [
+ [746.602,0,379.206],
+ [0,746.635,260.689],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319922,0.0568918,0.00103779,-0.000422086,0.0766843],
+ "R": [
+ [0.09129519856,-0.01052008078,0.9957683037],
+ [0.2195471399,0.9755524467,-0.009822274065],
+ [-0.9713208739,0.2195148095,0.09137290798]
+ ],
+ "t": [
+ [18.69590833],
+ [125.3942709],
+ [304.7857903]
+ ]
+ },
+ {
+ "name": "14_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 24,
+ "K": [
+ [745.388,0,382.392],
+ [0,745.496,224.015],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302393,-0.0525763,-0.000559682,-6.77e-05,0.234314],
+ "R": [
+ [0.08118536371,-0.04636746828,0.9956199047],
+ [0.1796446798,0.9832385033,0.03114216711],
+ [-0.9803758084,0.1763295309,0.0881542445]
+ ],
+ "t": [
+ [8.147122648],
+ [159.0280693],
+ [298.1193244]
+ ]
+ },
+ {
+ "name": "15_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 1,
+ "K": [
+ [747.532,0,374.739],
+ [0,747.668,233.944],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331439,0.109037,-0.000609362,0.000392501,-0.000621335],
+ "R": [
+ [0.7848571462,0.05717032211,0.6170338843],
+ [0.1817012858,0.9307358272,-0.3173569956],
+ [-0.5924389444,0.3611957561,0.7201067442]
+ ],
+ "t": [
+ [-19.59276639],
+ [102.5270366],
+ [325.6365462]
+ ]
+ },
+ {
+ "name": "15_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 2,
+ "K": [
+ [743.597,0,385.764],
+ [0,743.786,211.188],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307778,-0.0279819,-0.000454196,0.00143268,0.205643],
+ "R": [
+ [0.7963392439,-0.01332837804,0.6047033677],
+ [0.2601504211,0.910106147,-0.3225345868],
+ [-0.5460453892,0.4141607847,0.7282206241]
+ ],
+ "t": [
+ [-38.00771612],
+ [61.10094736],
+ [329.1235579]
+ ]
+ },
+ {
+ "name": "15_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 3,
+ "K": [
+ [746.709,0,382.284],
+ [0,746.792,243.451],
+ [0,0,1]
+ ],
+ "distCoef": [-0.343209,0.149416,0.000603517,0.00195788,-0.0395936],
+ "R": [
+ [0.7773715491,0.01124156294,0.6289412548],
+ [0.2547080739,0.908583342,-0.3310590698],
+ [-0.5751671686,0.4175523175,0.7034435232]
+ ],
+ "t": [
+ [-3.435783379],
+ [55.70511308],
+ [330.3798829]
+ ]
+ },
+ {
+ "name": "15_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 4,
+ "K": [
+ [743.976,0,365.248],
+ [0,744.344,229.757],
+ [0,0,1]
+ ],
+ "distCoef": [-0.297483,-0.106842,0.000162294,-0.00147347,0.393874],
+ "R": [
+ [0.7524447247,-0.05297584633,0.6565215122],
+ [0.2825071426,0.9263759092,-0.2490329079],
+ [-0.5949929838,0.3728555143,0.7120127209]
+ ],
+ "t": [
+ [9.049706825],
+ [87.26745214],
+ [326.8342451]
+ ]
+ },
+ {
+ "name": "15_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 5,
+ "K": [
+ [748.766,0,349.367],
+ [0,748.975,233.229],
+ [0,0,1]
+ ],
+ "distCoef": [-0.341466,0.149186,0.00133441,-0.000377568,-0.0615035],
+ "R": [
+ [0.7609990379,-0.1304343502,0.6355055818],
+ [0.3323849453,0.9196335935,-0.2092708816],
+ [-0.5571361704,0.3704874276,0.7431946943]
+ ],
+ "t": [
+ [9.029843232],
+ [83.469382],
+ [327.9910328]
+ ]
+ },
+ {
+ "name": "15_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 6,
+ "K": [
+ [747.104,0,395.739],
+ [0,747.205,237.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337038,0.14046,-0.00100634,0.00170735,-0.0468264],
+ "R": [
+ [0.7339738121,-0.1238803965,0.6677844641],
+ [0.3595276943,0.9050347286,-0.227270713],
+ [-0.5762137452,0.4068977603,0.7088102232]
+ ],
+ "t": [
+ [34.88470946],
+ [89.42074723],
+ [330.2467181]
+ ]
+ },
+ {
+ "name": "15_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 7,
+ "K": [
+ [743.991,0,393.18],
+ [0,744.112,255.459],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325283,0.0732539,0.00077889,1.70805e-05,0.0462558],
+ "R": [
+ [0.7496842409,-0.1571943749,0.6428557128],
+ [0.3434403747,0.9227495198,-0.1748771933],
+ [-0.5657050892,0.3518852828,0.7457576683]
+ ],
+ "t": [
+ [12.35233863],
+ [128.2674639],
+ [324.6313017]
+ ]
+ },
+ {
+ "name": "15_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 8,
+ "K": [
+ [744.616,0,369.102],
+ [0,744.835,223.742],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336732,0.141968,-0.000206183,0.000677154,-0.0657397],
+ "R": [
+ [0.7264947252,-0.2131742795,0.6532703428],
+ [0.4249899792,0.8864309285,-0.1833677358],
+ [-0.5399897516,0.4108490422,0.7345843265]
+ ],
+ "t": [
+ [15.28675757],
+ [126.0458703],
+ [333.4285141]
+ ]
+ },
+ {
+ "name": "15_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 9,
+ "K": [
+ [747.517,0,392.733],
+ [0,747.836,218.574],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334626,0.113242,0.000443349,0.00121381,-0.00550976],
+ "R": [
+ [0.8000319441,0.07155257429,0.5956753458],
+ [0.1937456116,0.9088549369,-0.3693850858],
+ [-0.5678129326,0.4109293525,0.7132499848]
+ ],
+ "t": [
+ [-44.09712116],
+ [90.97242653],
+ [330.2186197]
+ ]
+ },
+ {
+ "name": "15_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 10,
+ "K": [
+ [743.904,0,354.135],
+ [0,744.494,220.038],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309276,-0.0261099,-0.00127318,0.000283377,0.220693],
+ "R": [
+ [0.7314656006,-0.1499734814,0.6651812009],
+ [0.3639090401,0.9108337109,-0.1948131455],
+ [-0.576652656,0.3845645668,0.720820233]
+ ],
+ "t": [
+ [2.360923884],
+ [158.0207055],
+ [327.7017732]
+ ]
+ },
+ {
+ "name": "15_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 11,
+ "K": [
+ [745.441,0,366.024],
+ [0,745.471,238.165],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311636,0.00305556,-0.00136926,0.00112458,0.163822],
+ "R": [
+ [0.743215427,-0.1065195831,0.660518287],
+ [0.3430146167,0.9082888556,-0.2394834597],
+ [-0.5744317207,0.4045552288,0.7115920636]
+ ],
+ "t": [
+ [3.38448511],
+ [170.5922255],
+ [331.2143489]
+ ]
+ },
+ {
+ "name": "15_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 12,
+ "K": [
+ [743.816,0,384.478],
+ [0,744.21,221.813],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309294,-0.0116228,-0.000777235,0.00017565,0.174372],
+ "R": [
+ [0.799529392,-0.03302696284,0.5997182431],
+ [0.261290645,0.91817945,-0.2977812898],
+ [-0.540814155,0.3947856601,0.7427410938]
+ ],
+ "t": [
+ [-15.11731065],
+ [179.1857595],
+ [329.2699106]
+ ]
+ },
+ {
+ "name": "15_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 13,
+ "K": [
+ [744.594,0,366.809],
+ [0,744.805,211.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313339,0.0076854,-0.000770441,0.000328229,0.137582],
+ "R": [
+ [0.7697001229,-0.07364256128,0.6341439064],
+ [0.280866324,0.9310898592,-0.2327783971],
+ [-0.5733025631,0.3572792288,0.7373436945]
+ ],
+ "t": [
+ [-27.06753178],
+ [173.6081799],
+ [322.2797536]
+ ]
+ },
+ {
+ "name": "15_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 14,
+ "K": [
+ [744.088,0,376.311],
+ [0,744.421,235.85],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308902,-0.0157485,-0.000258056,-0.00040893,0.167363],
+ "R": [
+ [0.8019727226,0.02030217439,0.5970155559],
+ [0.20788107,0.9274680659,-0.31078682],
+ [-0.5600225111,0.3733507848,0.7395836522]
+ ],
+ "t": [
+ [-32.35663304],
+ [177.8511702],
+ [324.3990212]
+ ]
+ },
+ {
+ "name": "15_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 15,
+ "K": [
+ [745.471,0,391.786],
+ [0,745.597,244.782],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319471,0.0520955,-9.03549e-05,0.00103599,0.0679082],
+ "R": [
+ [0.7993824794,0.07801580494,0.5957358356],
+ [0.170767806,0.9211391478,-0.3497728217],
+ [-0.5760434082,0.3813347671,0.723019908]
+ ],
+ "t": [
+ [-27.66881494],
+ [158.8808021],
+ [326.8395357]
+ ]
+ },
+ {
+ "name": "15_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 16,
+ "K": [
+ [744.688,0,372.572],
+ [0,744.687,232.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313079,0.00611683,0.000601543,0.00134427,0.153664],
+ "R": [
+ [0.8032635264,0.07397377164,0.5910123419],
+ [0.1542914416,0.9325457224,-0.3264239985],
+ [-0.5752928456,0.3533926383,0.7376664456]
+ ],
+ "t": [
+ [-29.95169554],
+ [148.2901373],
+ [322.192073]
+ ]
+ },
+ {
+ "name": "15_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 17,
+ "K": [
+ [746.029,0,371.631],
+ [0,745.957,227.751],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328618,0.10871,0.000376647,0.00140085,-0.015131],
+ "R": [
+ [0.7930332571,0.09578045983,0.6016014933],
+ [0.1573865304,0.9218193412,-0.3542295616],
+ [-0.5884961625,0.3755997947,0.7159588403]
+ ],
+ "t": [
+ [-34.37744536],
+ [124.5681533],
+ [326.9926029]
+ ]
+ },
+ {
+ "name": "15_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 18,
+ "K": [
+ [745.728,0,355.008],
+ [0,745.836,235.366],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326785,0.0753795,-0.00141997,0.000421746,0.0593081],
+ "R": [
+ [0.7423074724,-0.1183757606,0.6595201254],
+ [0.3246236378,0.9245812728,-0.1994215728],
+ [-0.5861732766,0.362127946,0.7247511576]
+ ],
+ "t": [
+ [30.16113415],
+ [163.1800117],
+ [323.8887405]
+ ]
+ },
+ {
+ "name": "15_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 19,
+ "K": [
+ [745.415,0,362.511],
+ [0,745.431,246.567],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31824,0.0392935,0.000511921,2.0382e-05,0.0980721],
+ "R": [
+ [0.7792023734,-0.03485918818,0.6258022837],
+ [0.250771695,0.9323920084,-0.2603050127],
+ [-0.5744190268,0.3597637832,0.7352637636]
+ ],
+ "t": [
+ [-23.21577405],
+ [116.3982595],
+ [324.3931588]
+ ]
+ },
+ {
+ "name": "15_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 20,
+ "K": [
+ [745.757,0,370.457],
+ [0,745.798,252.296],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322058,0.058259,0.000816175,0.000770211,0.0698692],
+ "R": [
+ [0.7754488131,-0.03297117701,0.6305489986],
+ [0.2704225106,0.9197540051,-0.2844718542],
+ [-0.5705705951,0.391108005,0.7221383001]
+ ],
+ "t": [
+ [-0.5150360293],
+ [101.3336776],
+ [328.6175717]
+ ]
+ },
+ {
+ "name": "15_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 21,
+ "K": [
+ [746.009,0,385.23],
+ [0,746.113,244.377],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328614,0.0717398,0.00119782,0.000153035,0.0631847],
+ "R": [
+ [0.7150247804,-0.1629175474,0.6798510396],
+ [0.3900461789,0.9000077369,-0.194550898],
+ [-0.5801754405,0.4042820134,0.7070732013]
+ ],
+ "t": [
+ [2.095653738],
+ [113.9962742],
+ [330.0144097]
+ ]
+ },
+ {
+ "name": "15_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 22,
+ "K": [
+ [747.044,0,384.928],
+ [0,747.43,218.136],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332061,0.0970763,-0.00131827,0.000796644,0.024739],
+ "R": [
+ [0.7476996574,-0.1120966581,0.6545071135],
+ [0.3349363173,0.9147459603,-0.2259590484],
+ [-0.5733784838,0.3881677053,0.7215004829]
+ ],
+ "t": [
+ [-3.202807266],
+ [138.4357179],
+ [328.3283502]
+ ]
+ },
+ {
+ "name": "15_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 23,
+ "K": [
+ [746.525,0,381.586],
+ [0,746.566,231.744],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323751,0.0809499,0.00143311,0.000786746,0.0334271],
+ "R": [
+ [0.7874675535,-0.04961201835,0.6143561669],
+ [0.2785108695,0.9178324582,-0.2828697124],
+ [-0.5498422936,0.3938555906,0.7365807667]
+ ],
+ "t": [
+ [-21.67007007],
+ [141.1281207],
+ [328.549187]
+ ]
+ },
+ {
+ "name": "15_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 24,
+ "K": [
+ [744.493,0,392.291],
+ [0,744.573,223.193],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308278,-0.0176562,-0.000671893,0.00116828,0.17277],
+ "R": [
+ [0.7758686755,-0.01407586642,0.6307374005],
+ [0.2927445364,0.8936390769,-0.3401614861],
+ [-0.5588635207,0.4485655695,0.6974672]
+ ],
+ "t": [
+ [-20.05926183],
+ [105.1778582],
+ [335.8474538]
+ ]
+ },
+ {
+ "name": "16_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 1,
+ "K": [
+ [745.918,0,380.409],
+ [0,745.86,226.454],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329171,0.0901569,-0.000500393,-0.000311386,0.0200307],
+ "R": [
+ [0.8121486446,0.04341076946,0.5818333819],
+ [-0.0759194996,0.9966126489,0.03161419974],
+ [-0.5784901112,-0.06984792866,0.8126933358]
+ ],
+ "t": [
+ [55.6088262],
+ [125.3657692],
+ [265.9940479]
+ ]
+ },
+ {
+ "name": "16_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 2,
+ "K": [
+ [747.364,0,392.411],
+ [0,747.161,225.523],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325367,0.0819479,0.000479765,0.00158774,0.0247525],
+ "R": [
+ [0.8168932447,0.07701494166,0.5716241121],
+ [-0.08391193553,0.9963702084,-0.01432462351],
+ [-0.5706524458,-0.03626439747,0.8203905653]
+ ],
+ "t": [
+ [75.42528996],
+ [124.1426197],
+ [270.1790967]
+ ]
+ },
+ {
+ "name": "16_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 3,
+ "K": [
+ [744.743,0,378.771],
+ [0,744.551,249.858],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319546,0.0369202,-5.08119e-05,0.00111176,0.115068],
+ "R": [
+ [0.8437113062,0.07102371173,0.5320778742],
+ [-0.08587784221,0.9963005803,0.003185889303],
+ [-0.5298832211,-0.04838167055,0.8466894271]
+ ],
+ "t": [
+ [57.15960424],
+ [150.0301024],
+ [271.4615922]
+ ]
+ },
+ {
+ "name": "16_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 4,
+ "K": [
+ [745.916,0,377.522],
+ [0,746.078,215.704],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32195,0.0590592,-0.000295617,0.000900619,0.0691531],
+ "R": [
+ [0.8298382679,0.121110683,0.5447023514],
+ [-0.1306769278,0.9911961099,-0.02130286834],
+ [-0.5424868568,-0.05350209448,0.8383588349]
+ ],
+ "t": [
+ [50.00635036],
+ [157.1807453],
+ [269.6015294]
+ ]
+ },
+ {
+ "name": "16_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 5,
+ "K": [
+ [745.303,0,378.655],
+ [0,745.572,246.962],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315703,0.0277156,6.06815e-05,0.000389915,0.121683],
+ "R": [
+ [0.8187116226,0.05412921644,0.5716478872],
+ [-0.09011941267,0.9953220251,0.0348218015],
+ [-0.5670888559,-0.08002558546,0.8197598034]
+ ],
+ "t": [
+ [44.81120287],
+ [188.347539],
+ [263.8787228]
+ ]
+ },
+ {
+ "name": "16_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 6,
+ "K": [
+ [745.606,0,364.995],
+ [0,745.957,239.275],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315328,0.0257972,-0.000148911,-0.000553771,0.11289],
+ "R": [
+ [0.8250072615,0.03741598225,0.5638821355],
+ [-0.06134414867,0.997839028,0.02354080738],
+ [-0.5617827996,-0.05401220659,0.8255196955]
+ ],
+ "t": [
+ [18.96573731],
+ [189.9536973],
+ [269.3804852]
+ ]
+ },
+ {
+ "name": "16_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 7,
+ "K": [
+ [748.144,0,375.351],
+ [0,748.158,222.981],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330846,0.0923667,0.000924419,-0.000952259,0.0155541],
+ "R": [
+ [0.837010476,0.04764620621,0.5451085232],
+ [-0.06946161724,0.9973944363,0.0194787641],
+ [-0.542760119,-0.05416804921,0.8381391744]
+ ],
+ "t": [
+ [-3.044263505],
+ [177.2440129],
+ [269.3681033]
+ ]
+ },
+ {
+ "name": "16_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 8,
+ "K": [
+ [744.865,0,367.243],
+ [0,744.958,216.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318901,0.0494498,-4.02299e-05,-0.00132469,0.0675277],
+ "R": [
+ [0.820488273,0.02086231711,0.571282555],
+ [-0.05401864215,0.9976917237,0.04114864192],
+ [-0.569105421,-0.06462188605,0.8197213134]
+ ],
+ "t": [
+ [-19.55260409],
+ [185.7078501],
+ [268.0867658]
+ ]
+ },
+ {
+ "name": "16_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 9,
+ "K": [
+ [747.002,0,387.115],
+ [0,747.11,221.005],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330535,0.106093,-0.000909516,-0.000158007,-0.000767667],
+ "R": [
+ [0.7988895638,0.03324884852,0.6005580562],
+ [-0.04929092881,0.9987315997,0.01027599727],
+ [-0.5994546431,-0.03781145137,0.7995151187]
+ ],
+ "t": [
+ [-23.46737596],
+ [164.4653247],
+ [274.3468777]
+ ]
+ },
+ {
+ "name": "16_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 10,
+ "K": [
+ [747.13,0,370.332],
+ [0,747.181,215.13],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317083,0.0321021,0.000973109,0.00011315,0.117938],
+ "R": [
+ [0.8533830718,-0.04475694932,0.5193593633],
+ [-0.01101437775,0.9945367161,0.1038046423],
+ [-0.5211679348,-0.09430554471,0.8482278279]
+ ],
+ "t": [
+ [-57.15311463],
+ [154.6074069],
+ [261.7210039]
+ ]
+ },
+ {
+ "name": "16_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 11,
+ "K": [
+ [743.847,0,352.444],
+ [0,743.813,257.427],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317406,0.0378558,0.000559662,0.00156409,0.0978841],
+ "R": [
+ [0.8306368039,-0.006305585156,0.5567788965],
+ [-0.01286906876,0.999451376,0.03051776569],
+ [-0.5566658666,-0.03251440526,0.8300999496]
+ ],
+ "t": [
+ [-55.68789985],
+ [125.5954887],
+ [272.609285]
+ ]
+ },
+ {
+ "name": "16_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 12,
+ "K": [
+ [744.746,0,358.295],
+ [0,744.902,240.075],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311924,0.00313238,0.000282789,0.000109914,0.161883],
+ "R": [
+ [0.8248636519,0.04296544146,0.5636966618],
+ [-0.06337887364,0.9978500361,0.01668603434],
+ [-0.5617678116,-0.04949016272,0.8258133262]
+ ],
+ "t": [
+ [-45.5470475],
+ [111.3455785],
+ [270.6081331]
+ ]
+ },
+ {
+ "name": "16_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 13,
+ "K": [
+ [742.599,0,373.118],
+ [0,742.696,232.489],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30659,-0.0244311,-0.000674534,-0.000450328,0.198624],
+ "R": [
+ [0.8431633834,0.1596479738,0.5134082522],
+ [-0.1755645793,0.9843078819,-0.01775026834],
+ [-0.5081855837,-0.07516992751,0.8579608934]
+ ],
+ "t": [
+ [-27.27822308],
+ [119.4613899],
+ [265.3318331]
+ ]
+ },
+ {
+ "name": "16_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 14,
+ "K": [
+ [745.804,0,370.921],
+ [0,745.998,236.13],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32821,0.0986121,-0.000141995,-6.949e-05,-0.000912797],
+ "R": [
+ [0.8387309717,0.02755081107,0.5438486094],
+ [-0.05712815546,0.9976599438,0.03756341813],
+ [-0.5415410705,-0.06257467009,0.8383422211]
+ ],
+ "t": [
+ [-30.56519475],
+ [90.10611059],
+ [268.3571691]
+ ]
+ },
+ {
+ "name": "16_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 15,
+ "K": [
+ [746.816,0,365.456],
+ [0,746.849,225.794],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313831,-0.00769663,-0.000408313,0.00132145,0.204366],
+ "R": [
+ [0.832563643,0.03033638007,0.5530980784],
+ [-0.06055031945,0.9974999941,0.03643378343],
+ [-0.5506100609,-0.06382370879,0.8323191065]
+ ],
+ "t": [
+ [-6.42740827],
+ [88.69840867],
+ [268.7038743]
+ ]
+ },
+ {
+ "name": "16_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 16,
+ "K": [
+ [745.958,0,362.302],
+ [0,745.997,246.977],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334292,0.102923,-0.000499879,-0.000549652,0.00793805],
+ "R": [
+ [0.8469636173,0.04048111503,0.5301074517],
+ [-0.08872767491,0.9938758,0.0658657255],
+ [-0.5241946497,-0.1028210748,0.8453684379]
+ ],
+ "t": [
+ [4.584618298],
+ [109.8657875],
+ [264.6056558]
+ ]
+ },
+ {
+ "name": "16_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 17,
+ "K": [
+ [743.409,0,347.233],
+ [0,743.501,244.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321337,0.060438,0.000289347,-0.000274585,0.0540146],
+ "R": [
+ [0.8338949711,0.06176137043,0.5484566622],
+ [-0.07967791451,0.9967809419,0.008898524832],
+ [-0.5461415633,-0.05112031815,0.8361316319]
+ ],
+ "t": [
+ [32.73506114],
+ [91.25662398],
+ [270.2531272]
+ ]
+ },
+ {
+ "name": "16_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 18,
+ "K": [
+ [745.291,0,372.769],
+ [0,745.233,242.994],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333422,0.127228,0.000470045,-0.000171948,-0.0533425],
+ "R": [
+ [0.83476387,0.01583088955,0.5503804723],
+ [-0.006383142992,0.9997976531,-0.01907638369],
+ [-0.5505711006,0.01241111862,0.8346960089]
+ ],
+ "t": [
+ [48.20146308],
+ [84.31846371],
+ [276.1979749]
+ ]
+ },
+ {
+ "name": "16_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 19,
+ "K": [
+ [746.318,0,365.802],
+ [0,746.439,228.058],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329752,0.106043,0.000413141,0.00102356,-0.00232913],
+ "R": [
+ [0.812564017,0.08482803737,0.576666214],
+ [-0.09768913876,0.9951785947,-0.008740529432],
+ [-0.5746273144,-0.04923178609,0.8169330944]
+ ],
+ "t": [
+ [39.50134988],
+ [124.7306793],
+ [269.4016435]
+ ]
+ },
+ {
+ "name": "16_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 20,
+ "K": [
+ [745.104,0,371.377],
+ [0,745.158,252.192],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317414,0.0233642,0.000269725,0.000539732,0.145301],
+ "R": [
+ [0.8445515108,0.05428741136,0.5327153297],
+ [-0.06949119822,0.9975462456,0.00851241329],
+ [-0.5309460603,-0.04420819807,0.8462516862]
+ ],
+ "t": [
+ [17.33430135],
+ [146.0606392],
+ [271.3134014]
+ ]
+ },
+ {
+ "name": "16_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 21,
+ "K": [
+ [744.321,0,365.126],
+ [0,744.44,221.253],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310945,0.00293318,4.64093e-05,-0.000454281,0.146346],
+ "R": [
+ [0.8382052649,0.09941648006,0.5362166515],
+ [-0.1229674254,0.9923765769,0.008230548616],
+ [-0.531310593,-0.07283607028,0.8440402601]
+ ],
+ "t": [
+ [5.636303812],
+ [160.8368098],
+ [266.310691]
+ ]
+ },
+ {
+ "name": "16_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 22,
+ "K": [
+ [745.695,0,387.973],
+ [0,745.975,222.039],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325844,0.0780224,-0.000861123,0.000487347,0.0459906],
+ "R": [
+ [0.8503320636,-0.003175777979,0.52623692],
+ [-0.02504000004,0.9986049625,0.04648792516],
+ [-0.5256504352,-0.05270714583,0.8490662971]
+ ],
+ "t": [
+ [-29.03965018],
+ [141.2975723],
+ [268.9897195]
+ ]
+ },
+ {
+ "name": "16_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 23,
+ "K": [
+ [746.757,0,385.384],
+ [0,746.697,250.739],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330103,0.0993513,0.000581277,0.0005991,0.0043047],
+ "R": [
+ [0.8172674448,0.1129970073,0.565071323],
+ [-0.1204798393,0.992420693,-0.02420281713],
+ [-0.5635233199,-0.0482995277,0.8246869852]
+ ],
+ "t": [
+ [1.484048414],
+ [120.2737991],
+ [270.3939501]
+ ]
+ },
+ {
+ "name": "16_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 24,
+ "K": [
+ [743.909,0,365.262],
+ [0,744.1,225.983],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309366,-0.0151251,-0.000569796,0.000128233,0.192772],
+ "R": [
+ [0.8488529257,0.0258708029,0.5279956553],
+ [-0.02681353424,0.9996232069,-0.005871843729],
+ [-0.5279486195,-0.009173097852,0.8492267715]
+ ],
+ "t": [
+ [-1.170097817],
+ [104.9858918],
+ [274.723166]
+ ]
+ },
+ {
+ "name": "17_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 1,
+ "K": [
+ [743.511,0,382.741],
+ [0,744.07,233.668],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303608,-0.0460126,4.19904e-05,0.000729649,0.232264],
+ "R": [
+ [0.7426987355,0.03664601822,-0.6686222084],
+ [-0.01756201576,0.9992239229,0.035258014],
+ [0.6693953719,-0.01444372865,0.742765922]
+ ],
+ "t": [
+ [27.30884403],
+ [110.2809812],
+ [269.7471778]
+ ]
+ },
+ {
+ "name": "17_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 2,
+ "K": [
+ [744.491,0,371.868],
+ [0,744.58,223.545],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320104,0.0388113,-0.000303412,-0.00118762,0.0743207],
+ "R": [
+ [0.773334615,0.1038173874,-0.6254402635],
+ [-0.04654036662,0.9931361468,0.107306049],
+ [0.6322875671,-0.05387526291,0.7728582591]
+ ],
+ "t": [
+ [68.17402308],
+ [125.7906344],
+ [263.8293382]
+ ]
+ },
+ {
+ "name": "17_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 3,
+ "K": [
+ [744.096,0,373.775],
+ [0,744.072,232.317],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314223,0.0332024,-0.000194112,2.11963e-05,0.079313],
+ "R": [
+ [0.7946878724,-0.02084896757,-0.6066601239],
+ [0.03470365887,0.999335828,0.01111570764],
+ [0.6060254462,-0.02988684405,0.7948835985]
+ ],
+ "t": [
+ [55.17367606],
+ [148.0232969],
+ [266.1261169]
+ ]
+ },
+ {
+ "name": "17_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 4,
+ "K": [
+ [748.225,0,373.118],
+ [0,748.618,236.287],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325852,0.0883394,-0.000431944,-0.00077703,0.0075009],
+ "R": [
+ [0.7874797118,0.07165214706,-0.6121614766],
+ [-0.03177741847,0.9966185482,0.07577377574],
+ [0.6155208357,-0.04021739967,0.7870938073]
+ ],
+ "t": [
+ [46.04066644],
+ [153.679907],
+ [265.8341529]
+ ]
+ },
+ {
+ "name": "17_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 5,
+ "K": [
+ [745.23,0,378.585],
+ [0,745.614,229.474],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323397,0.071697,-0.000659822,0.000678056,0.0530686],
+ "R": [
+ [0.7680042357,0.04160049173,-0.6390922414],
+ [0.01355248597,0.9966090615,0.08115854064],
+ [0.6403013541,-0.07099139161,0.7648361904]
+ ],
+ "t": [
+ [29.31016003],
+ [185.453895],
+ [261.9380867]
+ ]
+ },
+ {
+ "name": "17_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 6,
+ "K": [
+ [742.876,0,352.101],
+ [0,743.303,231.794],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319343,0.0421325,-0.000546468,-1.33187e-05,0.10149],
+ "R": [
+ [0.8064347587,0.08751734637,-0.584810819],
+ [-0.03388642915,0.9942014648,0.1020546777],
+ [0.5903513275,-0.062483289,0.8047242688]
+ ],
+ "t": [
+ [35.39857301],
+ [188.6248332],
+ [262.8234665]
+ ]
+ },
+ {
+ "name": "17_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 7,
+ "K": [
+ [745.054,0,358.779],
+ [0,745.36,231.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309912,-0.00132311,-0.00013553,-0.000280643,0.151777],
+ "R": [
+ [0.7882500993,-0.004275732235,-0.615340149],
+ [0.05540043824,0.996408109,0.06404429605],
+ [0.612856078,-0.08457303664,0.7856556683]
+ ],
+ "t": [
+ [-7.246792888],
+ [183.4614511],
+ [259.402568]
+ ]
+ },
+ {
+ "name": "17_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 8,
+ "K": [
+ [745.254,0,343.02],
+ [0,745.689,227.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309897,-0.0109758,-0.00111103,0.000256129,0.180098],
+ "R": [
+ [0.7946287881,0.03514926038,-0.6060772382],
+ [0.01090423253,0.9973351466,0.07213669658],
+ [0.6069976827,-0.06393070292,0.7921279432]
+ ],
+ "t": [
+ [-18.41109561],
+ [184.5517176],
+ [263.9542066]
+ ]
+ },
+ {
+ "name": "17_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 9,
+ "K": [
+ [745.379,0,338.137],
+ [0,745.543,245.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314138,0.0142784,0.00088856,-0.00114362,0.123117],
+ "R": [
+ [0.7570044814,0.09852948519,-0.6459381981],
+ [-0.05745310106,0.9947735679,0.08440787789],
+ [0.6508789107,-0.02678598925,0.7587088733]
+ ],
+ "t": [
+ [-40.16389387],
+ [164.132571],
+ [267.7674295]
+ ]
+ },
+ {
+ "name": "17_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 10,
+ "K": [
+ [743.633,0,369.381],
+ [0,743.739,253.863],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313678,0.00191444,-0.000367883,0.000526793,0.16208],
+ "R": [
+ [0.7732990879,0.03177464522,-0.6332447335],
+ [0.01440724919,0.9976050167,0.06765102948],
+ [0.6338777104,-0.06143779407,0.7709892643]
+ ],
+ "t": [
+ [-41.17430449],
+ [148.5957101],
+ [262.973747]
+ ]
+ },
+ {
+ "name": "17_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 11,
+ "K": [
+ [749.691,0,360.347],
+ [0,749.465,221.979],
+ [0,0,1]
+ ],
+ "distCoef": [-0.36212,0.288042,0.00167589,0.000680745,-0.303613],
+ "R": [
+ [0.7747984815,0.06051645956,-0.629305229],
+ [-0.01350572868,0.9967652932,0.07922465313],
+ [0.6320640066,-0.05288391526,0.7731095544]
+ ],
+ "t": [
+ [-52.93053536],
+ [133.9502209],
+ [264.0833713]
+ ]
+ },
+ {
+ "name": "17_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 12,
+ "K": [
+ [746.505,0,357.704],
+ [0,746.569,217.534],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312272,-0.0352904,0.000404412,-0.00107082,0.237629],
+ "R": [
+ [0.7725304823,-0.04233401582,-0.633564902],
+ [0.05994143841,0.9981814314,0.006391704783],
+ [0.6321421342,-0.04291457833,0.7736631445]
+ ],
+ "t": [
+ [-62.64410987],
+ [104.0188122],
+ [265.010728]
+ ]
+ },
+ {
+ "name": "17_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 13,
+ "K": [
+ [745.264,0,354.32],
+ [0,745.302,226.261],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318398,0.0346929,0.000845692,0.000532231,0.122684],
+ "R": [
+ [0.7851484689,0.03204817868,-0.6184778056],
+ [-0.002225165301,0.9987996914,0.04893081946],
+ [0.619303585,-0.03704174263,0.784277361]
+ ],
+ "t": [
+ [-29.19489341],
+ [103.2650402],
+ [265.9795804]
+ ]
+ },
+ {
+ "name": "17_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 14,
+ "K": [
+ [744.589,0,353.058],
+ [0,744.664,227.639],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324606,0.0822873,0.00100728,-0.000415736,0.0203245],
+ "R": [
+ [0.7765409088,-0.02900211747,-0.6293989944],
+ [0.06862390156,0.9968904955,0.03873112579],
+ [0.6263185908,-0.07326811825,0.7761164898]
+ ],
+ "t": [
+ [-35.65491372],
+ [89.93385082],
+ [261.6973052]
+ ]
+ },
+ {
+ "name": "17_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 15,
+ "K": [
+ [744.009,0,351.118],
+ [0,743.982,227.187],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31768,0.0289626,0.000394183,-0.00106594,0.077624],
+ "R": [
+ [0.7703409519,0.009578036972,-0.6375602553],
+ [0.03762675731,0.9974619202,0.06044786963],
+ [0.6365210484,-0.07055479443,0.7680253746]
+ ],
+ "t": [
+ [-14.94306331],
+ [88.85755459],
+ [261.4804843]
+ ]
+ },
+ {
+ "name": "17_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 16,
+ "K": [
+ [745.298,0,365.044],
+ [0,745.641,201.543],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315769,0.0139989,-0.000983596,0.000497246,0.155532],
+ "R": [
+ [0.7668905855,0.04755147693,-0.6400138177],
+ [0.009922268647,0.9962536216,0.0859084976],
+ [0.6417011597,-0.07223280706,0.7635457047]
+ ],
+ "t": [
+ [4.594602528],
+ [99.8882812],
+ [261.439958]
+ ]
+ },
+ {
+ "name": "17_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 17,
+ "K": [
+ [744.772,0,356.238],
+ [0,744.946,209.811],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307562,-0.0273551,-0.000331097,0.000403566,0.231396],
+ "R": [
+ [0.7386328767,0.1026186384,-0.6662513704],
+ [-0.03586762178,0.992927984,0.1131703685],
+ [0.6731530192,-0.05969450264,0.7370899397]
+ ],
+ "t": [
+ [18.92063539],
+ [92.1220326],
+ [263.1909682]
+ ]
+ },
+ {
+ "name": "17_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 18,
+ "K": [
+ [746.696,0,345.664],
+ [0,746.883,230.9],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332087,0.135716,-0.000396371,4.15402e-05,-0.0769473],
+ "R": [
+ [0.7676740293,0.0869303765,-0.6349170767],
+ [-0.05592901251,0.9960646798,0.06875390322],
+ [0.6383952774,-0.01727030079,0.7695149163]
+ ],
+ "t": [
+ [48.13164066],
+ [87.731429],
+ [267.0873794]
+ ]
+ },
+ {
+ "name": "17_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 19,
+ "K": [
+ [743.785,0,363.137],
+ [0,743.962,239.724],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322076,0.0699752,0.00130957,8.28091e-06,0.0447641],
+ "R": [
+ [0.7666015958,0.09362030423,-0.6352615462],
+ [-0.01827880108,0.9920950944,0.1241499457],
+ [0.6418628193,-0.08356172708,0.7622529495]
+ ],
+ "t": [
+ [25.25313987],
+ [133.2656265],
+ [259.9680703]
+ ]
+ },
+ {
+ "name": "17_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 20,
+ "K": [
+ [747.071,0,344.427],
+ [0,747.404,242.981],
+ [0,0,1]
+ ],
+ "distCoef": [-0.349964,0.20917,0.0008789,-0.000586258,-0.211765],
+ "R": [
+ [0.7775513873,0.03007697302,-0.6280996862],
+ [-0.01270805589,0.999403059,0.03212523871],
+ [0.6286909777,-0.01699709801,0.7774694548]
+ ],
+ "t": [
+ [17.35278566],
+ [137.2956705],
+ [269.3773006]
+ ]
+ },
+ {
+ "name": "17_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 21,
+ "K": [
+ [744.669,0,371.314],
+ [0,744.881,251.475],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32107,0.0528121,0.000172414,0.000961494,0.0921892],
+ "R": [
+ [0.7854342878,0.01663631847,-0.6187214337],
+ [0.02446292583,0.9980232337,0.05788946549],
+ [0.6184614336,-0.06060410764,0.7834746947]
+ ],
+ "t": [
+ [-1.039205356],
+ [155.8049723],
+ [263.425936]
+ ]
+ },
+ {
+ "name": "17_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 22,
+ "K": [
+ [744.126,0,368.359],
+ [0,744.205,218.365],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306681,-0.0309893,-0.000506643,-0.000551257,0.209183],
+ "R": [
+ [0.7742934088,0.08491898973,-0.6271032469],
+ [-0.02171436959,0.9939373135,0.1077826651],
+ [0.6324541115,-0.06983825553,0.771443073]
+ ],
+ "t": [
+ [-12.48615074],
+ [146.2169272],
+ [261.8070617]
+ ]
+ },
+ {
+ "name": "17_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 23,
+ "K": [
+ [746.439,0,363.854],
+ [0,746.575,224.032],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333494,0.127943,0.00111227,0.000376509,-0.0438307],
+ "R": [
+ [0.7741360077,0.05745954338,-0.6304060933],
+ [-0.01777243196,0.9974520988,0.06909016755],
+ [0.6327697704,-0.04228133707,0.7731847814]
+ ],
+ "t": [
+ [-14.18178238],
+ [117.4047924],
+ [265.0998909]
+ ]
+ },
+ {
+ "name": "17_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 24,
+ "K": [
+ [745.824,0,346.505],
+ [0,746.017,224.098],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317434,0.0247137,-0.000866957,0.000304145,0.138958],
+ "R": [
+ [0.7656627697,0.09930116127,-0.6355311184],
+ [-0.04982185052,0.99419918,0.09531932471],
+ [0.6413098365,-0.04131912178,0.7661686654]
+ ],
+ "t": [
+ [7.35512715],
+ [111.8344509],
+ [265.0127015]
+ ]
+ },
+ {
+ "name": "18_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 1,
+ "K": [
+ [744.96,0,372.705],
+ [0,744.564,226.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321978,0.0724692,0.000483988,0.000458946,0.0380169],
+ "R": [
+ [-0.3520669355,0.03279886428,-0.9353999719],
+ [0.04913052402,0.9986556534,0.01652505738],
+ [0.9346844732,-0.04013876447,-0.3532050609]
+ ],
+ "t": [
+ [47.10128491],
+ [117.3460549],
+ [266.6541908]
+ ]
+ },
+ {
+ "name": "18_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 2,
+ "K": [
+ [748.843,0,358.358],
+ [0,748.813,225.018],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335266,0.148062,0.000634215,-0.00153008,-0.105518],
+ "R": [
+ [-0.3389880085,0.04020239671,-0.9399313259],
+ [0.04795713663,0.9985260662,0.02541275744],
+ [0.9395675831,-0.03646179499,-0.3404163544]
+ ],
+ "t": [
+ [70.51461434],
+ [125.984952],
+ [266.5287049]
+ ]
+ },
+ {
+ "name": "18_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 3,
+ "K": [
+ [746.557,0,370.525],
+ [0,746.643,239.094],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336876,0.137869,0.0006954,0.000424607,-0.0538424],
+ "R": [
+ [-0.3751735108,0.06869685522,-0.9244055273],
+ [0.01802710881,0.9976021763,0.06682006625],
+ [0.9267792942,0.008404759824,-0.3755123165]
+ ],
+ "t": [
+ [58.58769651],
+ [133.6261971],
+ [275.7276294]
+ ]
+ },
+ {
+ "name": "18_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 4,
+ "K": [
+ [744.71,0,356.151],
+ [0,744.769,223.97],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312604,0.00791514,0.000747313,-0.000519594,0.158336],
+ "R": [
+ [-0.3438161676,0.01243889994,-0.9389545871],
+ [0.0251972518,0.9996744288,0.00401683712],
+ [0.9386988555,-0.02227802162,-0.344017657]
+ ],
+ "t": [
+ [40.26546697],
+ [152.0702476],
+ [270.0686857]
+ ]
+ },
+ {
+ "name": "18_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 5,
+ "K": [
+ [743.927,0,355.392],
+ [0,744.057,262.153],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316206,0.0381773,0.00109867,0.000112775,0.102099],
+ "R": [
+ [-0.3913025917,0.04706716523,-0.9190576498],
+ [0.07535158968,0.9969764632,0.0189755056],
+ [0.9171719684,-0.0618272904,-0.3936660596]
+ ],
+ "t": [
+ [27.50168157],
+ [183.5367771],
+ [265.1462318]
+ ]
+ },
+ {
+ "name": "18_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 6,
+ "K": [
+ [744.89,0,353.646],
+ [0,744.816,246.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311434,-0.0151537,0.000898898,0.00113623,0.19919],
+ "R": [
+ [-0.3540366423,0.02766248657,-0.9348223589],
+ [0.06855079724,0.9976412764,0.003559761167],
+ [0.9327158432,-0.06282253209,-0.3550978532]
+ ],
+ "t": [
+ [15.12228299],
+ [191.0759947],
+ [263.959739]
+ ]
+ },
+ {
+ "name": "18_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 7,
+ "K": [
+ [744.21,0,382.066],
+ [0,744.474,221.564],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318836,0.0439442,-0.000310088,0.000693195,0.0844966],
+ "R": [
+ [-0.3784097731,0.01208936744,-0.9255592314],
+ [0.03775536538,0.9992841689,-0.002383732641],
+ [0.9248678695,-0.03584685469,-0.3785953341]
+ ],
+ "t": [
+ [-11.73143391],
+ [170.7040215],
+ [268.2801795]
+ ]
+ },
+ {
+ "name": "18_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 8,
+ "K": [
+ [744.996,0,378.911],
+ [0,745.249,217.173],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317298,0.0439499,-0.000470842,0.000645598,0.0800391],
+ "R": [
+ [-0.3573644405,-0.02168005213,-0.9337133564],
+ [0.09030348924,0.9942444419,-0.05764780686],
+ [0.9295891224,-0.1049188503,-0.3533498244]
+ ],
+ "t": [
+ [-32.18764663],
+ [193.5958696],
+ [255.9258617]
+ ]
+ },
+ {
+ "name": "18_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 9,
+ "K": [
+ [745.488,0,367.703],
+ [0,745.136,254.274],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333608,0.117291,0.00107107,0.000590786,-0.0167148],
+ "R": [
+ [-0.3755971335,-0.01611847579,-0.9266428589],
+ [0.03486308067,0.9988953473,-0.03150636014],
+ [0.9261270749,-0.0441393233,-0.3746202894]
+ ],
+ "t": [
+ [-52.11061688],
+ [162.8813669],
+ [265.66749]
+ ]
+ },
+ {
+ "name": "18_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 10,
+ "K": [
+ [746.691,0,377.016],
+ [0,746.35,247.895],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324348,0.0759263,0.000632098,0.000973799,0.0365142],
+ "R": [
+ [-0.3979832561,-0.05264507275,-0.9158809007],
+ [0.03842303812,0.9965195246,-0.07397639654],
+ [0.9165876925,-0.06463229393,-0.3945753015]
+ ],
+ "t": [
+ [-58.47639535],
+ [144.7851801],
+ [261.4908418]
+ ]
+ },
+ {
+ "name": "18_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 11,
+ "K": [
+ [743.499,0,383.73],
+ [0,743.269,228.607],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318101,0.0343673,-0.000192972,9.02677e-05,0.0940376],
+ "R": [
+ [-0.3591156591,-0.0799459609,-0.9298626709],
+ [0.01693912278,0.9956019804,-0.09213990831],
+ [0.9331393302,-0.04883994185,-0.356182047]
+ ],
+ "t": [
+ [-65.19666066],
+ [124.1115675],
+ [265.1913912]
+ ]
+ },
+ {
+ "name": "18_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 12,
+ "K": [
+ [744.847,0,377.843],
+ [0,744.539,240.133],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322594,0.0777366,0.000608553,0.000730506,0.0395492],
+ "R": [
+ [-0.3599917326,-0.04959232233,-0.9316364924],
+ [0.02914279324,0.9975011607,-0.0643593979],
+ [0.9325002145,-0.05031934083,-0.3576469123]
+ ],
+ "t": [
+ [-57.61171896],
+ [105.5688064],
+ [264.3974594]
+ ]
+ },
+ {
+ "name": "18_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 13,
+ "K": [
+ [742.264,0,386.065],
+ [0,742.375,236.247],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316238,0.0182785,-0.000395794,0.00144239,0.136479],
+ "R": [
+ [-0.3232019546,0.03338047233,-0.9457411066],
+ [0.05161368011,0.9985119503,0.01760435083],
+ [0.9449214383,-0.04312341834,-0.324443903]
+ ],
+ "t": [
+ [61.04698375],
+ [97.35388185],
+ [264.1973208]
+ ]
+ },
+ {
+ "name": "18_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 14,
+ "K": [
+ [744.531,0,362.517],
+ [0,744.694,222.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323155,0.0551,-0.000315217,0.00114443,0.0791805],
+ "R": [
+ [-0.3124904102,0.02154150537,-0.9496766329],
+ [-0.004629448499,0.999696432,0.02419942065],
+ [0.9499096335,0.01195856595,-0.3122958229]
+ ],
+ "t": [
+ [-14.02426098],
+ [68.46079663],
+ [270.3325449]
+ ]
+ },
+ {
+ "name": "18_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 15,
+ "K": [
+ [747.429,0,398.562],
+ [0,747.425,233.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333617,0.122405,0.000303778,0.00134383,-0.0202721],
+ "R": [
+ [-0.358025731,-0.0142572014,-0.9336028643],
+ [0.04081564607,0.9986886699,-0.03090345813],
+ [0.9328191995,-0.04916983726,-0.3569743242]
+ ],
+ "t": [
+ [-8.683192747],
+ [83.02873835],
+ [264.4620974]
+ ]
+ },
+ {
+ "name": "18_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 16,
+ "K": [
+ [742.757,0,357.304],
+ [0,742.66,220.331],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305443,-0.0527047,-0.000521453,0.00022453,0.250047],
+ "R": [
+ [-0.3364590891,0.05374146283,-0.9401633563],
+ [0.05791647683,0.99766121,0.03630140184],
+ [0.9399154021,-0.04223701264,-0.3387846981]
+ ],
+ "t": [
+ [20.062846],
+ [91.33983095],
+ [265.2581766]
+ ]
+ },
+ {
+ "name": "18_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 17,
+ "K": [
+ [750.787,0,361.922],
+ [0,750.723,216.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.368257,0.303211,-0.00101236,-0.000679192,-0.335284],
+ "R": [
+ [-0.3521002367,0.0154136189,-0.9358353721],
+ [0.04957845599,0.9987678018,-0.002203336065],
+ [0.9346482761,-0.04717306796,-0.3524305629]
+ ],
+ "t": [
+ [32.75189895],
+ [90.38015946],
+ [265.2110414]
+ ]
+ },
+ {
+ "name": "18_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 18,
+ "K": [
+ [745.69,0,366.196],
+ [0,745.645,224.452],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325076,0.0695314,0.000207452,8.09151e-05,0.0569118],
+ "R": [
+ [-0.369329094,-0.008664471876,-0.929258278],
+ [0.06369637747,0.997368813,-0.03461534879],
+ [0.9271131494,-0.07197484145,-0.3678054246]
+ ],
+ "t": [
+ [-35.28307581],
+ [111.055802],
+ [261.8818226]
+ ]
+ },
+ {
+ "name": "18_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 19,
+ "K": [
+ [745.552,0,357.301],
+ [0,745.545,223.113],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320101,0.042192,0.00043748,0.000103204,0.104558],
+ "R": [
+ [-0.3584191226,-0.04877846794,-0.9322855752],
+ [0.07086164718,0.9943315632,-0.07926770686],
+ [0.9308675306,-0.09447435344,-0.3529309238]
+ ],
+ "t": [
+ [16.14340371],
+ [139.4376601],
+ [259.6452388]
+ ]
+ },
+ {
+ "name": "18_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 20,
+ "K": [
+ [746.078,0,363.03],
+ [0,746.077,221.582],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321359,0.0569666,0.000169599,0.000938787,0.0797635],
+ "R": [
+ [-0.3631410096,0.0448531679,-0.9306539639],
+ [0.06634832184,0.9975497918,0.02218813063],
+ [0.9293688758,-0.05368990856,-0.3652271709]
+ ],
+ "t": [
+ [21.37501917],
+ [147.345749],
+ [265.5705493]
+ ]
+ },
+ {
+ "name": "18_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 21,
+ "K": [
+ [745.043,0,372.293],
+ [0,745.076,222.901],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317484,0.0404748,0.000192535,-0.000111527,0.0957966],
+ "R": [
+ [-0.3461967977,-0.005928135698,-0.9381431844],
+ [0.04577092509,0.9986824948,-0.02320122706],
+ [0.937044716,-0.05097187193,-0.3454693453]
+ ],
+ "t": [
+ [-0.5259425122],
+ [153.3372726],
+ [265.7616305]
+ ]
+ },
+ {
+ "name": "18_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 22,
+ "K": [
+ [745.252,0,401.788],
+ [0,745.346,245.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315494,0.0267895,-0.000624877,0.000210937,0.0993279],
+ "R": [
+ [-0.3267831921,-0.004575639121,-0.9450882546],
+ [0.07739750703,0.9964998407,-0.03158628616],
+ [0.9419248225,-0.08346934224,-0.3252852558]
+ ],
+ "t": [
+ [-10.3938656],
+ [148.3069178],
+ [261.1183693]
+ ]
+ },
+ {
+ "name": "18_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 23,
+ "K": [
+ [747.114,0,358.608],
+ [0,746.941,217.398],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324507,0.0792141,-0.000227367,0.0013287,0.0357905],
+ "R": [
+ [-0.356358404,-0.03218270054,-0.9337949248],
+ [0.02645826287,0.9986582749,-0.04451528213],
+ [0.9339746507,-0.04056998648,-0.3550287707]
+ ],
+ "t": [
+ [-18.04448695],
+ [115.7023496],
+ [266.3010308]
+ ]
+ },
+ {
+ "name": "18_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 24,
+ "K": [
+ [747.28,0,383.407],
+ [0,747.414,233.333],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321806,0.0494121,-0.000677773,0.00106862,0.0725344],
+ "R": [
+ [-0.3696831614,0.01690678518,-0.9290040478],
+ [0.03916078476,0.9992295361,0.002601362608],
+ [0.9283322644,-0.03541884761,-0.3700604169]
+ ],
+ "t": [
+ [3.487638933],
+ [110.8874693],
+ [266.9764809]
+ ]
+ },
+ {
+ "name": "19_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 1,
+ "K": [
+ [742.815,0,376.349],
+ [0,742.96,226.412],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311242,0.000676611,0.00127048,0.000398816,0.145683],
+ "R": [
+ [-0.9986287013,0.0334613179,0.04026235479],
+ [0.03051664863,0.9969627365,-0.07165218936],
+ [-0.04253764409,-0.07032526067,-0.99661673]
+ ],
+ "t": [
+ [47.87451164],
+ [124.5257469],
+ [265.3025885]
+ ]
+ },
+ {
+ "name": "19_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 2,
+ "K": [
+ [746.352,0,362.211],
+ [0,746.799,224.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.33354,0.113916,-0.000650978,0.00200875,0.00369896],
+ "R": [
+ [-0.9978769066,0.0627015602,0.01761231284],
+ [0.06225819076,0.9977547513,-0.02468550225],
+ [-0.01912058832,-0.02353658189,-0.9995401105]
+ ],
+ "t": [
+ [76.18899734],
+ [119.4504319],
+ [269.470097]
+ ]
+ },
+ {
+ "name": "19_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 3,
+ "K": [
+ [744.923,0,335.897],
+ [0,744.843,232.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310786,-0.00740435,0.000477261,-0.00048183,0.169837],
+ "R": [
+ [-0.9959217828,0.05942221639,0.06788816328],
+ [0.05820019172,0.9981077555,-0.01984051806],
+ [-0.06893866983,-0.0158085,-0.9974956397]
+ ],
+ "t": [
+ [57.6907282],
+ [139.716188],
+ [274.5941587]
+ ]
+ },
+ {
+ "name": "19_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 4,
+ "K": [
+ [745.3,0,371.455],
+ [0,745.339,223.979],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316788,0.039021,-0.00160053,-0.000126119,0.09467],
+ "R": [
+ [-0.995350133,0.07444232287,0.06112653567],
+ [0.06997485872,0.994930028,-0.0722340534],
+ [-0.06619389658,-0.06762085396,-0.9955128267]
+ ],
+ "t": [
+ [42.04206067],
+ [161.4993909],
+ [266.5642499]
+ ]
+ },
+ {
+ "name": "19_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 5,
+ "K": [
+ [741.339,0,353.354],
+ [0,741.563,231.192],
+ [0,0,1]
+ ],
+ "distCoef": [-0.304803,-0.0634451,-0.00114618,-0.000982934,0.282182],
+ "R": [
+ [-0.9964181101,0.07478982294,0.03946431643],
+ [0.07096423127,0.993341211,-0.09075966339],
+ [-0.04598943103,-0.08763401739,-0.9950905744]
+ ],
+ "t": [
+ [45.56899486],
+ [188.2245222],
+ [262.1501617]
+ ]
+ },
+ {
+ "name": "19_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 6,
+ "K": [
+ [745.947,0,350.894],
+ [0,746.217,234.332],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313212,0.0178381,0.000340441,0.00055626,0.126083],
+ "R": [
+ [-0.9969018679,0.07865171151,0.0007576151751],
+ [0.07854654264,0.9959829876,-0.04299219736],
+ [-0.004135981729,-0.0427994938,-0.9990751208]
+ ],
+ "t": [
+ [37.2742824],
+ [183.4195047],
+ [270.0123608]
+ ]
+ },
+ {
+ "name": "19_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 7,
+ "K": [
+ [748.821,0,355.822],
+ [0,748.684,217.17],
+ [0,0,1]
+ ],
+ "distCoef": [-0.342444,0.16602,-0.000477836,-0.000195363,-0.106824],
+ "R": [
+ [-0.9928808048,-0.04900785176,0.10856306],
+ [-0.05236016128,0.998228751,-0.02824489671],
+ [-0.106986546,-0.0337281951,-0.9936882247]
+ ],
+ "t": [
+ [-31.49326377],
+ [168.7489309],
+ [271.4480177]
+ ]
+ },
+ {
+ "name": "19_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 8,
+ "K": [
+ [747.238,0,359.034],
+ [0,747.474,233.038],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313675,0.00436645,0.000419802,0.000604189,0.154068],
+ "R": [
+ [-0.9913876468,0.02931278851,0.127637354],
+ [0.0192008625,0.9966303068,-0.07974558542],
+ [-0.1295448208,-0.07660804099,-0.9886098055]
+ ],
+ "t": [
+ [-44.88902211],
+ [188.5485089],
+ [261.5304555]
+ ]
+ },
+ {
+ "name": "19_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 9,
+ "K": [
+ [743.415,0,332.333],
+ [0,743.715,235.337],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308464,-0.0208585,-0.00102455,0.000256502,0.207947],
+ "R": [
+ [-0.9954977047,0.04566149696,0.08306231217],
+ [0.04175753042,0.9979670543,-0.04814631117],
+ [-0.08509188364,-0.04446106523,-0.9953806232]
+ ],
+ "t": [
+ [-46.35184093],
+ [166.6378451],
+ [268.6077116]
+ ]
+ },
+ {
+ "name": "19_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 10,
+ "K": [
+ [747.206,0,362.728],
+ [0,747.412,248.496],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340118,0.138855,0.000965068,4.5306e-05,-0.0441245],
+ "R": [
+ [-0.9935175509,0.05252798067,0.1008151146],
+ [0.05439486481,0.9983935823,0.01585728578],
+ [-0.09982021218,0.02123831626,-0.9947787991]
+ ],
+ "t": [
+ [-46.95074625],
+ [127.5778656],
+ [276.6370715]
+ ]
+ },
+ {
+ "name": "19_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 11,
+ "K": [
+ [745.45,0,355.141],
+ [0,745.641,249.232],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326245,0.10077,0.000216744,-2.37583e-05,-0.0259903],
+ "R": [
+ [-0.9983050345,-0.001439505441,0.05818063101],
+ [-0.002578079686,0.9998065462,-0.01949932386],
+ [-0.05814130636,-0.01961626748,-0.9981156198]
+ ],
+ "t": [
+ [-58.09544547],
+ [121.7224759],
+ [272.659258]
+ ]
+ },
+ {
+ "name": "19_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 12,
+ "K": [
+ [743.805,0,368.42],
+ [0,744.013,242.015],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323306,0.0785457,-0.00106293,0.000187763,0.0236672],
+ "R": [
+ [-0.9954771119,0.0748660766,0.05848410323],
+ [0.07512966129,0.9971710788,0.002318097681],
+ [-0.05814510944,0.006701504052,-0.9982856485]
+ ],
+ "t": [
+ [-47.8147621],
+ [97.15541342],
+ [274.4212668]
+ ]
+ },
+ {
+ "name": "19_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 13,
+ "K": [
+ [742.693,0,353.966],
+ [0,742.776,227.014],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307193,-0.0103139,0.000109263,-0.000950495,0.159317],
+ "R": [
+ [-0.9933059489,0.1045971031,0.04901773034],
+ [0.1016362638,0.9930442478,-0.05944065861],
+ [-0.05489409585,-0.05406078084,-0.9970276176]
+ ],
+ "t": [
+ [-21.5323637],
+ [109.7713479],
+ [268.3161895]
+ ]
+ },
+ {
+ "name": "19_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 14,
+ "K": [
+ [742.837,0,362.248],
+ [0,743.502,226.37],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308934,-0.00321353,-0.0010059,0.000705591,0.156528],
+ "R": [
+ [-0.9919154966,0.0987006026,0.07976113456],
+ [0.09553429302,0.9945144894,-0.04259259489],
+ [-0.08352751879,-0.03462833131,-0.995903626]
+ ],
+ "t": [
+ [-30.66946365],
+ [84.06052642],
+ [268.8728165]
+ ]
+ },
+ {
+ "name": "19_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 15,
+ "K": [
+ [742.618,0,345.237],
+ [0,742.923,230.439],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302695,-0.0546693,-0.000167537,-0.000784726,0.259585],
+ "R": [
+ [-0.9885523252,0.1391044686,0.05843155954],
+ [0.1381120085,0.9902000007,-0.02071308279],
+ [-0.06074021267,-0.01240586611,-0.9980765106]
+ ],
+ "t": [
+ [-1.26146274],
+ [74.12977283],
+ [271.0351679]
+ ]
+ },
+ {
+ "name": "19_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 16,
+ "K": [
+ [744.088,0,370.473],
+ [0,744.417,231.755],
+ [0,0,1]
+ ],
+ "distCoef": [-0.300902,-0.0664899,-0.000333311,0.000589361,0.253926],
+ "R": [
+ [-0.9917390399,0.06178336486,0.1124121551],
+ [0.06447509535,0.9977094298,0.02046596672],
+ [-0.1108902109,0.02754468261,-0.9934508803]
+ ],
+ "t": [
+ [-3.269853258],
+ [73.62667861],
+ [274.8694227]
+ ]
+ },
+ {
+ "name": "19_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 17,
+ "K": [
+ [745.582,0,373.528],
+ [0,745.86,237.254],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322134,0.0530706,-0.000603814,0.00101303,0.0846746],
+ "R": [
+ [-0.9897330936,0.1313546283,0.05634150462],
+ [0.1318000226,0.9912672261,0.00424742025],
+ [-0.05529156869,0.01162962396,-0.9984025212]
+ ],
+ "t": [
+ [37.3391924],
+ [70.20661568],
+ [273.1392775]
+ ]
+ },
+ {
+ "name": "19_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 18,
+ "K": [
+ [742.542,0,374.105],
+ [0,742.758,223.273],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306762,-0.0452572,-0.00032402,-0.000364469,0.245651],
+ "R": [
+ [-0.9920842372,0.1065981921,0.06637538524],
+ [0.106818653,0.9942784937,-0.0002288198192],
+ [-0.06602000984,0.006863120707,-0.9977946963]
+ ],
+ "t": [
+ [52.26513597],
+ [79.91641464],
+ [273.9509772]
+ ]
+ },
+ {
+ "name": "19_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 19,
+ "K": [
+ [744.378,0,361.433],
+ [0,744.589,244.618],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310422,-0.000364242,-0.000710118,0.000839407,0.169675],
+ "R": [
+ [-0.9919054981,0.126974259,0.001010166835],
+ [0.1269495258,0.9918188066,-0.01338927975],
+ [-0.002701996339,-0.01315266,-0.9999098493]
+ ],
+ "t": [
+ [49.23489662],
+ [110.9052228],
+ [271.6142806]
+ ]
+ },
+ {
+ "name": "19_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 20,
+ "K": [
+ [745.72,0,364.99],
+ [0,745.913,248.461],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32476,0.0791445,0.000409065,0.000522525,0.0385155],
+ "R": [
+ [-0.9808466558,0.1869185946,0.05478391053],
+ [0.1851721888,0.9820671342,-0.03543168776],
+ [-0.06042431929,-0.02460859583,-0.9978693896]
+ ],
+ "t": [
+ [40.23583817],
+ [134.9359413],
+ [272.7493911]
+ ]
+ },
+ {
+ "name": "19_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 21,
+ "K": [
+ [745.966,0,347.023],
+ [0,745.905,254.016],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312122,-0.0171046,0.00101358,-9.38575e-05,0.213424],
+ "R": [
+ [-0.9944456328,0.07811965146,0.07053512206],
+ [0.07435713108,0.9957422838,-0.0544823029],
+ [-0.07449094204,-0.04893489886,-0.9960203187]
+ ],
+ "t": [
+ [2.247391851],
+ [153.0572023],
+ [268.8284628]
+ ]
+ },
+ {
+ "name": "19_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 22,
+ "K": [
+ [743.607,0,364.935],
+ [0,743.756,243.53],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311531,0.000696399,0.00010932,-0.000314324,0.159615],
+ "R": [
+ [-0.9924188487,0.09367860135,0.07955594568],
+ [0.08900119243,0.9941960017,-0.06044086279],
+ [-0.0847562186,-0.05290207743,-0.9949963586]
+ ],
+ "t": [
+ [-15.3150092],
+ [142.5037842],
+ [267.7211288]
+ ]
+ },
+ {
+ "name": "19_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 23,
+ "K": [
+ [743.508,0,369.721],
+ [0,743.449,243.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309744,-0.0191119,0.000292611,0.000847107,0.198605],
+ "R": [
+ [-0.9987856124,0.03694807636,0.03259049098],
+ [0.03470669556,0.9971594314,-0.06684694127],
+ [-0.03496778135,-0.06563465492,-0.997230839]
+ ],
+ "t": [
+ [-6.799650163],
+ [123.3743131],
+ [267.1549958]
+ ]
+ },
+ {
+ "name": "19_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 24,
+ "K": [
+ [742.775,0,379.613],
+ [0,742.864,224.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316586,0.0333112,-0.000180777,0.00112675,0.112087],
+ "R": [
+ [-0.9947573056,0.06853183176,0.07590316848],
+ [0.05765365411,0.9888586451,-0.1372393391],
+ [-0.08446276764,-0.1321437401,-0.9876254719]
+ ],
+ "t": [
+ [4.340029177],
+ [136.5307812],
+ [258.2193706]
+ ]
+ },
+ {
+ "name": "20_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 1,
+ "K": [
+ [745.267,0,367.511],
+ [0,745.253,228.976],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316421,0.0232694,0.000233523,0.00095017,0.129164],
+ "R": [
+ [-0.2595515744,0.03264633198,0.965177288],
+ [-0.02439656235,0.9988878376,-0.04034718866],
+ [-0.9654210418,-0.03401918423,-0.2584664527]
+ ],
+ "t": [
+ [43.91564589],
+ [114.6472759],
+ [269.2437955]
+ ]
+ },
+ {
+ "name": "20_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 2,
+ "K": [
+ [746.737,0,383.621],
+ [0,746.553,234.139],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330711,0.126048,0.000259954,-0.000232797,-0.067441],
+ "R": [
+ [-0.2600597375,0.03354081135,0.965009817],
+ [-0.06475754991,0.9965406566,-0.05208818886],
+ [-0.9634185968,-0.07603771211,-0.2569880808]
+ ],
+ "t": [
+ [63.03617994],
+ [136.0112472],
+ [264.2112923]
+ ]
+ },
+ {
+ "name": "20_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 3,
+ "K": [
+ [748.567,0,371.842],
+ [0,748.646,223.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332561,0.132401,-0.000978802,0.0010132,-0.0596871],
+ "R": [
+ [-0.2517963519,0.03200567411,0.967250864],
+ [0.0115205721,0.9994813079,-0.03007310314],
+ [-0.9677116686,0.003570985655,-0.2520344708]
+ ],
+ "t": [
+ [55.32226207],
+ [135.5872215],
+ [276.5287505]
+ ]
+ },
+ {
+ "name": "20_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 4,
+ "K": [
+ [747.412,0,375.731],
+ [0,747.545,213.638],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324984,0.0823763,-0.00190711,0.0010176,0.0382164],
+ "R": [
+ [-0.2864406942,-0.001302983566,0.9580970885],
+ [-0.1193951903,0.9922525608,-0.03434594761],
+ [-0.9506295373,-0.1242302613,-0.2843770823]
+ ],
+ "t": [
+ [40.5108683],
+ [178.4576708],
+ [254.9563649]
+ ]
+ },
+ {
+ "name": "20_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 5,
+ "K": [
+ [747.818,0,377.646],
+ [0,748.63,232.294],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327048,0.100477,-0.00250563,-0.000951363,0.00505748],
+ "R": [
+ [-0.2682590325,-0.01756457816,0.9631866782],
+ [-0.1175373506,0.9929607203,-0.014628026],
+ [-0.9561496027,-0.1171345104,-0.2684351761]
+ ],
+ "t": [
+ [28.10870602],
+ [198.6254244],
+ [256.0861594]
+ ]
+ },
+ {
+ "name": "20_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 6,
+ "K": [
+ [744.281,0,376.164],
+ [0,744.733,212.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314115,0.0261091,-0.00186017,0.000146826,0.111047],
+ "R": [
+ [-0.2995512244,0.02650351378,0.9537120256],
+ [-0.1164678133,0.9911222418,-0.06412449085],
+ [-0.9469447251,-0.1302853239,-0.2938050747]
+ ],
+ "t": [
+ [24.38602287],
+ [207.7342285],
+ [252.6787249]
+ ]
+ },
+ {
+ "name": "20_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 7,
+ "K": [
+ [744.844,0,367.199],
+ [0,744.885,234.874],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307447,-0.0235368,-0.000447762,-0.000552595,0.198481],
+ "R": [
+ [-0.2246138655,-0.03605175288,0.9737807158],
+ [-0.1345418425,0.9908917963,0.005651603877],
+ [-0.965115073,-0.1297448231,-0.2274185059]
+ ],
+ "t": [
+ [-24.57828512],
+ [193.807989],
+ [253.6581871]
+ ]
+ },
+ {
+ "name": "20_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 8,
+ "K": [
+ [745.265,0,373.297],
+ [0,745.204,222.406],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322725,0.0753011,-0.00198414,9.48962e-05,0.0496562],
+ "R": [
+ [-0.2740281164,0.007089557403,0.9616955493],
+ [-0.08615117171,0.9957715968,-0.0318889104],
+ [-0.9578551911,-0.09158965645,-0.2722586413]
+ ],
+ "t": [
+ [-24.40184383],
+ [190.6520913],
+ [261.5790911]
+ ]
+ },
+ {
+ "name": "20_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 9,
+ "K": [
+ [743.742,0,376.404],
+ [0,743.442,252.182],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310951,0.0101818,-0.000165117,0.000699519,0.141452],
+ "R": [
+ [-0.234740558,-0.05401621619,0.9705560874],
+ [-0.06709368181,0.9969740023,0.03925909634],
+ [-0.9697398147,-0.05590247913,-0.2376543804]
+ ],
+ "t": [
+ [-60.89112675],
+ [163.1020008],
+ [266.420435]
+ ]
+ },
+ {
+ "name": "20_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 10,
+ "K": [
+ [746.237,0,381.452],
+ [0,745.998,235.104],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321635,0.0804606,-0.000793429,0.000500703,0.0308776],
+ "R": [
+ [-0.2327490461,-0.03063038999,0.9720543507],
+ [-0.1073579574,0.9942045343,0.005622535858],
+ [-0.9665930636,-0.1030491297,-0.2346885731]
+ ],
+ "t": [
+ [-52.7687065],
+ [155.650502],
+ [258.7092289]
+ ]
+ },
+ {
+ "name": "20_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 11,
+ "K": [
+ [744.465,0,352.406],
+ [0,744.368,231.635],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307896,-0.0267024,-0.00138959,-0.000489454,0.213952],
+ "R": [
+ [-0.2568719183,-0.003646201445,0.9664385768],
+ [-0.06909534804,0.997503196,-0.01460160774],
+ [-0.9639723287,-0.07052715282,-0.256482495]
+ ],
+ "t": [
+ [-58.11810551],
+ [133.8270577],
+ [264.378006]
+ ]
+ },
+ {
+ "name": "20_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 12,
+ "K": [
+ [744.557,0,351.376],
+ [0,744.424,216.683],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317479,0.0158652,-0.000659121,-0.00059258,0.147681],
+ "R": [
+ [-0.2372383683,-0.02274879941,0.9711850744],
+ [-0.1004253449,0.9949438408,-0.001226302928],
+ [-0.9662467111,-0.09782252214,-0.2383234094]
+ ],
+ "t": [
+ [-62.35654103],
+ [118.4734964],
+ [259.8400796]
+ ]
+ },
+ {
+ "name": "20_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 13,
+ "K": [
+ [743.07,0,377.102],
+ [0,743.158,222.988],
+ [0,0,1]
+ ],
+ "distCoef": [-0.29868,-0.0827266,-0.00133003,-0.00119832,0.273178],
+ "R": [
+ [-0.2367527853,-0.03686088138,0.9708704311],
+ [-0.08746956632,0.9960307636,0.01648614259],
+ [-0.9676245107,-0.08101847538,-0.2390372628]
+ ],
+ "t": [
+ [-42.43038274],
+ [111.3831569],
+ [262.4188123]
+ ]
+ },
+ {
+ "name": "20_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 14,
+ "K": [
+ [745.597,0,372.306],
+ [0,745.414,237.499],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320131,0.0615197,0.00113665,-0.000991542,0.0414761],
+ "R": [
+ [-0.2769894269,0.05383368349,0.9593637433],
+ [-0.05406721308,0.9959742516,-0.07149843787],
+ [-0.9593506105,-0.07167443526,-0.2729636999]
+ ],
+ "t": [
+ [-21.49417033],
+ [90.7530727],
+ [264.2254974]
+ ]
+ },
+ {
+ "name": "20_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 15,
+ "K": [
+ [746.296,0,380.788],
+ [0,746.161,226.883],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321885,0.0553182,0.000132369,-0.000878491,0.0778662],
+ "R": [
+ [-0.2870302882,0.01079685294,0.9578606588],
+ [-0.05665486447,0.9979947406,-0.02822630231],
+ [-0.9562446549,-0.06236926949,-0.2858430237]
+ ],
+ "t": [
+ [-1.106709776],
+ [85.82297146],
+ [264.8070963]
+ ]
+ },
+ {
+ "name": "20_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 16,
+ "K": [
+ [744.119,0,345.288],
+ [0,744.112,227.607],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302547,-0.0664079,0.000893953,-0.000627784,0.303861],
+ "R": [
+ [-0.252548592,0.05539030986,0.9659974753],
+ [-0.08640189331,0.9930807476,-0.07953201617],
+ [-0.963718798,-0.1035497095,-0.2460153169]
+ ],
+ "t": [
+ [10.51473419],
+ [107.4721829],
+ [260.872486]
+ ]
+ },
+ {
+ "name": "20_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 17,
+ "K": [
+ [745.831,0,353.784],
+ [0,745.87,219.754],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321082,0.0599511,-0.000750204,0.000386726,0.0615888],
+ "R": [
+ [-0.3124433364,0.0857084176,0.9460619582],
+ [-0.03834810703,0.9939715084,-0.1027135007],
+ [-0.9491620432,-0.06837183409,-0.3072730188]
+ ],
+ "t": [
+ [50.17882687],
+ [91.39390134],
+ [262.9120903]
+ ]
+ },
+ {
+ "name": "20_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 18,
+ "K": [
+ [745.227,0,385.13],
+ [0,745.129,233.897],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311291,0.0180828,0.00116452,0.000576614,0.0928398],
+ "R": [
+ [-0.2786751196,0.05379991941,0.9588773365],
+ [-0.03740853519,0.9970639104,-0.06681437094],
+ [-0.9596565944,-0.0544896994,-0.2758443282]
+ ],
+ "t": [
+ [57.04086511],
+ [98.35557378],
+ [265.4113916]
+ ]
+ },
+ {
+ "name": "20_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 19,
+ "K": [
+ [746.424,0,373.724],
+ [0,746.378,215.089],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317589,0.0452179,0.000839363,0.00087423,0.0858828],
+ "R": [
+ [-0.2053627335,-0.023863444,0.9783949528],
+ [-0.1366627843,0.9906072975,-0.004523879826],
+ [-0.9690972248,-0.1346392148,-0.2066950671]
+ ],
+ "t": [
+ [2.454839771],
+ [148.020868],
+ [256.5149472]
+ ]
+ },
+ {
+ "name": "20_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 20,
+ "K": [
+ [744.35,0,378.361],
+ [0,744.386,245.706],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305792,-0.0298413,-5.26611e-05,9.57392e-05,0.206854],
+ "R": [
+ [-0.2653224987,0.04663873586,0.9630310483],
+ [-0.08123292055,0.9941966424,-0.07052835541],
+ [-0.9607315881,-0.09694258412,-0.2599941366]
+ ],
+ "t": [
+ [23.42848118],
+ [157.616994],
+ [260.7931406]
+ ]
+ },
+ {
+ "name": "20_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 21,
+ "K": [
+ [747.371,0,368.768],
+ [0,747.344,231.897],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308946,-0.0139041,-0.000755627,-0.000244894,0.190547],
+ "R": [
+ [-0.2375675449,-0.01520768023,0.9712519694],
+ [-0.09352440886,0.9955903179,-0.007287238765],
+ [-0.966858235,-0.09256697771,-0.2379422368]
+ ],
+ "t": [
+ [-12.76210059],
+ [163.3748289],
+ [261.1782343]
+ ]
+ },
+ {
+ "name": "20_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 22,
+ "K": [
+ [746.314,0,371.788],
+ [0,745.992,237.732],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315167,0.0352154,-0.000828301,0.000312219,0.0891012],
+ "R": [
+ [-0.2145858088,0.0004599306573,0.9767050318],
+ [-0.07749764501,0.9968390076,-0.017495939],
+ [-0.9736257216,-0.07944672006,-0.2138718611]
+ ],
+ "t": [
+ [-33.0373727],
+ [146.3668194],
+ [262.1626174]
+ ]
+ },
+ {
+ "name": "20_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 23,
+ "K": [
+ [746.318,0,371.868],
+ [0,746.096,236.531],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318459,0.0405311,0.000489761,-0.000285822,0.0876741],
+ "R": [
+ [-0.2554085937,0.004734611177,0.9668216142],
+ [-0.07039835709,0.9972425561,-0.02348096154],
+ [-0.9642668311,-0.0740598926,-0.25437101]
+ ],
+ "t": [
+ [-17.40671779],
+ [124.2252344],
+ [264.0602836]
+ ]
+ },
+ {
+ "name": "20_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 24,
+ "K": [
+ [745.832,0,382.965],
+ [0,745.816,231.317],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320385,0.0446211,0.00028801,0.00167617,0.104376],
+ "R": [
+ [-0.2362773498,-0.02089730322,0.9714609188],
+ [-0.1013714927,0.9948433166,-0.003255144035],
+ [-0.9663833786,-0.09924756028,-0.2371773332]
+ ],
+ "t": [
+ [-5.093436327],
+ [126.6662443],
+ [260.9183094]
+ ]
+ },
+ {
+ "name": "00_00",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 0,
+ "K": [
+ [1634.03,0,942.792],
+ [0,1629.73,558.29],
+ [0,0,1]
+ ],
+ "distCoef": [-0.222445,0.199192,8.73054e-05,0.000982243,0.0238445],
+ "R": [
+ [0.1369296663,0.03357591931,-0.9900115778],
+ [-0.09021094677,0.9956950625,0.02129149064],
+ [0.9864645212,0.08639444504,0.1393691081]
+ ],
+ "t": [
+ [20.90028135],
+ [127.2202879],
+ [283.1159034]
+ ]
+ },
+ {
+ "name": "00_01",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 1,
+ "K": [
+ [1395.91,0,951.559],
+ [0,1392.24,561.398],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286227,0.183082,-4.29815e-05,0.000644874,-0.0479635],
+ "R": [
+ [0.05337497606,0.02479711619,0.9982666052],
+ [0.6376765256,0.7684660834,-0.05318390075],
+ [-0.7684528356,0.6394098699,0.0252043199]
+ ],
+ "t": [
+ [6.299256813],
+ [104.397182],
+ [363.078698]
+ ]
+ },
+ {
+ "name": "00_02",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 2,
+ "K": [
+ [1397.02,0,939.355],
+ [0,1394.04,556.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28229,0.173658,-0.000610716,0.000955319,-0.0398628],
+ "R": [
+ [-0.9970491806,0.05290586318,-0.05562284625],
+ [-0.01182874156,0.6100448884,0.792278559],
+ [0.07584861407,0.7905986364,-0.6076189463]
+ ],
+ "t": [
+ [-16.22360931],
+ [63.30660163],
+ [381.0181823]
+ ]
+ },
+ {
+ "name": "00_03",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 3,
+ "K": [
+ [1395.71,0,949.456],
+ [0,1392.06,566.648],
+ [0,0,1]
+ ],
+ "distCoef": [-0.281728,0.168097,-0.00021431,1.8072e-05,-0.0371786],
+ "R": [
+ [-0.6216465312,-0.0285781748,0.7827763909],
+ [0.07448493547,0.9926490654,0.09539301533],
+ [-0.7797484111,0.117605786,-0.6149482047]
+ ],
+ "t": [
+ [-14.50346059],
+ [117.4297203],
+ [290.1984382]
+ ]
+ },
+ {
+ "name": "00_04",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 4,
+ "K": [
+ [1633.26,0,949.479],
+ [0,1629.32,572.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.223003,0.185095,-0.000261654,0.00109433,0.0657602],
+ "R": [
+ [-0.5292732399,-0.01229259603,0.8483623811],
+ [0.636650989,0.6551966806,0.4066851706],
+ [-0.5608434325,0.7553583268,-0.3389519765]
+ ],
+ "t": [
+ [-5.411400695],
+ [80.12176746],
+ [379.8488129]
+ ]
+ },
+ {
+ "name": "00_05",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 5,
+ "K": [
+ [1396.29,0,933.34],
+ [0,1392.95,560.462],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28733,0.185523,-0.000225825,-0.000143128,-0.0508452],
+ "R": [
+ [-0.9314658579,-0.01073438439,-0.363670357],
+ [-0.021313424,0.9994579907,0.02508909603],
+ [0.3632039283,0.03112069687,-0.9311897813]
+ ],
+ "t": [
+ [-6.050515741],
+ [143.9213951],
+ [280.3813532]
+ ]
+ },
+ {
+ "name": "00_06",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 6,
+ "K": [
+ [1396.11,0,950.228],
+ [0,1392.54,548.78],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286481,0.183173,-0.000152555,0.0010664,-0.0482263],
+ "R": [
+ [0.9448241112,-0.04876703013,-0.3239277321],
+ [-0.2141569626,0.6563150135,-0.7234551806],
+ [0.2478793944,0.7529092773,0.6096584503]
+ ],
+ "t": [
+ [-10.023614],
+ [84.45695974],
+ [376.925635]
+ ]
+ },
+ {
+ "name": "00_07",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 7,
+ "K": [
+ [1395.51,0,947.67],
+ [0,1392.41,549.081],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286691,0.185163,-6.53256e-05,4.32858e-06,-0.052639],
+ "R": [
+ [-0.9419632708,-0.03700247277,0.3336705164],
+ [0.180351898,0.7825307202,0.5959185052],
+ [-0.2831578878,0.6215114552,-0.7304417305]
+ ],
+ "t": [
+ [-5.250326149],
+ [112.5645453],
+ [360.2387508]
+ ]
+ },
+ {
+ "name": "00_08",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 8,
+ "K": [
+ [1642.7,0,945.082],
+ [0,1638.64,562.465],
+ [0,0,1]
+ ],
+ "distCoef": [-0.22444,0.208938,-0.000569838,0.000484927,0.0287248],
+ "R": [
+ [0.9544726119,0.01685383959,-0.2978220632],
+ [-0.03362017317,0.9981191009,-0.05126347965],
+ [0.2963979035,0.05894241665,0.9532439742]
+ ],
+ "t": [
+ [-19.67808464],
+ [136.6798831],
+ [282.6801175]
+ ]
+ },
+ {
+ "name": "00_09",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 9,
+ "K": [
+ [1396.79,0,945.482],
+ [0,1393.03,542.64],
+ [0,0,1]
+ ],
+ "distCoef": [-0.284259,0.175176,-0.000406823,0.000640552,-0.0406716],
+ "R": [
+ [-0.3169419478,-0.08460972789,0.9446634298],
+ [-0.1243350249,0.9911238917,0.04705563528],
+ [-0.9402598595,-0.1025408464,-0.3246486894]
+ ],
+ "t": [
+ [6.780958613],
+ [147.0057696],
+ [260.6395044]
+ ]
+ },
+ {
+ "name": "00_10",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 10,
+ "K": [
+ [1393.87,0,944.546],
+ [0,1390.36,563.199],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285353,0.177704,-0.000109708,0.000471392,-0.0432146],
+ "R": [
+ [0.9503475669,0.04849461332,0.3073886376],
+ [0.1560494297,0.7803459045,-0.6055648973],
+ [-0.2692360999,0.6234649483,0.734032275]
+ ],
+ "t": [
+ [22.71992555],
+ [112.7759402],
+ [360.0009328]
+ ]
+ },
+ {
+ "name": "00_11",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 11,
+ "K": [
+ [1492.96,0,934.544],
+ [0,1489.74,547.466],
+ [0,0,1]
+ ],
+ "distCoef": [-0.259288,0.190057,-5.50625e-05,0.00031915,-0.0281283],
+ "R": [
+ [0.8129763959,0.04080422416,-0.5808652124],
+ [-0.2848486357,0.8979062573,-0.3355973896],
+ [0.5078687177,0.4382914196,0.7415996205]
+ ],
+ "t": [
+ [-0.03199165418],
+ [105.1487628],
+ [331.4862369]
+ ]
+ },
+ {
+ "name": "00_12",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 12,
+ "K": [
+ [1395.93,0,964.611],
+ [0,1392.67,564.875],
+ [0,0,1]
+ ],
+ "distCoef": [-0.290995,0.19463,-0.000241491,0.000727782,-0.0582663],
+ "R": [
+ [-0.9950957343,0.04321912909,-0.08897520145],
+ [-0.001969290489,0.8906636271,0.454658581],
+ [0.09889692354,0.4526040326,-0.886210465]
+ ],
+ "t": [
+ [24.66653867],
+ [97.49188585],
+ [334.8897626]
+ ]
+ },
+ {
+ "name": "00_13",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 13,
+ "K": [
+ [1592.21,0,937.375],
+ [0,1588.39,560.919],
+ [0,0,1]
+ ],
+ "distCoef": [-0.239248,0.229218,0.000137317,0.000315934,-0.0358302],
+ "R": [
+ [-0.2862766934,0.07452649614,-0.9552441867],
+ [-0.7557457469,0.5952786327,0.2729317047],
+ [0.588977097,0.8000557173,-0.1140913162]
+ ],
+ "t": [
+ [-15.47943966],
+ [60.20818768],
+ [381.0821849]
+ ]
+ },
+ {
+ "name": "00_14",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 14,
+ "K": [
+ [1649.51,0,934.882],
+ [0,1644.85,568.024],
+ [0,0,1]
+ ],
+ "distCoef": [-0.22365,0.220791,-0.000591343,0.000286172,0.0121962],
+ "R": [
+ [0.827339054,-0.07848137689,0.5561930989],
+ [0.02005408661,0.9936867625,0.110383204],
+ [-0.5613447456,-0.08017039095,0.8236897383]
+ ],
+ "t": [
+ [-7.23447972],
+ [142.1657406],
+ [267.9541185]
+ ]
+ },
+ {
+ "name": "00_15",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 15,
+ "K": [
+ [1430.11,0,948.926],
+ [0,1426.48,561.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.277948,0.185701,0.000192514,0.000149713,-0.0424254],
+ "R": [
+ [-0.9997414125,0.006454955712,0.02180462522],
+ [0.005192647027,0.9983342904,-0.05746025644],
+ [-0.02213920846,-0.05733217422,-0.9981096519]
+ ],
+ "t": [
+ [9.642162177],
+ [134.9258555],
+ [268.2324221]
+ ]
+ },
+ {
+ "name": "00_16",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 16,
+ "K": [
+ [1427.34,0,949.618],
+ [0,1423.13,548.132],
+ [0,0,1]
+ ],
+ "distCoef": [-0.279453,0.188683,-0.000345265,0.000583475,-0.0479414],
+ "R": [
+ [0.7694875517,0.002369830201,0.6386574134],
+ [0.2539259376,0.9164213706,-0.3093436433],
+ [-0.586012394,0.4002077652,0.7045730755]
+ ],
+ "t": [
+ [4.866150988],
+ [118.1652356],
+ [330.6340665]
+ ]
+ },
+ {
+ "name": "00_17",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 17,
+ "K": [
+ [1393.35,0,916.395],
+ [0,1390.34,563.652],
+ [0,0,1]
+ ],
+ "distCoef": [-0.287138,0.186145,7.50854e-05,0.000557424,-0.0513205],
+ "R": [
+ [0.5039250676,0.09465184024,-0.8585456047],
+ [-0.6050310345,0.7480627966,-0.2726527087],
+ [0.6164389455,0.6568432701,0.4342348962]
+ ],
+ "t": [
+ [18.2296155],
+ [97.71531857],
+ [361.6667015]
+ ]
+ },
+ {
+ "name": "00_18",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 18,
+ "K": [
+ [1542.2,0,947.567],
+ [0,1538.02,555.168],
+ [0,0,1]
+ ],
+ "distCoef": [-0.245751,0.182006,3.81269e-06,0.000651097,0.00472657],
+ "R": [
+ [-0.4048875531,-0.001022756131,0.9143659133],
+ [0.3656410889,0.9163838146,0.1629334173],
+ [-0.8380767647,0.4002994608,-0.3706584387]
+ ],
+ "t": [
+ [16.25260358],
+ [116.7586119],
+ [329.7529305]
+ ]
+ },
+ {
+ "name": "00_19",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 19,
+ "K": [
+ [1396.57,0,949.242],
+ [0,1393.19,554.872],
+ [0,0,1]
+ ],
+ "distCoef": [-0.280864,0.167216,-6.6519e-05,0.000917406,-0.0342733],
+ "R": [
+ [0.7360342296,0.009501079563,0.6768776421],
+ [0.5173282683,0.6370082142,-0.5714822813],
+ [-0.4366063167,0.7707984591,0.4639446731]
+ ],
+ "t": [
+ [-24.15514071],
+ [74.04862943],
+ [379.5076537]
+ ]
+ },
+ {
+ "name": "00_20",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 20,
+ "K": [
+ [1403.46,0,940.386],
+ [0,1400.1,552.684],
+ [0,0,1]
+ ],
+ "distCoef": [-0.287177,0.194004,-0.000120001,8.41526e-05,-0.0604614],
+ "R": [
+ [-0.6201222217,0.04052054618,-0.7834580496],
+ [-0.1302964194,0.9794749929,0.1537907063],
+ [0.773609251,0.1974508131,-0.6021145267]
+ ],
+ "t": [
+ [24.4496252],
+ [140.6900046],
+ [300.8290806]
+ ]
+ },
+ {
+ "name": "00_21",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 21,
+ "K": [
+ [1397.56,0,932.828],
+ [0,1393.91,562.186],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28642,0.185674,-0.000229601,1.91211e-05,-0.052608],
+ "R": [
+ [-0.2617478675,-0.05032313647,-0.9638234464],
+ [-0.4532392419,0.8880813121,0.07671878938],
+ [0.8520928608,0.4569235877,-0.2552618099]
+ ],
+ "t": [
+ [-8.784671236],
+ [98.11062797],
+ [332.9193692]
+ ]
+ },
+ {
+ "name": "00_22",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 22,
+ "K": [
+ [1514.1,0,945.861],
+ [0,1510.18,558.694],
+ [0,0,1]
+ ],
+ "distCoef": [-0.260535,0.216046,-0.000156491,0.000677315,-0.0506741],
+ "R": [
+ [-0.9239818557,-0.0613765916,0.3774790647],
+ [0.05486070575,0.9555572213,0.289656175],
+ [-0.3784809549,0.288345818,-0.8795503715]
+ ],
+ "t": [
+ [-5.224239691],
+ [110.7456244],
+ [313.8855054]
+ ]
+ },
+ {
+ "name": "00_23",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 23,
+ "K": [
+ [1572.86,0,941.716],
+ [0,1568.17,560.048],
+ [0,0,1]
+ ],
+ "distCoef": [-0.240801,0.195963,-0.000444179,0.000458513,0.00455186],
+ "R": [
+ [0.5162966551,0.01335424781,0.856305686],
+ [0.1418829708,0.9847272537,-0.100903213],
+ [-0.8445750331,0.173591186,0.506516647]
+ ],
+ "t": [
+ [2.417701344],
+ [102.3557555],
+ [298.3746617]
+ ]
+ },
+ {
+ "name": "00_24",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 24,
+ "K": [
+ [1399.63,0,954.539],
+ [0,1396.27,546.388],
+ [0,0,1]
+ ],
+ "distCoef": [-0.288761,0.190789,4.23479e-05,6.78832e-05,-0.0577764],
+ "R": [
+ [-0.388991142,-0.05987834367,-0.9192934653],
+ [0.02928793432,0.9965772059,-0.07730517199],
+ [0.9207758187,-0.05699523376,-0.3859059924]
+ ],
+ "t": [
+ [-15.12220678],
+ [134.1751339],
+ [265.239245]
+ ]
+ },
+ {
+ "name": "00_25",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 25,
+ "K": [
+ [1397.66,0,935.585],
+ [0,1394.65,559.251],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285722,0.183994,-0.000502702,0.000494145,-0.0515729],
+ "R": [
+ [0.7926422733,0.00130484237,-0.6096855943],
+ [0.04487405742,0.9971605675,0.06047414042],
+ [0.6080333424,-0.07529342651,0.7903330655]
+ ],
+ "t": [
+ [4.539475053],
+ [139.2223569],
+ [261.6293171]
+ ]
+ },
+ {
+ "name": "00_26",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 26,
+ "K": [
+ [1616.8,0,950.116],
+ [0,1613.47,551.417],
+ [0,0,1]
+ ],
+ "distCoef": [-0.223464,0.185279,-0.00090721,0.000127112,0.0351947],
+ "R": [
+ [-0.7556190155,-0.04350579001,-0.6535649545],
+ [0.1389994774,0.9644159151,-0.2249023966],
+ [0.6400930001,-0.2607857146,-0.7226837222]
+ ],
+ "t": [
+ [-12.5475419],
+ [141.1612209],
+ [240.8579734]
+ ]
+ },
+ {
+ "name": "00_27",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 27,
+ "K": [
+ [1861.86,0,934.556],
+ [0,1857.26,552.106],
+ [0,0,1]
+ ],
+ "distCoef": [-0.171511,0.209759,-1.83176e-05,-3.41566e-05,0.211418],
+ "R": [
+ [0.9782876177,0.02697940456,0.2054883178],
+ [0.02691509764,0.9665557486,-0.2550403151],
+ [-0.2054967507,0.2550335204,0.9448433674]
+ ],
+ "t": [
+ [-0.5131666478],
+ [123.4498457],
+ [311.6401591]
+ ]
+ },
+ {
+ "name": "00_28",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 28,
+ "K": [
+ [1395.57,0,953.143],
+ [0,1392.36,561.982],
+ [0,0,1]
+ ],
+ "distCoef": [-0.284934,0.181016,0.000127361,0.000271191,-0.0471616],
+ "R": [
+ [-0.6310677524,-0.02949081954,-0.775166939],
+ [-0.5128354354,0.7656140117,0.3883748207],
+ [0.5820251782,0.6426238999,-0.4982782509]
+ ],
+ "t": [
+ [-8.508070023],
+ [104.2896072],
+ [361.3816814]
+ ]
+ },
+ {
+ "name": "00_29",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 29,
+ "K": [
+ [1400.36,0,939.608],
+ [0,1397.25,572.603],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286109,0.1878,-0.000309515,0.000886248,-0.0523515],
+ "R": [
+ [0.4887300705,-0.07268882749,-0.8694016635],
+ [-0.08227020668,0.9882426049,-0.1288726774],
+ [0.8685473685,0.1345098073,0.4770037531]
+ ],
+ "t": [
+ [-20.72850042],
+ [158.8912224],
+ [289.281465]
+ ]
+ },
+ {
+ "name": "00_30",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 30,
+ "K": [
+ [1407.21,0,946.883],
+ [0,1403.86,563.032],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285813,0.195568,-0.000394067,0.000468367,-0.0600751],
+ "R": [
+ [0.08635045426,0.06174190292,0.9943498059],
+ [0.2147800801,0.9734543185,-0.07909618832],
+ [-0.9728376618,0.2203965227,0.07079729175]
+ ],
+ "t": [
+ [13.79078928],
+ [132.1300437],
+ [306.0754676]
+ ]
+ },
+ {
+ "name": "50_01",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 1,
+ "K": [
+ [1053.92,0,947.294],
+ [0,1054.32,535.405],
+ [0,0,1]
+ ],
+ "distCoef": [0.0476403,-0.053786,0.000733314,-0.000579648,0.0122759],
+ "R": [
+ [0.9095307192,0.0006254166507,-0.4156362348],
+ [-0.003349684277,0.999977422,-0.0058253781],
+ [0.4156232073,0.006690610494,0.9095122788]
+ ],
+ "t": [
+ [-15.84850815],
+ [103.1392168],
+ [269.3362326]
+ ]
+ },
+ {
+ "name": "50_02",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 2,
+ "K": [
+ [1058.92,0,971.224],
+ [0,1059.3,541.276],
+ [0,0,1]
+ ],
+ "distCoef": [0.0485216,-0.0529886,-0.000413578,-0.000171659,0.00909728],
+ "R": [
+ [-0.08404700998,-0.006825065684,-0.9964384169],
+ [-0.04073006897,0.9991643735,-0.003408260769],
+ [0.9956290281,0.04029855131,-0.08425476347]
+ ],
+ "t": [
+ [-4.246538185],
+ [93.69672118],
+ [271.0169727]
+ ]
+ },
+ {
+ "name": "50_03",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 3,
+ "K": [
+ [1050.35,0,971.069],
+ [0,1050.88,535.343],
+ [0,0,1]
+ ],
+ "distCoef": [0.0482196,-0.0555053,0.000460862,0.000594278,0.0128034],
+ "R": [
+ [-0.9791929995,-0.0009192386581,-0.2029291126],
+ [0.004325206908,0.9996680429,-0.02539875018],
+ [0.2028850964,-0.02574798878,-0.9788639736]
+ ],
+ "t": [
+ [-10.71273011],
+ [112.0293664],
+ [269.2258843]
+ ]
+ },
+ {
+ "name": "50_04",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 4,
+ "K": [
+ [1053.76,0,952.563],
+ [0,1053.62,535.073],
+ [0,0,1]
+ ],
+ "distCoef": [0.0534802,-0.059505,0.000265754,-0.00038559,0.0128987],
+ "R": [
+ [-0.4973721867,-0.01252789009,0.8674468052],
+ [-0.05725964091,0.9981894693,-0.01841512904],
+ [-0.8656455634,-0.05882886558,-0.4971890215]
+ ],
+ "t": [
+ [-12.12207689],
+ [119.639642],
+ [263.8142799]
+ ]
+ },
+ {
+ "name": "50_05",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 5,
+ "K": [
+ [1061.53,0,963.346],
+ [0,1061.99,535.689],
+ [0,0,1]
+ ],
+ "distCoef": [0.0450742,-0.0483577,0.000117724,0.00131017,0.00746483],
+ "R": [
+ [0.6332975321,0.02789684006,0.7734054578],
+ [-0.04440403331,0.9990136015,0.0003253688515],
+ [-0.772633495,-0.034548377,0.6339115806]
+ ],
+ "t": [
+ [4.398197962],
+ [114.449943],
+ [269.0646085]
+ ]
+ },
+ {
+ "name": "50_06",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 6,
+ "K": [
+ [1053.8,0,975.87],
+ [0,1054.44,518.546],
+ [0,0,1]
+ ],
+ "distCoef": [0.0608578,-0.0758877,0.000572907,0.000423304,0.0232485],
+ "R": [
+ [0.9936973916,-0.01776547634,0.1106791841],
+ [0.08238304881,0.7853099766,-0.6135969963],
+ [-0.07601662453,0.6188478234,0.7818240495]
+ ],
+ "t": [
+ [-23.36095562],
+ [58.01362542],
+ [350.0526212]
+ ]
+ },
+ {
+ "name": "50_07",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 7,
+ "K": [
+ [1058.37,0,951.456],
+ [0,1058.06,537.752],
+ [0,0,1]
+ ],
+ "distCoef": [0.0510704,-0.0625189,-0.000144014,6.68608e-05,0.016463],
+ "R": [
+ [0.4325769754,-0.03234243573,-0.9010167186],
+ [-0.4868424381,0.832758343,-0.2636247005],
+ [0.7588554545,0.5526911516,0.344486415]
+ ],
+ "t": [
+ [-19.0385587],
+ [87.13576568],
+ [341.2560709]
+ ]
+ },
+ {
+ "name": "50_08",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 8,
+ "K": [
+ [1051.92,0,937.937],
+ [0,1051.86,554.246],
+ [0,0,1]
+ ],
+ "distCoef": [0.0499863,-0.0613843,-4.12419e-05,-0.000155211,0.0174279],
+ "R": [
+ [-0.7043873056,-0.07078753835,-0.7062773168],
+ [-0.4398115151,0.8245196459,0.3559960458],
+ [0.5571394394,0.5613879923,-0.6119143463]
+ ],
+ "t": [
+ [-21.03532832],
+ [82.26745729],
+ [344.5100871]
+ ]
+ },
+ {
+ "name": "50_09",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 9,
+ "K": [
+ [1054,0,961.563],
+ [0,1054.08,544.179],
+ [0,0,1]
+ ],
+ "distCoef": [0.0446773,-0.0530941,0.000226286,-0.000324258,0.0121913],
+ "R": [
+ [-0.8728623151,-0.0989156561,0.4778358211],
+ [0.2068965126,0.8118396582,0.5459946908],
+ [-0.4419334927,0.5754407548,-0.6881589393]
+ ],
+ "t": [
+ [-36.30074608],
+ [73.0041962],
+ [346.5857858]
+ ]
+ },
+ {
+ "name": "50_10",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 10,
+ "K": [
+ [1050.04,0,941.59],
+ [0,1050.6,559.398],
+ [0,0,1]
+ ],
+ "distCoef": [0.0506861,-0.0636966,0.000195295,-6.41025e-06,0.0181857],
+ "R": [
+ [0.1849149694,0.002001709126,0.9827524852],
+ [0.5894867579,0.7998990427,-0.1125472514],
+ [-0.786328059,0.6001312479,0.146733326]
+ ],
+ "t": [
+ [-12.26435316],
+ [64.88453925],
+ [349.5293231]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000168.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000168.json
new file mode 100644
index 0000000000000000000000000000000000000000..9fd1245a01cf78417ff0fbbe2a42f77a09d54a0f
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000168.json
@@ -0,0 +1,15 @@
+{ "version": 0.7,
+"univTime" :47896.621,
+"fpsType" :"hd_29_97",
+"bodies" :
+[
+{ "id": 0,
+"joints19": [124.136, -105.991, -30.2854, 0.709717, 104.413, -117.401, -25.8053, 0.686829, 127.045, -63.7183, -29.9739, 0.446899, 120.248, -106.955, -45.6063, 0.575806, 111.254, -82.5488, -60.3606, 0.579651, 97.5534, -67.5661, -41.9563, 0.475403, 124.375, -63.6942, -41.0662, 0.417236, 83.4269, -47.7217, -38.6838, 0.203247, 0, 0, 0, -1, 128.266, -105.85, -14.7877, 0.602417, 127.309, -81.7486, 0.513331, 0.651978, 111.735, -70.4044, -14.4066, 0.506653, 129.716, -63.7424, -18.8814, 0.444885, 96.4134, -42.8902, 0.798849, 0.232483, 69.8839, -9.69057, 2.89286, 0.157349, 104.805, -121.062, -29.0914, 0.532288, 112.683, -122.17, -35.3427, 0.396851, 106.05, -121.316, -23.1694, 0.572449, 115.614, -122.634, -20.4819, 0.516724]
+},
+{ "id": 1,
+"joints19": [5.6087, -91.7251, 5.67075, 0.482361, 9.08691, -85.5182, -11.6583, 0.199097, 1.66692, -67.5066, 46.3826, 0.227905, 19.3811, -92.0237, 9.78557, 0.396667, 16.9647, -71.8503, 24.8387, 0.320435, 12.7281, -59.21, 8.82053, 0.295654, 9.3541, -67.7975, 46.523, 0.236145, 18.8347, -41.2812, 16.8548, 0.281738, 12.0261, -7.27261, 26.7646, 0.333557, -7.55927, -92.2895, 3.18333, 0.382874, -27.683, -73.6808, 6.9749, 0.261841, -13.6486, -60.4171, 9.29562, 0.138, -6.02027, -67.2158, 46.2422, 0.18158, -16.8764, -40.5201, 19.3464, 0.276733, -8.86912, -6.4542, 26.0121, 0.28595, 9.95262, -88.5757, -11.2162, 0.143005, 13.9014, -94.8183, -5.44828, 0.313904, 4.81003, -88.1194, -11.7807, 0.144226, -0.772542, -94.5733, -6.32695, 0.250549]
+},
+{ "id": 2,
+"joints19": [-46.623, -91.9748, -46.2094, 0.432495, -24.8662, -90.1627, -46.21, 0.392395, -75.3649, -54.9062, -38.0726, 0.157349, -43.6166, -91.045, -29.2171, 0.349304, -39.1777, -59.7671, -26.5732, 0.293274, -27.2637, -45.0868, -41.3075, 0.192017, -74.8994, -55.0912, -27.8377, 0.165894, -31.1378, -46.3092, -19.0025, 0.293518, -55.7049, -13.0915, -25.5646, 0.224976, -48.6966, -91.4388, -65.3742, 0.311768, -41.5079, -71.8104, -84.3137, 0.330261, -38.8408, -64.6871, -61.4434, 0.23877, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, -24.2805, -94.9466, -43.9519, 0.356812, -31.8517, -101.665, -40.2811, 0.385071, -24.9661, -93.5623, -49.2523, 0.2229, -31.0232, -98.0451, -55.4795, 0.279053]
+}
+] }
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000169.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000169.json
new file mode 100644
index 0000000000000000000000000000000000000000..e37c96c80a99b7016b80eaaa35094d99b992f789
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band1/hdPose3d_stage1_coco19/body3DScene_00000169.json
@@ -0,0 +1,15 @@
+{ "version": 0.7,
+"univTime" :47929.977,
+"fpsType" :"hd_29_97",
+"bodies" :
+[
+{ "id": 0,
+"joints19": [124.147, -105.973, -30.3072, 0.709717, 104.374, -117.36, -25.733, 0.686829, 127.183, -63.4893, -30.0803, 0.446899, 120.073, -106.823, -45.7773, 0.575806, 111.752, -82.4054, -59.6925, 0.579651, 98.0636, -68.2993, -41.7845, 0.475403, 124.546, -63.5584, -41.3032, 0.417236, 82.7474, -47.6873, -38.9817, 0.203247, 0, 0, 0, -1, 128.539, -105.963, -14.9304, 0.602417, 127.104, -81.4156, 0.247136, 0.651978, 111.693, -68.1563, -14.442, 0.506653, 129.82, -63.4201, -18.8574, 0.444885, 96.2478, -42.7821, 0.805848, 0.232483, 69.9732, -9.8122, 2.74933, 0.157349, 104.724, -121.033, -28.9868, 0.532288, 112.609, -122.205, -35.2695, 0.396851, 105.982, -121.283, -23.115, 0.572449, 115.59, -122.596, -20.5341, 0.516724]
+},
+{ "id": 1,
+"joints19": [5.04299, -94.3889, 5.11889, 0.482361, 7.96533, -87.0756, -12.5883, 0.199097, 1.01761, -67.6827, 45.8372, 0.227905, 18.8279, -94.4092, 9.16816, 0.396667, 16.9833, -73.798, 24.051, 0.320435, 13.7023, -60.619, 8.29119, 0.295654, 9.38785, -67.9091, 45.5134, 0.236145, 18.7713, -41.7337, 15.9782, 0.281738, 12.1838, -7.47727, 26.3543, 0.333557, -8.15184, -94.0344, 2.74916, 0.382874, -27.5733, -74.6123, 6.71778, 0.261841, -11.5391, -62.3807, 6.49904, 0.138, -7.35263, -67.4562, 46.1611, 0.18158, -16.292, -40.381, 19.0151, 0.276733, -8.81955, -6.61585, 25.9785, 0.28595, 9.40427, -90.5736, -11.3424, 0.143005, 13.4035, -96.5672, -5.99329, 0.313904, 3.80369, -90.1657, -12.6569, 0.144226, -1.05309, -95.823, -6.96162, 0.250549]
+},
+{ "id": 2,
+"joints19": [-47.6019, -93.4704, -46.6587, 0.432495, -26.2199, -91.5537, -46.2747, 0.392395, -75.3649, -54.9062, -38.0726, 0.157349, -44.5263, -91.3939, -29.3512, 0.349304, -40.4455, -62.5189, -26.2502, 0.293274, -28.7968, -47.0727, -40.3408, 0.192017, -75.2261, -55.2629, -28.1895, 0.165894, -31.459, -46.5397, -18.7491, 0.293518, -55.7543, -13.1495, -25.2371, 0.224976, -50.2673, -93.2394, -65.8827, 0.311768, -41.5254, -72.5218, -84.4612, 0.330261, -38.9596, -65.8381, -61.5105, 0.23877, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, -25.5114, -96.8998, -43.1621, 0.356812, -33.2547, -102.681, -40.4011, 0.385071, -26.3223, -94.9484, -49.0802, 0.2229, -32.1236, -99.3278, -55.4444, 0.279053]
+}
+] }
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/calibration_160906_band2.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/calibration_160906_band2.json
new file mode 100644
index 0000000000000000000000000000000000000000..31c0429b03ee8fe8c6b4680e6205056c397af2b4
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/calibration_160906_band2.json
@@ -0,0 +1,11965 @@
+{
+ "calibDataSource": "160906_calib_norm",
+ "cameras": [
+ {
+ "name": "01_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 1,
+ "K": [
+ [745.698,0,375.512],
+ [0,745.89,226.023],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324009,0.0732398,-0.000601245,0.000808154,0.0311011],
+ "R": [
+ [0.9609979695,0.02878724306,-0.2750530807],
+ [-0.05024448072,0.9961896773,-0.07128547526],
+ [0.2719529274,0.08232509619,0.9587826572]
+ ],
+ "t": [
+ [-51.56945892],
+ [143.9587601],
+ [282.5664691]
+ ]
+ },
+ {
+ "name": "01_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 2,
+ "K": [
+ [745.462,0,369.225],
+ [0,745.627,226.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336594,0.141798,-0.000612176,0.000160485,-0.0646767],
+ "R": [
+ [0.9715220842,-0.01574832828,-0.2364251047],
+ [0.005323209906,0.998987679,-0.04466856407],
+ [0.2368892218,0.042137956,0.9706224236]
+ ],
+ "t": [
+ [-66.22242206],
+ [142.1317177],
+ [278.6626087]
+ ]
+ },
+ {
+ "name": "01_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 3,
+ "K": [
+ [746.261,0,378.952],
+ [0,746.496,239.595],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322069,0.0440329,-0.000951664,0.000892653,0.103376],
+ "R": [
+ [0.9665011873,0.05534363601,-0.2506242943],
+ [-0.07024277085,0.996230894,-0.05089164033],
+ [0.2468631364,0.06679137568,0.9667458322]
+ ],
+ "t": [
+ [-54.75524211],
+ [118.3584455],
+ [281.78809]
+ ]
+ },
+ {
+ "name": "01_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 4,
+ "K": [
+ [747.661,0,366.929],
+ [0,747.759,234.022],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32333,0.0462607,-0.000972333,-0.000898261,0.102804],
+ "R": [
+ [0.9662588837,0.08601234823,-0.2427872436],
+ [-0.1112831564,0.9894890375,-0.09234448444],
+ [0.23229255,0.1162468093,0.9656742984]
+ ],
+ "t": [
+ [-29.08626445],
+ [96.75744843],
+ [287.7183779]
+ ]
+ },
+ {
+ "name": "01_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 5,
+ "K": [
+ [742.413,0,353.224],
+ [0,742.622,209.478],
+ [0,0,1]
+ ],
+ "distCoef": [-0.297729,-0.0985766,-0.000505185,-0.000773418,0.328727],
+ "R": [
+ [0.9718071292,0.05098345905,-0.2301990238],
+ [-0.07271497659,0.9935575811,-0.0869244798],
+ [0.2242842746,0.1012127458,0.9692536016]
+ ],
+ "t": [
+ [-26.91018729],
+ [77.97642882],
+ [285.7140393]
+ ]
+ },
+ {
+ "name": "01_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 6,
+ "K": [
+ [743.487,0,372.277],
+ [0,743.725,241.821],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317534,0.0281748,0.00130284,-0.000186889,0.119129],
+ "R": [
+ [0.9681278444,0.07458666466,-0.2390926732],
+ [-0.09383510211,0.9931135585,-0.07014580141],
+ [0.2322142341,0.09034538891,0.968459736]
+ ],
+ "t": [
+ [-7.038020326],
+ [73.51221006],
+ [284.7303027]
+ ]
+ },
+ {
+ "name": "01_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 7,
+ "K": [
+ [748.393,0,380.919],
+ [0,748.388,229.353],
+ [0,0,1]
+ ],
+ "distCoef": [-0.344193,0.174813,-0.00034307,0.00107023,-0.0968505],
+ "R": [
+ [0.9670535143,-0.02995409712,-0.2528047715],
+ [0.01712365053,0.9984582116,-0.0528013286],
+ [0.2539966162,0.04673276982,0.9660754459]
+ ],
+ "t": [
+ [-4.52170598],
+ [98.55800179],
+ [280.6705064]
+ ]
+ },
+ {
+ "name": "01_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 8,
+ "K": [
+ [745.37,0,362.362],
+ [0,745.56,217.483],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326014,0.0789588,-0.000462463,-0.00138061,0.0222432],
+ "R": [
+ [0.9652282485,0.06485174985,-0.2532364089],
+ [-0.07898708824,0.9958116468,-0.0460456736],
+ [0.2491896228,0.06444699145,0.9663079826]
+ ],
+ "t": [
+ [26.28384049],
+ [86.2200762],
+ [282.8912643]
+ ]
+ },
+ {
+ "name": "01_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 9,
+ "K": [
+ [746.037,0,338.236],
+ [0,746.053,236.859],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314486,0.0395532,0.000625849,-0.000232478,0.0599275],
+ "R": [
+ [0.9656569777,0.07278005487,-0.2494186543],
+ [-0.09030273149,0.9941334749,-0.05953193019],
+ [0.2436226964,0.08001060955,0.9665641645]
+ ],
+ "t": [
+ [45.35508632],
+ [94.7965848],
+ [284.0947744]
+ ]
+ },
+ {
+ "name": "01_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 10,
+ "K": [
+ [747.938,0,379.271],
+ [0,748.269,227.432],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3484,0.205218,-0.00110069,0.000562921,-0.151344],
+ "R": [
+ [0.9662738854,-0.001312373382,-0.2575132151],
+ [-0.009587322107,0.9991104143,-0.04106657164],
+ [0.2573380297,0.04215041788,0.9654017199]
+ ],
+ "t": [
+ [30.05861189],
+ [130.0028668],
+ [279.9552314]
+ ]
+ },
+ {
+ "name": "01_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 11,
+ "K": [
+ [746.12,0,364.693],
+ [0,745.844,223.621],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335335,0.119703,0.000192218,0.00118296,-0.00812072],
+ "R": [
+ [0.9869891455,-0.01212212734,-0.1603292883],
+ [0.00355647539,0.9985558958,-0.05360479805],
+ [0.1607475603,0.05233714665,0.9856069424]
+ ],
+ "t": [
+ [71.07099717],
+ [142.6182462],
+ [275.3539702]
+ ]
+ },
+ {
+ "name": "01_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 12,
+ "K": [
+ [745.407,0,358.691],
+ [0,745.503,226.329],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325389,0.0923962,-0.00061832,-0.00189678,-0.0159561],
+ "R": [
+ [0.9589650047,0.08538224277,-0.2703627054],
+ [-0.09708669181,0.9948178626,-0.03019262438],
+ [0.2663837347,0.05520229083,0.9622849957]
+ ],
+ "t": [
+ [54.63033668],
+ [157.9150468],
+ [281.9236261]
+ ]
+ },
+ {
+ "name": "01_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 13,
+ "K": [
+ [744.389,0,339.442],
+ [0,744.512,216.258],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320138,0.0543285,-0.000196977,-0.00116274,0.0473598],
+ "R": [
+ [0.9724830194,-0.06319437739,-0.2242392645],
+ [0.03959405574,0.9933373951,-0.1082272161],
+ [0.2295845984,0.09637058799,0.9685058709]
+ ],
+ "t": [
+ [19.90234626],
+ [154.6647449],
+ [286.7518211]
+ ]
+ },
+ {
+ "name": "01_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 14,
+ "K": [
+ [746.213,0,363.165],
+ [0,746.641,235.418],
+ [0,0,1]
+ ],
+ "distCoef": [-0.33414,0.127633,-0.000792357,0.000136075,-0.0405619],
+ "R": [
+ [0.9643490552,0.006836134333,-0.2645452079],
+ [-0.02440508255,0.9977035557,-0.06318233054],
+ [0.2635057717,0.0673860684,0.9623013177]
+ ],
+ "t": [
+ [19.24633902],
+ [182.0747755],
+ [282.9928946]
+ ]
+ },
+ {
+ "name": "01_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 15,
+ "K": [
+ [745.225,0,366.568],
+ [0,745.569,216.05],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319743,0.046174,-0.00158438,-0.000953331,0.0743504],
+ "R": [
+ [0.9602661069,0.03565913048,-0.2767985376],
+ [-0.06162250151,0.9944158624,-0.08567239854],
+ [0.2721978533,0.09932531892,0.9571012536]
+ ],
+ "t": [
+ [0.9330302863],
+ [174.5612072],
+ [288.1067574]
+ ]
+ },
+ {
+ "name": "01_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 16,
+ "K": [
+ [747.633,0,371.752],
+ [0,747.88,230.613],
+ [0,0,1]
+ ],
+ "distCoef": [-0.347758,0.198029,0.00072103,0.00029865,-0.136932],
+ "R": [
+ [0.9682573711,0.05614690975,-0.2435676248],
+ [-0.07153002565,0.9959334273,-0.05477283913],
+ [0.2395018137,0.07045660367,0.968336072]
+ ],
+ "t": [
+ [-3.74774],
+ [172.5737662],
+ [282.7618788]
+ ]
+ },
+ {
+ "name": "01_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 17,
+ "K": [
+ [748.152,0,373.9],
+ [0,748.508,234.452],
+ [0,0,1]
+ ],
+ "distCoef": [-0.345127,0.177692,-0.00116897,0.00210199,-0.0818461],
+ "R": [
+ [0.9639501783,0.02458774974,-0.264944327],
+ [-0.04477053879,0.9965129817,-0.07040934697],
+ [0.2622892538,0.07973280283,0.9616896732]
+ ],
+ "t": [
+ [-36.08309916],
+ [173.4726636],
+ [283.4522322]
+ ]
+ },
+ {
+ "name": "01_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 18,
+ "K": [
+ [743.791,0,363.617],
+ [0,744.126,236.963],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312734,0.0122172,-0.00120247,-0.000963953,0.133944],
+ "R": [
+ [0.9523198878,0.06045552763,-0.2990517689],
+ [-0.07234112338,0.9969633514,-0.02882425707],
+ [0.2964010681,0.04908365416,0.9538014478]
+ ],
+ "t": [
+ [-57.80984395],
+ [175.8598769],
+ [275.2458542]
+ ]
+ },
+ {
+ "name": "01_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 19,
+ "K": [
+ [743.162,0,364.748],
+ [0,743.331,220.785],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311505,0.00290054,-0.000860754,-0.000437091,0.146397],
+ "R": [
+ [0.9677776267,0.05243241618,-0.246287042],
+ [-0.06515666231,0.9969134625,-0.04379677618],
+ [0.243230497,0.05843278173,0.968206866]
+ ],
+ "t": [
+ [-19.88792012],
+ [144.796335],
+ [280.8929426]
+ ]
+ },
+ {
+ "name": "01_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 20,
+ "K": [
+ [744.661,0,343.237],
+ [0,744.907,246.044],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326994,0.0904776,0.000984855,-0.00107766,-0.0214165],
+ "R": [
+ [0.9717064093,0.03462931454,-0.2336396043],
+ [-0.0436324388,0.998486683,-0.03347468014],
+ [0.2321268283,0.04272182698,0.9717468709]
+ ],
+ "t": [
+ [-15.15244103],
+ [127.7778149],
+ [279.5122056]
+ ]
+ },
+ {
+ "name": "01_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 21,
+ "K": [
+ [742.462,0,365.246],
+ [0,742.468,221.387],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311193,-0.0017069,-0.0010044,-5.33063e-05,0.168374],
+ "R": [
+ [0.9650420793,0.04068979072,-0.2589172188],
+ [-0.04945049005,0.9984003719,-0.02741069744],
+ [0.257387712,0.03925605981,0.965510501]
+ ],
+ "t": [
+ [-1.672862451],
+ [122.1992626],
+ [279.1232554]
+ ]
+ },
+ {
+ "name": "01_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 22,
+ "K": [
+ [744.021,0,363.587],
+ [0,744.301,226.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330855,0.115198,-0.00111581,-0.000578883,-0.0257811],
+ "R": [
+ [0.9624230562,-0.007741542698,-0.2714441553],
+ [-0.003557050749,0.9991484058,-0.04110730506],
+ [0.271531229,0.0405281588,0.9615759252]
+ ],
+ "t": [
+ [4.289641778],
+ [135.1743597],
+ [279.2863723]
+ ]
+ },
+ {
+ "name": "01_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 23,
+ "K": [
+ [745.029,0,358.645],
+ [0,745.162,224.101],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31925,0.0412999,-0.000788365,0.000625647,0.108146],
+ "R": [
+ [0.9553340738,0.01211961015,-0.2952793973],
+ [-0.03701510886,0.9961975848,-0.07886858543],
+ [0.293200766,0.08627564605,0.9521501057]
+ ],
+ "t": [
+ [-2.968489269],
+ [143.230855],
+ [285.3382881]
+ ]
+ },
+ {
+ "name": "01_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 1,
+ "node": 24,
+ "K": [
+ [744.501,0,369.38],
+ [0,744.575,244.409],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317214,0.0306635,-5.65201e-05,-0.000305408,0.106933],
+ "R": [
+ [0.9627375442,0.05351140442,-0.2650904574],
+ [-0.07422624073,0.9948691584,-0.06874462026],
+ [0.2600516991,0.08585969499,0.9617698408]
+ ],
+ "t": [
+ [-7.333655278],
+ [148.0612654],
+ [284.8699573]
+ ]
+ },
+ {
+ "name": "02_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 1,
+ "K": [
+ [746.79,0,376.022],
+ [0,747.048,234.17],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317408,0.0301922,-0.000108969,-0.00027109,0.105931],
+ "R": [
+ [0.977473966,0.04697618088,0.2057617172],
+ [0.001487552662,0.9733575223,-0.2292878562],
+ [-0.211050783,0.2244289915,0.9513617581]
+ ],
+ "t": [
+ [-1.729507611],
+ [175.3460492],
+ [304.9109171]
+ ]
+ },
+ {
+ "name": "02_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 2,
+ "K": [
+ [747.689,0,367.065],
+ [0,747.811,212.158],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333664,0.117162,0.000577725,-0.000310896,-0.0327554],
+ "R": [
+ [0.9812751339,-0.05714257326,0.183939767],
+ [0.09271495859,0.9771941455,-0.1910380552],
+ [-0.1688284573,0.2045148611,0.9641942873]
+ ],
+ "t": [
+ [-50.62568249],
+ [190.9654762],
+ [299.6250374]
+ ]
+ },
+ {
+ "name": "02_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 3,
+ "K": [
+ [745.627,0,353.486],
+ [0,745.817,252.683],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321416,0.0392112,-0.00107045,-0.00134198,0.0908854],
+ "R": [
+ [0.9757098845,0.1270834984,0.1784376802],
+ [-0.07601456941,0.9603325594,-0.2682967771],
+ [-0.2054556071,0.248215954,0.946666168]
+ ],
+ "t": [
+ [-23.13649132],
+ [169.3490841],
+ [309.2380875]
+ ]
+ },
+ {
+ "name": "02_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 4,
+ "K": [
+ [746.11,0,381.584],
+ [0,746.321,224.917],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323963,0.0585021,-0.000871966,0.000552522,0.0715102],
+ "R": [
+ [0.979331342,0.07410153523,0.1881995881],
+ [-0.02608477747,0.9689731658,-0.2457856551],
+ [-0.2005734451,0.2357964511,0.950878713]
+ ],
+ "t": [
+ [-32.63906075],
+ [150.8763932],
+ [306.9317958]
+ ]
+ },
+ {
+ "name": "02_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 5,
+ "K": [
+ [744.11,0,378.377],
+ [0,744.035,244.823],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323078,0.0494134,-0.000238923,-0.000981516,0.0727453],
+ "R": [
+ [0.9857440106,0.05652749171,0.1584720428],
+ [-0.01525193411,0.9680163878,-0.250422945],
+ [-0.1675593154,0.244435913,0.95507851]
+ ],
+ "t": [
+ [-62.3494258],
+ [135.8190029],
+ [306.0165552]
+ ]
+ },
+ {
+ "name": "02_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 6,
+ "K": [
+ [743.928,0,352.844],
+ [0,744.181,228.627],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303908,-0.0528673,-0.000528541,8.08764e-05,0.267531],
+ "R": [
+ [0.9814194485,0.06212733968,0.1815380393],
+ [-0.0101664424,0.9616367605,-0.2741375282],
+ [-0.1916050874,0.2671983057,0.9444006332]
+ ],
+ "t": [
+ [-53.86742917],
+ [106.6702196],
+ [310.2214119]
+ ]
+ },
+ {
+ "name": "02_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 7,
+ "K": [
+ [746.501,0,376.178],
+ [0,746.591,217.394],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323449,0.0621904,-0.000592526,0.000355354,0.0689781],
+ "R": [
+ [0.9775323693,0.09704954661,0.1871145437],
+ [-0.05094527723,0.9701636443,-0.2370381445],
+ [-0.2045361721,0.2221798567,0.9533105819]
+ ],
+ "t": [
+ [-27.21830655],
+ [111.2122483],
+ [305.8578091]
+ ]
+ },
+ {
+ "name": "02_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 8,
+ "K": [
+ [747.056,0,346.722],
+ [0,747.425,231.954],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331626,0.0978711,0.000923123,-0.00170198,0.0128988],
+ "R": [
+ [0.9738310577,0.04398424166,0.222976361],
+ [0.006459505741,0.9753414162,-0.2206068824],
+ [-0.2271813062,0.2162741507,0.9495336465]
+ ],
+ "t": [
+ [-23.1615402],
+ [89.62617671],
+ [306.715437]
+ ]
+ },
+ {
+ "name": "02_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 9,
+ "K": [
+ [746.084,0,344.827],
+ [0,746.456,222.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31385,0.00765504,0.000335804,0.000338293,0.157318],
+ "R": [
+ [0.9708044988,0.02558390192,0.2385038556],
+ [0.01777728087,0.9838878899,-0.1779005014],
+ [-0.2392124442,0.1769465571,0.9547079776]
+ ],
+ "t": [
+ [-1.622489705],
+ [92.86686988],
+ [302.6276511]
+ ]
+ },
+ {
+ "name": "02_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 10,
+ "K": [
+ [743.875,0,345.16],
+ [0,744.131,231.932],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309364,-0.0158069,0.000435688,-0.000318284,0.167974],
+ "R": [
+ [0.9837217555,0.04774800386,0.1732386674],
+ [-0.008457215477,0.9752859506,-0.220784488],
+ [-0.179499257,0.2157253874,0.9598138226]
+ ],
+ "t": [
+ [0.6070589451],
+ [94.58504844],
+ [305.3954199]
+ ]
+ },
+ {
+ "name": "02_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 11,
+ "K": [
+ [748.642,0,372.727],
+ [0,749.029,221.349],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329743,0.0894243,0.000705225,0.000452301,0.0255748],
+ "R": [
+ [0.9762818677,-0.03993432779,0.2127885436],
+ [0.08495434643,0.9746762651,-0.20685487],
+ [-0.1991393328,0.2200259705,0.9549513592]
+ ],
+ "t": [
+ [18.17502224],
+ [86.30258496],
+ [305.899008]
+ ]
+ },
+ {
+ "name": "02_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 12,
+ "K": [
+ [746.297,0,386.393],
+ [0,746.341,223.432],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329805,0.088881,-0.000101498,-0.000342857,0.0238941],
+ "R": [
+ [0.9769251111,-0.05225372472,0.2070914666],
+ [0.09392861168,0.9759243238,-0.1968479875],
+ [-0.1918195589,0.211757556,0.9583130982]
+ ],
+ "t": [
+ [31.97904484],
+ [101.8192368],
+ [305.2554798]
+ ]
+ },
+ {
+ "name": "02_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 13,
+ "K": [
+ [746.887,0,386.903],
+ [0,746.77,241.912],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330222,0.0894843,0.000608161,-0.000202457,0.0188277],
+ "R": [
+ [0.9805035597,0.07291108666,0.1824739514],
+ [-0.03359954242,0.9771464723,-0.2098948364],
+ [-0.1936074385,0.199671593,0.9605453736]
+ ],
+ "t": [
+ [39.8755561],
+ [121.0360498],
+ [302.8306622]
+ ]
+ },
+ {
+ "name": "02_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 14,
+ "K": [
+ [745.399,0,359.381],
+ [0,745.103,221.453],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32351,0.0564367,0.000553752,0.000358328,0.0789504],
+ "R": [
+ [0.9639890244,-0.01369700088,0.2655890681],
+ [0.06651808592,0.9793475216,-0.1909287203],
+ [-0.2574888447,0.2017196672,0.9449913601]
+ ],
+ "t": [
+ [64.66924198],
+ [136.2834945],
+ [299.1868513]
+ ]
+ },
+ {
+ "name": "02_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 15,
+ "K": [
+ [746.343,0,376.035],
+ [0,746.136,233.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332319,0.10939,0.000552685,0.00121175,-0.00685584],
+ "R": [
+ [0.9739293667,-0.02993852249,0.2248672353],
+ [0.07982373372,0.9730868608,-0.2161715356],
+ [-0.2123434957,0.2284855491,0.9501076748]
+ ],
+ "t": [
+ [41.67937397],
+ [146.9667487],
+ [305.3208703]
+ ]
+ },
+ {
+ "name": "02_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 16,
+ "K": [
+ [747.983,0,369.069],
+ [0,747.865,212.357],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333814,0.119177,-0.00123283,0.000206724,-0.0313224],
+ "R": [
+ [0.9828420813,0.01261378295,0.1840172159],
+ [0.03080156014,0.9724259604,-0.2311688027],
+ [-0.181859031,0.2328704445,0.9553526307]
+ ],
+ "t": [
+ [22.33056427],
+ [154.6384713],
+ [307.0242051]
+ ]
+ },
+ {
+ "name": "02_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 17,
+ "K": [
+ [743.255,0,372.405],
+ [0,743.629,259.514],
+ [0,0,1]
+ ],
+ "distCoef": [-0.301911,-0.0577323,-0.000292445,-0.000537705,0.240913],
+ "R": [
+ [0.9702237144,0.05425789408,0.2360551311],
+ [-0.004184220731,0.978195713,-0.2076430576],
+ [-0.2421743923,0.2004725119,0.9492957051]
+ ],
+ "t": [
+ [39.95715372],
+ [182.9757461],
+ [299.4720725]
+ ]
+ },
+ {
+ "name": "02_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 18,
+ "K": [
+ [746.171,0,380.016],
+ [0,746.628,215.7],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310416,0.0111871,-0.00156578,-0.000885002,0.110566],
+ "R": [
+ [0.9751942313,0.01121985931,0.2210663386],
+ [0.02134458651,0.9892938663,-0.1443677759],
+ [-0.220319359,0.1455051918,0.9645141882]
+ ],
+ "t": [
+ [9.159436194],
+ [213.6293599],
+ [288.3403437]
+ ]
+ },
+ {
+ "name": "02_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 19,
+ "K": [
+ [745.09,0,380.114],
+ [0,745.176,232.983],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31746,0.043353,-0.000108725,0.000220738,0.0862213],
+ "R": [
+ [0.9809185988,0.05584586521,0.1862255137],
+ [-0.01423917048,0.975920974,-0.2176591338],
+ [-0.1938967473,0.2108541957,0.9580942331]
+ ],
+ "t": [
+ [-1.989355998],
+ [159.4183424],
+ [303.0216832]
+ ]
+ },
+ {
+ "name": "02_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 20,
+ "K": [
+ [746.359,0,393.165],
+ [0,746.438,228.007],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32236,0.0673245,-0.000115957,0.00130444,0.0588071],
+ "R": [
+ [0.9826018096,0.03015545669,0.1832602856],
+ [0.01576123022,0.9696317731,-0.2440610748],
+ [-0.1850547688,0.2427032613,0.9522866477]
+ ],
+ "t": [
+ [-25.36954265],
+ [136.7143691],
+ [307.7149997]
+ ]
+ },
+ {
+ "name": "02_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 21,
+ "K": [
+ [747.137,0,358.509],
+ [0,747.202,238.678],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327929,0.0852816,0.000460613,0.000357406,0.0365027],
+ "R": [
+ [0.9780966382,0.08951991601,0.1879179366],
+ [-0.04045439222,0.9673344336,-0.2502549415],
+ [-0.2041822921,0.2371714111,0.9497680314]
+ ],
+ "t": [
+ [-10.00427836],
+ [118.005594],
+ [307.3165834]
+ ]
+ },
+ {
+ "name": "02_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 22,
+ "K": [
+ [745.847,0,374.568],
+ [0,746.074,247.807],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32052,0.063252,0.000743322,-0.000945252,0.0534877],
+ "R": [
+ [0.9839840132,0.07804627455,0.160263036],
+ [-0.03749054936,0.9695570383,-0.2419785283],
+ [-0.1742696772,0.2320946541,0.9569546233]
+ ],
+ "t": [
+ [-1.458572059],
+ [110.2636917],
+ [306.6072245]
+ ]
+ },
+ {
+ "name": "02_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 23,
+ "K": [
+ [744.851,0,375.128],
+ [0,744.899,236.672],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328747,0.0731957,0.000409854,0.000115616,0.0573405],
+ "R": [
+ [0.9798731388,0.006836815724,0.1995041098],
+ [0.04188111895,0.9701291749,-0.2389463451],
+ [-0.1951783896,0.2424925605,0.9503171862]
+ ],
+ "t": [
+ [13.92766978],
+ [118.8861106],
+ [308.0337581]
+ ]
+ },
+ {
+ "name": "02_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 2,
+ "node": 24,
+ "K": [
+ [748.108,0,365.63],
+ [0,748.409,236.546],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337502,0.145226,-9.99404e-05,-0.000712599,-0.0768278],
+ "R": [
+ [0.9858983234,-0.01937546959,0.166219996],
+ [0.057736328,0.9716683618,-0.2291879382],
+ [-0.1570700873,0.2355529362,0.9590848773]
+ ],
+ "t": [
+ [-5.69779309],
+ [141.0775615],
+ [307.1963385]
+ ]
+ },
+ {
+ "name": "03_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 1,
+ "K": [
+ [745.205,0,364.445],
+ [0,745.671,223.278],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321278,0.0550501,-0.000663141,0.000431329,0.0680735],
+ "R": [
+ [0.789168654,0.1464091436,-0.5964706181],
+ [-0.3274382264,0.921936374,-0.2069239719],
+ [0.5196123973,0.3586051937,0.7755032377]
+ ],
+ "t": [
+ [-15.48720347],
+ [106.8731646],
+ [321.197831]
+ ]
+ },
+ {
+ "name": "03_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 2,
+ "K": [
+ [746.402,0,367.989],
+ [0,746.656,218.884],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319108,0.0415571,-0.000289565,0.00121415,0.0978966],
+ "R": [
+ [0.7844411333,0.123213727,-0.6078408392],
+ [-0.3461950886,0.9001611021,-0.2643084389],
+ [0.5145882519,0.4177659246,0.7487793823]
+ ],
+ "t": [
+ [-25.69855827],
+ [65.19717944],
+ [326.035328]
+ ]
+ },
+ {
+ "name": "03_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 3,
+ "K": [
+ [747.999,0,350.415],
+ [0,748.222,213.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322361,0.0444301,-0.000132478,-4.14576e-05,0.110213],
+ "R": [
+ [0.8075592295,0.0617799019,-0.5865418439],
+ [-0.2672496857,0.9248714179,-0.2705373648],
+ [0.525762015,0.3752280693,0.763399109]
+ ],
+ "t": [
+ [-8.799326732],
+ [72.40249706],
+ [323.1224723]
+ ]
+ },
+ {
+ "name": "03_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 4,
+ "K": [
+ [744.819,0,376.394],
+ [0,744.912,212.894],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335892,0.121706,-0.00015411,0.0017688,-0.0013985],
+ "R": [
+ [0.8410364559,-0.03582960221,-0.5397906256],
+ [-0.192384631,0.9127679401,-0.3603371217],
+ [0.5056143132,0.4069040761,0.7607780486]
+ ],
+ "t": [
+ [3.728898504],
+ [75.32503712],
+ [325.8417248]
+ ]
+ },
+ {
+ "name": "03_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 5,
+ "K": [
+ [746.446,0,376.523],
+ [0,746.682,251.012],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330943,0.0996499,0.00144142,-0.000113946,0.0131394],
+ "R": [
+ [0.8610606531,-0.05437396314,-0.5055868113],
+ [-0.176556083,0.9004429458,-0.3975304402],
+ [0.4768673833,0.4315622475,0.7657359371]
+ ],
+ "t": [
+ [31.93527518],
+ [62.43528973],
+ [326.764058]
+ ]
+ },
+ {
+ "name": "03_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 6,
+ "K": [
+ [744.998,0,378.484],
+ [0,744.973,240.788],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31652,0.0338012,-0.0010118,-0.000122735,0.0959735],
+ "R": [
+ [0.8769583834,-0.06555368648,-0.4760742674],
+ [-0.1128149484,0.9348860407,-0.3365425358],
+ [0.4671367907,0.348842092,0.8124607151]
+ ],
+ "t": [
+ [52.69213606],
+ [109.2131316],
+ [317.2562433]
+ ]
+ },
+ {
+ "name": "03_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 7,
+ "K": [
+ [744.942,0,394.454],
+ [0,745.513,230.902],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322593,0.0669124,0.000685625,0.000650135,0.0435827],
+ "R": [
+ [0.8511772215,-0.03734239681,-0.5235483579],
+ [-0.1521244983,0.9371023984,-0.3141611561],
+ [0.5023499524,0.3470513512,0.7919595223]
+ ],
+ "t": [
+ [39.57000229],
+ [127.8421428],
+ [318.5564893]
+ ]
+ },
+ {
+ "name": "03_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 8,
+ "K": [
+ [744.592,0,375.596],
+ [0,744.695,234.586],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314208,0.0115966,-0.0002404,-0.00129875,0.131833],
+ "R": [
+ [0.863242284,-0.08735605341,-0.4971736911],
+ [-0.1241310572,0.9179337282,-0.3768144785],
+ [0.4892895255,0.386996887,0.7815556088]
+ ],
+ "t": [
+ [48.3076273],
+ [133.8669044],
+ [323.1008342]
+ ]
+ },
+ {
+ "name": "03_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 9,
+ "K": [
+ [746.083,0,388.49],
+ [0,746.196,219.485],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327776,0.0952708,0.000477894,0.00116098,0.0130168],
+ "R": [
+ [0.8627791791,-0.162720556,-0.478679547],
+ [-0.06768333431,0.9010943873,-0.4283081501],
+ [0.5010299935,0.401933982,0.766432006]
+ ],
+ "t": [
+ [23.91664651],
+ [150.3571005],
+ [326.7446808]
+ ]
+ },
+ {
+ "name": "03_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 10,
+ "K": [
+ [744.984,0,374.291],
+ [0,745.244,231.69],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317288,0.0201616,0.000340337,0.000302133,0.135473],
+ "R": [
+ [0.8433461687,-0.104156761,-0.5271798639],
+ [-0.1611508321,0.8868626272,-0.433018579],
+ [0.5126379318,0.4501400333,0.7311472501]
+ ],
+ "t": [
+ [5.809004706],
+ [133.1751931],
+ [335.4888131]
+ ]
+ },
+ {
+ "name": "03_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 11,
+ "K": [
+ [746.325,0,369.755],
+ [0,746.606,238.315],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330117,0.107892,0.000853042,-0.00148033,-0.0192727],
+ "R": [
+ [0.8487877999,-0.06352852013,-0.5249032272],
+ [-0.1660312052,0.9105147821,-0.3786772643],
+ [0.5019889537,0.4085669574,0.7622861219]
+ ],
+ "t": [
+ [10.90299391],
+ [168.9126588],
+ [328.8547345]
+ ]
+ },
+ {
+ "name": "03_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 12,
+ "K": [
+ [745.397,0,373.191],
+ [0,745.394,241.989],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315431,0.0239438,0.00152043,8.78247e-05,0.132462],
+ "R": [
+ [0.7899500519,0.01447673769,-0.613000277],
+ [-0.2772192125,0.9001468868,-0.3359837649],
+ [0.5469263421,0.4353458466,0.7150843098]
+ ],
+ "t": [
+ [-11.01289772],
+ [165.4412244],
+ [333.9391633]
+ ]
+ },
+ {
+ "name": "03_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 13,
+ "K": [
+ [746.289,0,356.696],
+ [0,746.559,221.83],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307674,-0.0320128,-0.000713248,-0.000212304,0.187939],
+ "R": [
+ [0.7812025858,0.003231301473,-0.6242692358],
+ [-0.256925784,0.9130359895,-0.316787663],
+ [0.5689566429,0.4078662043,0.7140962805]
+ ],
+ "t": [
+ [-30.04397497],
+ [158.6113997],
+ [327.0561852]
+ ]
+ },
+ {
+ "name": "03_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 14,
+ "K": [
+ [744.216,0,367.374],
+ [0,744.503,234.384],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313106,0.0107213,0.00051099,0.000391129,0.137335],
+ "R": [
+ [0.7647493291,0.08765142393,-0.6383382266],
+ [-0.3090501184,0.9192036391,-0.2440342068],
+ [0.5653728752,0.3839035005,0.7300490493]
+ ],
+ "t": [
+ [-30.23656889],
+ [178.7825502],
+ [321.7207122]
+ ]
+ },
+ {
+ "name": "03_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 15,
+ "K": [
+ [747.827,0,380.852],
+ [0,747.806,237.021],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329904,0.102056,0.000500868,0.000776535,0.0163276],
+ "R": [
+ [0.8420936086,0.09442452017,-0.5310012847],
+ [-0.2692856411,0.9266613257,-0.2622670985],
+ [0.4672939095,0.3638444688,0.8057627471]
+ ],
+ "t": [
+ [-9.683781844],
+ [164.2881649],
+ [322.7392687]
+ ]
+ },
+ {
+ "name": "03_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 16,
+ "K": [
+ [745.289,0,371.652],
+ [0,745.447,216.538],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317152,0.0301694,-0.000847782,0.000226416,0.100881],
+ "R": [
+ [0.7751085928,0.08020770062,-0.6267163586],
+ [-0.2817854267,0.9316829094,-0.2292682483],
+ [0.5655118413,0.3543073259,0.74475679]
+ ],
+ "t": [
+ [-42.18053512],
+ [150.9579844],
+ [316.9204289]
+ ]
+ },
+ {
+ "name": "03_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 17,
+ "K": [
+ [744.591,0,386.471],
+ [0,744.601,243.766],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308716,-0.020066,-0.000742984,7.36231e-05,0.18193],
+ "R": [
+ [0.8000888793,0.13985822,-0.5833502066],
+ [-0.3086873752,0.9298003917,-0.2004578159],
+ [0.5143635773,0.3404569133,0.7870954202]
+ ],
+ "t": [
+ [-29.24407076],
+ [139.76037],
+ [318.5389184]
+ ]
+ },
+ {
+ "name": "03_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 18,
+ "K": [
+ [747.091,0,388.41],
+ [0,747.213,245.147],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331947,0.109947,-0.00018029,-0.000335458,-0.0100282],
+ "R": [
+ [0.7812031275,0.143907843,-0.6074637489],
+ [-0.3493109676,0.9072427652,-0.2342912992],
+ [0.5174007358,0.3952228456,0.7590094735]
+ ],
+ "t": [
+ [-39.38157975],
+ [101.9329028],
+ [324.6812046]
+ ]
+ },
+ {
+ "name": "03_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 19,
+ "K": [
+ [743.815,0,380.782],
+ [0,743.921,233.579],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31618,0.0384848,0.000240219,0.000426998,0.0977231],
+ "R": [
+ [0.8097086682,0.09665101941,-0.578818152],
+ [-0.2718115959,0.9359285209,-0.2239559336],
+ [0.5200868476,0.3386685464,0.784100304]
+ ],
+ "t": [
+ [-3.817362892],
+ [126.1763792],
+ [318.2990602]
+ ]
+ },
+ {
+ "name": "03_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 20,
+ "K": [
+ [746.163,0,356.033],
+ [0,746.281,215.327],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323416,0.0556958,5.62358e-06,-0.000684023,0.0815018],
+ "R": [
+ [0.8690981447,0.003405692177,-0.4946279574],
+ [-0.1831744592,0.9310985933,-0.3154402114],
+ [0.4594731031,0.3647517111,0.8098398958]
+ ],
+ "t": [
+ [22.15812523],
+ [111.197586],
+ [320.9871724]
+ ]
+ },
+ {
+ "name": "03_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 21,
+ "K": [
+ [745.277,0,370.698],
+ [0,745.633,251.594],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309423,-0.0154759,-0.000871178,-0.000110471,0.185828],
+ "R": [
+ [0.8519925598,-0.01534543221,-0.5233289556],
+ [-0.157671027,0.9456449668,-0.2844212441],
+ [0.4992479597,0.3248385977,0.8032629458]
+ ],
+ "t": [
+ [23.66925749],
+ [140.0971121],
+ [315.3107012]
+ ]
+ },
+ {
+ "name": "03_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 22,
+ "K": [
+ [749.812,0,361.025],
+ [0,750.052,224.033],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333335,0.0892582,3.32371e-05,-0.00136116,0.0353235],
+ "R": [
+ [0.8242021998,-0.0118106517,-0.5661724493],
+ [-0.2609232338,0.8794144434,-0.3981824994],
+ [0.5026030242,0.4759104383,0.7217336453]
+ ],
+ "t": [
+ [6.739100305],
+ [105.8858326],
+ [336.9710973]
+ ]
+ },
+ {
+ "name": "03_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 23,
+ "K": [
+ [744.781,0,365.976],
+ [0,744.836,235.682],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319452,0.032528,0.000754874,-0.000913445,0.102166],
+ "R": [
+ [0.8233335342,0.02583843362,-0.5669693703],
+ [-0.2570181529,0.9076367155,-0.3318693443],
+ [0.506027233,0.4189605805,0.7539286912]
+ ],
+ "t": [
+ [-4.103462359],
+ [133.5127669],
+ [329.5726238]
+ ]
+ },
+ {
+ "name": "03_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 3,
+ "node": 24,
+ "K": [
+ [746.135,0,373.553],
+ [0,746.515,225.298],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323756,0.0623909,2.70614e-05,0.000962707,0.0761173],
+ "R": [
+ [0.8557458945,0.0294251088,-0.5165589289],
+ [-0.2234217673,0.921515875,-0.3176337608],
+ [0.4666708454,0.3872242956,0.7951576366]
+ ],
+ "t": [
+ [-1.49693002],
+ [128.5290469],
+ [325.1203285]
+ ]
+ },
+ {
+ "name": "04_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 1,
+ "K": [
+ [745.756,0,368.953],
+ [0,745.945,245.188],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3245,0.0724334,-0.000312337,0.000678015,0.0415529],
+ "R": [
+ [0.04501388353,-0.06073969189,-0.9971381249],
+ [-0.08162898106,0.9945884367,-0.06426936354],
+ [0.9956457501,0.08428838276,0.03981216889]
+ ],
+ "t": [
+ [-59.71104012],
+ [137.3658878],
+ [280.4259077]
+ ]
+ },
+ {
+ "name": "04_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 2,
+ "K": [
+ [745.144,0,382.474],
+ [0,745.286,222.525],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322843,0.0690658,-0.000684608,-0.000275864,0.0370253],
+ "R": [
+ [0.1096717734,-0.01795980665,-0.9938055884],
+ [-0.007042199406,0.9997976117,-0.01884523745],
+ [0.9939429106,0.009065367736,0.1095231006]
+ ],
+ "t": [
+ [-53.83503278],
+ [149.6185443],
+ [272.7820927]
+ ]
+ },
+ {
+ "name": "04_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 3,
+ "K": [
+ [742.832,0,377.499],
+ [0,742.665,258.984],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312355,-0.00257413,0.000454129,0.00111055,0.151137],
+ "R": [
+ [0.07040546321,0.04162572676,-0.9966495721],
+ [-0.08610880414,0.9956530214,0.03550119457],
+ [0.9937949208,0.08332082476,0.07368375372]
+ ],
+ "t": [
+ [-50.21742462],
+ [111.4103034],
+ [280.5940976]
+ ]
+ },
+ {
+ "name": "04_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 4,
+ "K": [
+ [743.339,0,393.561],
+ [0,743.571,223.626],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307228,-0.0295629,-0.000661125,6.4492e-05,0.183577],
+ "R": [
+ [0.09450112049,0.05679880598,-0.993903131],
+ [-0.03670643306,0.9978910099,0.05353662459],
+ [0.9948478155,0.03142336774,0.09638670013]
+ ],
+ "t": [
+ [-21.9069],
+ [118.1273376],
+ [275.8163164]
+ ]
+ },
+ {
+ "name": "04_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 5,
+ "K": [
+ [746.019,0,364.58],
+ [0,746.273,258.887],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327759,0.0738839,0.000801649,0.000211169,0.0604088],
+ "R": [
+ [0.135847977,0.01131634816,-0.9906650632],
+ [-0.049797809,0.9987488181,0.004580011864],
+ [0.98947739,0.04871076425,0.1362415358]
+ ],
+ "t": [
+ [-12.12624478],
+ [90.71810202],
+ [278.5550143]
+ ]
+ },
+ {
+ "name": "04_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 6,
+ "K": [
+ [745.588,0,362.328],
+ [0,745.695,224.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317313,0.0342325,-0.00011624,0.00140051,0.0955503],
+ "R": [
+ [0.09768474559,0.09486669264,-0.9906856217],
+ [-0.08671696061,0.9924717325,0.0864871607],
+ [0.9914322262,0.07746076975,0.1051758999]
+ ],
+ "t": [
+ [6.120914551],
+ [75.66522558],
+ [280.1538331]
+ ]
+ },
+ {
+ "name": "04_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 7,
+ "K": [
+ [744.949,0,374.902],
+ [0,744.948,218.152],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307279,-0.0368619,-0.000928182,-0.000206153,0.214368],
+ "R": [
+ [0.08413477249,-0.05845821559,-0.994738145],
+ [-0.03729096802,0.9973936317,-0.06176833509],
+ [0.9957563576,0.04229161317,0.08173552284]
+ ],
+ "t": [
+ [3.352563309],
+ [99.7043349],
+ [277.3248716]
+ ]
+ },
+ {
+ "name": "04_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 8,
+ "K": [
+ [744.851,0,365.832],
+ [0,744.82,236.655],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313642,0.00106915,0.000461187,-0.00049658,0.163492],
+ "R": [
+ [0.1068294918,-0.02053293437,-0.9940653189],
+ [-0.04471775106,0.998675844,-0.02543386204],
+ [0.9932712532,0.04716945203,0.1057698462]
+ ],
+ "t": [
+ [34.88142403],
+ [92.93282517],
+ [277.1804593]
+ ]
+ },
+ {
+ "name": "04_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 9,
+ "K": [
+ [745.947,0,354.92],
+ [0,745.962,217.292],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332252,0.114802,-0.000779302,-0.000175195,-0.0220414],
+ "R": [
+ [0.0951039165,0.01286389124,-0.99538423],
+ [-0.04378002227,0.9990030715,0.008727700331],
+ [0.9945041753,0.04274790527,0.09557228614]
+ ],
+ "t": [
+ [51.3876018],
+ [107.4685168],
+ [276.8925649]
+ ]
+ },
+ {
+ "name": "04_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 10,
+ "K": [
+ [743.419,0,373.623],
+ [0,743.493,209.714],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312784,-0.00205334,-0.00151839,-4.48796e-05,0.146707],
+ "R": [
+ [0.07554192003,-0.02015366607,-0.996938939],
+ [-0.05402378201,0.9982445697,-0.02427365106],
+ [0.9956780852,0.05569209012,0.07432053419]
+ ],
+ "t": [
+ [36.95032578],
+ [126.4783785],
+ [278.9862968]
+ ]
+ },
+ {
+ "name": "04_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 11,
+ "K": [
+ [743.168,0,378.723],
+ [0,743.196,231.359],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312654,0.00616666,0.000125459,-0.000163635,0.137741],
+ "R": [
+ [0.104627794,-0.01026277171,-0.994458496],
+ [-0.05855646041,0.9981483637,-0.01646162423],
+ [0.9927860624,0.05995431298,0.1038331098]
+ ],
+ "t": [
+ [61.78762978],
+ [139.882294],
+ [278.0088471]
+ ]
+ },
+ {
+ "name": "04_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 12,
+ "K": [
+ [746.755,0,377.564],
+ [0,747.014,231.526],
+ [0,0,1]
+ ],
+ "distCoef": [-0.342661,0.169314,0.000669193,0.000564241,-0.092518],
+ "R": [
+ [0.09069981891,0.03748374052,-0.9951726041],
+ [-0.02832816732,0.9989841486,0.03504548138],
+ [0.9954752924,0.02501279723,0.09166952704]
+ ],
+ "t": [
+ [63.18640006],
+ [168.1511303],
+ [272.7093484]
+ ]
+ },
+ {
+ "name": "04_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 13,
+ "K": [
+ [745.766,0,371.377],
+ [0,745.897,229.211],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323265,0.06437,0.000357726,0.000480753,0.061899],
+ "R": [
+ [0.03414536791,0.03842962758,-0.9986777546],
+ [-0.02717943982,0.9989265658,0.03750992125],
+ [0.9990472321,0.02586271187,0.03515321085]
+ ],
+ "t": [
+ [27.04698548],
+ [171.5967975],
+ [274.5649723]
+ ]
+ },
+ {
+ "name": "04_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 14,
+ "K": [
+ [744.965,0,366.266],
+ [0,745.319,235.632],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317134,0.0349168,5.85303e-05,0.000379707,0.110605],
+ "R": [
+ [0.05221731101,0.04748668842,-0.9975060736],
+ [0.03426805086,0.9981953182,0.04931335942],
+ [0.9980476207,-0.03675759989,0.05049579913]
+ ],
+ "t": [
+ [31.93275734],
+ [208.7852536],
+ [260.7309393]
+ ]
+ },
+ {
+ "name": "04_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 15,
+ "K": [
+ [744.586,0,371.051],
+ [0,745.106,212.085],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332822,0.11382,-0.000911903,0.000640183,-0.00904196],
+ "R": [
+ [0.0693166226,0.04834029473,-0.9964228127],
+ [-0.01396942206,0.9987743784,0.04748258878],
+ [0.9974968978,0.01062811814,0.06990695264]
+ ],
+ "t": [
+ [16.12425569],
+ [198.357827],
+ [269.7404532]
+ ]
+ },
+ {
+ "name": "04_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 16,
+ "K": [
+ [742.58,0,362.432],
+ [0,742.717,222.722],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316061,0.0181932,0.000637155,-0.000119442,0.122715],
+ "R": [
+ [0.07545496093,-0.0349426896,-0.9965367817],
+ [-0.03652359913,0.9986183515,-0.03778114217],
+ [0.9964800929,0.03924788454,0.07407447592]
+ ],
+ "t": [
+ [-15.86676392],
+ [179.6369531],
+ [275.0674259]
+ ]
+ },
+ {
+ "name": "04_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 17,
+ "K": [
+ [745.044,0,350.241],
+ [0,745.211,214.104],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330556,0.0995367,-0.000406045,-3.83783e-05,-0.00374247],
+ "R": [
+ [0.0837025501,0.02221656332,-0.9962430965],
+ [-0.04478154079,0.9988252756,0.01851168242],
+ [0.9954840515,0.04306382584,0.08459911461]
+ ],
+ "t": [
+ [-23.0620205],
+ [182.4550181],
+ [276.0013748]
+ ]
+ },
+ {
+ "name": "04_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 18,
+ "K": [
+ [747.543,0,399.307],
+ [0,747.43,229.515],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337874,0.152604,0.000377489,0.002871,-0.0603327],
+ "R": [
+ [0.03967719066,0.06607189882,-0.9970256891],
+ [-0.02383145062,0.9975901546,0.06516091958],
+ [0.998928317,0.02117516625,0.04115616396]
+ ],
+ "t": [
+ [-45.47747339],
+ [181.8911988],
+ [269.8403328]
+ ]
+ },
+ {
+ "name": "04_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 19,
+ "K": [
+ [743.963,0,369.391],
+ [0,744.08,218.072],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320196,0.0539371,0.000417857,0.00192962,0.0700112],
+ "R": [
+ [0.0434323362,0.03783761887,-0.9983395949],
+ [-0.08481170801,0.9958149524,0.03405223652],
+ [0.9954499517,0.08319191804,0.04645964289]
+ ],
+ "t": [
+ [-24.42650241],
+ [136.5925943],
+ [281.0885176]
+ ]
+ },
+ {
+ "name": "04_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 20,
+ "K": [
+ [745.858,0,356.253],
+ [0,746.045,207.418],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328012,0.0801152,-7.74627e-05,-0.000454429,0.0269942],
+ "R": [
+ [0.0976780849,0.06705669278,-0.9929563896],
+ [-0.1171365339,0.9915671608,0.05544004021],
+ [0.9883005738,0.1108961929,0.1047091699]
+ ],
+ "t": [
+ [-1.775430866],
+ [107.2147587],
+ [285.054156]
+ ]
+ },
+ {
+ "name": "04_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 21,
+ "K": [
+ [746.156,0,369.678],
+ [0,746.129,226.325],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331296,0.10434,-0.000526263,0.0017798,0.0107539],
+ "R": [
+ [0.06864954522,0.009029787974,-0.9975999714],
+ [-0.09824772164,0.9951594531,0.00224680986],
+ [0.9927913301,0.09785768182,0.06920439997]
+ ],
+ "t": [
+ [2.330018678],
+ [104.6606406],
+ [283.2576255]
+ ]
+ },
+ {
+ "name": "04_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 22,
+ "K": [
+ [746.305,0,363.016],
+ [0,746.511,222.294],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313633,0.00103632,0.000318828,-0.000294887,0.154057],
+ "R": [
+ [0.08441946195,-0.0784287402,-0.9933389588],
+ [-0.07957536672,0.9931828981,-0.08517917513],
+ [0.9932477614,0.08623609206,0.07760297012]
+ ],
+ "t": [
+ [9.995164317],
+ [122.6888691],
+ [282.4272415]
+ ]
+ },
+ {
+ "name": "04_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 23,
+ "K": [
+ [745.178,0,358.539],
+ [0,745.299,233.674],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315081,0.0210219,-6.99317e-06,-0.000330658,0.115227],
+ "R": [
+ [0.1162513982,0.03935918122,-0.9924396542],
+ [-0.02556811677,0.999001962,0.03662446354],
+ [0.9928906706,0.02111716788,0.117141715]
+ ],
+ "t": [
+ [32.91845612],
+ [159.7823772],
+ [272.1694603]
+ ]
+ },
+ {
+ "name": "04_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 4,
+ "node": 24,
+ "K": [
+ [746.014,0,365.199],
+ [0,746.411,216.584],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320661,0.0432533,-0.00136099,-0.000113861,0.0956118],
+ "R": [
+ [0.1001711426,-0.0639180002,-0.9929150172],
+ [-0.0054812292,0.9978838124,-0.06479084071],
+ [0.9949551238,0.01193256733,0.09960881242]
+ ],
+ "t": [
+ [-9.066812064],
+ [167.2144724],
+ [271.0944115]
+ ]
+ },
+ {
+ "name": "05_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 1,
+ "K": [
+ [744.506,0,379.212],
+ [0,745.093,221.816],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322425,0.0503962,-0.00139268,-0.000488272,0.0792831],
+ "R": [
+ [0.4832137358,-0.07031409603,-0.8726742883],
+ [-0.1214142278,0.9817563233,-0.14633218],
+ [0.8670427157,0.1766647942,0.465861009]
+ ],
+ "t": [
+ [-31.81590772],
+ [187.5269902],
+ [291.8752718]
+ ]
+ },
+ {
+ "name": "05_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 2,
+ "K": [
+ [746.146,0,379.909],
+ [0,746.274,243.237],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327102,0.0750235,0.00051439,0.000830868,0.0552106],
+ "R": [
+ [0.559561068,-0.04316954181,-0.8276640634],
+ [-0.1711397799,0.9711012062,-0.1663539088],
+ [0.8109269924,0.2347314165,0.5360024022]
+ ],
+ "t": [
+ [-21.47998338],
+ [182.028679],
+ [304.5116426]
+ ]
+ },
+ {
+ "name": "05_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 3,
+ "K": [
+ [746.598,0,366.137],
+ [0,746.916,245.497],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34673,0.191883,-0.000717065,0.000142378,-0.151818],
+ "R": [
+ [0.4493443217,0.06721032382,-0.8908268367],
+ [-0.2833621033,0.9563979118,-0.07077395533],
+ [0.8472281859,0.2842284411,0.4487968296]
+ ],
+ "t": [
+ [-42.79170468],
+ [156.78227],
+ [309.5144468]
+ ]
+ },
+ {
+ "name": "05_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 4,
+ "K": [
+ [744.97,0,361.533],
+ [0,745.268,216.194],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320215,0.0355127,-0.000935438,6.82351e-05,0.107335],
+ "R": [
+ [0.5139859054,0.07264601249,-0.8547169391],
+ [-0.2477501277,0.96651576,-0.06683681477],
+ [0.8212419639,0.2461094116,0.5147735369]
+ ],
+ "t": [
+ [-21.66847624],
+ [145.8563675],
+ [305.5618637]
+ ]
+ },
+ {
+ "name": "05_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 5,
+ "K": [
+ [743.904,0,367.466],
+ [0,744.108,216.808],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328736,0.086922,-0.000934339,0.000214876,0.0243362],
+ "R": [
+ [0.4889793362,0.07185582001,-0.8693307483],
+ [-0.2209595119,0.9743010874,-0.0437525441],
+ [0.8438460185,0.2134809878,0.4922903259]
+ ],
+ "t": [
+ [-47.80972546],
+ [144.3254019],
+ [299.7644507]
+ ]
+ },
+ {
+ "name": "05_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 6,
+ "K": [
+ [745.323,0,383.952],
+ [0,745.526,234.808],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334223,0.133657,-0.000107051,0.00148947,-0.0461754],
+ "R": [
+ [0.4969854565,0.0559027949,-0.8659563116],
+ [-0.2018212488,0.978003949,-0.05269211703],
+ [0.8439630558,0.2009556001,0.4973361109]
+ ],
+ "t": [
+ [-46.56558119],
+ [125.7186081],
+ [298.6423415]
+ ]
+ },
+ {
+ "name": "05_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 7,
+ "K": [
+ [746.158,0,356.674],
+ [0,746.317,240.893],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334568,0.11153,0.000321304,-0.000871385,-0.0157856],
+ "R": [
+ [0.5541201274,0.02610072644,-0.8320274253],
+ [-0.1769665492,0.9803549196,-0.08710380092],
+ [0.8134087072,0.1955069916,0.5478533484]
+ ],
+ "t": [
+ [-14.70019562],
+ [115.5481293],
+ [299.4445791]
+ ]
+ },
+ {
+ "name": "05_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 8,
+ "K": [
+ [744.96,0,386.044],
+ [0,745.46,258.776],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325919,0.068823,-0.000458274,0.000477805,0.0465958],
+ "R": [
+ [0.4763065258,-0.004539644313,-0.8792675845],
+ [-0.1710253429,0.980409884,-0.09770768372],
+ [0.8624861886,0.1969158475,0.4661992314]
+ ],
+ "t": [
+ [-40.46029545],
+ [93.91456762],
+ [297.4902987]
+ ]
+ },
+ {
+ "name": "05_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 9,
+ "K": [
+ [745.188,0,367.116],
+ [0,745.437,236.843],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328194,0.058828,0.000388874,-0.00143808,0.0829656],
+ "R": [
+ [0.5065601345,-0.04543027129,-0.8610069225],
+ [-0.1705921502,0.9735884993,-0.1517357977],
+ [0.845159836,0.2237443283,0.4854310735]
+ ],
+ "t": [
+ [-16.55300824],
+ [76.93410209],
+ [300.8962768]
+ ]
+ },
+ {
+ "name": "05_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 10,
+ "K": [
+ [747.452,0,374.886],
+ [0,747.648,257.28],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337728,0.123608,0.00138141,5.97732e-05,-0.0225942],
+ "R": [
+ [0.4549222289,-0.02855444123,-0.8900732608],
+ [-0.1699899924,0.9783230281,-0.1182685721],
+ [0.8741562607,0.2051065493,0.4402069233]
+ ],
+ "t": [
+ [-13.61854908],
+ [96.6157071],
+ [299.0141417]
+ ]
+ },
+ {
+ "name": "05_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 11,
+ "K": [
+ [746.39,0,405.604],
+ [0,746.458,241.87],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333064,0.100943,0.000870611,0.00103156,0.0180409],
+ "R": [
+ [0.5002384593,-0.05591048228,-0.8640807264],
+ [-0.1916757277,0.9660062257,-0.1734715752],
+ [0.8444062406,0.2524004556,0.4725167836]
+ ],
+ "t": [
+ [16.55277765],
+ [75.44647006],
+ [303.7304898]
+ ]
+ },
+ {
+ "name": "05_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 12,
+ "K": [
+ [745.943,0,392.757],
+ [0,746.143,272.1],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323245,0.0770562,0.00168738,0.000666505,0.0382015],
+ "R": [
+ [0.5344619138,-0.0483612619,-0.8438078283],
+ [-0.2099054746,0.9594877737,-0.1879438847],
+ [0.818712498,0.277568731,0.5026583782]
+ ],
+ "t": [
+ [45.5535171],
+ [81.37072912],
+ [304.8427161]
+ ]
+ },
+ {
+ "name": "05_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 13,
+ "K": [
+ [748.463,0,383.471],
+ [0,748.465,243.614],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34071,0.149034,0.000455623,0.000254671,-0.0668973],
+ "R": [
+ [0.550270912,-0.09726860505,-0.8293013577],
+ [-0.1127468592,0.975440235,-0.1892207537],
+ [0.82733915,0.1976238001,0.525789658]
+ ],
+ "t": [
+ [34.15956958],
+ [127.9842494],
+ [295.9545727]
+ ]
+ },
+ {
+ "name": "05_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 14,
+ "K": [
+ [744.467,0,372.192],
+ [0,744.287,242.67],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321164,0.0557106,-0.000170048,0.000249902,0.0584864],
+ "R": [
+ [0.5607110475,-0.1151130063,-0.8199708025],
+ [-0.101866971,0.9731761842,-0.2062795062],
+ [0.8217215109,0.1991911399,0.5339444244]
+ ],
+ "t": [
+ [50.41224037],
+ [142.3474205],
+ [294.74195]
+ ]
+ },
+ {
+ "name": "05_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 15,
+ "K": [
+ [746.542,0,352.38],
+ [0,746.666,240.759],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327959,0.100036,-0.000636984,-0.00122606,-0.0366604],
+ "R": [
+ [0.5029624145,-0.05772144518,-0.8623787128],
+ [-0.198700467,0.9633205664,-0.180365215],
+ [0.8411580909,0.262071977,0.4730447599]
+ ],
+ "t": [
+ [34.04469815],
+ [136.31759],
+ [307.4406203]
+ ]
+ },
+ {
+ "name": "05_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 16,
+ "K": [
+ [747.042,0,371.719],
+ [0,747.231,244.896],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323957,0.0675271,-0.000219383,0.00030566,0.0452733],
+ "R": [
+ [0.5145114331,-0.105655334,-0.8509494319],
+ [-0.1209004538,0.9735279663,-0.1939752023],
+ [0.8489175846,0.2026826318,0.4881174913]
+ ],
+ "t": [
+ [9.341169646],
+ [165.8735131],
+ [297.8569993]
+ ]
+ },
+ {
+ "name": "05_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 17,
+ "K": [
+ [745.814,0,386.675],
+ [0,746.085,252.153],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320652,0.0597547,0.000647483,5.56623e-05,0.0523558],
+ "R": [
+ [0.5123119379,-0.06682282728,-0.856195765],
+ [-0.1341513719,0.9785027468,-0.1566390244],
+ [0.8482569703,0.1951078787,0.4923342645]
+ ],
+ "t": [
+ [9.076647729],
+ [186.6487394],
+ [296.0424945]
+ ]
+ },
+ {
+ "name": "05_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 18,
+ "K": [
+ [744.362,0,367.747],
+ [0,744.705,261.961],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317525,0.0240072,0.000331,-0.000409781,0.122239],
+ "R": [
+ [0.5214772573,-0.05602259067,-0.8514240656],
+ [-0.1526209796,0.9756261952,-0.1576716965],
+ [0.8395047985,0.2121673788,0.5002166498]
+ ],
+ "t": [
+ [-2.829687906],
+ [192.8140289],
+ [298.6606918]
+ ]
+ },
+ {
+ "name": "05_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 19,
+ "K": [
+ [744.259,0,353.379],
+ [0,744.524,245.823],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320328,0.0298824,0.00026675,-0.00161079,0.123162],
+ "R": [
+ [0.5556726344,-0.05485450779,-0.8295896012],
+ [-0.2099711545,0.9562161648,-0.2038694692],
+ [0.8044501462,0.2874745713,0.519825291]
+ ],
+ "t": [
+ [-1.476630227],
+ [134.2745178],
+ [310.4571486]
+ ]
+ },
+ {
+ "name": "05_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 20,
+ "K": [
+ [743.679,0,405.845],
+ [0,743.856,234.88],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326644,0.0646831,0.000108119,5.73367e-05,0.058946],
+ "R": [
+ [0.447769915,-0.01338423954,-0.894048637],
+ [-0.18660487,0.9764723016,-0.1080762074],
+ [0.8744602482,0.2152271039,0.4347373552]
+ ],
+ "t": [
+ [-41.39083575],
+ [143.2049031],
+ [297.8732354]
+ ]
+ },
+ {
+ "name": "05_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 21,
+ "K": [
+ [746.956,0,354.763],
+ [0,747.081,232.068],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333648,0.0797639,-0.000768992,-0.00091097,0.0508097],
+ "R": [
+ [0.5053420531,-0.009379958189,-0.8628681393],
+ [-0.2526298673,0.9545207072,-0.1583299394],
+ [0.8251106347,0.2979970402,0.4799897963]
+ ],
+ "t": [
+ [-19.66925616],
+ [96.29580053],
+ [309.4868577]
+ ]
+ },
+ {
+ "name": "05_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 22,
+ "K": [
+ [748.369,0,375.575],
+ [0,748.642,247.648],
+ [0,0,1]
+ ],
+ "distCoef": [-0.339087,0.143465,-0.000470446,0.00132222,-0.0624301],
+ "R": [
+ [0.54260376,-0.05746408722,-0.8380209057],
+ [-0.1470082191,0.975763273,-0.1620944744],
+ [0.8270246327,0.2111490322,0.5210051277]
+ ],
+ "t": [
+ [3.173863757],
+ [116.0988382],
+ [299.4207466]
+ ]
+ },
+ {
+ "name": "05_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 23,
+ "K": [
+ [744.544,0,368.615],
+ [0,744.426,281.181],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322575,0.0664483,0.00114224,0.000391788,0.0483369],
+ "R": [
+ [0.5347472888,-0.05715349527,-0.8430769924],
+ [-0.1466458645,0.9762943366,-0.1591991164],
+ [0.832190079,0.2087650503,0.5136894259]
+ ],
+ "t": [
+ [16.7223507],
+ [130.5590862],
+ [298.5444367]
+ ]
+ },
+ {
+ "name": "05_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 5,
+ "node": 24,
+ "K": [
+ [743.308,0,356.74],
+ [0,743.243,228.93],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321093,0.0447792,0.000127467,-8.40104e-05,0.095825],
+ "R": [
+ [0.5706235669,-0.133891243,-0.8102233519],
+ [-0.1678811389,0.9467635938,-0.2746900447],
+ [0.8038685639,0.2927658322,0.5177678046]
+ ],
+ "t": [
+ [6.742844805],
+ [124.9131408],
+ [309.8640068]
+ ]
+ },
+ {
+ "name": "06_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 1,
+ "K": [
+ [744.518,0,344.042],
+ [0,744.512,240.289],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313532,-0.0139368,0.00116047,-0.000125352,0.195046],
+ "R": [
+ [-0.3305715804,0.1011846603,-0.9383411399],
+ [-0.314462461,0.9256148845,0.2105954561],
+ [0.8898515555,0.3646899369,-0.2741631979]
+ ],
+ "t": [
+ [-23.56718534],
+ [104.1648487],
+ [320.754952]
+ ]
+ },
+ {
+ "name": "06_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 2,
+ "K": [
+ [748.956,0,345.566],
+ [0,748.875,227.82],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335662,0.0955564,-6.0167e-05,-0.0012999,0.0278092],
+ "R": [
+ [-0.2903396332,0.1603112194,-0.9433998147],
+ [-0.341086429,0.9037763758,0.2585504022],
+ [0.8940709957,0.3968483028,-0.2077221201]
+ ],
+ "t": [
+ [-2.499901432],
+ [69.14355517],
+ [325.2941984]
+ ]
+ },
+ {
+ "name": "06_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 3,
+ "K": [
+ [743.901,0,369.68],
+ [0,743.816,251.042],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320568,0.044977,0.000366128,-0.00033077,0.103335],
+ "R": [
+ [-0.3123459653,0.110763308,-0.943488997],
+ [-0.3278062139,0.9196080197,0.216481353],
+ [0.891618239,0.3768986331,-0.250926954]
+ ],
+ "t": [
+ [2.578346941],
+ [71.05917793],
+ [323.4074447]
+ ]
+ },
+ {
+ "name": "06_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 4,
+ "K": [
+ [745.814,0,378.476],
+ [0,745.908,222.393],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316287,0.0251632,0.000357033,0.00145486,0.13215],
+ "R": [
+ [-0.2756543214,0.09031338143,-0.9570048005],
+ [-0.3333214643,0.9248259371,0.1832860813],
+ [0.9016160472,0.3695138418,-0.2248288776]
+ ],
+ "t": [
+ [26.15902854],
+ [86.10496093],
+ [322.4382284]
+ ]
+ },
+ {
+ "name": "06_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 5,
+ "K": [
+ [750.419,0,363.736],
+ [0,750.614,222.964],
+ [0,0,1]
+ ],
+ "distCoef": [-0.344753,0.14329,-0.000836382,-0.000451111,-0.060951],
+ "R": [
+ [-0.2930259634,0.06094491301,-0.9541601031],
+ [-0.3875087878,0.9047544541,0.1767945619],
+ [0.8740553324,0.4215508218,-0.2414998562]
+ ],
+ "t": [
+ [36.26889278],
+ [61.41890121],
+ [327.3260635]
+ ]
+ },
+ {
+ "name": "06_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 6,
+ "K": [
+ [747.394,0,354.724],
+ [0,747.506,211.184],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329009,0.0921746,-0.00050966,0.000333806,0.021085],
+ "R": [
+ [-0.2297156979,0.02557529828,-0.9729216835],
+ [-0.3964529538,0.9104994627,0.1175405629],
+ [0.888850805,0.4127185877,-0.199016617]
+ ],
+ "t": [
+ [62.78312093],
+ [81.38139883],
+ [324.7093469]
+ ]
+ },
+ {
+ "name": "06_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 7,
+ "K": [
+ [746.623,0,374.989],
+ [0,746.758,209.923],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319339,0.0433323,-0.00139256,0.000754597,0.0938733],
+ "R": [
+ [-0.2846142448,0.03267216609,-0.9580852056],
+ [-0.3313740809,0.934457856,0.1303063082],
+ [0.8995476364,0.3545716359,-0.255133308]
+ ],
+ "t": [
+ [45.81195811],
+ [121.7115234],
+ [320.8009986]
+ ]
+ },
+ {
+ "name": "06_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 8,
+ "K": [
+ [745.971,0,357.954],
+ [0,746.024,209.947],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314348,0.0246684,-0.0014997,0.000635776,0.111152],
+ "R": [
+ [-0.3038162213,-0.0261928812,-0.9523705354],
+ [-0.3441704234,0.9351353343,0.08407512184],
+ [0.8883931693,0.3533211563,-0.2931240987]
+ ],
+ "t": [
+ [41.47715732],
+ [140.438376],
+ [322.3540865]
+ ]
+ },
+ {
+ "name": "06_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 9,
+ "K": [
+ [742.648,0,362.103],
+ [0,742.703,220.817],
+ [0,0,1]
+ ],
+ "distCoef": [-0.304218,-0.0643312,-0.000139411,-0.000234647,0.289172],
+ "R": [
+ [-0.2807259034,-0.0411671215,-0.958904706],
+ [-0.3740921558,0.9247597922,0.06981680165],
+ [0.8838823599,0.3783181134,-0.2750043253]
+ ],
+ "t": [
+ [37.64720227],
+ [153.3424109],
+ [325.0305142]
+ ]
+ },
+ {
+ "name": "06_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 10,
+ "K": [
+ [747.72,0,366.165],
+ [0,747.851,213.209],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324647,0.0523798,-0.00077308,-0.000271098,0.0916616],
+ "R": [
+ [-0.2880158499,0.02777358159,-0.957222805],
+ [-0.3788720768,0.9147158267,0.1405379157],
+ [0.8794900907,0.4031421393,-0.2529300217]
+ ],
+ "t": [
+ [33.16578395],
+ [147.9736193],
+ [327.8869733]
+ ]
+ },
+ {
+ "name": "06_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 11,
+ "K": [
+ [745.331,0,369.444],
+ [0,745.587,207.732],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317455,0.0357855,-0.00041249,0.000556817,0.0920153],
+ "R": [
+ [-0.3142048567,0.04518634316,-0.9482792323],
+ [-0.3166241188,0.9366885696,0.1495449465],
+ [0.8949997069,0.3472358248,-0.2800050117]
+ ],
+ "t": [
+ [26.61359186],
+ [187.9055539],
+ [317.8889871]
+ ]
+ },
+ {
+ "name": "06_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 12,
+ "K": [
+ [747.25,0,346.366],
+ [0,747.394,225.779],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328454,0.0750084,3.92686e-05,0.00130952,0.0669429],
+ "R": [
+ [-0.2993781475,0.05639323365,-0.9524665495],
+ [-0.3171785116,0.9355987261,0.1550897014],
+ [0.8998725002,0.3485323901,-0.2622110915]
+ ],
+ "t": [
+ [13.58039626],
+ [195.4066632],
+ [317.2443523]
+ ]
+ },
+ {
+ "name": "06_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 13,
+ "K": [
+ [743.861,0,344.414],
+ [0,743.872,231.421],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307564,-0.0231037,-0.000140407,-0.000635225,0.208058],
+ "R": [
+ [-0.2583036736,0.07116007646,-0.9634393887],
+ [-0.3357690773,0.9284960528,0.1586007776],
+ [0.905835713,0.3644603181,-0.2159405881]
+ ],
+ "t": [
+ [14.66480509],
+ [172.1699927],
+ [320.6722019]
+ ]
+ },
+ {
+ "name": "06_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 14,
+ "K": [
+ [744.949,0,378.98],
+ [0,744.921,225.408],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321047,0.0567081,-0.000162218,0.000699701,0.0634367],
+ "R": [
+ [-0.3208579847,0.07871363947,-0.9438507915],
+ [-0.3472646452,0.9173632389,0.1945557869],
+ [0.8811682132,0.3901907879,-0.267008856]
+ ],
+ "t": [
+ [-45.70363788],
+ [100.2282059],
+ [322.9364507]
+ ]
+ },
+ {
+ "name": "06_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 15,
+ "K": [
+ [745.712,0,360.895],
+ [0,745.741,234.163],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31006,-0.0103454,0.000398478,0.000813845,0.181221],
+ "R": [
+ [-0.3227895896,0.1367774117,-0.9365355415],
+ [-0.3406635237,0.9063958148,0.2497898928],
+ [0.8830375102,0.3996730746,-0.245980058]
+ ],
+ "t": [
+ [-14.93002532],
+ [154.0180569],
+ [326.396188]
+ ]
+ },
+ {
+ "name": "06_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 16,
+ "K": [
+ [745.931,0,372.193],
+ [0,746.03,212.813],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325757,0.0830346,-0.000419051,0.00216162,0.0290765],
+ "R": [
+ [-0.311559769,0.02363818266,-0.9499324958],
+ [-0.312276077,0.9416182622,0.1258518973],
+ [0.8974486961,0.3358515813,-0.2859887293]
+ ],
+ "t": [
+ [-41.03283731],
+ [153.3338286],
+ [314.9665339]
+ ]
+ },
+ {
+ "name": "06_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 17,
+ "K": [
+ [744.756,0,368.403],
+ [0,744.752,202.816],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313223,0.00720848,-0.00119606,0.000542174,0.130737],
+ "R": [
+ [-0.3236003046,0.09291211415,-0.9416210394],
+ [-0.3175516679,0.9267842511,0.2005788875],
+ [0.8913157584,0.3639207207,-0.2704032691]
+ ],
+ "t": [
+ [-41.098271],
+ [130.5289196],
+ [319.7107876]
+ ]
+ },
+ {
+ "name": "06_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 18,
+ "K": [
+ [744.889,0,373.989],
+ [0,745.092,230.989],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319065,0.0283013,-0.000935078,-0.000739787,0.111424],
+ "R": [
+ [-0.3391260928,0.0773602665,-0.9375547357],
+ [-0.3008220503,0.9353680392,0.1859911968],
+ [0.8913470633,0.3451116057,-0.2939360344]
+ ],
+ "t": [
+ [-22.38901828],
+ [189.8595323],
+ [315.0907711]
+ ]
+ },
+ {
+ "name": "06_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 19,
+ "K": [
+ [743.21,0,358.424],
+ [0,743.138,251.445],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316603,0.00648778,0.000375455,-0.000277526,0.16085],
+ "R": [
+ [-0.34774011,0.09728469559,-0.9325301624],
+ [-0.3453355468,0.9113903597,0.2238548019],
+ [0.8716766465,0.399879107,-0.2833311204]
+ ],
+ "t": [
+ [-13.32995299],
+ [105.9918293],
+ [324.8353482]
+ ]
+ },
+ {
+ "name": "06_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 20,
+ "K": [
+ [745.315,0,375.798],
+ [0,745.342,214.671],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317661,0.021421,-0.000865931,0.000266434,0.124612],
+ "R": [
+ [-0.2889220833,0.06736289331,-0.9549797225],
+ [-0.355115135,0.918816287,0.172249446],
+ [0.8890541438,0.3888944219,-0.2415447329]
+ ],
+ "t": [
+ [16.18922492],
+ [101.394333],
+ [324.5371374]
+ ]
+ },
+ {
+ "name": "06_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 21,
+ "K": [
+ [743.803,0,341.335],
+ [0,743.805,238.935],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305727,-0.0577903,-0.000702133,-0.00085287,0.249773],
+ "R": [
+ [-0.2867564999,0.0564691645,-0.9563377767],
+ [-0.3641939053,0.9168870998,0.1633427245],
+ [0.8860775977,0.3951319776,-0.24235761]
+ ],
+ "t": [
+ [29.77890794],
+ [113.785435],
+ [325.4988706]
+ ]
+ },
+ {
+ "name": "06_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 22,
+ "K": [
+ [745.285,0,373.625],
+ [0,745.232,235.431],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319503,0.0483306,-0.000362012,0.00120612,0.080115],
+ "R": [
+ [-0.3458253526,0.08893014684,-0.9340750797],
+ [-0.3902640321,0.8916714915,0.2293816395],
+ [0.8532870623,0.4438618933,-0.2736563703]
+ ],
+ "t": [
+ [18.96316513],
+ [116.1979138],
+ [333.2100324]
+ ]
+ },
+ {
+ "name": "06_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 23,
+ "K": [
+ [744.536,0,366.592],
+ [0,744.501,224.531],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312705,-0.014521,0.000375544,8.36622e-05,0.188212],
+ "R": [
+ [-0.3181142509,0.09038767844,-0.94373375],
+ [-0.4081954831,0.8853909401,0.2223945386],
+ [0.8556750382,0.455974726,-0.2447596336]
+ ],
+ "t": [
+ [6.972278595],
+ [119.3141773],
+ [334.5341124]
+ ]
+ },
+ {
+ "name": "06_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 6,
+ "node": 24,
+ "K": [
+ [744.6,0,358.514],
+ [0,744.655,220.515],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30152,-0.0573254,-0.000856409,-0.000288003,0.227002],
+ "R": [
+ [-0.3545583501,0.05661769889,-0.9333181732],
+ [-0.3227337004,0.929412527,0.1789841147],
+ [0.8775712706,0.3646735401,-0.3112585327]
+ ],
+ "t": [
+ [-25.22428756],
+ [139.0090865],
+ [319.514146]
+ ]
+ },
+ {
+ "name": "07_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 1,
+ "K": [
+ [745.635,0,384.154],
+ [0,745.75,223.733],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328279,0.104082,-0.000872931,0.00144148,0.00404207],
+ "R": [
+ [-0.9078071857,0.03344162453,-0.4180523547],
+ [0.00958043905,0.9982092569,0.05904654639],
+ [0.4192783428,0.049597754,-0.9065019217]
+ ],
+ "t": [
+ [-23.31434773],
+ [152.0493649],
+ [282.3431498]
+ ]
+ },
+ {
+ "name": "07_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 2,
+ "K": [
+ [746.944,0,375.746],
+ [0,747.112,207.581],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321827,0.078307,-0.00112183,4.35862e-05,0.0396046],
+ "R": [
+ [-0.9306435439,0.005427673037,-0.3658867782],
+ [-0.02457764723,0.9967049447,0.07729936951],
+ [0.3651007167,0.08093079535,-0.9274436225]
+ ],
+ "t": [
+ [-62.01828104],
+ [131.8151818],
+ [284.3018088]
+ ]
+ },
+ {
+ "name": "07_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 3,
+ "K": [
+ [743.881,0,383.122],
+ [0,743.965,237.105],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311008,0.000325185,-0.000782967,0.00055371,0.154469],
+ "R": [
+ [-0.9217631286,0.06528892794,-0.3822173342],
+ [0.03992506463,0.996464058,0.07392814261],
+ [0.3856925251,0.05288418425,-0.9211104924]
+ ],
+ "t": [
+ [-43.22640533],
+ [121.5976731],
+ [282.3432951]
+ ]
+ },
+ {
+ "name": "07_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 4,
+ "K": [
+ [743.69,0,370.307],
+ [0,743.828,227.79],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303025,-0.0263668,-0.000445815,0.00071591,0.180166],
+ "R": [
+ [-0.9409979296,0.06863452498,-0.3313792366],
+ [0.04529042225,0.9959498431,0.07767037874],
+ [0.3353679682,0.05807936004,-0.9402952269]
+ ],
+ "t": [
+ [-38.37277115],
+ [113.0266013],
+ [281.4230584]
+ ]
+ },
+ {
+ "name": "07_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 5,
+ "K": [
+ [743.998,0,375.484],
+ [0,744.299,220.79],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310908,0.00595719,-5.69241e-05,0.000519591,0.131448],
+ "R": [
+ [-0.9269484075,0.08594630429,-0.3652121064],
+ [0.04467826469,0.9917683984,0.1199970688],
+ [0.3725191305,0.09491404865,-0.9231580692]
+ ],
+ "t": [
+ [-23.36597135],
+ [80.23534001],
+ [286.4206576]
+ ]
+ },
+ {
+ "name": "07_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 6,
+ "K": [
+ [745.602,0,379.444],
+ [0,745.67,224.268],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303286,-0.0402497,-0.00132196,0.00012981,0.210105],
+ "R": [
+ [-0.923694641,0.09319000989,-0.3716232396],
+ [0.04673933936,0.9901316615,0.1321163393],
+ [0.3802678586,0.1046657299,-0.9189349491]
+ ],
+ "t": [
+ [-0.9450645075],
+ [68.69008136],
+ [287.3198917]
+ ]
+ },
+ {
+ "name": "07_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 7,
+ "K": [
+ [745.731,0,365.823],
+ [0,745.481,229.263],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308219,-0.0231519,0.000110727,0.000180113,0.209056],
+ "R": [
+ [-0.917494877,0.04967698427,-0.3946331815],
+ [0.001316203411,0.9925436367,0.1218827179],
+ [0.3977454189,0.1113073518,-0.9107190869]
+ ],
+ "t": [
+ [18.92434207],
+ [79.05208738],
+ [288.1952445]
+ ]
+ },
+ {
+ "name": "07_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 8,
+ "K": [
+ [745.611,0,393.911],
+ [0,745.863,244.069],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318705,0.0460564,0.000184451,0.000507881,0.0745222],
+ "R": [
+ [-0.9083609307,0.09070031,-0.4082326216],
+ [0.05268537174,0.9932388068,0.1034452715],
+ [0.4148550001,0.07245775567,-0.9069979066]
+ ],
+ "t": [
+ [48.31394514],
+ [81.42535523],
+ [283.8217571]
+ ]
+ },
+ {
+ "name": "07_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 9,
+ "K": [
+ [745.77,0,370.33],
+ [0,746.047,217.48],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321786,0.069205,4.67533e-05,5.58471e-05,0.0372207],
+ "R": [
+ [-0.9211612824,0.007939579541,-0.3891000576],
+ [-0.02433705705,0.996659961,0.07795274024],
+ [0.3884193603,0.08127659646,-0.9178913418]
+ ],
+ "t": [
+ [49.65486911],
+ [97.0413663],
+ [285.6851525]
+ ]
+ },
+ {
+ "name": "07_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 10,
+ "K": [
+ [744.504,0,363.969],
+ [0,744.833,247.068],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335916,0.144192,-0.000823922,-0.000462503,-0.076361],
+ "R": [
+ [-0.9225918644,-0.01579725191,-0.3854538864],
+ [-0.05416624958,0.9945677902,0.08888716518],
+ [0.381955847,0.1028851669,-0.9184358297]
+ ],
+ "t": [
+ [40.86826856],
+ [113.0714764],
+ [288.4804376]
+ ]
+ },
+ {
+ "name": "07_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 11,
+ "K": [
+ [744.999,0,387.199],
+ [0,745.384,239.21],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313806,0.0330336,-7.01628e-05,0.00132279,0.0985619],
+ "R": [
+ [-0.9109471902,-0.006922747781,-0.4124648981],
+ [-0.04540685091,0.9954664163,0.08357530662],
+ [0.4100163832,0.09486142287,-0.9071316751]
+ ],
+ "t": [
+ [65.64483344],
+ [130.0336458],
+ [285.8729547]
+ ]
+ },
+ {
+ "name": "07_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 12,
+ "K": [
+ [743.664,0,350.646],
+ [0,743.861,222.503],
+ [0,0,1]
+ ],
+ "distCoef": [-0.300623,-0.0667329,-0.000394627,-0.00107967,0.272621],
+ "R": [
+ [-0.9268683851,0.02536908581,-0.3745282449],
+ [0.006256924582,0.9986192343,0.0521581796],
+ [0.3753343145,0.04600037271,-0.9257473295]
+ ],
+ "t": [
+ [57.10937388],
+ [163.0891099],
+ [280.8513179]
+ ]
+ },
+ {
+ "name": "07_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 13,
+ "K": [
+ [744.176,0,390.977],
+ [0,744.332,246.666],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327257,0.10216,-0.000582688,0.00201022,0.0126373],
+ "R": [
+ [-0.9290120658,-0.01909429991,-0.3695564765],
+ [-0.04453762663,0.9971777882,0.06043888335],
+ [0.3673594716,0.07260762025,-0.9272406117]
+ ],
+ "t": [
+ [26.5211548],
+ [160.1280328],
+ [285.2494721]
+ ]
+ },
+ {
+ "name": "07_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 14,
+ "K": [
+ [744.044,0,360.721],
+ [0,744.333,226.474],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311296,-0.00746755,-0.00165304,-0.000168766,0.17966],
+ "R": [
+ [-0.9305033137,0.06302128148,-0.3608211486],
+ [0.03165130136,0.9952368859,0.09220485899],
+ [0.3649133847,0.07437646791,-0.9280659258]
+ ],
+ "t": [
+ [37.8814582],
+ [178.0304645],
+ [285.6034633]
+ ]
+ },
+ {
+ "name": "07_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 15,
+ "K": [
+ [744.03,0,362.147],
+ [0,744.447,229.329],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314413,0.0379836,-0.000745365,2.01034e-05,0.0898919],
+ "R": [
+ [-0.9265853662,0.03975182478,-0.373977742],
+ [0.01411888978,0.9973739765,0.07103385017],
+ [0.3758193929,0.06053877555,-0.9247133829]
+ ],
+ "t": [
+ [16.14446289],
+ [185.021862],
+ [282.5666312]
+ ]
+ },
+ {
+ "name": "07_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 16,
+ "K": [
+ [743.673,0,368.897],
+ [0,743.962,238.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314216,0.0200058,-0.0002257,-0.000345788,0.11969],
+ "R": [
+ [-0.9350006114,0.024774913,-0.3537796777],
+ [-0.006073372197,0.9962920776,0.08582080369],
+ [0.354594093,0.08239113958,-0.9313832344]
+ ],
+ "t": [
+ [-10.51100446],
+ [168.6528502],
+ [285.9762696]
+ ]
+ },
+ {
+ "name": "07_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 17,
+ "K": [
+ [744.686,0,385.346],
+ [0,745.049,227.767],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317176,0.0455424,-0.000136917,0.000534438,0.0739505],
+ "R": [
+ [-0.908638426,0.05327873405,-0.4141709639],
+ [0.04010861029,0.9983767379,0.04043746577],
+ [0.4156531128,0.02013121347,-0.9093004036]
+ ],
+ "t": [
+ [-7.322164421],
+ [189.4505625],
+ [275.8940033]
+ ]
+ },
+ {
+ "name": "07_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 18,
+ "K": [
+ [746.282,0,378.432],
+ [0,746.624,237.775],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320382,0.058651,0.000451819,0.000534403,0.062414],
+ "R": [
+ [-0.916555331,0.01769811564,-0.3995160846],
+ [-0.01470055472,0.9968539618,0.07788499561],
+ [0.3996376094,0.077259016,-0.9134116408]
+ ],
+ "t": [
+ [-37.37478029],
+ [164.0712496],
+ [285.8486829]
+ ]
+ },
+ {
+ "name": "07_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 19,
+ "K": [
+ [743.687,0,374.362],
+ [0,743.883,225.048],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322503,0.0715253,7.77555e-05,0.000517375,0.0539586],
+ "R": [
+ [-0.9239544056,0.01616424802,-0.3821609261],
+ [-0.020576852,0.9955594902,0.09185801365],
+ [0.3819487525,0.09273628522,-0.9195189677]
+ ],
+ "t": [
+ [-17.14443298],
+ [133.4982453],
+ [287.2304165]
+ ]
+ },
+ {
+ "name": "07_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 20,
+ "K": [
+ [745.801,0,368.555],
+ [0,746.033,233.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317685,0.0475287,-3.52395e-05,0.000512076,0.0805211],
+ "R": [
+ [-0.9241543321,-0.01069440692,-0.3818696113],
+ [-0.04324692472,0.9961108974,0.076764468],
+ [0.3795635307,0.08745690199,-0.9210227014]
+ ],
+ "t": [
+ [-16.56758847],
+ [113.8864258],
+ [286.5218078]
+ ]
+ },
+ {
+ "name": "07_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 21,
+ "K": [
+ [744.1,0,390.405],
+ [0,744.284,237.593],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322514,0.0588182,0.000321804,0.00147162,0.0689104],
+ "R": [
+ [-0.9369369296,0.006948104691,-0.3494294118],
+ [-0.02026391849,0.9970404822,0.07415962808],
+ [0.3489105381,0.07656370335,-0.9340232522]
+ ],
+ "t": [
+ [-3.618393153],
+ [111.1940513],
+ [285.5030449]
+ ]
+ },
+ {
+ "name": "07_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 22,
+ "K": [
+ [747.001,0,381.032],
+ [0,747.132,234.437],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324882,0.0577225,-0.00134011,-0.00135265,0.0819201],
+ "R": [
+ [-0.9282296861,0.06047570579,-0.3670590401],
+ [0.02337036389,0.9942284933,0.1047068731],
+ [0.3712727784,0.08861372459,-0.9242857414]
+ ],
+ "t": [
+ [25.6408869],
+ [119.8980517],
+ [286.9452799]
+ ]
+ },
+ {
+ "name": "07_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 23,
+ "K": [
+ [743.981,0,363.51],
+ [0,744.339,258.582],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313768,0.0101513,0.00111395,-0.00104272,0.1345],
+ "R": [
+ [-0.9138255678,-0.001018785166,-0.4061056435],
+ [-0.03060482875,0.9973259054,0.06636552484],
+ [0.4049520663,0.0730753071,-0.9114130916]
+ ],
+ "t": [
+ [24.3580015],
+ [146.5427691],
+ [284.2261849]
+ ]
+ },
+ {
+ "name": "07_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 7,
+ "node": 24,
+ "K": [
+ [744.847,0,398.685],
+ [0,745.01,270.264],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328511,0.106892,0.000179407,0.00152869,-0.00291861],
+ "R": [
+ [-0.915939158,0.01937877811,-0.4008490012],
+ [-0.01852012751,0.9957282098,0.09045627137],
+ [0.4008895904,0.09027621565,-0.9116675607]
+ ],
+ "t": [
+ [6.147743662],
+ [145.7157982],
+ [287.1579534]
+ ]
+ },
+ {
+ "name": "08_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 1,
+ "K": [
+ [743.703,0,360.221],
+ [0,744.108,227.682],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309411,-0.0239561,-0.001159,0.000249551,0.191643],
+ "R": [
+ [-0.6256262875,-0.004424555618,-0.7801103586],
+ [-0.1745259617,0.9754325172,0.134432485],
+ [0.7603502068,0.2202540071,-0.6110284243]
+ ],
+ "t": [
+ [5.656398722],
+ [175.9817187],
+ [302.7764948]
+ ]
+ },
+ {
+ "name": "08_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 2,
+ "K": [
+ [747.203,0,376.344],
+ [0,747.435,209.923],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331616,0.11313,4.7739e-05,0.00134479,-0.0154118],
+ "R": [
+ [-0.6724252099,0.1092176997,-0.7320627235],
+ [-0.09964199407,0.9666926758,0.2357472025],
+ [0.7334274403,0.2314665517,-0.6391458561]
+ ],
+ "t": [
+ [-0.9742570867],
+ [185.4525058],
+ [305.0714088]
+ ]
+ },
+ {
+ "name": "08_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 3,
+ "K": [
+ [747.234,0,368.091],
+ [0,747.404,224.293],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329137,0.0905459,-0.000565165,-0.000329878,0.0231933],
+ "R": [
+ [-0.656899377,0.0205246652,-0.7536988435],
+ [-0.2005757989,0.9588523348,0.2009267253],
+ [0.7268098496,0.2831623883,-0.6257527502]
+ ],
+ "t": [
+ [-32.7353206],
+ [153.4285774],
+ [313.8994992]
+ ]
+ },
+ {
+ "name": "08_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 4,
+ "K": [
+ [747.386,0,362.788],
+ [0,747.713,235.953],
+ [0,0,1]
+ ],
+ "distCoef": [-0.341304,0.154379,-0.000777774,-0.000654564,-0.0867958],
+ "R": [
+ [-0.6631685233,0.06657565756,-0.7455033143],
+ [-0.1433461882,0.9663011288,0.2138083224],
+ [0.7346151238,0.2486560079,-0.6312771259]
+ ],
+ "t": [
+ [-22.98714967],
+ [144.6795235],
+ [307.788251]
+ ]
+ },
+ {
+ "name": "08_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 5,
+ "K": [
+ [745.746,0,376.748],
+ [0,745.752,233.642],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32088,0.0642866,0.000720856,0.00118823,0.0489989],
+ "R": [
+ [-0.6568191598,0.04935682433,-0.7524310568],
+ [-0.1452125328,0.970898021,0.19044777],
+ [0.7399337211,0.2343521638,-0.6305371929]
+ ],
+ "t": [
+ [-42.15667108],
+ [135.9397275],
+ [306.138018]
+ ]
+ },
+ {
+ "name": "08_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 6,
+ "K": [
+ [743.581,0,359.642],
+ [0,743.625,223.766],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309434,-0.0145066,-0.000137344,-0.000208072,0.169515],
+ "R": [
+ [-0.6714433509,-0.01781555577,-0.7408417054],
+ [-0.2359597182,0.9528188479,0.1909430659],
+ [0.7024861834,0.3030162521,-0.6439676336]
+ ],
+ "t": [
+ [-57.25895983],
+ [89.79547495],
+ [311.6502108]
+ ]
+ },
+ {
+ "name": "08_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 7,
+ "K": [
+ [745.148,0,371.237],
+ [0,745.103,220.621],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318768,0.034703,-0.000217256,0.000447556,0.0954449],
+ "R": [
+ [-0.7012843801,0.01049644172,-0.7128043511],
+ [-0.1276034542,0.9818947595,0.1400001421],
+ [0.7013683602,0.1891362102,-0.6872480755]
+ ],
+ "t": [
+ [-43.70728874],
+ [118.2041714],
+ [298.0588141]
+ ]
+ },
+ {
+ "name": "08_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 8,
+ "K": [
+ [743.06,0,391.891],
+ [0,743.237,230.861],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322908,0.0553375,0.000339696,0.00130059,0.0777268],
+ "R": [
+ [-0.6299217379,0.07604043096,-0.7729272003],
+ [-0.1362742651,0.9689348188,0.2063846932],
+ [0.7646096578,0.2353362908,-0.5999907511]
+ ],
+ "t": [
+ [-3.915515028],
+ [82.19520224],
+ [306.2551203]
+ ]
+ },
+ {
+ "name": "08_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 9,
+ "K": [
+ [746.456,0,356.955],
+ [0,746.592,233.352],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320498,0.0507213,0.000550471,0.000126643,0.0741224],
+ "R": [
+ [-0.684872543,0.06612723284,-0.7256561093],
+ [-0.09767122593,0.9785553778,0.1813551881],
+ [0.7220872049,0.1950809107,-0.6637269822]
+ ],
+ "t": [
+ [-6.194765679],
+ [87.40737989],
+ [301.7039487]
+ ]
+ },
+ {
+ "name": "08_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 10,
+ "K": [
+ [747.33,0,361.528],
+ [0,747.71,220.883],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322455,0.0389243,0.00118705,0.000768992,0.12227],
+ "R": [
+ [-0.6055801648,0.01225702185,-0.7956899079],
+ [-0.1760343759,0.973047512,0.1489645524],
+ [0.7760699469,0.2302787546,-0.5871006154]
+ ],
+ "t": [
+ [32.64204154],
+ [89.24589085],
+ [303.2777117]
+ ]
+ },
+ {
+ "name": "08_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 11,
+ "K": [
+ [747.774,0,350.264],
+ [0,747.981,233.163],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312094,-0.0263709,0.00148203,-0.000526901,0.233175],
+ "R": [
+ [-0.6738094891,0.06987822761,-0.7355935058],
+ [-0.1142917175,0.9736808734,0.1971876265],
+ [0.730012449,0.216939139,-0.6480889092]
+ ],
+ "t": [
+ [35.79986479],
+ [83.7107121],
+ [303.8218457]
+ ]
+ },
+ {
+ "name": "08_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 12,
+ "K": [
+ [744.899,0,366.47],
+ [0,744.848,222.726],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30396,-0.0418844,-0.00058576,-0.000160605,0.231689],
+ "R": [
+ [-0.6160341517,-0.01803679921,-0.7875129191],
+ [-0.1884772348,0.9740736778,0.1251271436],
+ [0.7648387123,0.2255108512,-0.6034621779]
+ ],
+ "t": [
+ [61.57356311],
+ [97.36793025],
+ [301.4047959]
+ ]
+ },
+ {
+ "name": "08_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 13,
+ "K": [
+ [746.859,0,368.586],
+ [0,747.139,224.684],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318047,0.0428323,-0.000551709,0.000692584,0.0895927],
+ "R": [
+ [-0.6485099772,-0.04236983322,-0.7600260566],
+ [-0.2235198928,0.9650338886,0.1369249841],
+ [0.7276494121,0.258678161,-0.6353046057]
+ ],
+ "t": [
+ [38.13208236],
+ [106.9572182],
+ [307.8393222]
+ ]
+ },
+ {
+ "name": "08_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 14,
+ "K": [
+ [744.505,0,357.32],
+ [0,744.53,228.165],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303025,-0.0702212,0.000533599,-0.000753966,0.269146],
+ "R": [
+ [-0.6825611814,-0.04644305139,-0.729351271],
+ [-0.1871280484,0.9758162042,0.1129859684],
+ [0.7064653757,0.213601916,-0.6747450588]
+ ],
+ "t": [
+ [41.82592662],
+ [132.5834032],
+ [304.3020009]
+ ]
+ },
+ {
+ "name": "08_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 15,
+ "K": [
+ [745.837,0,357.73],
+ [0,745.88,221.629],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3197,0.0439542,-0.00136466,0.00170195,0.109142],
+ "R": [
+ [-0.6069626381,-0.02117938565,-0.7944481037],
+ [-0.2107505505,0.968144583,0.1352045554],
+ [0.7662770787,0.2494944888,-0.5920911574]
+ ],
+ "t": [
+ [64.87618524],
+ [141.1933336],
+ [303.6799609]
+ ]
+ },
+ {
+ "name": "08_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 16,
+ "K": [
+ [744.767,0,345.102],
+ [0,744.781,229.581],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307131,-0.033453,0.0002274,-0.000565369,0.224073],
+ "R": [
+ [-0.6350262321,-0.03398669713,-0.7717425665],
+ [-0.2527580664,0.9531820242,0.1660041824],
+ [0.7299692079,0.3004811693,-0.6138860012]
+ ],
+ "t": [
+ [34.611726],
+ [134.434862],
+ [314.3473002]
+ ]
+ },
+ {
+ "name": "08_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 17,
+ "K": [
+ [743.543,0,370.548],
+ [0,743.847,224.118],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308645,-0.0111516,9.80345e-05,-0.000744439,0.160705],
+ "R": [
+ [-0.6124225565,-0.05791042639,-0.7884066177],
+ [-0.1936876385,0.977907652,0.07862393367],
+ [0.7664357188,0.2008556864,-0.610109238]
+ ],
+ "t": [
+ [28.62018644],
+ [186.6213498],
+ [297.6164741]
+ ]
+ },
+ {
+ "name": "08_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 18,
+ "K": [
+ [743.39,0,376.249],
+ [0,743.751,216.723],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319375,0.0602092,-1.05699e-05,0.00110696,0.0487054],
+ "R": [
+ [-0.6887185447,0.08181736584,-0.720397588],
+ [-0.1043667464,0.9720764384,0.2101784484],
+ [0.7174777686,0.2199393475,-0.6609480577]
+ ],
+ "t": [
+ [20.48604056],
+ [189.7333893],
+ [302.8177068]
+ ]
+ },
+ {
+ "name": "08_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 19,
+ "K": [
+ [747.038,0,360.923],
+ [0,747.259,204.023],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32724,0.0825647,-0.000697091,0.000733699,0.0397455],
+ "R": [
+ [-0.6726100217,0.03848005322,-0.7389959704],
+ [-0.1487286588,0.9712392562,0.1859411014],
+ [0.7248969201,0.2349757278,-0.6475421705]
+ ],
+ "t": [
+ [3.177324598],
+ [151.0352965],
+ [305.3818706]
+ ]
+ },
+ {
+ "name": "08_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 20,
+ "K": [
+ [747.914,0,388.693],
+ [0,747.835,242.83],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338429,0.134609,0.00136964,0.000561914,-0.0365273],
+ "R": [
+ [-0.6685313457,0.02780025068,-0.7431641715],
+ [-0.1765857142,0.9647874561,0.194942684],
+ [0.722414926,0.2615574708,-0.6400815293]
+ ],
+ "t": [
+ [-14.15175066],
+ [129.456494],
+ [308.9585645]
+ ]
+ },
+ {
+ "name": "08_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 21,
+ "K": [
+ [746.296,0,369.274],
+ [0,746.424,219.198],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312598,-0.010091,-0.000298989,-0.000771876,0.160922],
+ "R": [
+ [-0.6341455554,-0.01222382885,-0.7731170626],
+ [-0.1896201401,0.9718007188,0.1401697733],
+ [0.7496023059,0.2354866044,-0.6185809907]
+ ],
+ "t": [
+ [-6.414673774],
+ [116.5175191],
+ [305.5663378]
+ ]
+ },
+ {
+ "name": "08_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 22,
+ "K": [
+ [743.609,0,361.562],
+ [0,743.794,221.87],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314273,0.00142644,4.14402e-05,0.000150079,0.159707],
+ "R": [
+ [-0.6552794634,-0.0176584532,-0.7551801135],
+ [-0.2007508014,0.9678470127,0.1515627784],
+ [0.7282224527,0.2509189891,-0.6377552198]
+ ],
+ "t": [
+ [4.541098798],
+ [103.6271831],
+ [307.0310837]
+ ]
+ },
+ {
+ "name": "08_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 23,
+ "K": [
+ [748.435,0,354.117],
+ [0,748.457,219.552],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324308,0.0627041,-0.000215295,-0.000444561,0.0758056],
+ "R": [
+ [-0.6485698923,-0.03356212054,-0.7604148071],
+ [-0.2015811272,0.9709293787,0.1290782349],
+ [0.733976937,0.2370015309,-0.6364810526]
+ ],
+ "t": [
+ [20.56445448],
+ [121.4098798],
+ [305.3725739]
+ ]
+ },
+ {
+ "name": "08_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 8,
+ "node": 24,
+ "K": [
+ [745.572,0,350.678],
+ [0,745.729,218.826],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313081,0.00890587,-0.000465969,-0.00023462,0.141032],
+ "R": [
+ [-0.6716141,0.00283216084,-0.7408957278],
+ [-0.1390702972,0.9817365211,0.1298185488],
+ [0.7277320613,0.1902245569,-0.6589542206]
+ ],
+ "t": [
+ [13.95231346],
+ [154.9907046],
+ [298.6967118]
+ ]
+ },
+ {
+ "name": "09_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 1,
+ "K": [
+ [745.377,0,383.314],
+ [0,745.581,229.65],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311824,0.0113225,-0.000890232,0.000288511,0.13186],
+ "R": [
+ [-0.9888207636,0.1490770148,-0.003088867539],
+ [0.1339941062,0.8974831076,0.420201917],
+ [0.06541465384,0.4150904904,-0.9074253732]
+ ],
+ "t": [
+ [-5.5065201],
+ [83.70733211],
+ [330.6651976]
+ ]
+ },
+ {
+ "name": "09_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 2,
+ "K": [
+ [745.133,0,380.598],
+ [0,746.347,248.499],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340543,0.0603048,-0.00219925,-0.00194065,0.128165],
+ "R": [
+ [-0.9728033822,0.2090533065,0.09975116351],
+ [0.2316107347,0.8720009628,0.4312433055],
+ [0.003169728315,0.4426183864,-0.8967044758]
+ ],
+ "t": [
+ [-23.76195567],
+ [58.26386366],
+ [329.69794]
+ ]
+ },
+ {
+ "name": "09_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 3,
+ "K": [
+ [745.787,0,382.41],
+ [0,745.973,216.203],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309439,0.00115788,-0.000439278,0.00154239,0.140783],
+ "R": [
+ [-0.995096801,0.09728424012,-0.01783629191],
+ [0.08253738581,0.9161639792,0.3922131349],
+ [0.05449712496,0.3888178749,-0.9197014317]
+ ],
+ "t": [
+ [6.72584843],
+ [65.39953055],
+ [327.4514754]
+ ]
+ },
+ {
+ "name": "09_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 4,
+ "K": [
+ [744.782,0,384.335],
+ [0,745.051,230.833],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319171,0.0452003,0.000841339,0.00114337,0.0902557],
+ "R": [
+ [-0.9962766095,0.08536470964,0.01207409478],
+ [0.0830687393,0.9129812009,0.3994557689],
+ [0.02307600417,0.3989714189,-0.9166729542]
+ ],
+ "t": [
+ [12.91980994],
+ [75.72355875],
+ [328.4117918]
+ ]
+ },
+ {
+ "name": "09_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 5,
+ "K": [
+ [745.938,0,386.124],
+ [0,746.151,234.663],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322825,0.0563734,0.000659785,0.00216478,0.0846192],
+ "R": [
+ [-0.9996885429,0.02460566921,0.004168718214],
+ [0.02372582958,0.8852416043,0.464525981],
+ [0.007739649829,0.4644802074,-0.8855496794]
+ ],
+ "t": [
+ [23.79490616],
+ [45.57973364],
+ [333.4360246]
+ ]
+ },
+ {
+ "name": "09_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 6,
+ "K": [
+ [745.533,0,376.456],
+ [0,745.938,237.583],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324418,0.0645728,-2.52302e-05,0.000695669,0.0784542],
+ "R": [
+ [-0.9996292032,0.0242501169,-0.01238498622],
+ [0.01720849374,0.9151046106,0.4028491273],
+ [0.02110269642,0.4024866252,-0.9151826008]
+ ],
+ "t": [
+ [44.50201086],
+ [83.15135806],
+ [329.4460526]
+ ]
+ },
+ {
+ "name": "09_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 7,
+ "K": [
+ [745.538,0,357.165],
+ [0,745.859,222.198],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30448,-0.0356601,-0.000261684,-0.000249049,0.226264],
+ "R": [
+ [-0.9994703128,-0.005373675551,-0.03209699996],
+ [-0.01769948118,0.9174086112,0.3975527241],
+ [0.02730974481,0.3979102457,-0.9170177829]
+ ],
+ "t": [
+ [39.28939518],
+ [107.3778293],
+ [329.1138759]
+ ]
+ },
+ {
+ "name": "09_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 8,
+ "K": [
+ [746.393,0,361.584],
+ [0,746.73,220.937],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31726,0.0513551,0.000643529,-0.000795525,0.0635312],
+ "R": [
+ [-0.9973050313,-0.005865573042,-0.0731318648],
+ [-0.03181904441,0.9327538711,0.3591068981],
+ [0.06610766226,0.3604661023,-0.9304267656]
+ ],
+ "t": [
+ [64.05594666],
+ [137.6750859],
+ [322.0323762]
+ ]
+ },
+ {
+ "name": "09_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 9,
+ "K": [
+ [750.271,0,344.156],
+ [0,750.817,228.346],
+ [0,0,1]
+ ],
+ "distCoef": [-0.379154,0.391779,0.000225814,-0.000528714,-0.53339],
+ "R": [
+ [-0.9991212371,-0.002089946585,-0.04186150665],
+ [-0.01685937738,0.9344344151,0.355735977],
+ [0.03837336329,0.3561291283,-0.933648504]
+ ],
+ "t": [
+ [51.49527243],
+ [159.1149955],
+ [322.66132]
+ ]
+ },
+ {
+ "name": "09_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 10,
+ "K": [
+ [744.897,0,366.998],
+ [0,745.389,227.752],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317307,0.0499201,-0.000255849,-0.000414203,0.0689696],
+ "R": [
+ [-0.9956077306,0.03830608065,-0.08542769468],
+ [0.005132094192,0.9334237661,0.3587390896],
+ [0.093482129,0.3567249879,-0.9295205079]
+ ],
+ "t": [
+ [51.9897871],
+ [163.3127669],
+ [320.2676037]
+ ]
+ },
+ {
+ "name": "09_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 11,
+ "K": [
+ [745.812,0,365.568],
+ [0,746.463,243.927],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334591,0.135033,-0.000586766,0.000648781,-0.0516408],
+ "R": [
+ [-0.998272905,0.02856351314,-0.05133549401],
+ [0.007150624435,0.926422355,0.3764179707],
+ [0.05831016891,0.3754007803,-0.9250265825]
+ ],
+ "t": [
+ [35.7749059],
+ [177.7642897],
+ [325.0135255]
+ ]
+ },
+ {
+ "name": "09_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 12,
+ "K": [
+ [743.195,0,380.908],
+ [0,743.577,227.789],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308886,-0.0148964,-0.00146189,1.64512e-05,0.167268],
+ "R": [
+ [-0.9994731762,0.02727182579,0.01759595347],
+ [0.03184982914,0.9284235071,0.3701558858],
+ [-0.006241669996,0.370521307,-0.9288029945]
+ ],
+ "t": [
+ [-0.9618436208],
+ [187.4005014],
+ [324.424529]
+ ]
+ },
+ {
+ "name": "09_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 13,
+ "K": [
+ [745.52,0,396.637],
+ [0,745.641,231.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327971,0.0908214,-0.00010844,0.00165709,0.0286999],
+ "R": [
+ [-0.9916965419,0.1263943494,0.02371575794],
+ [0.1244737261,0.8970729317,0.4239887342],
+ [0.03231501572,0.4234201503,-0.9053568998]
+ ],
+ "t": [
+ [12.62306638],
+ [150.537484],
+ [333.7640249]
+ ]
+ },
+ {
+ "name": "09_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 14,
+ "K": [
+ [744.91,0,372.463],
+ [0,744.965,226.423],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308854,-0.0214085,8.99951e-05,0.000256405,0.180188],
+ "R": [
+ [-0.9924146786,0.1180105859,0.03444716585],
+ [0.1215225705,0.8993517426,0.4199984619],
+ [0.01858414592,0.4209987468,-0.9068708203]
+ ],
+ "t": [
+ [-10.68067405],
+ [162.2988485],
+ [333.0026074]
+ ]
+ },
+ {
+ "name": "09_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 15,
+ "K": [
+ [747.246,0,368.718],
+ [0,747.604,232.745],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3413,0.139342,-0.00187439,-0.000934376,-0.0485015],
+ "R": [
+ [-0.9858543141,0.1593536378,0.05193928607],
+ [0.1663907088,0.8933064559,0.4175137217],
+ [0.02013463084,0.4202499184,-0.9071849882]
+ ],
+ "t": [
+ [-16.61956214],
+ [147.1949584],
+ [331.9981158]
+ ]
+ },
+ {
+ "name": "09_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 16,
+ "K": [
+ [743.705,0,367.288],
+ [0,743.835,246.124],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316616,0.0215265,-3.02132e-05,0.000242548,0.131229],
+ "R": [
+ [-0.9974602961,0.07055123587,0.009771425173],
+ [0.06902048446,0.9235857212,0.3771280794],
+ [0.01758210332,0.3768447143,-0.9261095675]
+ ],
+ "t": [
+ [-30.73982653],
+ [139.9628037],
+ [324.9351286]
+ ]
+ },
+ {
+ "name": "09_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 17,
+ "K": [
+ [742.776,0,376.251],
+ [0,742.956,242.934],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317736,0.0249159,0.000195501,0.000659428,0.110976],
+ "R": [
+ [-0.9810894361,0.1806813104,0.06941024814],
+ [0.1934432758,0.9031273242,0.3833284952],
+ [0.006574003146,0.389506483,-0.9210002618]
+ ],
+ "t": [
+ [-32.91453507],
+ [125.2651482],
+ [325.9500645]
+ ]
+ },
+ {
+ "name": "09_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 18,
+ "K": [
+ [744.563,0,383.579],
+ [0,744.554,245.613],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324188,0.0688729,0.000784842,0.000316148,0.0548859],
+ "R": [
+ [-0.970594512,0.2257141743,0.08366244524],
+ [0.2406675117,0.9026066179,0.3569039677],
+ [0.005044007626,0.3665438649,-0.9303870985]
+ ],
+ "t": [
+ [-30.64851648],
+ [114.5848432],
+ [323.1694161]
+ ]
+ },
+ {
+ "name": "09_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 19,
+ "K": [
+ [745.897,0,369.27],
+ [0,746.007,226.27],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314378,0.0131268,-0.000749673,-0.000436078,0.140449],
+ "R": [
+ [-0.9929061616,0.1118291068,0.04039313118],
+ [0.1187797946,0.9175946163,0.3793566667],
+ [0.005358597494,0.3814634596,-0.9243683867]
+ ],
+ "t": [
+ [-9.348770156],
+ [111.4514571],
+ [325.9373984]
+ ]
+ },
+ {
+ "name": "09_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 20,
+ "K": [
+ [743.647,0,378.532],
+ [0,743.859,221.629],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312883,-0.00145442,-0.000725648,-1.91192e-05,0.160115],
+ "R": [
+ [-0.9995005243,0.01416777706,-0.02824846864],
+ [0.002450265794,0.9259270935,0.3776943389],
+ [0.03150711165,0.3774364735,-0.9254993303]
+ ],
+ "t": [
+ [6.861259295],
+ [105.360829],
+ [326.1962043]
+ ]
+ },
+ {
+ "name": "09_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 21,
+ "K": [
+ [745.35,0,364.423],
+ [0,745.51,242.824],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317615,0.0309367,1.60295e-05,-0.00084218,0.138729],
+ "R": [
+ [-0.9983267687,0.03243769532,-0.0478691851],
+ [0.01510269673,0.9453721551,0.3256430514],
+ [0.05581730476,0.3243752215,-0.9442802255]
+ ],
+ "t": [
+ [30.85545331],
+ [138.1219419],
+ [318.1793043]
+ ]
+ },
+ {
+ "name": "09_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 22,
+ "K": [
+ [744.248,0,356.027],
+ [0,744.436,238.226],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308137,-0.0481761,0.000357682,-8.3696e-05,0.245728],
+ "R": [
+ [-0.9955839097,0.09158830299,-0.0205976113],
+ [0.07579544873,0.9137019347,0.3992540852],
+ [0.05538708142,0.3959297379,-0.9166089209]
+ ],
+ "t": [
+ [35.25988756],
+ [131.4528362],
+ [328.3382973]
+ ]
+ },
+ {
+ "name": "09_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 23,
+ "K": [
+ [744.535,0,363.359],
+ [0,744.632,254.668],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311847,-0.00198079,0.000462082,-0.000460419,0.174118],
+ "R": [
+ [-0.9946906764,0.1028474748,0.003585412436],
+ [0.09771594436,0.9329851386,0.346396197],
+ [0.03228083764,0.3449074195,-0.9380814567]
+ ],
+ "t": [
+ [12.3985171],
+ [157.8437238],
+ [320.5381764]
+ ]
+ },
+ {
+ "name": "09_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 9,
+ "node": 24,
+ "K": [
+ [743.311,0,385.98],
+ [0,743.511,229.743],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319602,0.0480118,-0.000790169,0.000699953,0.0704098],
+ "R": [
+ [-0.9986396845,0.04700092247,-0.02257640097],
+ [0.03617494752,0.9363507866,0.3491970469],
+ [0.03755201414,0.3479053287,-0.93677731]
+ ],
+ "t": [
+ [-8.936415104],
+ [142.1371611],
+ [321.4431282]
+ ]
+ },
+ {
+ "name": "10_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 1,
+ "K": [
+ [744.128,0,369.511],
+ [0,744.056,233.67],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31156,0.00550691,-0.000430053,0.000410016,0.149166],
+ "R": [
+ [-0.6229970612,0.0209936641,0.781942407],
+ [0.05250109858,0.9985078863,0.01502117145],
+ [-0.7804603106,0.05041098106,-0.6231696692]
+ ],
+ "t": [
+ [-46.84686717],
+ [150.7389104],
+ [280.0083694]
+ ]
+ },
+ {
+ "name": "10_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 2,
+ "K": [
+ [743.282,0,357.827],
+ [0,743.347,211.632],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30948,-0.00718458,0.000285593,0.000547399,0.164062],
+ "R": [
+ [-0.6512046155,0.0977241901,0.7525839032],
+ [0.103617117,0.9938368806,-0.03939223155],
+ [-0.7517952126,0.05232817138,-0.6573170626]
+ ],
+ "t": [
+ [-42.32005533],
+ [143.0774393],
+ [282.200902]
+ ]
+ },
+ {
+ "name": "10_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 3,
+ "K": [
+ [744.012,0,361.17],
+ [0,744.101,225.217],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303567,-0.0563565,0.000757602,-0.000519388,0.263551],
+ "R": [
+ [-0.6320598226,0.04182219841,0.773790207],
+ [0.06737176964,0.9977273282,0.001106034268],
+ [-0.771985379,0.05283069539,-0.6334409935]
+ ],
+ "t": [
+ [-54.02554254],
+ [119.7786683],
+ [280.9354705]
+ ]
+ },
+ {
+ "name": "10_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 4,
+ "K": [
+ [744.209,0,380.966],
+ [0,744.256,205.476],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315194,0.0249601,-0.000765583,0.001001,0.10286],
+ "R": [
+ [-0.6566261636,0.06356030055,0.7515332125],
+ [0.0713368826,0.9972094103,-0.02201002698],
+ [-0.7508349555,0.03915967697,-0.6593279831]
+ ],
+ "t": [
+ [-22.38173011],
+ [115.5645607],
+ [280.9145253]
+ ]
+ },
+ {
+ "name": "10_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 5,
+ "K": [
+ [744.499,0,353.834],
+ [0,744.652,215.524],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317042,0.0236932,-0.00147688,-0.000206715,0.11602],
+ "R": [
+ [-0.6480155592,0.1057846486,0.754244949],
+ [0.1559047408,0.9877614348,-0.004589090624],
+ [-0.7454995284,0.1146165612,-0.6565771067]
+ ],
+ "t": [
+ [-17.37690425],
+ [72.84298088],
+ [287.4167752]
+ ]
+ },
+ {
+ "name": "10_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 6,
+ "K": [
+ [746.493,0,367.328],
+ [0,746.754,207.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323089,0.0587326,-0.000981175,-0.000221417,0.0550321],
+ "R": [
+ [-0.6607542091,0.07289791872,0.74705406],
+ [0.1340507848,0.9907326878,0.02188900409],
+ [-0.738535214,0.1146064347,-0.6644028167]
+ ],
+ "t": [
+ [3.021864726],
+ [64.04371811],
+ [286.9062935]
+ ]
+ },
+ {
+ "name": "10_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 7,
+ "K": [
+ [744.949,0,365.308],
+ [0,744.944,217.014],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320697,0.0459897,0.000335318,2.89241e-06,0.0947246],
+ "R": [
+ [-0.643287111,0.03528116955,0.764811697],
+ [0.0902182212,0.9954712387,0.02996140018],
+ [-0.7602909742,0.08827373343,-0.6435568215]
+ ],
+ "t": [
+ [9.776307982],
+ [84.51813798],
+ [285.3816638]
+ ]
+ },
+ {
+ "name": "10_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 8,
+ "K": [
+ [748.112,0,395.78],
+ [0,748.17,229.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325424,0.0774932,-0.000546,0.000524276,0.0351183],
+ "R": [
+ [-0.6241633069,0.05185263499,0.7795713377],
+ [0.04102617023,0.9985938587,-0.03357318505],
+ [-0.7802160084,0.0110276762,-0.6254129601]
+ ],
+ "t": [
+ [-46.24758235],
+ [183.5392889],
+ [272.6641799]
+ ]
+ },
+ {
+ "name": "10_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 9,
+ "K": [
+ [746.122,0,370.333],
+ [0,746.261,210.753],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323285,0.0813962,-0.00031195,0.00117949,0.0118242],
+ "R": [
+ [-0.6717702835,0.002860846795,0.7407540089],
+ [0.1085475528,0.9895782107,0.09461708989],
+ [-0.7327633417,0.1439679842,-0.6650797731]
+ ],
+ "t": [
+ [53.6134591],
+ [78.01841366],
+ [288.9552018]
+ ]
+ },
+ {
+ "name": "10_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 10,
+ "K": [
+ [746.498,0,355.775],
+ [0,746.616,218.183],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320479,0.0482256,-0.000295345,0.000515541,0.088746],
+ "R": [
+ [-0.6274497943,0.01735785812,0.7784635254],
+ [0.05740772193,0.9980618939,0.02401685623],
+ [-0.7765378993,0.0597591891,-0.6272302051]
+ ],
+ "t": [
+ [35.32452291],
+ [122.8912729],
+ [283.9520693]
+ ]
+ },
+ {
+ "name": "10_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 11,
+ "K": [
+ [745.209,0,387.948],
+ [0,745.058,237.868],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312054,0.0106095,2.04654e-05,-0.000407432,0.122509],
+ "R": [
+ [-0.663538187,0.0558857692,0.74605218],
+ [0.09086672278,0.9958436408,0.006219474654],
+ [-0.742603739,0.07191817555,-0.6658584406]
+ ],
+ "t": [
+ [70.41193089],
+ [130.903078],
+ [283.3216663]
+ ]
+ },
+ {
+ "name": "10_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 12,
+ "K": [
+ [746.923,0,359.191],
+ [0,746.955,219.728],
+ [0,0,1]
+ ],
+ "distCoef": [-0.34193,0.180291,-0.0011698,0.000387434,-0.142263],
+ "R": [
+ [-0.6573529902,0.02662022179,0.7531124817],
+ [0.0203979596,0.9996382488,-0.01752982786],
+ [-0.7533066902,0.003838673213,-0.6576581901]
+ ],
+ "t": [
+ [61.18715226],
+ [173.543055],
+ [273.2477614]
+ ]
+ },
+ {
+ "name": "10_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 13,
+ "K": [
+ [747.063,0,362.554],
+ [0,747.091,228.588],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334743,0.115617,-0.000133435,0.000763825,-0.0142674],
+ "R": [
+ [-0.6314178936,0.07344004486,0.771957255],
+ [0.07624079511,0.9965613541,-0.03244701456],
+ [-0.7716856775,0.03836700932,-0.6348457984]
+ ],
+ "t": [
+ [39.63694261],
+ [165.7689372],
+ [279.8275089]
+ ]
+ },
+ {
+ "name": "10_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 14,
+ "K": [
+ [745.722,0,380.721],
+ [0,745.932,237.231],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319645,0.0532601,-0.00105825,0.00148804,0.0812854],
+ "R": [
+ [-0.6464741699,0.0407242176,0.7618482039],
+ [0.05782238306,0.998317631,-0.004298792509],
+ [-0.7607415591,0.04127282036,-0.6477413331]
+ ],
+ "t": [
+ [37.16059778],
+ [187.0284564],
+ [279.5510011]
+ ]
+ },
+ {
+ "name": "10_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 15,
+ "K": [
+ [745.212,0,345.945],
+ [0,745.407,234.052],
+ [0,0,1]
+ ],
+ "distCoef": [-0.345973,0.208044,0.00063894,-0.000591324,-0.26389],
+ "R": [
+ [-0.6892736753,0.06991501806,0.7211197479],
+ [0.04097555303,0.9975016565,-0.0575451947],
+ [-0.7233414164,-0.01011610737,-0.6904164394]
+ ],
+ "t": [
+ [38.38229011],
+ [201.7157692],
+ [268.6124541]
+ ]
+ },
+ {
+ "name": "10_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 16,
+ "K": [
+ [746.402,0,351.743],
+ [0,746.432,235.34],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332074,0.123634,0.000553061,0.000200886,-0.050504],
+ "R": [
+ [-0.6626903808,0.1069713565,0.7412142659],
+ [0.1159650419,0.9924654921,-0.03955194002],
+ [-0.7398605059,0.05974425322,-0.6701022728]
+ ],
+ "t": [
+ [18.24762504],
+ [172.5928493],
+ [282.9657885]
+ ]
+ },
+ {
+ "name": "10_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 17,
+ "K": [
+ [745.425,0,381.954],
+ [0,745.576,234.397],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316953,0.0361047,-0.000329948,0.00146685,0.0995591],
+ "R": [
+ [-0.6439914485,0.08005681888,0.7608323863],
+ [0.04150323442,0.9967010496,-0.06974596286],
+ [-0.7639060779,-0.01333879876,-0.6451895695]
+ ],
+ "t": [
+ [-14.39474973],
+ [198.5707312],
+ [268.934139]
+ ]
+ },
+ {
+ "name": "10_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 18,
+ "K": [
+ [742.866,0,374.357],
+ [0,743.163,216.484],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313801,-0.00472223,0.00105562,-0.000883374,0.146196],
+ "R": [
+ [-0.6735625977,0.03695414336,0.7382058102],
+ [0.08136680684,0.9963864104,0.02436316713],
+ [-0.7346379174,0.07647556771,-0.6741354596]
+ ],
+ "t": [
+ [41.81793908],
+ [81.57199105],
+ [283.0241236]
+ ]
+ },
+ {
+ "name": "10_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 19,
+ "K": [
+ [747.195,0,374.317],
+ [0,747.324,252.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325848,0.0754879,0.000850799,-0.000494425,0.0423325],
+ "R": [
+ [-0.6398121174,0.03550225829,0.7677109118],
+ [0.06489671873,0.9978603994,0.00793971962],
+ [-0.7657864391,0.05490184793,-0.6407471551]
+ ],
+ "t": [
+ [-18.67539454],
+ [143.739157],
+ [281.6554752]
+ ]
+ },
+ {
+ "name": "10_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 20,
+ "K": [
+ [744.074,0,359.595],
+ [0,744.232,222.54],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312038,-0.00652471,0.000517579,-0.000473896,0.154037],
+ "R": [
+ [-0.6341018605,0.07503908623,0.769599874],
+ [0.1134623387,0.9935365213,-0.003387984729],
+ [-0.7648798129,0.08517227417,-0.6385174669]
+ ],
+ "t": [
+ [-10.64771601],
+ [114.6784971],
+ [285.5473806]
+ ]
+ },
+ {
+ "name": "10_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 21,
+ "K": [
+ [745.669,0,353.595],
+ [0,745.986,221.41],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331248,0.0956435,-0.00124938,0.0010706,0.0394747],
+ "R": [
+ [-0.618235149,0.02815342604,0.7854888192],
+ [0.09838720035,0.994269895,0.04180113162],
+ [-0.7798110408,0.1031249747,-0.6174625335]
+ ],
+ "t": [
+ [-3.462045404],
+ [102.4105128],
+ [287.5712577]
+ ]
+ },
+ {
+ "name": "10_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 22,
+ "K": [
+ [745.836,0,367.536],
+ [0,745.883,217.602],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306908,-0.0326669,-0.000283909,0.000278093,0.200484],
+ "R": [
+ [-0.6189078213,0.03804187807,0.7845418563],
+ [0.07413417155,0.9971968305,0.01012945108],
+ [-0.7819573092,0.06443055706,-0.6199931209]
+ ],
+ "t": [
+ [14.73270812],
+ [126.5060302],
+ [283.9045417]
+ ]
+ },
+ {
+ "name": "10_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 23,
+ "K": [
+ [742.749,0,379.273],
+ [0,742.868,231.204],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310394,-0.00460726,-0.000822068,-0.000336616,0.147608],
+ "R": [
+ [-0.6037549899,0.1086195044,0.7897352186],
+ [0.1215591915,0.9916324658,-0.04345590495],
+ [-0.787847241,0.0697628552,-0.6119067485]
+ ],
+ "t": [
+ [19.26192194],
+ [145.0128457],
+ [284.7838402]
+ ]
+ },
+ {
+ "name": "10_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 10,
+ "node": 24,
+ "K": [
+ [745.597,0,368.627],
+ [0,745.598,227.731],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309585,-0.00749389,-0.000770097,-0.000330202,0.147896],
+ "R": [
+ [-0.6450785239,0.075478584,0.760379301],
+ [0.07622559694,0.9965021766,-0.03425011393],
+ [-0.7603047786,0.03586635318,-0.6485755533]
+ ],
+ "t": [
+ [7.856697427],
+ [160.1393432],
+ [279.1413867]
+ ]
+ },
+ {
+ "name": "11_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 1,
+ "K": [
+ [742.855,0,374.596],
+ [0,743.116,213.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312561,0.00631745,-0.000399255,9.31566e-05,0.13435],
+ "R": [
+ [-0.9229364354,0.00164792287,0.3849488544],
+ [0.08421827064,0.9766305816,0.1977371741],
+ [-0.3756269679,0.2149185694,-0.9015067329]
+ ],
+ "t": [
+ [-1.777017447],
+ [176.3500352],
+ [303.9155303]
+ ]
+ },
+ {
+ "name": "11_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 2,
+ "K": [
+ [743.543,0,362.467],
+ [0,743.612,228.587],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311508,-0.0063044,0.000209199,0.000389142,0.157517],
+ "R": [
+ [-0.9382305089,-0.009495783218,0.3458805319],
+ [0.07354737957,0.9713073762,0.226169768],
+ [-0.338103971,0.2376379833,-0.9106118238]
+ ],
+ "t": [
+ [-11.88478771],
+ [180.6527832],
+ [308.9268929]
+ ]
+ },
+ {
+ "name": "11_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 3,
+ "K": [
+ [749.382,0,384.698],
+ [0,749.44,241.756],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334994,0.135003,0.000819921,0.00199466,-0.05032],
+ "R": [
+ [-0.9215516186,0.03410543981,0.3867550042],
+ [0.1287847641,0.966589567,0.2216282778],
+ [-0.3662746221,0.2540500501,-0.895154441]
+ ],
+ "t": [
+ [-28.84627719],
+ [162.2565593],
+ [311.7587167]
+ ]
+ },
+ {
+ "name": "11_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 4,
+ "K": [
+ [747.478,0,355.1],
+ [0,747.786,237.425],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332665,0.125805,0.000559145,-0.000285828,-0.0488142],
+ "R": [
+ [-0.9186497576,-0.03493542623,0.3935252708],
+ [0.05923251482,0.9726444983,0.2246200995],
+ [-0.3906073886,0.2296566914,-0.8914503195]
+ ],
+ "t": [
+ [-43.73591523],
+ [146.455357],
+ [306.7233507]
+ ]
+ },
+ {
+ "name": "11_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 5,
+ "K": [
+ [744.546,0,358.346],
+ [0,744.606,240.06],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319412,0.0357687,0.00118284,-0.000939418,0.105494],
+ "R": [
+ [-0.9252091585,0.02778676908,0.3784387777],
+ [0.1130706466,0.9721977994,0.2050523536],
+ [-0.3622196044,0.2325066328,-0.9026281759]
+ ],
+ "t": [
+ [-43.43063623],
+ [134.4377466],
+ [308.7383564]
+ ]
+ },
+ {
+ "name": "11_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 6,
+ "K": [
+ [744.682,0,386.644],
+ [0,744.47,247.576],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310524,-0.0156223,-0.000288596,-3.26402e-05,0.156674],
+ "R": [
+ [-0.9144551399,0.0484228537,0.4017798207],
+ [0.1449564791,0.9661327489,0.2134833264],
+ [-0.3778351707,0.2534615133,-0.8905042645]
+ ],
+ "t": [
+ [-44.21957265],
+ [107.5274508],
+ [309.8949628]
+ ]
+ },
+ {
+ "name": "11_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 7,
+ "K": [
+ [746.436,0,349.001],
+ [0,746.553,211.863],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330393,0.0902383,-0.000783974,-0.000712996,0.00481592],
+ "R": [
+ [-0.9105637485,0.003264968682,0.4133557789],
+ [0.1001837456,0.9718993559,0.2130137535],
+ [-0.401044732,0.2353741321,-0.8853034174]
+ ],
+ "t": [
+ [-36.21090107],
+ [102.2867759],
+ [306.6852556]
+ ]
+ },
+ {
+ "name": "11_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 8,
+ "K": [
+ [745.743,0,370.625],
+ [0,745.85,233.671],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3257,0.0614375,0.00126654,-0.000627381,0.0722474],
+ "R": [
+ [-0.8981193216,-0.01090147501,0.4396166989],
+ [0.09488580103,0.9713398361,0.2179348702],
+ [-0.4293930238,0.2374449004,-0.8713446794]
+ ],
+ "t": [
+ [-42.17364239],
+ [80.07059019],
+ [305.3107943]
+ ]
+ },
+ {
+ "name": "11_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 9,
+ "K": [
+ [743.294,0,376.993],
+ [0,743.306,225.516],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315184,-0.00458353,0.00085295,-0.000315923,0.19344],
+ "R": [
+ [-0.9287334953,0.02657190893,0.369794576],
+ [0.1072763174,0.9740215576,0.1994336907],
+ [-0.354888555,0.2248909489,-0.9074569822]
+ ],
+ "t": [
+ [4.627896612],
+ [76.0139061],
+ [305.925361]
+ ]
+ },
+ {
+ "name": "11_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 10,
+ "K": [
+ [746.981,0,373.015],
+ [0,746.916,231.087],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31553,-0.0133214,-7.49701e-05,-0.000474937,0.183355],
+ "R": [
+ [-0.897589008,-0.01428097087,0.4406018914],
+ [0.092180686,0.9712994893,0.219271574],
+ [-0.431087803,0.2374307391,-0.8705113154]
+ ],
+ "t": [
+ [-5.834972436],
+ [85.69962032],
+ [306.7617687]
+ ]
+ },
+ {
+ "name": "11_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 11,
+ "K": [
+ [743.956,0,385.014],
+ [0,743.968,233.944],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321873,0.0619652,-0.000204505,0.000631491,0.0680901],
+ "R": [
+ [-0.9171447001,-0.01735780695,0.3981762243],
+ [0.08629809142,0.9667012777,0.2409175774],
+ [-0.3890992656,0.2553181275,-0.8851070078]
+ ],
+ "t": [
+ [26.82061991],
+ [73.01187567],
+ [307.7528197]
+ ]
+ },
+ {
+ "name": "11_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 12,
+ "K": [
+ [749.192,0,349.167],
+ [0,749.113,221.266],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334032,0.094759,-0.000689735,0.000727903,0.0409048],
+ "R": [
+ [-0.937850977,-0.03419002209,0.345349949],
+ [0.06230645433,0.9623765935,0.2644791068],
+ [-0.341399254,0.2695595196,-0.9004355695]
+ ],
+ "t": [
+ [57.17130279],
+ [82.80130245],
+ [306.825197]
+ ]
+ },
+ {
+ "name": "11_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 13,
+ "K": [
+ [744.715,0,367.122],
+ [0,744.786,220.538],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315954,0.0180051,3.91318e-05,0.000697083,0.145396],
+ "R": [
+ [-0.9312656673,-0.01667316508,0.3639591494],
+ [0.07039560041,0.9718946087,0.2246448954],
+ [-0.3574754765,0.2348252013,-0.9039183639]
+ ],
+ "t": [
+ [46.96203938],
+ [112.2947483],
+ [304.8878272]
+ ]
+ },
+ {
+ "name": "11_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 14,
+ "K": [
+ [746.505,0,367.697],
+ [0,746.62,222.237],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323622,0.0629014,0.000917096,0.00064017,0.0716359],
+ "R": [
+ [-0.9260527677,-0.07925799212,0.3689775632],
+ [0.02937617957,0.9595934278,0.279852628],
+ [-0.3762490021,0.2699974518,-0.8863058527]
+ ],
+ "t": [
+ [50.81898209],
+ [116.0290364],
+ [310.1255555]
+ ]
+ },
+ {
+ "name": "11_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 15,
+ "K": [
+ [746.042,0,355.995],
+ [0,745.821,261.077],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321065,0.0443736,0.000927074,0.000280863,0.106789],
+ "R": [
+ [-0.9208600933,-0.04678508348,0.387076019],
+ [0.03581020852,0.9784294414,0.2034538209],
+ [-0.3882451771,0.2012137775,-0.8993212431]
+ ],
+ "t": [
+ [43.08113165],
+ [154.6066575],
+ [301.5640854]
+ ]
+ },
+ {
+ "name": "11_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 16,
+ "K": [
+ [741.668,0,363.735],
+ [0,741.796,217.06],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309875,-0.0179015,-1.19394e-05,-0.000437783,0.188022],
+ "R": [
+ [-0.8991061052,-0.0185684781,0.437336739],
+ [0.0842559957,0.9730755765,0.214534029],
+ [-0.4295452698,0.2297370977,-0.873333686]
+ ],
+ "t": [
+ [16.70791642],
+ [154.14567],
+ [307.2679797]
+ ]
+ },
+ {
+ "name": "11_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 17,
+ "K": [
+ [747.822,0,361.761],
+ [0,747.76,222.34],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334628,0.097635,0.00152491,-0.000486737,0.0213673],
+ "R": [
+ [-0.9162397179,0.01033450945,0.4004971626],
+ [0.1187416248,0.9617552428,0.2468345183],
+ [-0.3826293322,0.2737152732,-0.8824254888]
+ ],
+ "t": [
+ [27.8785048],
+ [159.3368695],
+ [313.9971646]
+ ]
+ },
+ {
+ "name": "11_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 18,
+ "K": [
+ [745.448,0,360.818],
+ [0,745.84,214.85],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329534,0.0903331,0.00014069,0.000717079,0.0211508],
+ "R": [
+ [-0.9101418911,0.04432675398,0.411918532],
+ [0.1391589893,0.9692024732,0.2031781034],
+ [-0.3902262342,0.2422430698,-0.888280238]
+ ],
+ "t": [
+ [16.35209076],
+ [181.679224],
+ [308.9632727]
+ ]
+ },
+ {
+ "name": "11_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 19,
+ "K": [
+ [746.167,0,363.996],
+ [0,746.229,234.387],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310901,-0.0147285,-0.000729007,-0.000655789,0.178193],
+ "R": [
+ [-0.9157731435,-0.03755396433,0.3999365568],
+ [0.06406747528,0.9692207168,0.2377110865],
+ [-0.3965537899,0.2433123544,-0.8851803149]
+ ],
+ "t": [
+ [-10.79527777],
+ [146.8696803],
+ [308.5271108]
+ ]
+ },
+ {
+ "name": "11_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 20,
+ "K": [
+ [744.588,0,384.664],
+ [0,744.662,240.853],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307863,-0.0295446,-0.000517465,0.000242427,0.189333],
+ "R": [
+ [-0.9170523574,0.0431160901,0.396429031],
+ [0.124694228,0.9752892469,0.1823793695],
+ [-0.3787694858,0.2166838427,-0.8997676305]
+ ],
+ "t": [
+ [-9.200936127],
+ [142.5227957],
+ [304.9039442]
+ ]
+ },
+ {
+ "name": "11_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 21,
+ "K": [
+ [745.832,0,378.426],
+ [0,745.825,230.649],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317765,0.041948,0.000140897,0.000331931,0.0876249],
+ "R": [
+ [-0.903416406,0.009580467792,0.4286572198],
+ [0.1299134284,0.9588705554,0.2523683006],
+ [-0.4086089801,0.2836819921,-0.8675040223]
+ ],
+ "t": [
+ [-22.38884391],
+ [100.2357286],
+ [311.942278]
+ ]
+ },
+ {
+ "name": "11_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 22,
+ "K": [
+ [745.759,0,381.189],
+ [0,746.033,229.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307738,-0.0303832,0.000694314,-0.000395606,0.211723],
+ "R": [
+ [-0.9121889441,-0.007451044875,0.4097021017],
+ [0.1102495844,0.9585035751,0.2628990789],
+ [-0.394659802,0.2849831196,-0.8735148895]
+ ],
+ "t": [
+ [-0.4671669308],
+ [91.25062129],
+ [311.8622342]
+ ]
+ },
+ {
+ "name": "11_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 23,
+ "K": [
+ [748.678,0,358.839],
+ [0,748.651,239.635],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328983,0.0919887,-1.22475e-05,-0.000911096,0.0194744],
+ "R": [
+ [-0.9251940915,-0.06790089301,0.3733702744],
+ [0.01633387562,0.9758259889,0.2179377065],
+ [-0.3791425821,0.207733262,-0.9017193545]
+ ],
+ "t": [
+ [15.23843998],
+ [129.776393],
+ [302.9631654]
+ ]
+ },
+ {
+ "name": "11_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 11,
+ "node": 24,
+ "K": [
+ [747.741,0,374.843],
+ [0,747.8,238.972],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320184,0.0453956,8.07771e-05,-0.000586724,0.0799959],
+ "R": [
+ [-0.901120423,0.005145678853,0.4335383549],
+ [0.1030532182,0.9738156258,0.2026404726],
+ [-0.4211437016,0.2272809911,-0.8780554275]
+ ],
+ "t": [
+ [6.522845915],
+ [142.0951003],
+ [306.255293]
+ ]
+ },
+ {
+ "name": "12_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 1,
+ "K": [
+ [745.397,0,350.188],
+ [0,745.422,244.528],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318784,0.0421446,0.000567418,-0.000208,0.092208],
+ "R": [
+ [-0.2717431751,0.1656287556,0.9480098956],
+ [0.4128654434,0.9098857043,-0.04062180222],
+ [-0.86930879,0.3803618284,-0.3156376199]
+ ],
+ "t": [
+ [-13.70303847],
+ [97.1923903],
+ [326.2673629]
+ ]
+ },
+ {
+ "name": "12_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 2,
+ "K": [
+ [747.727,0,370.501],
+ [0,747.788,234.298],
+ [0,0,1]
+ ],
+ "distCoef": [-0.349811,0.202844,-0.00194754,-0.000389321,-0.178679],
+ "R": [
+ [-0.3883456032,0.1438043201,0.9102241537],
+ [0.3131714459,0.9495549238,-0.01640403197],
+ [-0.8666667975,0.2786857806,-0.4137908865]
+ ],
+ "t": [
+ [13.37192963],
+ [105.5473845],
+ [318.08591]
+ ]
+ },
+ {
+ "name": "12_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 3,
+ "K": [
+ [746.831,0,387.09],
+ [0,746.752,242.092],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338844,0.109538,-0.000689346,-0.00140957,-0.0011227],
+ "R": [
+ [-0.2489409576,0.07810816372,0.9653639285],
+ [0.3865744043,0.9219167609,0.0250941395],
+ [-0.8880251289,0.3794319447,-0.2596974581]
+ ],
+ "t": [
+ [-20.03334166],
+ [70.50216381],
+ [325.3775618]
+ ]
+ },
+ {
+ "name": "12_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 4,
+ "K": [
+ [746.601,0,360.45],
+ [0,746.776,222.063],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336822,0.124774,0.000206697,-0.000417774,-0.0398672],
+ "R": [
+ [-0.3081671276,0.03567998316,0.9506629057],
+ [0.4212102042,0.9011275261,0.1027187694],
+ [-0.8530035084,0.4320834647,-0.2927266543]
+ ],
+ "t": [
+ [4.764737811],
+ [63.41476985],
+ [331.1517594]
+ ]
+ },
+ {
+ "name": "12_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 5,
+ "K": [
+ [748.2,0,362.212],
+ [0,748.363,218.877],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337789,0.133894,-0.000945522,-0.000498923,-0.0570031],
+ "R": [
+ [-0.2841336654,-0.004801876737,0.9587726541],
+ [0.3831436474,0.9161034097,0.118133349],
+ [-0.8789021593,0.4009133132,-0.2584560111]
+ ],
+ "t": [
+ [10.92507323],
+ [68.32263664],
+ [329.7866549]
+ ]
+ },
+ {
+ "name": "12_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 6,
+ "K": [
+ [747.371,0,350.388],
+ [0,747.497,231.124],
+ [0,0,1]
+ ],
+ "distCoef": [-0.351189,0.233364,-0.000450075,-0.00118874,-0.265042],
+ "R": [
+ [-0.3878504716,-0.01635524947,0.9215771902],
+ [0.3346075558,0.9291346168,0.1573106717],
+ [-0.8588421248,0.3693797093,-0.3548927092]
+ ],
+ "t": [
+ [53.76493542],
+ [97.09757883],
+ [324.1315487]
+ ]
+ },
+ {
+ "name": "12_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 7,
+ "K": [
+ [747.196,0,383.602],
+ [0,747.258,260.076],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340453,0.149462,7.57635e-05,-0.00150211,-0.0810731],
+ "R": [
+ [-0.3567494973,0.01375486298,0.934098817],
+ [0.3428523716,0.9320474424,0.1172169629],
+ [-0.8690121101,0.3620750873,-0.3372233439]
+ ],
+ "t": [
+ [46.87962376],
+ [118.8343508],
+ [324.070693]
+ ]
+ },
+ {
+ "name": "12_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 8,
+ "K": [
+ [748.388,0,360.952],
+ [0,748.584,220.934],
+ [0,0,1]
+ ],
+ "distCoef": [-0.353387,0.236369,0.000317101,-0.000350889,-0.25062],
+ "R": [
+ [-0.3882650784,-0.0538394581,0.9199736636],
+ [0.3529834406,0.9134681838,0.2024316376],
+ [-0.8512654812,0.4033326047,-0.3356633588]
+ ],
+ "t": [
+ [53.63586961],
+ [124.5990463],
+ [329.2926486]
+ ]
+ },
+ {
+ "name": "12_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 9,
+ "K": [
+ [745.023,0,373.202],
+ [0,745.321,253.183],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310235,-0.0270349,0.000213071,-0.0010354,0.204812],
+ "R": [
+ [-0.3615436505,-0.1034754049,0.9265953968],
+ [0.3189620476,0.9201303682,0.2272076531],
+ [-0.8760989676,0.3776942494,-0.2996625652]
+ ],
+ "t": [
+ [26.36947949],
+ [154.1173845],
+ [328.14772]
+ ]
+ },
+ {
+ "name": "12_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 10,
+ "K": [
+ [743.497,0,337.094],
+ [0,743.775,230.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323522,0.0697077,-0.000922284,-0.00112939,0.0376595],
+ "R": [
+ [-0.409013364,-0.03192166586,0.9119698873],
+ [0.3635432206,0.9109541012,0.1949331996],
+ [-0.8369853014,0.4112707536,-0.3609874961]
+ ],
+ "t": [
+ [36.39561956],
+ [146.2733377],
+ [330.6860766]
+ ]
+ },
+ {
+ "name": "12_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 11,
+ "K": [
+ [744.432,0,350.161],
+ [0,744.664,216.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.3138,0.0423232,-0.000980128,0.000347352,0.0411803],
+ "R": [
+ [-0.3625324698,0.01191238118,0.9318950067],
+ [0.4332658145,0.8874493782,0.157207936],
+ [-0.8251369234,0.4607512304,-0.3268904424]
+ ],
+ "t": [
+ [30.02223667],
+ [146.021886],
+ [340.9352409]
+ ]
+ },
+ {
+ "name": "12_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 12,
+ "K": [
+ [745.59,0,349.499],
+ [0,745.978,243.824],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328804,0.102744,-0.00034172,-0.00160085,-0.0230968],
+ "R": [
+ [-0.3184962228,0.07265474811,0.9451356747],
+ [0.3862627531,0.9204738181,0.05940568743],
+ [-0.8656565379,0.3839911948,-0.3212312573]
+ ],
+ "t": [
+ [17.04074577],
+ [180.9741057],
+ [327.7548666]
+ ]
+ },
+ {
+ "name": "12_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 13,
+ "K": [
+ [744.766,0,364.423],
+ [0,744.926,205.341],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32165,0.0514735,-0.000885848,-0.00113933,0.0656482],
+ "R": [
+ [-0.2748509499,0.06379038152,0.9593684081],
+ [0.3894986417,0.919644886,0.05043898999],
+ [-0.8790607279,0.3875358962,-0.2776115375]
+ ],
+ "t": [
+ [-9.802475588],
+ [164.1613661],
+ [327.7325897]
+ ]
+ },
+ {
+ "name": "12_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 14,
+ "K": [
+ [744.556,0,345.329],
+ [0,744.551,253.003],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311027,-0.00213006,0.0011289,-0.000863959,0.162024],
+ "R": [
+ [-0.3202755169,0.1244082889,0.9391198917],
+ [0.4530679872,0.8907277919,0.0365157459],
+ [-0.831957326,0.4371802584,-0.3416437171]
+ ],
+ "t": [
+ [0.5161253202],
+ [152.8799295],
+ [338.113135]
+ ]
+ },
+ {
+ "name": "12_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 15,
+ "K": [
+ [747.233,0,347.644],
+ [0,747.329,227.375],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323105,0.049287,-0.00101918,5.08353e-05,0.100564],
+ "R": [
+ [-0.2639942301,0.1219548974,0.9567831779],
+ [0.4010015368,0.9160569375,-0.006120025947],
+ [-0.8772142349,0.3820558732,-0.2907378472]
+ ],
+ "t": [
+ [-27.43280694],
+ [159.7105652],
+ [325.8203908]
+ ]
+ },
+ {
+ "name": "12_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 16,
+ "K": [
+ [744.634,0,382.866],
+ [0,744.52,241.14],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320913,0.0518689,0.000556907,0.000900625,0.0851061],
+ "R": [
+ [-0.2918914105,0.1153635448,0.9494686183],
+ [0.4055533141,0.9139698053,0.01362734066],
+ [-0.8662135499,0.3890378484,-0.3135660035]
+ ],
+ "t": [
+ [-22.908528],
+ [135.1916248],
+ [327.5972929]
+ ]
+ },
+ {
+ "name": "12_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 17,
+ "K": [
+ [745.929,0,399.922],
+ [0,745.76,235.115],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324412,0.0924767,0.000808772,0.00160345,0.0125449],
+ "R": [
+ [-0.2332319969,0.1531844985,0.9602798264],
+ [0.4252056559,0.9041694633,-0.04096012482],
+ [-0.8745301515,0.3987632018,-0.2760161646]
+ ],
+ "t": [
+ [-42.90434909],
+ [120.9469461],
+ [326.5490528]
+ ]
+ },
+ {
+ "name": "12_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 18,
+ "K": [
+ [745.596,0,390.427],
+ [0,745.457,235.855],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331545,0.0834192,0.000515021,-0.000851112,0.0388274],
+ "R": [
+ [-0.2198853867,0.1587089693,0.9625288982],
+ [0.4990272732,0.8661072571,-0.02880971702],
+ [-0.8382256244,0.4739933356,-0.2696444333]
+ ],
+ "t": [
+ [-48.83152805],
+ [73.52609427],
+ [332.6787653]
+ ]
+ },
+ {
+ "name": "12_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 19,
+ "K": [
+ [744.284,0,396.863],
+ [0,744.47,248.804],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318049,0.0444362,0.000417829,0.000948817,0.0847095],
+ "R": [
+ [-0.2972813843,0.0975420226,0.9497943632],
+ [0.4134272643,0.9098266462,0.03596346693],
+ [-0.8606402708,0.4033621545,-0.3108010564]
+ ],
+ "t": [
+ [-6.347004052],
+ [101.4062297],
+ [328.9550302]
+ ]
+ },
+ {
+ "name": "12_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 20,
+ "K": [
+ [745.173,0,391.68],
+ [0,745.292,239.851],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316891,0.030971,0.000827356,0.00064571,0.114679],
+ "R": [
+ [-0.3480625566,0.05516818218,0.9358466372],
+ [0.3680676982,0.9261498325,0.08229615655],
+ [-0.8621940769,0.3730991283,-0.3426637043]
+ ],
+ "t": [
+ [18.00373906],
+ [105.1024652],
+ [325.6162418]
+ ]
+ },
+ {
+ "name": "12_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 21,
+ "K": [
+ [744.07,0,385.155],
+ [0,744.184,238.534],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325321,0.0749068,6.22505e-05,8.78769e-06,0.0274316],
+ "R": [
+ [-0.2944173655,-0.00519814937,0.9556628036],
+ [0.365777539,0.9232287513,0.117709238],
+ [-0.882907247,0.3842156322,-0.2699132104]
+ ],
+ "t": [
+ [4.17424328],
+ [116.8807078],
+ [328.2455421]
+ ]
+ },
+ {
+ "name": "12_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 22,
+ "K": [
+ [747.36,0,358.25],
+ [0,747.451,237.291],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329867,0.116416,-0.000580151,-0.000763801,-0.0625995],
+ "R": [
+ [-0.323867873,0.0530845029,0.9446118972],
+ [0.387407199,0.9183241349,0.08121850418],
+ [-0.8631484594,0.3922535134,-0.3179810029]
+ ],
+ "t": [
+ [22.53106717],
+ [133.6738778],
+ [328.8995429]
+ ]
+ },
+ {
+ "name": "12_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 23,
+ "K": [
+ [748.813,0,380.156],
+ [0,748.859,237.356],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333932,0.115832,0.000621747,-0.000254241,-0.0140772],
+ "R": [
+ [-0.3097958639,0.0326105921,0.9502436908],
+ [0.3550951383,0.9310652686,0.08381472691],
+ [-0.8820056493,0.3633923705,-0.3000200319]
+ ],
+ "t": [
+ [-6.485061334],
+ [151.418855],
+ [323.8858443]
+ ]
+ },
+ {
+ "name": "12_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 12,
+ "node": 24,
+ "K": [
+ [745.33,0,360.408],
+ [0,745.472,237.433],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321653,0.057929,3.69615e-05,-0.000478596,0.0560779],
+ "R": [
+ [-0.3250711399,0.1046959739,0.9398763254],
+ [0.4072848242,0.9124585149,0.03922410658],
+ [-0.8534915501,0.395547989,-0.3392550109]
+ ],
+ "t": [
+ [2.217299854],
+ [123.8595425],
+ [329.2221602]
+ ]
+ },
+ {
+ "name": "13_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 1,
+ "K": [
+ [747.6,0,355.92],
+ [0,747.783,249.853],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333712,0.144699,-6.46303e-05,-0.0011294,-0.0924471],
+ "R": [
+ [0.5138271048,0.01100033104,0.857823233],
+ [0.08358608019,0.9945184566,-0.06282043172],
+ [-0.8538120833,0.1039809221,0.5100910647]
+ ],
+ "t": [
+ [-37.95328646],
+ [135.6435695],
+ [289.9999799]
+ ]
+ },
+ {
+ "name": "13_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 2,
+ "K": [
+ [743.227,0,372.15],
+ [0,743.265,265.407],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306942,-0.0266079,0.000311285,0.000595534,0.199806],
+ "R": [
+ [0.4485620057,-0.005900946102,0.8937322339],
+ [0.06601293956,0.9974655925,-0.02654587691],
+ [-0.8913105064,0.07090536373,0.4478147055]
+ ],
+ "t": [
+ [-38.28645032],
+ [133.2984516],
+ [288.856211]
+ ]
+ },
+ {
+ "name": "13_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 3,
+ "K": [
+ [746.538,0,387.516],
+ [0,746.833,233.181],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322577,0.0715483,-4.90461e-05,0.000787497,0.0326639],
+ "R": [
+ [0.5260210271,0.02315422103,0.8501563157],
+ [0.07372016672,0.9946254291,-0.07270208278],
+ [-0.8472704504,0.1009164896,0.5214869567]
+ ],
+ "t": [
+ [-53.0750023],
+ [105.7642054],
+ [287.8235486]
+ ]
+ },
+ {
+ "name": "13_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 4,
+ "K": [
+ [744.864,0,367.763],
+ [0,745.005,229.771],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318118,0.0367901,0.000364188,-0.000713933,0.0879467],
+ "R": [
+ [0.4575577495,0.1623260474,0.8742374736],
+ [-0.0244195278,0.9851184177,-0.1701334469],
+ [-0.8888445267,0.05649741078,0.4547124916]
+ ],
+ "t": [
+ [4.756699591],
+ [110.8595803],
+ [285.3944853]
+ ]
+ },
+ {
+ "name": "13_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 5,
+ "K": [
+ [744.026,0,374.462],
+ [0,744.21,219.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309274,-0.00813814,-0.000611939,0.000562163,0.16533],
+ "R": [
+ [0.5236500196,-0.01990538858,0.8517009055],
+ [0.0479853053,0.9988290545,-0.006158764858],
+ [-0.8505810176,0.04409416531,0.5239920201]
+ ],
+ "t": [
+ [-32.80347729],
+ [91.75629107],
+ [282.6719703]
+ ]
+ },
+ {
+ "name": "13_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 6,
+ "K": [
+ [746.172,0,347.715],
+ [0,746.412,223.735],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315889,0.0243673,0.00083413,-0.000596366,0.129203],
+ "R": [
+ [0.489601615,0.07237643337,0.8689372305],
+ [-0.010214584,0.9969567785,-0.07728417735],
+ [-0.8718864151,0.02896262571,0.488850944]
+ ],
+ "t": [
+ [7.55259059],
+ [89.5920217],
+ [281.8493454]
+ ]
+ },
+ {
+ "name": "13_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 7,
+ "K": [
+ [745.619,0,383.372],
+ [0,745.683,224.508],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315816,0.0424659,0.000456201,0.000714024,0.0879752],
+ "R": [
+ [0.5142457137,-0.005076098829,0.8576278792],
+ [0.07753605572,0.9961627141,-0.04059565316],
+ [-0.8541308483,0.08737322366,0.5126659866]
+ ],
+ "t": [
+ [9.165152848],
+ [86.80281732],
+ [287.1451009]
+ ]
+ },
+ {
+ "name": "13_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 8,
+ "K": [
+ [746.151,0,390.693],
+ [0,746.159,238.847],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312796,0.0112848,0.00109903,0.000945928,0.138088],
+ "R": [
+ [0.5333632905,-0.08775347438,0.841322131],
+ [0.13459771,0.9907366672,0.0180086874],
+ [-0.8351090089,0.1036348594,0.5402339855]
+ ],
+ "t": [
+ [14.59630248],
+ [78.12680456],
+ [289.302137]
+ ]
+ },
+ {
+ "name": "13_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 9,
+ "K": [
+ [744.811,0,365.557],
+ [0,745.05,239.01],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302561,-0.0588071,-0.000331846,-0.00065645,0.252299],
+ "R": [
+ [0.515993865,0.007464548532,0.8565597538],
+ [0.05311793688,0.9977587535,-0.04069342277],
+ [-0.8549437502,0.06649624343,0.5144408941]
+ ],
+ "t": [
+ [47.02842806],
+ [101.5821868],
+ [285.7219747]
+ ]
+ },
+ {
+ "name": "13_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 10,
+ "K": [
+ [744.185,0,393.537],
+ [0,744.44,231.354],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321367,0.0639595,-3.49657e-05,0.000800078,0.0579089],
+ "R": [
+ [0.5364096096,-0.02345912583,0.8436316733],
+ [0.07330244032,0.9971310212,-0.01888064639],
+ [-0.8407683884,0.07196802054,0.536590273]
+ ],
+ "t": [
+ [31.38919798],
+ [122.486781],
+ [287.1552388]
+ ]
+ },
+ {
+ "name": "13_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 11,
+ "K": [
+ [745.973,0,365.594],
+ [0,746.037,211.677],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32905,0.0977698,-0.000962762,0.000946642,0.0190885],
+ "R": [
+ [0.5178117038,0.00482526951,0.8554810087],
+ [0.01921134431,0.9996663333,-0.01726691564],
+ [-0.8552788806,0.02537595122,0.5175462273]
+ ],
+ "t": [
+ [57.16543019],
+ [149.3252564],
+ [279.6241941]
+ ]
+ },
+ {
+ "name": "13_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 12,
+ "K": [
+ [745.909,0,358.218],
+ [0,746.022,220.333],
+ [0,0,1]
+ ],
+ "distCoef": [-0.338571,0.148871,-0.00100229,-0.000678393,-0.0710162],
+ "R": [
+ [0.5368407815,0.02503814463,0.8433119628],
+ [-0.01156171997,0.9996840035,-0.02232083821],
+ [-0.8436043516,0.002232599467,0.5369606257]
+ ],
+ "t": [
+ [51.57359577],
+ [176.1957711],
+ [275.7319623]
+ ]
+ },
+ {
+ "name": "13_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 13,
+ "K": [
+ [743.068,0,370.139],
+ [0,743.357,232.303],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302401,-0.0553181,-0.00107418,-0.000672395,0.220417],
+ "R": [
+ [0.5299693687,-0.06080201885,0.8458342525],
+ [0.13849556,0.9902402801,-0.01559383094],
+ [-0.8366310107,0.1254085412,0.5332178257]
+ ],
+ "t": [
+ [16.99243391],
+ [145.7883087],
+ [295.0494301]
+ ]
+ },
+ {
+ "name": "13_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 14,
+ "K": [
+ [743.724,0,347.611],
+ [0,743.902,235.434],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315484,0.0296225,-0.000529931,-0.000276443,0.110913],
+ "R": [
+ [0.5388576125,-0.001120175332,0.8423961174],
+ [0.06888686412,0.9967085439,-0.04273965901],
+ [-0.8395755317,0.08106061749,0.5371611517]
+ ],
+ "t": [
+ [22.68047362],
+ [178.4537167],
+ [288.5132471]
+ ]
+ },
+ {
+ "name": "13_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 15,
+ "K": [
+ [748.48,0,370.578],
+ [0,748.498,231.761],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333743,0.123731,0.000274987,0.00129665,-0.0264397],
+ "R": [
+ [0.5569883215,-0.02228411773,0.8302213126],
+ [0.06483002391,0.9977563557,-0.01671294857],
+ [-0.827986158,0.06313218472,0.5571833177]
+ ],
+ "t": [
+ [-8.30154925],
+ [184.6918205],
+ [284.5865319]
+ ]
+ },
+ {
+ "name": "13_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 16,
+ "K": [
+ [748.413,0,364.616],
+ [0,748.358,230.166],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337541,0.138107,0.000557985,-0.000490808,-0.0648839],
+ "R": [
+ [0.5035312414,0.04830043061,0.8626258501],
+ [0.03089895722,0.996790644,-0.07384894344],
+ [-0.8634243125,0.06383948941,0.5004227975]
+ ],
+ "t": [
+ [5.312179267],
+ [173.5565462],
+ [284.5085099]
+ ]
+ },
+ {
+ "name": "13_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 17,
+ "K": [
+ [745.143,0,372.782],
+ [0,745.112,223.2],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321603,0.0646008,-0.000584526,0.000805086,0.0603349],
+ "R": [
+ [0.5471603314,0.02993221277,0.8364924593],
+ [0.06649342528,0.9946477166,-0.07908567611],
+ [-0.8343825239,0.09889379359,0.5422414789]
+ ],
+ "t": [
+ [-32.63653561],
+ [167.4383368],
+ [289.2367997]
+ ]
+ },
+ {
+ "name": "13_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 18,
+ "K": [
+ [745.136,0,373.506],
+ [0,745.259,215.704],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333755,0.12331,-0.00049301,0.00138004,-0.0323155],
+ "R": [
+ [0.5039095131,0.07384116584,0.8605943788],
+ [0.02822760746,0.9943991795,-0.1018502524],
+ [-0.8632950856,0.07561583139,0.4990028469]
+ ],
+ "t": [
+ [-29.61131213],
+ [166.0398843],
+ [286.9453226]
+ ]
+ },
+ {
+ "name": "13_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 19,
+ "K": [
+ [743.638,0,344.046],
+ [0,743.783,238.416],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319291,0.0355055,-0.000169258,0.000161892,0.118247],
+ "R": [
+ [0.5180347054,0.01180967192,0.8552780692],
+ [0.1057363227,0.9913513706,-0.07773216881],
+ [-0.8487990775,0.1307019191,0.512305704]
+ ],
+ "t": [
+ [-19.08174331],
+ [122.2280138],
+ [293.3272927]
+ ]
+ },
+ {
+ "name": "13_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 20,
+ "K": [
+ [745.321,0,372.761],
+ [0,745.559,236.547],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320489,0.0479206,-9.03328e-05,-0.000256288,0.0784864],
+ "R": [
+ [0.4966252135,-0.01754426777,0.8677877598],
+ [0.06583916704,0.9976766247,-0.01750875645],
+ [-0.8654643848,0.06582971318,0.4966264667]
+ ],
+ "t": [
+ [-11.61163777],
+ [120.2765647],
+ [285.1928757]
+ ]
+ },
+ {
+ "name": "13_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 21,
+ "K": [
+ [745.539,0,371.886],
+ [0,745.656,230.519],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326644,0.0839413,-0.000557984,0.000204085,0.0126328],
+ "R": [
+ [0.5330371562,-0.03752357961,0.8452593514],
+ [0.08887796824,0.9959722199,-0.01183402057],
+ [-0.8414107777,0.08143290645,0.5342252193]
+ ],
+ "t": [
+ [-6.03247131],
+ [109.6165459],
+ [286.9430377]
+ ]
+ },
+ {
+ "name": "13_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 22,
+ "K": [
+ [744.018,0,396.717],
+ [0,744.224,249.141],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315372,0.0205822,-0.000440151,0.000134817,0.105074],
+ "R": [
+ [0.4984198723,-0.001673636668,0.8669341554],
+ [0.03130878513,0.9993805529,-0.01607079461],
+ [-0.8663702389,0.03515265859,0.4981635271]
+ ],
+ "t": [
+ [26.09238071],
+ [136.8142763],
+ [280.4949188]
+ ]
+ },
+ {
+ "name": "13_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 23,
+ "K": [
+ [744.884,0,382.514],
+ [0,744.877,235.74],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326378,0.0966908,-9.48994e-05,0.00105607,0.00534895],
+ "R": [
+ [0.4908089633,-0.01723518027,0.8710967283],
+ [0.04978157704,0.9987257364,-0.008288432131],
+ [-0.8698438688,0.04743260567,0.4910415377]
+ ],
+ "t": [
+ [21.95453226],
+ [154.6836493],
+ [281.6596012]
+ ]
+ },
+ {
+ "name": "13_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 13,
+ "node": 24,
+ "K": [
+ [744.481,0,341.813],
+ [0,744.509,213.322],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310201,-0.0109775,-0.00130948,-0.000370453,0.189258],
+ "R": [
+ [0.5283332962,-0.01827851401,0.8488402818],
+ [0.07383881778,0.996969434,-0.02449033896],
+ [-0.8458201683,0.0756164244,0.5280818111]
+ ],
+ "t": [
+ [-10.59416721],
+ [149.8670778],
+ [286.3856475]
+ ]
+ },
+ {
+ "name": "14_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 1,
+ "K": [
+ [745.639,0,394.42],
+ [0,745.872,232.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317821,0.05701,0.000216723,0.00145431,0.0516441],
+ "R": [
+ [0.1117244957,0.006687085701,0.9937167202],
+ [0.1929264895,0.9808052728,-0.02829110459],
+ [-0.9748317838,0.1948750877,0.1082898585]
+ ],
+ "t": [
+ [-10.76838593],
+ [183.2092961],
+ [300.2249606]
+ ]
+ },
+ {
+ "name": "14_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 2,
+ "K": [
+ [744.265,0,384.24],
+ [0,744.607,234.555],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314122,0.0172489,-0.000351192,-3.05431e-05,0.116521],
+ "R": [
+ [0.09126102309,0.01926845044,0.9956405739],
+ [0.1889483007,0.9813154942,-0.03631033643],
+ [-0.9777371658,0.191438313,0.08591511501]
+ ],
+ "t": [
+ [-20.54744948],
+ [195.8515337],
+ [299.6149103]
+ ]
+ },
+ {
+ "name": "14_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 3,
+ "K": [
+ [742.909,0,383.13],
+ [0,743.051,234.161],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311566,0.0211516,-0.000212815,-9.64233e-05,0.110817],
+ "R": [
+ [0.07658267666,-0.01244461629,0.9969855692],
+ [0.2193131093,0.9756433613,-0.004668149478],
+ [-0.9726442586,0.2190095044,0.07744664757]
+ ],
+ "t": [
+ [-39.95619704],
+ [171.7405641],
+ [305.3439137]
+ ]
+ },
+ {
+ "name": "14_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 4,
+ "K": [
+ [745.057,0,349.277],
+ [0,745.321,214.2],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31581,0.0237721,-0.00140945,-0.000667487,0.124292],
+ "R": [
+ [0.09341145846,-0.02354383001,0.9953491787],
+ [0.2305453591,0.9730606003,0.001380415192],
+ [-0.9685675696,0.2293441873,0.09632293059]
+ ],
+ "t": [
+ [-43.73412593],
+ [146.7921304],
+ [306.2893961]
+ ]
+ },
+ {
+ "name": "14_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 5,
+ "K": [
+ [744.634,0,387.597],
+ [0,744.752,225.246],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315944,0.0434616,-0.000268259,0.00110436,0.0780237],
+ "R": [
+ [0.1133728096,0.0374780752,0.9928454059],
+ [0.2222309073,0.973014014,-0.06210597779],
+ [-0.9683801061,0.2276820645,0.1019845459]
+ ],
+ "t": [
+ [-53.79623552],
+ [137.113178],
+ [305.5099477]
+ ]
+ },
+ {
+ "name": "14_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 6,
+ "K": [
+ [744.759,0,388.645],
+ [0,744.666,221.73],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306159,-0.0283273,-0.000508774,0.00094455,0.192402],
+ "R": [
+ [0.1564984143,0.01913164242,0.9874928995],
+ [0.2309282446,0.9713913042,-0.05541732523],
+ [-0.96030224,0.2367127254,0.1476031622]
+ ],
+ "t": [
+ [-66.24261018],
+ [112.7515407],
+ [303.5978047]
+ ]
+ },
+ {
+ "name": "14_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 7,
+ "K": [
+ [744.959,0,375.286],
+ [0,745.092,235.744],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302136,-0.0624017,-0.000302824,-0.00146028,0.239945],
+ "R": [
+ [0.0628689268,0.03077162571,0.9975472947],
+ [0.2444661638,0.9685997585,-0.04528578729],
+ [-0.967617586,0.2467136292,0.05337220603]
+ ],
+ "t": [
+ [-19.11814477],
+ [98.74694092],
+ [308.9777955]
+ ]
+ },
+ {
+ "name": "14_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 8,
+ "K": [
+ [746.649,0,384.752],
+ [0,746.836,237.267],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321628,0.0600031,0.000104796,0.000953791,0.0524376],
+ "R": [
+ [0.1158239713,-0.07384920575,0.9905206219],
+ [0.2473198554,0.9679682291,0.043248082],
+ [-0.9619863288,0.2399662524,0.1303782992]
+ ],
+ "t": [
+ [-45.76229918],
+ [76.40869106],
+ [305.3733784]
+ ]
+ },
+ {
+ "name": "14_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 9,
+ "K": [
+ [745.672,0,372.774],
+ [0,745.737,209.129],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30917,-0.00857977,-4.68803e-05,-0.000521617,0.17194],
+ "R": [
+ [0.1233501146,0.01050711315,0.9923075883],
+ [0.2153087978,0.9758411417,-0.0370970036],
+ [-0.9687243523,0.2182284735,0.1181078428]
+ ],
+ "t": [
+ [-15.44854612],
+ [78.73632155],
+ [304.5944309]
+ ]
+ },
+ {
+ "name": "14_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 10,
+ "K": [
+ [744.36,0,350.493],
+ [0,744.605,227.167],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324539,0.0696676,-0.000964917,-0.000688724,0.0453805],
+ "R": [
+ [0.0653712546,0.005547467364,0.9978455916],
+ [0.2748842968,0.9611936881,-0.02335203178],
+ [-0.9592524289,0.2758186354,0.06130952564]
+ ],
+ "t": [
+ [17.36142141],
+ [73.86484437],
+ [309.5485763]
+ ]
+ },
+ {
+ "name": "14_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 11,
+ "K": [
+ [744.072,0,352.953],
+ [0,744.032,218.847],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310531,-0.00866492,-5.61729e-06,0.000627577,0.179884],
+ "R": [
+ [0.08325845442,0.01268657881,0.9964472292],
+ [0.1993298125,0.97949952,-0.02912586749],
+ [-0.9763890903,0.2010466141,0.07902280276]
+ ],
+ "t": [
+ [33.26019053],
+ [89.58305599],
+ [303.0664402]
+ ]
+ },
+ {
+ "name": "14_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 12,
+ "K": [
+ [743.677,0,359.077],
+ [0,743.623,233.815],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305265,-0.0518121,0.000714314,0.000432839,0.265088],
+ "R": [
+ [0.06818541392,0.004787243789,0.9976611808],
+ [0.2533830838,0.9671167716,-0.02195821049],
+ [-0.9649599796,0.2542876962,0.06473025078]
+ ],
+ "t": [
+ [54.03449748],
+ [85.53998459],
+ [306.9876015]
+ ]
+ },
+ {
+ "name": "14_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 13,
+ "K": [
+ [742.736,0,368.122],
+ [0,742.832,238.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303469,-0.0412536,1.82225e-05,-0.000473228,0.205739],
+ "R": [
+ [0.1225239282,-0.0735967149,0.9897329996],
+ [0.2305366224,0.9720798639,0.0437447595],
+ [-0.9653189902,0.222809923,0.1360697815]
+ ],
+ "t": [
+ [17.43625272],
+ [116.7070017],
+ [307.0317679]
+ ]
+ },
+ {
+ "name": "14_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 14,
+ "K": [
+ [745.328,0,371.219],
+ [0,745.487,209.713],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318297,0.0286867,-0.0013247,0.000626009,0.137928],
+ "R": [
+ [0.06972690557,-0.0276618613,0.9971825209],
+ [0.2175762615,0.9759712693,0.01185967683],
+ [-0.9735495514,0.2161363064,0.0740700209]
+ ],
+ "t": [
+ [57.75964066],
+ [131.0709572],
+ [303.578107]
+ ]
+ },
+ {
+ "name": "14_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 15,
+ "K": [
+ [743.637,0,370.163],
+ [0,743.479,235.403],
+ [0,0,1]
+ ],
+ "distCoef": [-0.301307,-0.0600698,0.000220332,0.000264974,0.263845],
+ "R": [
+ [0.0871387997,-0.1078492175,0.9903410402],
+ [0.2171380052,0.9722761796,0.08677624828],
+ [-0.9722437535,0.2074790999,0.1081411432]
+ ],
+ "t": [
+ [27.10934266],
+ [155.0300785],
+ [303.8314173]
+ ]
+ },
+ {
+ "name": "14_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 16,
+ "K": [
+ [747.749,0,388.765],
+ [0,747.73,234.855],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320028,0.057848,-0.00103044,0.00101463,0.0716113],
+ "R": [
+ [0.09276252326,-0.02731891999,0.9953134134],
+ [0.2004837996,0.9796626634,0.008204393401],
+ [-0.9752955246,0.1987831547,0.09635298148]
+ ],
+ "t": [
+ [25.02944215],
+ [165.1686099],
+ [301.5459594]
+ ]
+ },
+ {
+ "name": "14_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 17,
+ "K": [
+ [745.477,0,358.035],
+ [0,745.633,228.78],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315933,0.0359808,-0.000244793,0.00106736,0.101835],
+ "R": [
+ [0.09323456203,-0.04884472803,0.9944453273],
+ [0.1997864834,0.9793990461,0.02937464128],
+ [-0.9753936013,0.1959380031,0.1010723576]
+ ],
+ "t": [
+ [12.52671676],
+ [185.8338565],
+ [300.6683817]
+ ]
+ },
+ {
+ "name": "14_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 19,
+ "K": [
+ [746.962,0,392.223],
+ [0,747.34,219.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325078,0.0885503,-0.00165532,0.000580691,0.0160315],
+ "R": [
+ [0.129696032,0.03909405168,0.990782819],
+ [0.1776002444,0.9821476201,-0.06200165731],
+ [-0.9755188837,0.1840046397,0.1204375361]
+ ],
+ "t": [
+ [-4.746570817],
+ [166.089254],
+ [298.9402723]
+ ]
+ },
+ {
+ "name": "14_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 20,
+ "K": [
+ [744.91,0,339.915],
+ [0,744.956,221.133],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306862,-0.0244375,-6.76743e-05,-0.000102471,0.205298],
+ "R": [
+ [0.09943504227,-0.007298095184,0.9950172914],
+ [0.2125993636,0.9770380132,-0.01407946415],
+ [-0.9720669642,0.212940035,0.09870338653]
+ ],
+ "t": [
+ [-22.7866272],
+ [143.0595857],
+ [303.8181509]
+ ]
+ },
+ {
+ "name": "14_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 21,
+ "K": [
+ [743.577,0,349.797],
+ [0,743.73,227.793],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307046,-0.0206712,-0.000861395,-9.97172e-05,0.196115],
+ "R": [
+ [0.09969364468,-0.01462231859,0.9949107322],
+ [0.2541863771,0.9670897407,-0.01125696175],
+ [-0.9620033591,0.2540150021,0.1001294952]
+ ],
+ "t": [
+ [-20.43364439],
+ [109.4423166],
+ [308.9174676]
+ ]
+ },
+ {
+ "name": "14_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 22,
+ "K": [
+ [745.066,0,381.498],
+ [0,745.047,229.678],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314894,0.0257947,-0.000483886,0.00117112,0.111876],
+ "R": [
+ [0.08696832552,-0.05294226024,0.9948033109],
+ [0.2154078845,0.9759627551,0.03310806346],
+ [-0.9726437959,0.2114091239,0.09628202687]
+ ],
+ "t": [
+ [-4.298071534],
+ [115.0382234],
+ [303.8536261]
+ ]
+ },
+ {
+ "name": "14_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 23,
+ "K": [
+ [746.602,0,379.206],
+ [0,746.635,260.689],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319922,0.0568918,0.00103779,-0.000422086,0.0766843],
+ "R": [
+ [0.09129519856,-0.01052008078,0.9957683037],
+ [0.2195471399,0.9755524467,-0.009822274065],
+ [-0.9713208739,0.2195148095,0.09137290798]
+ ],
+ "t": [
+ [18.69590833],
+ [125.3942709],
+ [304.7857903]
+ ]
+ },
+ {
+ "name": "14_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 14,
+ "node": 24,
+ "K": [
+ [745.388,0,382.392],
+ [0,745.496,224.015],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302393,-0.0525763,-0.000559682,-6.77e-05,0.234314],
+ "R": [
+ [0.08118536371,-0.04636746828,0.9956199047],
+ [0.1796446798,0.9832385033,0.03114216711],
+ [-0.9803758084,0.1763295309,0.0881542445]
+ ],
+ "t": [
+ [8.147122648],
+ [159.0280693],
+ [298.1193244]
+ ]
+ },
+ {
+ "name": "15_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 1,
+ "K": [
+ [747.532,0,374.739],
+ [0,747.668,233.944],
+ [0,0,1]
+ ],
+ "distCoef": [-0.331439,0.109037,-0.000609362,0.000392501,-0.000621335],
+ "R": [
+ [0.7848571462,0.05717032211,0.6170338843],
+ [0.1817012858,0.9307358272,-0.3173569956],
+ [-0.5924389444,0.3611957561,0.7201067442]
+ ],
+ "t": [
+ [-19.59276639],
+ [102.5270366],
+ [325.6365462]
+ ]
+ },
+ {
+ "name": "15_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 2,
+ "K": [
+ [743.597,0,385.764],
+ [0,743.786,211.188],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307778,-0.0279819,-0.000454196,0.00143268,0.205643],
+ "R": [
+ [0.7963392439,-0.01332837804,0.6047033677],
+ [0.2601504211,0.910106147,-0.3225345868],
+ [-0.5460453892,0.4141607847,0.7282206241]
+ ],
+ "t": [
+ [-38.00771612],
+ [61.10094736],
+ [329.1235579]
+ ]
+ },
+ {
+ "name": "15_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 3,
+ "K": [
+ [746.709,0,382.284],
+ [0,746.792,243.451],
+ [0,0,1]
+ ],
+ "distCoef": [-0.343209,0.149416,0.000603517,0.00195788,-0.0395936],
+ "R": [
+ [0.7773715491,0.01124156294,0.6289412548],
+ [0.2547080739,0.908583342,-0.3310590698],
+ [-0.5751671686,0.4175523175,0.7034435232]
+ ],
+ "t": [
+ [-3.435783379],
+ [55.70511308],
+ [330.3798829]
+ ]
+ },
+ {
+ "name": "15_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 4,
+ "K": [
+ [743.976,0,365.248],
+ [0,744.344,229.757],
+ [0,0,1]
+ ],
+ "distCoef": [-0.297483,-0.106842,0.000162294,-0.00147347,0.393874],
+ "R": [
+ [0.7524447247,-0.05297584633,0.6565215122],
+ [0.2825071426,0.9263759092,-0.2490329079],
+ [-0.5949929838,0.3728555143,0.7120127209]
+ ],
+ "t": [
+ [9.049706825],
+ [87.26745214],
+ [326.8342451]
+ ]
+ },
+ {
+ "name": "15_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 5,
+ "K": [
+ [748.766,0,349.367],
+ [0,748.975,233.229],
+ [0,0,1]
+ ],
+ "distCoef": [-0.341466,0.149186,0.00133441,-0.000377568,-0.0615035],
+ "R": [
+ [0.7609990379,-0.1304343502,0.6355055818],
+ [0.3323849453,0.9196335935,-0.2092708816],
+ [-0.5571361704,0.3704874276,0.7431946943]
+ ],
+ "t": [
+ [9.029843232],
+ [83.469382],
+ [327.9910328]
+ ]
+ },
+ {
+ "name": "15_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 6,
+ "K": [
+ [747.104,0,395.739],
+ [0,747.205,237.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.337038,0.14046,-0.00100634,0.00170735,-0.0468264],
+ "R": [
+ [0.7339738121,-0.1238803965,0.6677844641],
+ [0.3595276943,0.9050347286,-0.227270713],
+ [-0.5762137452,0.4068977603,0.7088102232]
+ ],
+ "t": [
+ [34.88470946],
+ [89.42074723],
+ [330.2467181]
+ ]
+ },
+ {
+ "name": "15_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 7,
+ "K": [
+ [743.991,0,393.18],
+ [0,744.112,255.459],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325283,0.0732539,0.00077889,1.70805e-05,0.0462558],
+ "R": [
+ [0.7496842409,-0.1571943749,0.6428557128],
+ [0.3434403747,0.9227495198,-0.1748771933],
+ [-0.5657050892,0.3518852828,0.7457576683]
+ ],
+ "t": [
+ [12.35233863],
+ [128.2674639],
+ [324.6313017]
+ ]
+ },
+ {
+ "name": "15_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 8,
+ "K": [
+ [744.616,0,369.102],
+ [0,744.835,223.742],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336732,0.141968,-0.000206183,0.000677154,-0.0657397],
+ "R": [
+ [0.7264947252,-0.2131742795,0.6532703428],
+ [0.4249899792,0.8864309285,-0.1833677358],
+ [-0.5399897516,0.4108490422,0.7345843265]
+ ],
+ "t": [
+ [15.28675757],
+ [126.0458703],
+ [333.4285141]
+ ]
+ },
+ {
+ "name": "15_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 9,
+ "K": [
+ [747.517,0,392.733],
+ [0,747.836,218.574],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334626,0.113242,0.000443349,0.00121381,-0.00550976],
+ "R": [
+ [0.8000319441,0.07155257429,0.5956753458],
+ [0.1937456116,0.9088549369,-0.3693850858],
+ [-0.5678129326,0.4109293525,0.7132499848]
+ ],
+ "t": [
+ [-44.09712116],
+ [90.97242653],
+ [330.2186197]
+ ]
+ },
+ {
+ "name": "15_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 10,
+ "K": [
+ [743.904,0,354.135],
+ [0,744.494,220.038],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309276,-0.0261099,-0.00127318,0.000283377,0.220693],
+ "R": [
+ [0.7314656006,-0.1499734814,0.6651812009],
+ [0.3639090401,0.9108337109,-0.1948131455],
+ [-0.576652656,0.3845645668,0.720820233]
+ ],
+ "t": [
+ [2.360923884],
+ [158.0207055],
+ [327.7017732]
+ ]
+ },
+ {
+ "name": "15_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 11,
+ "K": [
+ [745.441,0,366.024],
+ [0,745.471,238.165],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311636,0.00305556,-0.00136926,0.00112458,0.163822],
+ "R": [
+ [0.743215427,-0.1065195831,0.660518287],
+ [0.3430146167,0.9082888556,-0.2394834597],
+ [-0.5744317207,0.4045552288,0.7115920636]
+ ],
+ "t": [
+ [3.38448511],
+ [170.5922255],
+ [331.2143489]
+ ]
+ },
+ {
+ "name": "15_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 12,
+ "K": [
+ [743.816,0,384.478],
+ [0,744.21,221.813],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309294,-0.0116228,-0.000777235,0.00017565,0.174372],
+ "R": [
+ [0.799529392,-0.03302696284,0.5997182431],
+ [0.261290645,0.91817945,-0.2977812898],
+ [-0.540814155,0.3947856601,0.7427410938]
+ ],
+ "t": [
+ [-15.11731065],
+ [179.1857595],
+ [329.2699106]
+ ]
+ },
+ {
+ "name": "15_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 13,
+ "K": [
+ [744.594,0,366.809],
+ [0,744.805,211.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313339,0.0076854,-0.000770441,0.000328229,0.137582],
+ "R": [
+ [0.7697001229,-0.07364256128,0.6341439064],
+ [0.280866324,0.9310898592,-0.2327783971],
+ [-0.5733025631,0.3572792288,0.7373436945]
+ ],
+ "t": [
+ [-27.06753178],
+ [173.6081799],
+ [322.2797536]
+ ]
+ },
+ {
+ "name": "15_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 14,
+ "K": [
+ [744.088,0,376.311],
+ [0,744.421,235.85],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308902,-0.0157485,-0.000258056,-0.00040893,0.167363],
+ "R": [
+ [0.8019727226,0.02030217439,0.5970155559],
+ [0.20788107,0.9274680659,-0.31078682],
+ [-0.5600225111,0.3733507848,0.7395836522]
+ ],
+ "t": [
+ [-32.35663304],
+ [177.8511702],
+ [324.3990212]
+ ]
+ },
+ {
+ "name": "15_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 15,
+ "K": [
+ [745.471,0,391.786],
+ [0,745.597,244.782],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319471,0.0520955,-9.03549e-05,0.00103599,0.0679082],
+ "R": [
+ [0.7993824794,0.07801580494,0.5957358356],
+ [0.170767806,0.9211391478,-0.3497728217],
+ [-0.5760434082,0.3813347671,0.723019908]
+ ],
+ "t": [
+ [-27.66881494],
+ [158.8808021],
+ [326.8395357]
+ ]
+ },
+ {
+ "name": "15_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 16,
+ "K": [
+ [744.688,0,372.572],
+ [0,744.687,232.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313079,0.00611683,0.000601543,0.00134427,0.153664],
+ "R": [
+ [0.8032635264,0.07397377164,0.5910123419],
+ [0.1542914416,0.9325457224,-0.3264239985],
+ [-0.5752928456,0.3533926383,0.7376664456]
+ ],
+ "t": [
+ [-29.95169554],
+ [148.2901373],
+ [322.192073]
+ ]
+ },
+ {
+ "name": "15_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 17,
+ "K": [
+ [746.029,0,371.631],
+ [0,745.957,227.751],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328618,0.10871,0.000376647,0.00140085,-0.015131],
+ "R": [
+ [0.7930332571,0.09578045983,0.6016014933],
+ [0.1573865304,0.9218193412,-0.3542295616],
+ [-0.5884961625,0.3755997947,0.7159588403]
+ ],
+ "t": [
+ [-34.37744536],
+ [124.5681533],
+ [326.9926029]
+ ]
+ },
+ {
+ "name": "15_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 18,
+ "K": [
+ [745.728,0,355.008],
+ [0,745.836,235.366],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326785,0.0753795,-0.00141997,0.000421746,0.0593081],
+ "R": [
+ [0.7423074724,-0.1183757606,0.6595201254],
+ [0.3246236378,0.9245812728,-0.1994215728],
+ [-0.5861732766,0.362127946,0.7247511576]
+ ],
+ "t": [
+ [30.16113415],
+ [163.1800117],
+ [323.8887405]
+ ]
+ },
+ {
+ "name": "15_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 19,
+ "K": [
+ [745.415,0,362.511],
+ [0,745.431,246.567],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31824,0.0392935,0.000511921,2.0382e-05,0.0980721],
+ "R": [
+ [0.7792023734,-0.03485918818,0.6258022837],
+ [0.250771695,0.9323920084,-0.2603050127],
+ [-0.5744190268,0.3597637832,0.7352637636]
+ ],
+ "t": [
+ [-23.21577405],
+ [116.3982595],
+ [324.3931588]
+ ]
+ },
+ {
+ "name": "15_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 20,
+ "K": [
+ [745.757,0,370.457],
+ [0,745.798,252.296],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322058,0.058259,0.000816175,0.000770211,0.0698692],
+ "R": [
+ [0.7754488131,-0.03297117701,0.6305489986],
+ [0.2704225106,0.9197540051,-0.2844718542],
+ [-0.5705705951,0.391108005,0.7221383001]
+ ],
+ "t": [
+ [-0.5150360293],
+ [101.3336776],
+ [328.6175717]
+ ]
+ },
+ {
+ "name": "15_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 21,
+ "K": [
+ [746.009,0,385.23],
+ [0,746.113,244.377],
+ [0,0,1]
+ ],
+ "distCoef": [-0.328614,0.0717398,0.00119782,0.000153035,0.0631847],
+ "R": [
+ [0.7150247804,-0.1629175474,0.6798510396],
+ [0.3900461789,0.9000077369,-0.194550898],
+ [-0.5801754405,0.4042820134,0.7070732013]
+ ],
+ "t": [
+ [2.095653738],
+ [113.9962742],
+ [330.0144097]
+ ]
+ },
+ {
+ "name": "15_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 22,
+ "K": [
+ [747.044,0,384.928],
+ [0,747.43,218.136],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332061,0.0970763,-0.00131827,0.000796644,0.024739],
+ "R": [
+ [0.7476996574,-0.1120966581,0.6545071135],
+ [0.3349363173,0.9147459603,-0.2259590484],
+ [-0.5733784838,0.3881677053,0.7215004829]
+ ],
+ "t": [
+ [-3.202807266],
+ [138.4357179],
+ [328.3283502]
+ ]
+ },
+ {
+ "name": "15_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 23,
+ "K": [
+ [746.525,0,381.586],
+ [0,746.566,231.744],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323751,0.0809499,0.00143311,0.000786746,0.0334271],
+ "R": [
+ [0.7874675535,-0.04961201835,0.6143561669],
+ [0.2785108695,0.9178324582,-0.2828697124],
+ [-0.5498422936,0.3938555906,0.7365807667]
+ ],
+ "t": [
+ [-21.67007007],
+ [141.1281207],
+ [328.549187]
+ ]
+ },
+ {
+ "name": "15_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 15,
+ "node": 24,
+ "K": [
+ [744.493,0,392.291],
+ [0,744.573,223.193],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308278,-0.0176562,-0.000671893,0.00116828,0.17277],
+ "R": [
+ [0.7758686755,-0.01407586642,0.6307374005],
+ [0.2927445364,0.8936390769,-0.3401614861],
+ [-0.5588635207,0.4485655695,0.6974672]
+ ],
+ "t": [
+ [-20.05926183],
+ [105.1778582],
+ [335.8474538]
+ ]
+ },
+ {
+ "name": "16_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 1,
+ "K": [
+ [745.918,0,380.409],
+ [0,745.86,226.454],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329171,0.0901569,-0.000500393,-0.000311386,0.0200307],
+ "R": [
+ [0.8121486446,0.04341076946,0.5818333819],
+ [-0.0759194996,0.9966126489,0.03161419974],
+ [-0.5784901112,-0.06984792866,0.8126933358]
+ ],
+ "t": [
+ [55.6088262],
+ [125.3657692],
+ [265.9940479]
+ ]
+ },
+ {
+ "name": "16_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 2,
+ "K": [
+ [747.364,0,392.411],
+ [0,747.161,225.523],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325367,0.0819479,0.000479765,0.00158774,0.0247525],
+ "R": [
+ [0.8168932447,0.07701494166,0.5716241121],
+ [-0.08391193553,0.9963702084,-0.01432462351],
+ [-0.5706524458,-0.03626439747,0.8203905653]
+ ],
+ "t": [
+ [75.42528996],
+ [124.1426197],
+ [270.1790967]
+ ]
+ },
+ {
+ "name": "16_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 3,
+ "K": [
+ [744.743,0,378.771],
+ [0,744.551,249.858],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319546,0.0369202,-5.08119e-05,0.00111176,0.115068],
+ "R": [
+ [0.8437113062,0.07102371173,0.5320778742],
+ [-0.08587784221,0.9963005803,0.003185889303],
+ [-0.5298832211,-0.04838167055,0.8466894271]
+ ],
+ "t": [
+ [57.15960424],
+ [150.0301024],
+ [271.4615922]
+ ]
+ },
+ {
+ "name": "16_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 4,
+ "K": [
+ [745.916,0,377.522],
+ [0,746.078,215.704],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32195,0.0590592,-0.000295617,0.000900619,0.0691531],
+ "R": [
+ [0.8298382679,0.121110683,0.5447023514],
+ [-0.1306769278,0.9911961099,-0.02130286834],
+ [-0.5424868568,-0.05350209448,0.8383588349]
+ ],
+ "t": [
+ [50.00635036],
+ [157.1807453],
+ [269.6015294]
+ ]
+ },
+ {
+ "name": "16_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 5,
+ "K": [
+ [745.303,0,378.655],
+ [0,745.572,246.962],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315703,0.0277156,6.06815e-05,0.000389915,0.121683],
+ "R": [
+ [0.8187116226,0.05412921644,0.5716478872],
+ [-0.09011941267,0.9953220251,0.0348218015],
+ [-0.5670888559,-0.08002558546,0.8197598034]
+ ],
+ "t": [
+ [44.81120287],
+ [188.347539],
+ [263.8787228]
+ ]
+ },
+ {
+ "name": "16_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 6,
+ "K": [
+ [745.606,0,364.995],
+ [0,745.957,239.275],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315328,0.0257972,-0.000148911,-0.000553771,0.11289],
+ "R": [
+ [0.8250072615,0.03741598225,0.5638821355],
+ [-0.06134414867,0.997839028,0.02354080738],
+ [-0.5617827996,-0.05401220659,0.8255196955]
+ ],
+ "t": [
+ [18.96573731],
+ [189.9536973],
+ [269.3804852]
+ ]
+ },
+ {
+ "name": "16_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 7,
+ "K": [
+ [748.144,0,375.351],
+ [0,748.158,222.981],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330846,0.0923667,0.000924419,-0.000952259,0.0155541],
+ "R": [
+ [0.837010476,0.04764620621,0.5451085232],
+ [-0.06946161724,0.9973944363,0.0194787641],
+ [-0.542760119,-0.05416804921,0.8381391744]
+ ],
+ "t": [
+ [-3.044263505],
+ [177.2440129],
+ [269.3681033]
+ ]
+ },
+ {
+ "name": "16_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 8,
+ "K": [
+ [744.865,0,367.243],
+ [0,744.958,216.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318901,0.0494498,-4.02299e-05,-0.00132469,0.0675277],
+ "R": [
+ [0.820488273,0.02086231711,0.571282555],
+ [-0.05401864215,0.9976917237,0.04114864192],
+ [-0.569105421,-0.06462188605,0.8197213134]
+ ],
+ "t": [
+ [-19.55260409],
+ [185.7078501],
+ [268.0867658]
+ ]
+ },
+ {
+ "name": "16_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 9,
+ "K": [
+ [747.002,0,387.115],
+ [0,747.11,221.005],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330535,0.106093,-0.000909516,-0.000158007,-0.000767667],
+ "R": [
+ [0.7988895638,0.03324884852,0.6005580562],
+ [-0.04929092881,0.9987315997,0.01027599727],
+ [-0.5994546431,-0.03781145137,0.7995151187]
+ ],
+ "t": [
+ [-23.46737596],
+ [164.4653247],
+ [274.3468777]
+ ]
+ },
+ {
+ "name": "16_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 10,
+ "K": [
+ [747.13,0,370.332],
+ [0,747.181,215.13],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317083,0.0321021,0.000973109,0.00011315,0.117938],
+ "R": [
+ [0.8533830718,-0.04475694932,0.5193593633],
+ [-0.01101437775,0.9945367161,0.1038046423],
+ [-0.5211679348,-0.09430554471,0.8482278279]
+ ],
+ "t": [
+ [-57.15311463],
+ [154.6074069],
+ [261.7210039]
+ ]
+ },
+ {
+ "name": "16_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 11,
+ "K": [
+ [743.847,0,352.444],
+ [0,743.813,257.427],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317406,0.0378558,0.000559662,0.00156409,0.0978841],
+ "R": [
+ [0.8306368039,-0.006305585156,0.5567788965],
+ [-0.01286906876,0.999451376,0.03051776569],
+ [-0.5566658666,-0.03251440526,0.8300999496]
+ ],
+ "t": [
+ [-55.68789985],
+ [125.5954887],
+ [272.609285]
+ ]
+ },
+ {
+ "name": "16_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 12,
+ "K": [
+ [744.746,0,358.295],
+ [0,744.902,240.075],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311924,0.00313238,0.000282789,0.000109914,0.161883],
+ "R": [
+ [0.8248636519,0.04296544146,0.5636966618],
+ [-0.06337887364,0.9978500361,0.01668603434],
+ [-0.5617678116,-0.04949016272,0.8258133262]
+ ],
+ "t": [
+ [-45.5470475],
+ [111.3455785],
+ [270.6081331]
+ ]
+ },
+ {
+ "name": "16_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 13,
+ "K": [
+ [742.599,0,373.118],
+ [0,742.696,232.489],
+ [0,0,1]
+ ],
+ "distCoef": [-0.30659,-0.0244311,-0.000674534,-0.000450328,0.198624],
+ "R": [
+ [0.8431633834,0.1596479738,0.5134082522],
+ [-0.1755645793,0.9843078819,-0.01775026834],
+ [-0.5081855837,-0.07516992751,0.8579608934]
+ ],
+ "t": [
+ [-27.27822308],
+ [119.4613899],
+ [265.3318331]
+ ]
+ },
+ {
+ "name": "16_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 14,
+ "K": [
+ [745.804,0,370.921],
+ [0,745.998,236.13],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32821,0.0986121,-0.000141995,-6.949e-05,-0.000912797],
+ "R": [
+ [0.8387309717,0.02755081107,0.5438486094],
+ [-0.05712815546,0.9976599438,0.03756341813],
+ [-0.5415410705,-0.06257467009,0.8383422211]
+ ],
+ "t": [
+ [-30.56519475],
+ [90.10611059],
+ [268.3571691]
+ ]
+ },
+ {
+ "name": "16_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 15,
+ "K": [
+ [746.816,0,365.456],
+ [0,746.849,225.794],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313831,-0.00769663,-0.000408313,0.00132145,0.204366],
+ "R": [
+ [0.832563643,0.03033638007,0.5530980784],
+ [-0.06055031945,0.9974999941,0.03643378343],
+ [-0.5506100609,-0.06382370879,0.8323191065]
+ ],
+ "t": [
+ [-6.42740827],
+ [88.69840867],
+ [268.7038743]
+ ]
+ },
+ {
+ "name": "16_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 16,
+ "K": [
+ [745.958,0,362.302],
+ [0,745.997,246.977],
+ [0,0,1]
+ ],
+ "distCoef": [-0.334292,0.102923,-0.000499879,-0.000549652,0.00793805],
+ "R": [
+ [0.8469636173,0.04048111503,0.5301074517],
+ [-0.08872767491,0.9938758,0.0658657255],
+ [-0.5241946497,-0.1028210748,0.8453684379]
+ ],
+ "t": [
+ [4.584618298],
+ [109.8657875],
+ [264.6056558]
+ ]
+ },
+ {
+ "name": "16_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 17,
+ "K": [
+ [743.409,0,347.233],
+ [0,743.501,244.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321337,0.060438,0.000289347,-0.000274585,0.0540146],
+ "R": [
+ [0.8338949711,0.06176137043,0.5484566622],
+ [-0.07967791451,0.9967809419,0.008898524832],
+ [-0.5461415633,-0.05112031815,0.8361316319]
+ ],
+ "t": [
+ [32.73506114],
+ [91.25662398],
+ [270.2531272]
+ ]
+ },
+ {
+ "name": "16_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 18,
+ "K": [
+ [745.291,0,372.769],
+ [0,745.233,242.994],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333422,0.127228,0.000470045,-0.000171948,-0.0533425],
+ "R": [
+ [0.83476387,0.01583088955,0.5503804723],
+ [-0.006383142992,0.9997976531,-0.01907638369],
+ [-0.5505711006,0.01241111862,0.8346960089]
+ ],
+ "t": [
+ [48.20146308],
+ [84.31846371],
+ [276.1979749]
+ ]
+ },
+ {
+ "name": "16_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 19,
+ "K": [
+ [746.318,0,365.802],
+ [0,746.439,228.058],
+ [0,0,1]
+ ],
+ "distCoef": [-0.329752,0.106043,0.000413141,0.00102356,-0.00232913],
+ "R": [
+ [0.812564017,0.08482803737,0.576666214],
+ [-0.09768913876,0.9951785947,-0.008740529432],
+ [-0.5746273144,-0.04923178609,0.8169330944]
+ ],
+ "t": [
+ [39.50134988],
+ [124.7306793],
+ [269.4016435]
+ ]
+ },
+ {
+ "name": "16_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 20,
+ "K": [
+ [745.104,0,371.377],
+ [0,745.158,252.192],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317414,0.0233642,0.000269725,0.000539732,0.145301],
+ "R": [
+ [0.8445515108,0.05428741136,0.5327153297],
+ [-0.06949119822,0.9975462456,0.00851241329],
+ [-0.5309460603,-0.04420819807,0.8462516862]
+ ],
+ "t": [
+ [17.33430135],
+ [146.0606392],
+ [271.3134014]
+ ]
+ },
+ {
+ "name": "16_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 21,
+ "K": [
+ [744.321,0,365.126],
+ [0,744.44,221.253],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310945,0.00293318,4.64093e-05,-0.000454281,0.146346],
+ "R": [
+ [0.8382052649,0.09941648006,0.5362166515],
+ [-0.1229674254,0.9923765769,0.008230548616],
+ [-0.531310593,-0.07283607028,0.8440402601]
+ ],
+ "t": [
+ [5.636303812],
+ [160.8368098],
+ [266.310691]
+ ]
+ },
+ {
+ "name": "16_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 22,
+ "K": [
+ [745.695,0,387.973],
+ [0,745.975,222.039],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325844,0.0780224,-0.000861123,0.000487347,0.0459906],
+ "R": [
+ [0.8503320636,-0.003175777979,0.52623692],
+ [-0.02504000004,0.9986049625,0.04648792516],
+ [-0.5256504352,-0.05270714583,0.8490662971]
+ ],
+ "t": [
+ [-29.03965018],
+ [141.2975723],
+ [268.9897195]
+ ]
+ },
+ {
+ "name": "16_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 23,
+ "K": [
+ [746.757,0,385.384],
+ [0,746.697,250.739],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330103,0.0993513,0.000581277,0.0005991,0.0043047],
+ "R": [
+ [0.8172674448,0.1129970073,0.565071323],
+ [-0.1204798393,0.992420693,-0.02420281713],
+ [-0.5635233199,-0.0482995277,0.8246869852]
+ ],
+ "t": [
+ [1.484048414],
+ [120.2737991],
+ [270.3939501]
+ ]
+ },
+ {
+ "name": "16_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 16,
+ "node": 24,
+ "K": [
+ [743.909,0,365.262],
+ [0,744.1,225.983],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309366,-0.0151251,-0.000569796,0.000128233,0.192772],
+ "R": [
+ [0.8488529257,0.0258708029,0.5279956553],
+ [-0.02681353424,0.9996232069,-0.005871843729],
+ [-0.5279486195,-0.009173097852,0.8492267715]
+ ],
+ "t": [
+ [-1.170097817],
+ [104.9858918],
+ [274.723166]
+ ]
+ },
+ {
+ "name": "17_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 1,
+ "K": [
+ [743.511,0,382.741],
+ [0,744.07,233.668],
+ [0,0,1]
+ ],
+ "distCoef": [-0.303608,-0.0460126,4.19904e-05,0.000729649,0.232264],
+ "R": [
+ [0.7426987355,0.03664601822,-0.6686222084],
+ [-0.01756201576,0.9992239229,0.035258014],
+ [0.6693953719,-0.01444372865,0.742765922]
+ ],
+ "t": [
+ [27.30884403],
+ [110.2809812],
+ [269.7471778]
+ ]
+ },
+ {
+ "name": "17_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 2,
+ "K": [
+ [744.491,0,371.868],
+ [0,744.58,223.545],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320104,0.0388113,-0.000303412,-0.00118762,0.0743207],
+ "R": [
+ [0.773334615,0.1038173874,-0.6254402635],
+ [-0.04654036662,0.9931361468,0.107306049],
+ [0.6322875671,-0.05387526291,0.7728582591]
+ ],
+ "t": [
+ [68.17402308],
+ [125.7906344],
+ [263.8293382]
+ ]
+ },
+ {
+ "name": "17_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 3,
+ "K": [
+ [744.096,0,373.775],
+ [0,744.072,232.317],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314223,0.0332024,-0.000194112,2.11963e-05,0.079313],
+ "R": [
+ [0.7946878724,-0.02084896757,-0.6066601239],
+ [0.03470365887,0.999335828,0.01111570764],
+ [0.6060254462,-0.02988684405,0.7948835985]
+ ],
+ "t": [
+ [55.17367606],
+ [148.0232969],
+ [266.1261169]
+ ]
+ },
+ {
+ "name": "17_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 4,
+ "K": [
+ [748.225,0,373.118],
+ [0,748.618,236.287],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325852,0.0883394,-0.000431944,-0.00077703,0.0075009],
+ "R": [
+ [0.7874797118,0.07165214706,-0.6121614766],
+ [-0.03177741847,0.9966185482,0.07577377574],
+ [0.6155208357,-0.04021739967,0.7870938073]
+ ],
+ "t": [
+ [46.04066644],
+ [153.679907],
+ [265.8341529]
+ ]
+ },
+ {
+ "name": "17_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 5,
+ "K": [
+ [745.23,0,378.585],
+ [0,745.614,229.474],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323397,0.071697,-0.000659822,0.000678056,0.0530686],
+ "R": [
+ [0.7680042357,0.04160049173,-0.6390922414],
+ [0.01355248597,0.9966090615,0.08115854064],
+ [0.6403013541,-0.07099139161,0.7648361904]
+ ],
+ "t": [
+ [29.31016003],
+ [185.453895],
+ [261.9380867]
+ ]
+ },
+ {
+ "name": "17_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 6,
+ "K": [
+ [742.876,0,352.101],
+ [0,743.303,231.794],
+ [0,0,1]
+ ],
+ "distCoef": [-0.319343,0.0421325,-0.000546468,-1.33187e-05,0.10149],
+ "R": [
+ [0.8064347587,0.08751734637,-0.584810819],
+ [-0.03388642915,0.9942014648,0.1020546777],
+ [0.5903513275,-0.062483289,0.8047242688]
+ ],
+ "t": [
+ [35.39857301],
+ [188.6248332],
+ [262.8234665]
+ ]
+ },
+ {
+ "name": "17_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 7,
+ "K": [
+ [745.054,0,358.779],
+ [0,745.36,231.687],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309912,-0.00132311,-0.00013553,-0.000280643,0.151777],
+ "R": [
+ [0.7882500993,-0.004275732235,-0.615340149],
+ [0.05540043824,0.996408109,0.06404429605],
+ [0.612856078,-0.08457303664,0.7856556683]
+ ],
+ "t": [
+ [-7.246792888],
+ [183.4614511],
+ [259.402568]
+ ]
+ },
+ {
+ "name": "17_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 8,
+ "K": [
+ [745.254,0,343.02],
+ [0,745.689,227.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309897,-0.0109758,-0.00111103,0.000256129,0.180098],
+ "R": [
+ [0.7946287881,0.03514926038,-0.6060772382],
+ [0.01090423253,0.9973351466,0.07213669658],
+ [0.6069976827,-0.06393070292,0.7921279432]
+ ],
+ "t": [
+ [-18.41109561],
+ [184.5517176],
+ [263.9542066]
+ ]
+ },
+ {
+ "name": "17_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 9,
+ "K": [
+ [745.379,0,338.137],
+ [0,745.543,245.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314138,0.0142784,0.00088856,-0.00114362,0.123117],
+ "R": [
+ [0.7570044814,0.09852948519,-0.6459381981],
+ [-0.05745310106,0.9947735679,0.08440787789],
+ [0.6508789107,-0.02678598925,0.7587088733]
+ ],
+ "t": [
+ [-40.16389387],
+ [164.132571],
+ [267.7674295]
+ ]
+ },
+ {
+ "name": "17_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 10,
+ "K": [
+ [743.633,0,369.381],
+ [0,743.739,253.863],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313678,0.00191444,-0.000367883,0.000526793,0.16208],
+ "R": [
+ [0.7732990879,0.03177464522,-0.6332447335],
+ [0.01440724919,0.9976050167,0.06765102948],
+ [0.6338777104,-0.06143779407,0.7709892643]
+ ],
+ "t": [
+ [-41.17430449],
+ [148.5957101],
+ [262.973747]
+ ]
+ },
+ {
+ "name": "17_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 11,
+ "K": [
+ [749.691,0,360.347],
+ [0,749.465,221.979],
+ [0,0,1]
+ ],
+ "distCoef": [-0.36212,0.288042,0.00167589,0.000680745,-0.303613],
+ "R": [
+ [0.7747984815,0.06051645956,-0.629305229],
+ [-0.01350572868,0.9967652932,0.07922465313],
+ [0.6320640066,-0.05288391526,0.7731095544]
+ ],
+ "t": [
+ [-52.93053536],
+ [133.9502209],
+ [264.0833713]
+ ]
+ },
+ {
+ "name": "17_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 12,
+ "K": [
+ [746.505,0,357.704],
+ [0,746.569,217.534],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312272,-0.0352904,0.000404412,-0.00107082,0.237629],
+ "R": [
+ [0.7725304823,-0.04233401582,-0.633564902],
+ [0.05994143841,0.9981814314,0.006391704783],
+ [0.6321421342,-0.04291457833,0.7736631445]
+ ],
+ "t": [
+ [-62.64410987],
+ [104.0188122],
+ [265.010728]
+ ]
+ },
+ {
+ "name": "17_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 13,
+ "K": [
+ [745.264,0,354.32],
+ [0,745.302,226.261],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318398,0.0346929,0.000845692,0.000532231,0.122684],
+ "R": [
+ [0.7851484689,0.03204817868,-0.6184778056],
+ [-0.002225165301,0.9987996914,0.04893081946],
+ [0.619303585,-0.03704174263,0.784277361]
+ ],
+ "t": [
+ [-29.19489341],
+ [103.2650402],
+ [265.9795804]
+ ]
+ },
+ {
+ "name": "17_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 14,
+ "K": [
+ [744.589,0,353.058],
+ [0,744.664,227.639],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324606,0.0822873,0.00100728,-0.000415736,0.0203245],
+ "R": [
+ [0.7765409088,-0.02900211747,-0.6293989944],
+ [0.06862390156,0.9968904955,0.03873112579],
+ [0.6263185908,-0.07326811825,0.7761164898]
+ ],
+ "t": [
+ [-35.65491372],
+ [89.93385082],
+ [261.6973052]
+ ]
+ },
+ {
+ "name": "17_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 15,
+ "K": [
+ [744.009,0,351.118],
+ [0,743.982,227.187],
+ [0,0,1]
+ ],
+ "distCoef": [-0.31768,0.0289626,0.000394183,-0.00106594,0.077624],
+ "R": [
+ [0.7703409519,0.009578036972,-0.6375602553],
+ [0.03762675731,0.9974619202,0.06044786963],
+ [0.6365210484,-0.07055479443,0.7680253746]
+ ],
+ "t": [
+ [-14.94306331],
+ [88.85755459],
+ [261.4804843]
+ ]
+ },
+ {
+ "name": "17_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 16,
+ "K": [
+ [745.298,0,365.044],
+ [0,745.641,201.543],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315769,0.0139989,-0.000983596,0.000497246,0.155532],
+ "R": [
+ [0.7668905855,0.04755147693,-0.6400138177],
+ [0.009922268647,0.9962536216,0.0859084976],
+ [0.6417011597,-0.07223280706,0.7635457047]
+ ],
+ "t": [
+ [4.594602528],
+ [99.8882812],
+ [261.439958]
+ ]
+ },
+ {
+ "name": "17_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 17,
+ "K": [
+ [744.772,0,356.238],
+ [0,744.946,209.811],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307562,-0.0273551,-0.000331097,0.000403566,0.231396],
+ "R": [
+ [0.7386328767,0.1026186384,-0.6662513704],
+ [-0.03586762178,0.992927984,0.1131703685],
+ [0.6731530192,-0.05969450264,0.7370899397]
+ ],
+ "t": [
+ [18.92063539],
+ [92.1220326],
+ [263.1909682]
+ ]
+ },
+ {
+ "name": "17_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 18,
+ "K": [
+ [746.696,0,345.664],
+ [0,746.883,230.9],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332087,0.135716,-0.000396371,4.15402e-05,-0.0769473],
+ "R": [
+ [0.7676740293,0.0869303765,-0.6349170767],
+ [-0.05592901251,0.9960646798,0.06875390322],
+ [0.6383952774,-0.01727030079,0.7695149163]
+ ],
+ "t": [
+ [48.13164066],
+ [87.731429],
+ [267.0873794]
+ ]
+ },
+ {
+ "name": "17_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 19,
+ "K": [
+ [743.785,0,363.137],
+ [0,743.962,239.724],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322076,0.0699752,0.00130957,8.28091e-06,0.0447641],
+ "R": [
+ [0.7666015958,0.09362030423,-0.6352615462],
+ [-0.01827880108,0.9920950944,0.1241499457],
+ [0.6418628193,-0.08356172708,0.7622529495]
+ ],
+ "t": [
+ [25.25313987],
+ [133.2656265],
+ [259.9680703]
+ ]
+ },
+ {
+ "name": "17_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 20,
+ "K": [
+ [747.071,0,344.427],
+ [0,747.404,242.981],
+ [0,0,1]
+ ],
+ "distCoef": [-0.349964,0.20917,0.0008789,-0.000586258,-0.211765],
+ "R": [
+ [0.7775513873,0.03007697302,-0.6280996862],
+ [-0.01270805589,0.999403059,0.03212523871],
+ [0.6286909777,-0.01699709801,0.7774694548]
+ ],
+ "t": [
+ [17.35278566],
+ [137.2956705],
+ [269.3773006]
+ ]
+ },
+ {
+ "name": "17_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 21,
+ "K": [
+ [744.669,0,371.314],
+ [0,744.881,251.475],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32107,0.0528121,0.000172414,0.000961494,0.0921892],
+ "R": [
+ [0.7854342878,0.01663631847,-0.6187214337],
+ [0.02446292583,0.9980232337,0.05788946549],
+ [0.6184614336,-0.06060410764,0.7834746947]
+ ],
+ "t": [
+ [-1.039205356],
+ [155.8049723],
+ [263.425936]
+ ]
+ },
+ {
+ "name": "17_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 22,
+ "K": [
+ [744.126,0,368.359],
+ [0,744.205,218.365],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306681,-0.0309893,-0.000506643,-0.000551257,0.209183],
+ "R": [
+ [0.7742934088,0.08491898973,-0.6271032469],
+ [-0.02171436959,0.9939373135,0.1077826651],
+ [0.6324541115,-0.06983825553,0.771443073]
+ ],
+ "t": [
+ [-12.48615074],
+ [146.2169272],
+ [261.8070617]
+ ]
+ },
+ {
+ "name": "17_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 23,
+ "K": [
+ [746.439,0,363.854],
+ [0,746.575,224.032],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333494,0.127943,0.00111227,0.000376509,-0.0438307],
+ "R": [
+ [0.7741360077,0.05745954338,-0.6304060933],
+ [-0.01777243196,0.9974520988,0.06909016755],
+ [0.6327697704,-0.04228133707,0.7731847814]
+ ],
+ "t": [
+ [-14.18178238],
+ [117.4047924],
+ [265.0998909]
+ ]
+ },
+ {
+ "name": "17_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 17,
+ "node": 24,
+ "K": [
+ [745.824,0,346.505],
+ [0,746.017,224.098],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317434,0.0247137,-0.000866957,0.000304145,0.138958],
+ "R": [
+ [0.7656627697,0.09930116127,-0.6355311184],
+ [-0.04982185052,0.99419918,0.09531932471],
+ [0.6413098365,-0.04131912178,0.7661686654]
+ ],
+ "t": [
+ [7.35512715],
+ [111.8344509],
+ [265.0127015]
+ ]
+ },
+ {
+ "name": "18_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 1,
+ "K": [
+ [744.96,0,372.705],
+ [0,744.564,226.392],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321978,0.0724692,0.000483988,0.000458946,0.0380169],
+ "R": [
+ [-0.3520669355,0.03279886428,-0.9353999719],
+ [0.04913052402,0.9986556534,0.01652505738],
+ [0.9346844732,-0.04013876447,-0.3532050609]
+ ],
+ "t": [
+ [47.10128491],
+ [117.3460549],
+ [266.6541908]
+ ]
+ },
+ {
+ "name": "18_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 2,
+ "K": [
+ [748.843,0,358.358],
+ [0,748.813,225.018],
+ [0,0,1]
+ ],
+ "distCoef": [-0.335266,0.148062,0.000634215,-0.00153008,-0.105518],
+ "R": [
+ [-0.3389880085,0.04020239671,-0.9399313259],
+ [0.04795713663,0.9985260662,0.02541275744],
+ [0.9395675831,-0.03646179499,-0.3404163544]
+ ],
+ "t": [
+ [70.51461434],
+ [125.984952],
+ [266.5287049]
+ ]
+ },
+ {
+ "name": "18_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 3,
+ "K": [
+ [746.557,0,370.525],
+ [0,746.643,239.094],
+ [0,0,1]
+ ],
+ "distCoef": [-0.336876,0.137869,0.0006954,0.000424607,-0.0538424],
+ "R": [
+ [-0.3751735108,0.06869685522,-0.9244055273],
+ [0.01802710881,0.9976021763,0.06682006625],
+ [0.9267792942,0.008404759824,-0.3755123165]
+ ],
+ "t": [
+ [58.58769651],
+ [133.6261971],
+ [275.7276294]
+ ]
+ },
+ {
+ "name": "18_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 4,
+ "K": [
+ [744.71,0,356.151],
+ [0,744.769,223.97],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312604,0.00791514,0.000747313,-0.000519594,0.158336],
+ "R": [
+ [-0.3438161676,0.01243889994,-0.9389545871],
+ [0.0251972518,0.9996744288,0.00401683712],
+ [0.9386988555,-0.02227802162,-0.344017657]
+ ],
+ "t": [
+ [40.26546697],
+ [152.0702476],
+ [270.0686857]
+ ]
+ },
+ {
+ "name": "18_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 5,
+ "K": [
+ [743.927,0,355.392],
+ [0,744.057,262.153],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316206,0.0381773,0.00109867,0.000112775,0.102099],
+ "R": [
+ [-0.3913025917,0.04706716523,-0.9190576498],
+ [0.07535158968,0.9969764632,0.0189755056],
+ [0.9171719684,-0.0618272904,-0.3936660596]
+ ],
+ "t": [
+ [27.50168157],
+ [183.5367771],
+ [265.1462318]
+ ]
+ },
+ {
+ "name": "18_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 6,
+ "K": [
+ [744.89,0,353.646],
+ [0,744.816,246.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311434,-0.0151537,0.000898898,0.00113623,0.19919],
+ "R": [
+ [-0.3540366423,0.02766248657,-0.9348223589],
+ [0.06855079724,0.9976412764,0.003559761167],
+ [0.9327158432,-0.06282253209,-0.3550978532]
+ ],
+ "t": [
+ [15.12228299],
+ [191.0759947],
+ [263.959739]
+ ]
+ },
+ {
+ "name": "18_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 7,
+ "K": [
+ [744.21,0,382.066],
+ [0,744.474,221.564],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318836,0.0439442,-0.000310088,0.000693195,0.0844966],
+ "R": [
+ [-0.3784097731,0.01208936744,-0.9255592314],
+ [0.03775536538,0.9992841689,-0.002383732641],
+ [0.9248678695,-0.03584685469,-0.3785953341]
+ ],
+ "t": [
+ [-11.73143391],
+ [170.7040215],
+ [268.2801795]
+ ]
+ },
+ {
+ "name": "18_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 8,
+ "K": [
+ [744.996,0,378.911],
+ [0,745.249,217.173],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317298,0.0439499,-0.000470842,0.000645598,0.0800391],
+ "R": [
+ [-0.3573644405,-0.02168005213,-0.9337133564],
+ [0.09030348924,0.9942444419,-0.05764780686],
+ [0.9295891224,-0.1049188503,-0.3533498244]
+ ],
+ "t": [
+ [-32.18764663],
+ [193.5958696],
+ [255.9258617]
+ ]
+ },
+ {
+ "name": "18_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 9,
+ "K": [
+ [745.488,0,367.703],
+ [0,745.136,254.274],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333608,0.117291,0.00107107,0.000590786,-0.0167148],
+ "R": [
+ [-0.3755971335,-0.01611847579,-0.9266428589],
+ [0.03486308067,0.9988953473,-0.03150636014],
+ [0.9261270749,-0.0441393233,-0.3746202894]
+ ],
+ "t": [
+ [-52.11061688],
+ [162.8813669],
+ [265.66749]
+ ]
+ },
+ {
+ "name": "18_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 10,
+ "K": [
+ [746.691,0,377.016],
+ [0,746.35,247.895],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324348,0.0759263,0.000632098,0.000973799,0.0365142],
+ "R": [
+ [-0.3979832561,-0.05264507275,-0.9158809007],
+ [0.03842303812,0.9965195246,-0.07397639654],
+ [0.9165876925,-0.06463229393,-0.3945753015]
+ ],
+ "t": [
+ [-58.47639535],
+ [144.7851801],
+ [261.4908418]
+ ]
+ },
+ {
+ "name": "18_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 11,
+ "K": [
+ [743.499,0,383.73],
+ [0,743.269,228.607],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318101,0.0343673,-0.000192972,9.02677e-05,0.0940376],
+ "R": [
+ [-0.3591156591,-0.0799459609,-0.9298626709],
+ [0.01693912278,0.9956019804,-0.09213990831],
+ [0.9331393302,-0.04883994185,-0.356182047]
+ ],
+ "t": [
+ [-65.19666066],
+ [124.1115675],
+ [265.1913912]
+ ]
+ },
+ {
+ "name": "18_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 12,
+ "K": [
+ [744.847,0,377.843],
+ [0,744.539,240.133],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322594,0.0777366,0.000608553,0.000730506,0.0395492],
+ "R": [
+ [-0.3599917326,-0.04959232233,-0.9316364924],
+ [0.02914279324,0.9975011607,-0.0643593979],
+ [0.9325002145,-0.05031934083,-0.3576469123]
+ ],
+ "t": [
+ [-57.61171896],
+ [105.5688064],
+ [264.3974594]
+ ]
+ },
+ {
+ "name": "18_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 13,
+ "K": [
+ [742.264,0,386.065],
+ [0,742.375,236.247],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316238,0.0182785,-0.000395794,0.00144239,0.136479],
+ "R": [
+ [-0.3232019546,0.03338047233,-0.9457411066],
+ [0.05161368011,0.9985119503,0.01760435083],
+ [0.9449214383,-0.04312341834,-0.324443903]
+ ],
+ "t": [
+ [61.04698375],
+ [97.35388185],
+ [264.1973208]
+ ]
+ },
+ {
+ "name": "18_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 14,
+ "K": [
+ [744.531,0,362.517],
+ [0,744.694,222.936],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323155,0.0551,-0.000315217,0.00114443,0.0791805],
+ "R": [
+ [-0.3124904102,0.02154150537,-0.9496766329],
+ [-0.004629448499,0.999696432,0.02419942065],
+ [0.9499096335,0.01195856595,-0.3122958229]
+ ],
+ "t": [
+ [-14.02426098],
+ [68.46079663],
+ [270.3325449]
+ ]
+ },
+ {
+ "name": "18_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 15,
+ "K": [
+ [747.429,0,398.562],
+ [0,747.425,233.615],
+ [0,0,1]
+ ],
+ "distCoef": [-0.333617,0.122405,0.000303778,0.00134383,-0.0202721],
+ "R": [
+ [-0.358025731,-0.0142572014,-0.9336028643],
+ [0.04081564607,0.9986886699,-0.03090345813],
+ [0.9328191995,-0.04916983726,-0.3569743242]
+ ],
+ "t": [
+ [-8.683192747],
+ [83.02873835],
+ [264.4620974]
+ ]
+ },
+ {
+ "name": "18_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 16,
+ "K": [
+ [742.757,0,357.304],
+ [0,742.66,220.331],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305443,-0.0527047,-0.000521453,0.00022453,0.250047],
+ "R": [
+ [-0.3364590891,0.05374146283,-0.9401633563],
+ [0.05791647683,0.99766121,0.03630140184],
+ [0.9399154021,-0.04223701264,-0.3387846981]
+ ],
+ "t": [
+ [20.062846],
+ [91.33983095],
+ [265.2581766]
+ ]
+ },
+ {
+ "name": "18_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 17,
+ "K": [
+ [750.787,0,361.922],
+ [0,750.723,216.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.368257,0.303211,-0.00101236,-0.000679192,-0.335284],
+ "R": [
+ [-0.3521002367,0.0154136189,-0.9358353721],
+ [0.04957845599,0.9987678018,-0.002203336065],
+ [0.9346482761,-0.04717306796,-0.3524305629]
+ ],
+ "t": [
+ [32.75189895],
+ [90.38015946],
+ [265.2110414]
+ ]
+ },
+ {
+ "name": "18_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 18,
+ "K": [
+ [745.69,0,366.196],
+ [0,745.645,224.452],
+ [0,0,1]
+ ],
+ "distCoef": [-0.325076,0.0695314,0.000207452,8.09151e-05,0.0569118],
+ "R": [
+ [-0.369329094,-0.008664471876,-0.929258278],
+ [0.06369637747,0.997368813,-0.03461534879],
+ [0.9271131494,-0.07197484145,-0.3678054246]
+ ],
+ "t": [
+ [-35.28307581],
+ [111.055802],
+ [261.8818226]
+ ]
+ },
+ {
+ "name": "18_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 19,
+ "K": [
+ [745.552,0,357.301],
+ [0,745.545,223.113],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320101,0.042192,0.00043748,0.000103204,0.104558],
+ "R": [
+ [-0.3584191226,-0.04877846794,-0.9322855752],
+ [0.07086164718,0.9943315632,-0.07926770686],
+ [0.9308675306,-0.09447435344,-0.3529309238]
+ ],
+ "t": [
+ [16.14340371],
+ [139.4376601],
+ [259.6452388]
+ ]
+ },
+ {
+ "name": "18_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 20,
+ "K": [
+ [746.078,0,363.03],
+ [0,746.077,221.582],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321359,0.0569666,0.000169599,0.000938787,0.0797635],
+ "R": [
+ [-0.3631410096,0.0448531679,-0.9306539639],
+ [0.06634832184,0.9975497918,0.02218813063],
+ [0.9293688758,-0.05368990856,-0.3652271709]
+ ],
+ "t": [
+ [21.37501917],
+ [147.345749],
+ [265.5705493]
+ ]
+ },
+ {
+ "name": "18_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 21,
+ "K": [
+ [745.043,0,372.293],
+ [0,745.076,222.901],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317484,0.0404748,0.000192535,-0.000111527,0.0957966],
+ "R": [
+ [-0.3461967977,-0.005928135698,-0.9381431844],
+ [0.04577092509,0.9986824948,-0.02320122706],
+ [0.937044716,-0.05097187193,-0.3454693453]
+ ],
+ "t": [
+ [-0.5259425122],
+ [153.3372726],
+ [265.7616305]
+ ]
+ },
+ {
+ "name": "18_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 22,
+ "K": [
+ [745.252,0,401.788],
+ [0,745.346,245.295],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315494,0.0267895,-0.000624877,0.000210937,0.0993279],
+ "R": [
+ [-0.3267831921,-0.004575639121,-0.9450882546],
+ [0.07739750703,0.9964998407,-0.03158628616],
+ [0.9419248225,-0.08346934224,-0.3252852558]
+ ],
+ "t": [
+ [-10.3938656],
+ [148.3069178],
+ [261.1183693]
+ ]
+ },
+ {
+ "name": "18_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 23,
+ "K": [
+ [747.114,0,358.608],
+ [0,746.941,217.398],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324507,0.0792141,-0.000227367,0.0013287,0.0357905],
+ "R": [
+ [-0.356358404,-0.03218270054,-0.9337949248],
+ [0.02645826287,0.9986582749,-0.04451528213],
+ [0.9339746507,-0.04056998648,-0.3550287707]
+ ],
+ "t": [
+ [-18.04448695],
+ [115.7023496],
+ [266.3010308]
+ ]
+ },
+ {
+ "name": "18_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 18,
+ "node": 24,
+ "K": [
+ [747.28,0,383.407],
+ [0,747.414,233.333],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321806,0.0494121,-0.000677773,0.00106862,0.0725344],
+ "R": [
+ [-0.3696831614,0.01690678518,-0.9290040478],
+ [0.03916078476,0.9992295361,0.002601362608],
+ [0.9283322644,-0.03541884761,-0.3700604169]
+ ],
+ "t": [
+ [3.487638933],
+ [110.8874693],
+ [266.9764809]
+ ]
+ },
+ {
+ "name": "19_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 1,
+ "K": [
+ [742.815,0,376.349],
+ [0,742.96,226.412],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311242,0.000676611,0.00127048,0.000398816,0.145683],
+ "R": [
+ [-0.9986287013,0.0334613179,0.04026235479],
+ [0.03051664863,0.9969627365,-0.07165218936],
+ [-0.04253764409,-0.07032526067,-0.99661673]
+ ],
+ "t": [
+ [47.87451164],
+ [124.5257469],
+ [265.3025885]
+ ]
+ },
+ {
+ "name": "19_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 2,
+ "K": [
+ [746.352,0,362.211],
+ [0,746.799,224.495],
+ [0,0,1]
+ ],
+ "distCoef": [-0.33354,0.113916,-0.000650978,0.00200875,0.00369896],
+ "R": [
+ [-0.9978769066,0.0627015602,0.01761231284],
+ [0.06225819076,0.9977547513,-0.02468550225],
+ [-0.01912058832,-0.02353658189,-0.9995401105]
+ ],
+ "t": [
+ [76.18899734],
+ [119.4504319],
+ [269.470097]
+ ]
+ },
+ {
+ "name": "19_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 3,
+ "K": [
+ [744.923,0,335.897],
+ [0,744.843,232.622],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310786,-0.00740435,0.000477261,-0.00048183,0.169837],
+ "R": [
+ [-0.9959217828,0.05942221639,0.06788816328],
+ [0.05820019172,0.9981077555,-0.01984051806],
+ [-0.06893866983,-0.0158085,-0.9974956397]
+ ],
+ "t": [
+ [57.6907282],
+ [139.716188],
+ [274.5941587]
+ ]
+ },
+ {
+ "name": "19_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 4,
+ "K": [
+ [745.3,0,371.455],
+ [0,745.339,223.979],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316788,0.039021,-0.00160053,-0.000126119,0.09467],
+ "R": [
+ [-0.995350133,0.07444232287,0.06112653567],
+ [0.06997485872,0.994930028,-0.0722340534],
+ [-0.06619389658,-0.06762085396,-0.9955128267]
+ ],
+ "t": [
+ [42.04206067],
+ [161.4993909],
+ [266.5642499]
+ ]
+ },
+ {
+ "name": "19_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 5,
+ "K": [
+ [741.339,0,353.354],
+ [0,741.563,231.192],
+ [0,0,1]
+ ],
+ "distCoef": [-0.304803,-0.0634451,-0.00114618,-0.000982934,0.282182],
+ "R": [
+ [-0.9964181101,0.07478982294,0.03946431643],
+ [0.07096423127,0.993341211,-0.09075966339],
+ [-0.04598943103,-0.08763401739,-0.9950905744]
+ ],
+ "t": [
+ [45.56899486],
+ [188.2245222],
+ [262.1501617]
+ ]
+ },
+ {
+ "name": "19_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 6,
+ "K": [
+ [745.947,0,350.894],
+ [0,746.217,234.332],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313212,0.0178381,0.000340441,0.00055626,0.126083],
+ "R": [
+ [-0.9969018679,0.07865171151,0.0007576151751],
+ [0.07854654264,0.9959829876,-0.04299219736],
+ [-0.004135981729,-0.0427994938,-0.9990751208]
+ ],
+ "t": [
+ [37.2742824],
+ [183.4195047],
+ [270.0123608]
+ ]
+ },
+ {
+ "name": "19_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 7,
+ "K": [
+ [748.821,0,355.822],
+ [0,748.684,217.17],
+ [0,0,1]
+ ],
+ "distCoef": [-0.342444,0.16602,-0.000477836,-0.000195363,-0.106824],
+ "R": [
+ [-0.9928808048,-0.04900785176,0.10856306],
+ [-0.05236016128,0.998228751,-0.02824489671],
+ [-0.106986546,-0.0337281951,-0.9936882247]
+ ],
+ "t": [
+ [-31.49326377],
+ [168.7489309],
+ [271.4480177]
+ ]
+ },
+ {
+ "name": "19_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 8,
+ "K": [
+ [747.238,0,359.034],
+ [0,747.474,233.038],
+ [0,0,1]
+ ],
+ "distCoef": [-0.313675,0.00436645,0.000419802,0.000604189,0.154068],
+ "R": [
+ [-0.9913876468,0.02931278851,0.127637354],
+ [0.0192008625,0.9966303068,-0.07974558542],
+ [-0.1295448208,-0.07660804099,-0.9886098055]
+ ],
+ "t": [
+ [-44.88902211],
+ [188.5485089],
+ [261.5304555]
+ ]
+ },
+ {
+ "name": "19_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 9,
+ "K": [
+ [743.415,0,332.333],
+ [0,743.715,235.337],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308464,-0.0208585,-0.00102455,0.000256502,0.207947],
+ "R": [
+ [-0.9954977047,0.04566149696,0.08306231217],
+ [0.04175753042,0.9979670543,-0.04814631117],
+ [-0.08509188364,-0.04446106523,-0.9953806232]
+ ],
+ "t": [
+ [-46.35184093],
+ [166.6378451],
+ [268.6077116]
+ ]
+ },
+ {
+ "name": "19_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 10,
+ "K": [
+ [747.206,0,362.728],
+ [0,747.412,248.496],
+ [0,0,1]
+ ],
+ "distCoef": [-0.340118,0.138855,0.000965068,4.5306e-05,-0.0441245],
+ "R": [
+ [-0.9935175509,0.05252798067,0.1008151146],
+ [0.05439486481,0.9983935823,0.01585728578],
+ [-0.09982021218,0.02123831626,-0.9947787991]
+ ],
+ "t": [
+ [-46.95074625],
+ [127.5778656],
+ [276.6370715]
+ ]
+ },
+ {
+ "name": "19_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 11,
+ "K": [
+ [745.45,0,355.141],
+ [0,745.641,249.232],
+ [0,0,1]
+ ],
+ "distCoef": [-0.326245,0.10077,0.000216744,-2.37583e-05,-0.0259903],
+ "R": [
+ [-0.9983050345,-0.001439505441,0.05818063101],
+ [-0.002578079686,0.9998065462,-0.01949932386],
+ [-0.05814130636,-0.01961626748,-0.9981156198]
+ ],
+ "t": [
+ [-58.09544547],
+ [121.7224759],
+ [272.659258]
+ ]
+ },
+ {
+ "name": "19_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 12,
+ "K": [
+ [743.805,0,368.42],
+ [0,744.013,242.015],
+ [0,0,1]
+ ],
+ "distCoef": [-0.323306,0.0785457,-0.00106293,0.000187763,0.0236672],
+ "R": [
+ [-0.9954771119,0.0748660766,0.05848410323],
+ [0.07512966129,0.9971710788,0.002318097681],
+ [-0.05814510944,0.006701504052,-0.9982856485]
+ ],
+ "t": [
+ [-47.8147621],
+ [97.15541342],
+ [274.4212668]
+ ]
+ },
+ {
+ "name": "19_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 13,
+ "K": [
+ [742.693,0,353.966],
+ [0,742.776,227.014],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307193,-0.0103139,0.000109263,-0.000950495,0.159317],
+ "R": [
+ [-0.9933059489,0.1045971031,0.04901773034],
+ [0.1016362638,0.9930442478,-0.05944065861],
+ [-0.05489409585,-0.05406078084,-0.9970276176]
+ ],
+ "t": [
+ [-21.5323637],
+ [109.7713479],
+ [268.3161895]
+ ]
+ },
+ {
+ "name": "19_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 14,
+ "K": [
+ [742.837,0,362.248],
+ [0,743.502,226.37],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308934,-0.00321353,-0.0010059,0.000705591,0.156528],
+ "R": [
+ [-0.9919154966,0.0987006026,0.07976113456],
+ [0.09553429302,0.9945144894,-0.04259259489],
+ [-0.08352751879,-0.03462833131,-0.995903626]
+ ],
+ "t": [
+ [-30.66946365],
+ [84.06052642],
+ [268.8728165]
+ ]
+ },
+ {
+ "name": "19_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 15,
+ "K": [
+ [742.618,0,345.237],
+ [0,742.923,230.439],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302695,-0.0546693,-0.000167537,-0.000784726,0.259585],
+ "R": [
+ [-0.9885523252,0.1391044686,0.05843155954],
+ [0.1381120085,0.9902000007,-0.02071308279],
+ [-0.06074021267,-0.01240586611,-0.9980765106]
+ ],
+ "t": [
+ [-1.26146274],
+ [74.12977283],
+ [271.0351679]
+ ]
+ },
+ {
+ "name": "19_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 16,
+ "K": [
+ [744.088,0,370.473],
+ [0,744.417,231.755],
+ [0,0,1]
+ ],
+ "distCoef": [-0.300902,-0.0664899,-0.000333311,0.000589361,0.253926],
+ "R": [
+ [-0.9917390399,0.06178336486,0.1124121551],
+ [0.06447509535,0.9977094298,0.02046596672],
+ [-0.1108902109,0.02754468261,-0.9934508803]
+ ],
+ "t": [
+ [-3.269853258],
+ [73.62667861],
+ [274.8694227]
+ ]
+ },
+ {
+ "name": "19_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 17,
+ "K": [
+ [745.582,0,373.528],
+ [0,745.86,237.254],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322134,0.0530706,-0.000603814,0.00101303,0.0846746],
+ "R": [
+ [-0.9897330936,0.1313546283,0.05634150462],
+ [0.1318000226,0.9912672261,0.00424742025],
+ [-0.05529156869,0.01162962396,-0.9984025212]
+ ],
+ "t": [
+ [37.3391924],
+ [70.20661568],
+ [273.1392775]
+ ]
+ },
+ {
+ "name": "19_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 18,
+ "K": [
+ [742.542,0,374.105],
+ [0,742.758,223.273],
+ [0,0,1]
+ ],
+ "distCoef": [-0.306762,-0.0452572,-0.00032402,-0.000364469,0.245651],
+ "R": [
+ [-0.9920842372,0.1065981921,0.06637538524],
+ [0.106818653,0.9942784937,-0.0002288198192],
+ [-0.06602000984,0.006863120707,-0.9977946963]
+ ],
+ "t": [
+ [52.26513597],
+ [79.91641464],
+ [273.9509772]
+ ]
+ },
+ {
+ "name": "19_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 19,
+ "K": [
+ [744.378,0,361.433],
+ [0,744.589,244.618],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310422,-0.000364242,-0.000710118,0.000839407,0.169675],
+ "R": [
+ [-0.9919054981,0.126974259,0.001010166835],
+ [0.1269495258,0.9918188066,-0.01338927975],
+ [-0.002701996339,-0.01315266,-0.9999098493]
+ ],
+ "t": [
+ [49.23489662],
+ [110.9052228],
+ [271.6142806]
+ ]
+ },
+ {
+ "name": "19_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 20,
+ "K": [
+ [745.72,0,364.99],
+ [0,745.913,248.461],
+ [0,0,1]
+ ],
+ "distCoef": [-0.32476,0.0791445,0.000409065,0.000522525,0.0385155],
+ "R": [
+ [-0.9808466558,0.1869185946,0.05478391053],
+ [0.1851721888,0.9820671342,-0.03543168776],
+ [-0.06042431929,-0.02460859583,-0.9978693896]
+ ],
+ "t": [
+ [40.23583817],
+ [134.9359413],
+ [272.7493911]
+ ]
+ },
+ {
+ "name": "19_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 21,
+ "K": [
+ [745.966,0,347.023],
+ [0,745.905,254.016],
+ [0,0,1]
+ ],
+ "distCoef": [-0.312122,-0.0171046,0.00101358,-9.38575e-05,0.213424],
+ "R": [
+ [-0.9944456328,0.07811965146,0.07053512206],
+ [0.07435713108,0.9957422838,-0.0544823029],
+ [-0.07449094204,-0.04893489886,-0.9960203187]
+ ],
+ "t": [
+ [2.247391851],
+ [153.0572023],
+ [268.8284628]
+ ]
+ },
+ {
+ "name": "19_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 22,
+ "K": [
+ [743.607,0,364.935],
+ [0,743.756,243.53],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311531,0.000696399,0.00010932,-0.000314324,0.159615],
+ "R": [
+ [-0.9924188487,0.09367860135,0.07955594568],
+ [0.08900119243,0.9941960017,-0.06044086279],
+ [-0.0847562186,-0.05290207743,-0.9949963586]
+ ],
+ "t": [
+ [-15.3150092],
+ [142.5037842],
+ [267.7211288]
+ ]
+ },
+ {
+ "name": "19_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 23,
+ "K": [
+ [743.508,0,369.721],
+ [0,743.449,243.575],
+ [0,0,1]
+ ],
+ "distCoef": [-0.309744,-0.0191119,0.000292611,0.000847107,0.198605],
+ "R": [
+ [-0.9987856124,0.03694807636,0.03259049098],
+ [0.03470669556,0.9971594314,-0.06684694127],
+ [-0.03496778135,-0.06563465492,-0.997230839]
+ ],
+ "t": [
+ [-6.799650163],
+ [123.3743131],
+ [267.1549958]
+ ]
+ },
+ {
+ "name": "19_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 19,
+ "node": 24,
+ "K": [
+ [742.775,0,379.613],
+ [0,742.864,224.449],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316586,0.0333112,-0.000180777,0.00112675,0.112087],
+ "R": [
+ [-0.9947573056,0.06853183176,0.07590316848],
+ [0.05765365411,0.9888586451,-0.1372393391],
+ [-0.08446276764,-0.1321437401,-0.9876254719]
+ ],
+ "t": [
+ [4.340029177],
+ [136.5307812],
+ [258.2193706]
+ ]
+ },
+ {
+ "name": "20_01",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 1,
+ "K": [
+ [745.267,0,367.511],
+ [0,745.253,228.976],
+ [0,0,1]
+ ],
+ "distCoef": [-0.316421,0.0232694,0.000233523,0.00095017,0.129164],
+ "R": [
+ [-0.2595515744,0.03264633198,0.965177288],
+ [-0.02439656235,0.9988878376,-0.04034718866],
+ [-0.9654210418,-0.03401918423,-0.2584664527]
+ ],
+ "t": [
+ [43.91564589],
+ [114.6472759],
+ [269.2437955]
+ ]
+ },
+ {
+ "name": "20_02",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 2,
+ "K": [
+ [746.737,0,383.621],
+ [0,746.553,234.139],
+ [0,0,1]
+ ],
+ "distCoef": [-0.330711,0.126048,0.000259954,-0.000232797,-0.067441],
+ "R": [
+ [-0.2600597375,0.03354081135,0.965009817],
+ [-0.06475754991,0.9965406566,-0.05208818886],
+ [-0.9634185968,-0.07603771211,-0.2569880808]
+ ],
+ "t": [
+ [63.03617994],
+ [136.0112472],
+ [264.2112923]
+ ]
+ },
+ {
+ "name": "20_03",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 3,
+ "K": [
+ [748.567,0,371.842],
+ [0,748.646,223.378],
+ [0,0,1]
+ ],
+ "distCoef": [-0.332561,0.132401,-0.000978802,0.0010132,-0.0596871],
+ "R": [
+ [-0.2517963519,0.03200567411,0.967250864],
+ [0.0115205721,0.9994813079,-0.03007310314],
+ [-0.9677116686,0.003570985655,-0.2520344708]
+ ],
+ "t": [
+ [55.32226207],
+ [135.5872215],
+ [276.5287505]
+ ]
+ },
+ {
+ "name": "20_04",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 4,
+ "K": [
+ [747.412,0,375.731],
+ [0,747.545,213.638],
+ [0,0,1]
+ ],
+ "distCoef": [-0.324984,0.0823763,-0.00190711,0.0010176,0.0382164],
+ "R": [
+ [-0.2864406942,-0.001302983566,0.9580970885],
+ [-0.1193951903,0.9922525608,-0.03434594761],
+ [-0.9506295373,-0.1242302613,-0.2843770823]
+ ],
+ "t": [
+ [40.5108683],
+ [178.4576708],
+ [254.9563649]
+ ]
+ },
+ {
+ "name": "20_05",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 5,
+ "K": [
+ [747.818,0,377.646],
+ [0,748.63,232.294],
+ [0,0,1]
+ ],
+ "distCoef": [-0.327048,0.100477,-0.00250563,-0.000951363,0.00505748],
+ "R": [
+ [-0.2682590325,-0.01756457816,0.9631866782],
+ [-0.1175373506,0.9929607203,-0.014628026],
+ [-0.9561496027,-0.1171345104,-0.2684351761]
+ ],
+ "t": [
+ [28.10870602],
+ [198.6254244],
+ [256.0861594]
+ ]
+ },
+ {
+ "name": "20_06",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 6,
+ "K": [
+ [744.281,0,376.164],
+ [0,744.733,212.764],
+ [0,0,1]
+ ],
+ "distCoef": [-0.314115,0.0261091,-0.00186017,0.000146826,0.111047],
+ "R": [
+ [-0.2995512244,0.02650351378,0.9537120256],
+ [-0.1164678133,0.9911222418,-0.06412449085],
+ [-0.9469447251,-0.1302853239,-0.2938050747]
+ ],
+ "t": [
+ [24.38602287],
+ [207.7342285],
+ [252.6787249]
+ ]
+ },
+ {
+ "name": "20_07",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 7,
+ "K": [
+ [744.844,0,367.199],
+ [0,744.885,234.874],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307447,-0.0235368,-0.000447762,-0.000552595,0.198481],
+ "R": [
+ [-0.2246138655,-0.03605175288,0.9737807158],
+ [-0.1345418425,0.9908917963,0.005651603877],
+ [-0.965115073,-0.1297448231,-0.2274185059]
+ ],
+ "t": [
+ [-24.57828512],
+ [193.807989],
+ [253.6581871]
+ ]
+ },
+ {
+ "name": "20_08",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 8,
+ "K": [
+ [745.265,0,373.297],
+ [0,745.204,222.406],
+ [0,0,1]
+ ],
+ "distCoef": [-0.322725,0.0753011,-0.00198414,9.48962e-05,0.0496562],
+ "R": [
+ [-0.2740281164,0.007089557403,0.9616955493],
+ [-0.08615117171,0.9957715968,-0.0318889104],
+ [-0.9578551911,-0.09158965645,-0.2722586413]
+ ],
+ "t": [
+ [-24.40184383],
+ [190.6520913],
+ [261.5790911]
+ ]
+ },
+ {
+ "name": "20_09",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 9,
+ "K": [
+ [743.742,0,376.404],
+ [0,743.442,252.182],
+ [0,0,1]
+ ],
+ "distCoef": [-0.310951,0.0101818,-0.000165117,0.000699519,0.141452],
+ "R": [
+ [-0.234740558,-0.05401621619,0.9705560874],
+ [-0.06709368181,0.9969740023,0.03925909634],
+ [-0.9697398147,-0.05590247913,-0.2376543804]
+ ],
+ "t": [
+ [-60.89112675],
+ [163.1020008],
+ [266.420435]
+ ]
+ },
+ {
+ "name": "20_10",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 10,
+ "K": [
+ [746.237,0,381.452],
+ [0,745.998,235.104],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321635,0.0804606,-0.000793429,0.000500703,0.0308776],
+ "R": [
+ [-0.2327490461,-0.03063038999,0.9720543507],
+ [-0.1073579574,0.9942045343,0.005622535858],
+ [-0.9665930636,-0.1030491297,-0.2346885731]
+ ],
+ "t": [
+ [-52.7687065],
+ [155.650502],
+ [258.7092289]
+ ]
+ },
+ {
+ "name": "20_11",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 11,
+ "K": [
+ [744.465,0,352.406],
+ [0,744.368,231.635],
+ [0,0,1]
+ ],
+ "distCoef": [-0.307896,-0.0267024,-0.00138959,-0.000489454,0.213952],
+ "R": [
+ [-0.2568719183,-0.003646201445,0.9664385768],
+ [-0.06909534804,0.997503196,-0.01460160774],
+ [-0.9639723287,-0.07052715282,-0.256482495]
+ ],
+ "t": [
+ [-58.11810551],
+ [133.8270577],
+ [264.378006]
+ ]
+ },
+ {
+ "name": "20_12",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 12,
+ "K": [
+ [744.557,0,351.376],
+ [0,744.424,216.683],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317479,0.0158652,-0.000659121,-0.00059258,0.147681],
+ "R": [
+ [-0.2372383683,-0.02274879941,0.9711850744],
+ [-0.1004253449,0.9949438408,-0.001226302928],
+ [-0.9662467111,-0.09782252214,-0.2383234094]
+ ],
+ "t": [
+ [-62.35654103],
+ [118.4734964],
+ [259.8400796]
+ ]
+ },
+ {
+ "name": "20_13",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 13,
+ "K": [
+ [743.07,0,377.102],
+ [0,743.158,222.988],
+ [0,0,1]
+ ],
+ "distCoef": [-0.29868,-0.0827266,-0.00133003,-0.00119832,0.273178],
+ "R": [
+ [-0.2367527853,-0.03686088138,0.9708704311],
+ [-0.08746956632,0.9960307636,0.01648614259],
+ [-0.9676245107,-0.08101847538,-0.2390372628]
+ ],
+ "t": [
+ [-42.43038274],
+ [111.3831569],
+ [262.4188123]
+ ]
+ },
+ {
+ "name": "20_14",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 14,
+ "K": [
+ [745.597,0,372.306],
+ [0,745.414,237.499],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320131,0.0615197,0.00113665,-0.000991542,0.0414761],
+ "R": [
+ [-0.2769894269,0.05383368349,0.9593637433],
+ [-0.05406721308,0.9959742516,-0.07149843787],
+ [-0.9593506105,-0.07167443526,-0.2729636999]
+ ],
+ "t": [
+ [-21.49417033],
+ [90.7530727],
+ [264.2254974]
+ ]
+ },
+ {
+ "name": "20_15",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 15,
+ "K": [
+ [746.296,0,380.788],
+ [0,746.161,226.883],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321885,0.0553182,0.000132369,-0.000878491,0.0778662],
+ "R": [
+ [-0.2870302882,0.01079685294,0.9578606588],
+ [-0.05665486447,0.9979947406,-0.02822630231],
+ [-0.9562446549,-0.06236926949,-0.2858430237]
+ ],
+ "t": [
+ [-1.106709776],
+ [85.82297146],
+ [264.8070963]
+ ]
+ },
+ {
+ "name": "20_16",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 16,
+ "K": [
+ [744.119,0,345.288],
+ [0,744.112,227.607],
+ [0,0,1]
+ ],
+ "distCoef": [-0.302547,-0.0664079,0.000893953,-0.000627784,0.303861],
+ "R": [
+ [-0.252548592,0.05539030986,0.9659974753],
+ [-0.08640189331,0.9930807476,-0.07953201617],
+ [-0.963718798,-0.1035497095,-0.2460153169]
+ ],
+ "t": [
+ [10.51473419],
+ [107.4721829],
+ [260.872486]
+ ]
+ },
+ {
+ "name": "20_17",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 17,
+ "K": [
+ [745.831,0,353.784],
+ [0,745.87,219.754],
+ [0,0,1]
+ ],
+ "distCoef": [-0.321082,0.0599511,-0.000750204,0.000386726,0.0615888],
+ "R": [
+ [-0.3124433364,0.0857084176,0.9460619582],
+ [-0.03834810703,0.9939715084,-0.1027135007],
+ [-0.9491620432,-0.06837183409,-0.3072730188]
+ ],
+ "t": [
+ [50.17882687],
+ [91.39390134],
+ [262.9120903]
+ ]
+ },
+ {
+ "name": "20_18",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 18,
+ "K": [
+ [745.227,0,385.13],
+ [0,745.129,233.897],
+ [0,0,1]
+ ],
+ "distCoef": [-0.311291,0.0180828,0.00116452,0.000576614,0.0928398],
+ "R": [
+ [-0.2786751196,0.05379991941,0.9588773365],
+ [-0.03740853519,0.9970639104,-0.06681437094],
+ [-0.9596565944,-0.0544896994,-0.2758443282]
+ ],
+ "t": [
+ [57.04086511],
+ [98.35557378],
+ [265.4113916]
+ ]
+ },
+ {
+ "name": "20_19",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 19,
+ "K": [
+ [746.424,0,373.724],
+ [0,746.378,215.089],
+ [0,0,1]
+ ],
+ "distCoef": [-0.317589,0.0452179,0.000839363,0.00087423,0.0858828],
+ "R": [
+ [-0.2053627335,-0.023863444,0.9783949528],
+ [-0.1366627843,0.9906072975,-0.004523879826],
+ [-0.9690972248,-0.1346392148,-0.2066950671]
+ ],
+ "t": [
+ [2.454839771],
+ [148.020868],
+ [256.5149472]
+ ]
+ },
+ {
+ "name": "20_20",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 20,
+ "K": [
+ [744.35,0,378.361],
+ [0,744.386,245.706],
+ [0,0,1]
+ ],
+ "distCoef": [-0.305792,-0.0298413,-5.26611e-05,9.57392e-05,0.206854],
+ "R": [
+ [-0.2653224987,0.04663873586,0.9630310483],
+ [-0.08123292055,0.9941966424,-0.07052835541],
+ [-0.9607315881,-0.09694258412,-0.2599941366]
+ ],
+ "t": [
+ [23.42848118],
+ [157.616994],
+ [260.7931406]
+ ]
+ },
+ {
+ "name": "20_21",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 21,
+ "K": [
+ [747.371,0,368.768],
+ [0,747.344,231.897],
+ [0,0,1]
+ ],
+ "distCoef": [-0.308946,-0.0139041,-0.000755627,-0.000244894,0.190547],
+ "R": [
+ [-0.2375675449,-0.01520768023,0.9712519694],
+ [-0.09352440886,0.9955903179,-0.007287238765],
+ [-0.966858235,-0.09256697771,-0.2379422368]
+ ],
+ "t": [
+ [-12.76210059],
+ [163.3748289],
+ [261.1782343]
+ ]
+ },
+ {
+ "name": "20_22",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 22,
+ "K": [
+ [746.314,0,371.788],
+ [0,745.992,237.732],
+ [0,0,1]
+ ],
+ "distCoef": [-0.315167,0.0352154,-0.000828301,0.000312219,0.0891012],
+ "R": [
+ [-0.2145858088,0.0004599306573,0.9767050318],
+ [-0.07749764501,0.9968390076,-0.017495939],
+ [-0.9736257216,-0.07944672006,-0.2138718611]
+ ],
+ "t": [
+ [-33.0373727],
+ [146.3668194],
+ [262.1626174]
+ ]
+ },
+ {
+ "name": "20_23",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 23,
+ "K": [
+ [746.318,0,371.868],
+ [0,746.096,236.531],
+ [0,0,1]
+ ],
+ "distCoef": [-0.318459,0.0405311,0.000489761,-0.000285822,0.0876741],
+ "R": [
+ [-0.2554085937,0.004734611177,0.9668216142],
+ [-0.07039835709,0.9972425561,-0.02348096154],
+ [-0.9642668311,-0.0740598926,-0.25437101]
+ ],
+ "t": [
+ [-17.40671779],
+ [124.2252344],
+ [264.0602836]
+ ]
+ },
+ {
+ "name": "20_24",
+ "type": "vga",
+ "resolution": [640,480],
+ "panel": 20,
+ "node": 24,
+ "K": [
+ [745.832,0,382.965],
+ [0,745.816,231.317],
+ [0,0,1]
+ ],
+ "distCoef": [-0.320385,0.0446211,0.00028801,0.00167617,0.104376],
+ "R": [
+ [-0.2362773498,-0.02089730322,0.9714609188],
+ [-0.1013714927,0.9948433166,-0.003255144035],
+ [-0.9663833786,-0.09924756028,-0.2371773332]
+ ],
+ "t": [
+ [-5.093436327],
+ [126.6662443],
+ [260.9183094]
+ ]
+ },
+ {
+ "name": "00_00",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 0,
+ "K": [
+ [1634.03,0,942.792],
+ [0,1629.73,558.29],
+ [0,0,1]
+ ],
+ "distCoef": [-0.222445,0.199192,8.73054e-05,0.000982243,0.0238445],
+ "R": [
+ [0.1369296663,0.03357591931,-0.9900115778],
+ [-0.09021094677,0.9956950625,0.02129149064],
+ [0.9864645212,0.08639444504,0.1393691081]
+ ],
+ "t": [
+ [20.90028135],
+ [127.2202879],
+ [283.1159034]
+ ]
+ },
+ {
+ "name": "00_01",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 1,
+ "K": [
+ [1395.91,0,951.559],
+ [0,1392.24,561.398],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286227,0.183082,-4.29815e-05,0.000644874,-0.0479635],
+ "R": [
+ [0.05337497606,0.02479711619,0.9982666052],
+ [0.6376765256,0.7684660834,-0.05318390075],
+ [-0.7684528356,0.6394098699,0.0252043199]
+ ],
+ "t": [
+ [6.299256813],
+ [104.397182],
+ [363.078698]
+ ]
+ },
+ {
+ "name": "00_02",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 2,
+ "K": [
+ [1397.02,0,939.355],
+ [0,1394.04,556.611],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28229,0.173658,-0.000610716,0.000955319,-0.0398628],
+ "R": [
+ [-0.9970491806,0.05290586318,-0.05562284625],
+ [-0.01182874156,0.6100448884,0.792278559],
+ [0.07584861407,0.7905986364,-0.6076189463]
+ ],
+ "t": [
+ [-16.22360931],
+ [63.30660163],
+ [381.0181823]
+ ]
+ },
+ {
+ "name": "00_03",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 3,
+ "K": [
+ [1395.71,0,949.456],
+ [0,1392.06,566.648],
+ [0,0,1]
+ ],
+ "distCoef": [-0.281728,0.168097,-0.00021431,1.8072e-05,-0.0371786],
+ "R": [
+ [-0.6216465312,-0.0285781748,0.7827763909],
+ [0.07448493547,0.9926490654,0.09539301533],
+ [-0.7797484111,0.117605786,-0.6149482047]
+ ],
+ "t": [
+ [-14.50346059],
+ [117.4297203],
+ [290.1984382]
+ ]
+ },
+ {
+ "name": "00_04",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 4,
+ "K": [
+ [1633.26,0,949.479],
+ [0,1629.32,572.374],
+ [0,0,1]
+ ],
+ "distCoef": [-0.223003,0.185095,-0.000261654,0.00109433,0.0657602],
+ "R": [
+ [-0.5292732399,-0.01229259603,0.8483623811],
+ [0.636650989,0.6551966806,0.4066851706],
+ [-0.5608434325,0.7553583268,-0.3389519765]
+ ],
+ "t": [
+ [-5.411400695],
+ [80.12176746],
+ [379.8488129]
+ ]
+ },
+ {
+ "name": "00_05",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 5,
+ "K": [
+ [1396.29,0,933.34],
+ [0,1392.95,560.462],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28733,0.185523,-0.000225825,-0.000143128,-0.0508452],
+ "R": [
+ [-0.9314658579,-0.01073438439,-0.363670357],
+ [-0.021313424,0.9994579907,0.02508909603],
+ [0.3632039283,0.03112069687,-0.9311897813]
+ ],
+ "t": [
+ [-6.050515741],
+ [143.9213951],
+ [280.3813532]
+ ]
+ },
+ {
+ "name": "00_06",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 6,
+ "K": [
+ [1396.11,0,950.228],
+ [0,1392.54,548.78],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286481,0.183173,-0.000152555,0.0010664,-0.0482263],
+ "R": [
+ [0.9448241112,-0.04876703013,-0.3239277321],
+ [-0.2141569626,0.6563150135,-0.7234551806],
+ [0.2478793944,0.7529092773,0.6096584503]
+ ],
+ "t": [
+ [-10.023614],
+ [84.45695974],
+ [376.925635]
+ ]
+ },
+ {
+ "name": "00_07",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 7,
+ "K": [
+ [1395.51,0,947.67],
+ [0,1392.41,549.081],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286691,0.185163,-6.53256e-05,4.32858e-06,-0.052639],
+ "R": [
+ [-0.9419632708,-0.03700247277,0.3336705164],
+ [0.180351898,0.7825307202,0.5959185052],
+ [-0.2831578878,0.6215114552,-0.7304417305]
+ ],
+ "t": [
+ [-5.250326149],
+ [112.5645453],
+ [360.2387508]
+ ]
+ },
+ {
+ "name": "00_08",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 8,
+ "K": [
+ [1642.7,0,945.082],
+ [0,1638.64,562.465],
+ [0,0,1]
+ ],
+ "distCoef": [-0.22444,0.208938,-0.000569838,0.000484927,0.0287248],
+ "R": [
+ [0.9544726119,0.01685383959,-0.2978220632],
+ [-0.03362017317,0.9981191009,-0.05126347965],
+ [0.2963979035,0.05894241665,0.9532439742]
+ ],
+ "t": [
+ [-19.67808464],
+ [136.6798831],
+ [282.6801175]
+ ]
+ },
+ {
+ "name": "00_09",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 9,
+ "K": [
+ [1396.79,0,945.482],
+ [0,1393.03,542.64],
+ [0,0,1]
+ ],
+ "distCoef": [-0.284259,0.175176,-0.000406823,0.000640552,-0.0406716],
+ "R": [
+ [-0.3169419478,-0.08460972789,0.9446634298],
+ [-0.1243350249,0.9911238917,0.04705563528],
+ [-0.9402598595,-0.1025408464,-0.3246486894]
+ ],
+ "t": [
+ [6.780958613],
+ [147.0057696],
+ [260.6395044]
+ ]
+ },
+ {
+ "name": "00_10",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 10,
+ "K": [
+ [1393.87,0,944.546],
+ [0,1390.36,563.199],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285353,0.177704,-0.000109708,0.000471392,-0.0432146],
+ "R": [
+ [0.9503475669,0.04849461332,0.3073886376],
+ [0.1560494297,0.7803459045,-0.6055648973],
+ [-0.2692360999,0.6234649483,0.734032275]
+ ],
+ "t": [
+ [22.71992555],
+ [112.7759402],
+ [360.0009328]
+ ]
+ },
+ {
+ "name": "00_11",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 11,
+ "K": [
+ [1492.96,0,934.544],
+ [0,1489.74,547.466],
+ [0,0,1]
+ ],
+ "distCoef": [-0.259288,0.190057,-5.50625e-05,0.00031915,-0.0281283],
+ "R": [
+ [0.8129763959,0.04080422416,-0.5808652124],
+ [-0.2848486357,0.8979062573,-0.3355973896],
+ [0.5078687177,0.4382914196,0.7415996205]
+ ],
+ "t": [
+ [-0.03199165418],
+ [105.1487628],
+ [331.4862369]
+ ]
+ },
+ {
+ "name": "00_12",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 12,
+ "K": [
+ [1395.93,0,964.611],
+ [0,1392.67,564.875],
+ [0,0,1]
+ ],
+ "distCoef": [-0.290995,0.19463,-0.000241491,0.000727782,-0.0582663],
+ "R": [
+ [-0.9950957343,0.04321912909,-0.08897520145],
+ [-0.001969290489,0.8906636271,0.454658581],
+ [0.09889692354,0.4526040326,-0.886210465]
+ ],
+ "t": [
+ [24.66653867],
+ [97.49188585],
+ [334.8897626]
+ ]
+ },
+ {
+ "name": "00_13",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 13,
+ "K": [
+ [1592.21,0,937.375],
+ [0,1588.39,560.919],
+ [0,0,1]
+ ],
+ "distCoef": [-0.239248,0.229218,0.000137317,0.000315934,-0.0358302],
+ "R": [
+ [-0.2862766934,0.07452649614,-0.9552441867],
+ [-0.7557457469,0.5952786327,0.2729317047],
+ [0.588977097,0.8000557173,-0.1140913162]
+ ],
+ "t": [
+ [-15.47943966],
+ [60.20818768],
+ [381.0821849]
+ ]
+ },
+ {
+ "name": "00_14",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 14,
+ "K": [
+ [1649.51,0,934.882],
+ [0,1644.85,568.024],
+ [0,0,1]
+ ],
+ "distCoef": [-0.22365,0.220791,-0.000591343,0.000286172,0.0121962],
+ "R": [
+ [0.827339054,-0.07848137689,0.5561930989],
+ [0.02005408661,0.9936867625,0.110383204],
+ [-0.5613447456,-0.08017039095,0.8236897383]
+ ],
+ "t": [
+ [-7.23447972],
+ [142.1657406],
+ [267.9541185]
+ ]
+ },
+ {
+ "name": "00_15",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 15,
+ "K": [
+ [1430.11,0,948.926],
+ [0,1426.48,561.705],
+ [0,0,1]
+ ],
+ "distCoef": [-0.277948,0.185701,0.000192514,0.000149713,-0.0424254],
+ "R": [
+ [-0.9997414125,0.006454955712,0.02180462522],
+ [0.005192647027,0.9983342904,-0.05746025644],
+ [-0.02213920846,-0.05733217422,-0.9981096519]
+ ],
+ "t": [
+ [9.642162177],
+ [134.9258555],
+ [268.2324221]
+ ]
+ },
+ {
+ "name": "00_16",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 16,
+ "K": [
+ [1427.34,0,949.618],
+ [0,1423.13,548.132],
+ [0,0,1]
+ ],
+ "distCoef": [-0.279453,0.188683,-0.000345265,0.000583475,-0.0479414],
+ "R": [
+ [0.7694875517,0.002369830201,0.6386574134],
+ [0.2539259376,0.9164213706,-0.3093436433],
+ [-0.586012394,0.4002077652,0.7045730755]
+ ],
+ "t": [
+ [4.866150988],
+ [118.1652356],
+ [330.6340665]
+ ]
+ },
+ {
+ "name": "00_17",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 17,
+ "K": [
+ [1393.35,0,916.395],
+ [0,1390.34,563.652],
+ [0,0,1]
+ ],
+ "distCoef": [-0.287138,0.186145,7.50854e-05,0.000557424,-0.0513205],
+ "R": [
+ [0.5039250676,0.09465184024,-0.8585456047],
+ [-0.6050310345,0.7480627966,-0.2726527087],
+ [0.6164389455,0.6568432701,0.4342348962]
+ ],
+ "t": [
+ [18.2296155],
+ [97.71531857],
+ [361.6667015]
+ ]
+ },
+ {
+ "name": "00_18",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 18,
+ "K": [
+ [1542.2,0,947.567],
+ [0,1538.02,555.168],
+ [0,0,1]
+ ],
+ "distCoef": [-0.245751,0.182006,3.81269e-06,0.000651097,0.00472657],
+ "R": [
+ [-0.4048875531,-0.001022756131,0.9143659133],
+ [0.3656410889,0.9163838146,0.1629334173],
+ [-0.8380767647,0.4002994608,-0.3706584387]
+ ],
+ "t": [
+ [16.25260358],
+ [116.7586119],
+ [329.7529305]
+ ]
+ },
+ {
+ "name": "00_19",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 19,
+ "K": [
+ [1396.57,0,949.242],
+ [0,1393.19,554.872],
+ [0,0,1]
+ ],
+ "distCoef": [-0.280864,0.167216,-6.6519e-05,0.000917406,-0.0342733],
+ "R": [
+ [0.7360342296,0.009501079563,0.6768776421],
+ [0.5173282683,0.6370082142,-0.5714822813],
+ [-0.4366063167,0.7707984591,0.4639446731]
+ ],
+ "t": [
+ [-24.15514071],
+ [74.04862943],
+ [379.5076537]
+ ]
+ },
+ {
+ "name": "00_20",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 20,
+ "K": [
+ [1403.46,0,940.386],
+ [0,1400.1,552.684],
+ [0,0,1]
+ ],
+ "distCoef": [-0.287177,0.194004,-0.000120001,8.41526e-05,-0.0604614],
+ "R": [
+ [-0.6201222217,0.04052054618,-0.7834580496],
+ [-0.1302964194,0.9794749929,0.1537907063],
+ [0.773609251,0.1974508131,-0.6021145267]
+ ],
+ "t": [
+ [24.4496252],
+ [140.6900046],
+ [300.8290806]
+ ]
+ },
+ {
+ "name": "00_21",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 21,
+ "K": [
+ [1397.56,0,932.828],
+ [0,1393.91,562.186],
+ [0,0,1]
+ ],
+ "distCoef": [-0.28642,0.185674,-0.000229601,1.91211e-05,-0.052608],
+ "R": [
+ [-0.2617478675,-0.05032313647,-0.9638234464],
+ [-0.4532392419,0.8880813121,0.07671878938],
+ [0.8520928608,0.4569235877,-0.2552618099]
+ ],
+ "t": [
+ [-8.784671236],
+ [98.11062797],
+ [332.9193692]
+ ]
+ },
+ {
+ "name": "00_22",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 22,
+ "K": [
+ [1514.1,0,945.861],
+ [0,1510.18,558.694],
+ [0,0,1]
+ ],
+ "distCoef": [-0.260535,0.216046,-0.000156491,0.000677315,-0.0506741],
+ "R": [
+ [-0.9239818557,-0.0613765916,0.3774790647],
+ [0.05486070575,0.9555572213,0.289656175],
+ [-0.3784809549,0.288345818,-0.8795503715]
+ ],
+ "t": [
+ [-5.224239691],
+ [110.7456244],
+ [313.8855054]
+ ]
+ },
+ {
+ "name": "00_23",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 23,
+ "K": [
+ [1572.86,0,941.716],
+ [0,1568.17,560.048],
+ [0,0,1]
+ ],
+ "distCoef": [-0.240801,0.195963,-0.000444179,0.000458513,0.00455186],
+ "R": [
+ [0.5162966551,0.01335424781,0.856305686],
+ [0.1418829708,0.9847272537,-0.100903213],
+ [-0.8445750331,0.173591186,0.506516647]
+ ],
+ "t": [
+ [2.417701344],
+ [102.3557555],
+ [298.3746617]
+ ]
+ },
+ {
+ "name": "00_24",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 24,
+ "K": [
+ [1399.63,0,954.539],
+ [0,1396.27,546.388],
+ [0,0,1]
+ ],
+ "distCoef": [-0.288761,0.190789,4.23479e-05,6.78832e-05,-0.0577764],
+ "R": [
+ [-0.388991142,-0.05987834367,-0.9192934653],
+ [0.02928793432,0.9965772059,-0.07730517199],
+ [0.9207758187,-0.05699523376,-0.3859059924]
+ ],
+ "t": [
+ [-15.12220678],
+ [134.1751339],
+ [265.239245]
+ ]
+ },
+ {
+ "name": "00_25",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 25,
+ "K": [
+ [1397.66,0,935.585],
+ [0,1394.65,559.251],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285722,0.183994,-0.000502702,0.000494145,-0.0515729],
+ "R": [
+ [0.7926422733,0.00130484237,-0.6096855943],
+ [0.04487405742,0.9971605675,0.06047414042],
+ [0.6080333424,-0.07529342651,0.7903330655]
+ ],
+ "t": [
+ [4.539475053],
+ [139.2223569],
+ [261.6293171]
+ ]
+ },
+ {
+ "name": "00_26",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 26,
+ "K": [
+ [1616.8,0,950.116],
+ [0,1613.47,551.417],
+ [0,0,1]
+ ],
+ "distCoef": [-0.223464,0.185279,-0.00090721,0.000127112,0.0351947],
+ "R": [
+ [-0.7556190155,-0.04350579001,-0.6535649545],
+ [0.1389994774,0.9644159151,-0.2249023966],
+ [0.6400930001,-0.2607857146,-0.7226837222]
+ ],
+ "t": [
+ [-12.5475419],
+ [141.1612209],
+ [240.8579734]
+ ]
+ },
+ {
+ "name": "00_27",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 27,
+ "K": [
+ [1861.86,0,934.556],
+ [0,1857.26,552.106],
+ [0,0,1]
+ ],
+ "distCoef": [-0.171511,0.209759,-1.83176e-05,-3.41566e-05,0.211418],
+ "R": [
+ [0.9782876177,0.02697940456,0.2054883178],
+ [0.02691509764,0.9665557486,-0.2550403151],
+ [-0.2054967507,0.2550335204,0.9448433674]
+ ],
+ "t": [
+ [-0.5131666478],
+ [123.4498457],
+ [311.6401591]
+ ]
+ },
+ {
+ "name": "00_28",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 28,
+ "K": [
+ [1395.57,0,953.143],
+ [0,1392.36,561.982],
+ [0,0,1]
+ ],
+ "distCoef": [-0.284934,0.181016,0.000127361,0.000271191,-0.0471616],
+ "R": [
+ [-0.6310677524,-0.02949081954,-0.775166939],
+ [-0.5128354354,0.7656140117,0.3883748207],
+ [0.5820251782,0.6426238999,-0.4982782509]
+ ],
+ "t": [
+ [-8.508070023],
+ [104.2896072],
+ [361.3816814]
+ ]
+ },
+ {
+ "name": "00_29",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 29,
+ "K": [
+ [1400.36,0,939.608],
+ [0,1397.25,572.603],
+ [0,0,1]
+ ],
+ "distCoef": [-0.286109,0.1878,-0.000309515,0.000886248,-0.0523515],
+ "R": [
+ [0.4887300705,-0.07268882749,-0.8694016635],
+ [-0.08227020668,0.9882426049,-0.1288726774],
+ [0.8685473685,0.1345098073,0.4770037531]
+ ],
+ "t": [
+ [-20.72850042],
+ [158.8912224],
+ [289.281465]
+ ]
+ },
+ {
+ "name": "00_30",
+ "type": "hd",
+ "resolution": [1920,1080],
+ "panel": 0,
+ "node": 30,
+ "K": [
+ [1407.21,0,946.883],
+ [0,1403.86,563.032],
+ [0,0,1]
+ ],
+ "distCoef": [-0.285813,0.195568,-0.000394067,0.000468367,-0.0600751],
+ "R": [
+ [0.08635045426,0.06174190292,0.9943498059],
+ [0.2147800801,0.9734543185,-0.07909618832],
+ [-0.9728376618,0.2203965227,0.07079729175]
+ ],
+ "t": [
+ [13.79078928],
+ [132.1300437],
+ [306.0754676]
+ ]
+ },
+ {
+ "name": "50_01",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 1,
+ "K": [
+ [1053.92,0,947.294],
+ [0,1054.32,535.405],
+ [0,0,1]
+ ],
+ "distCoef": [0.0476403,-0.053786,0.000733314,-0.000579648,0.0122759],
+ "R": [
+ [0.9095307192,0.0006254166507,-0.4156362348],
+ [-0.003349684277,0.999977422,-0.0058253781],
+ [0.4156232073,0.006690610494,0.9095122788]
+ ],
+ "t": [
+ [-15.84850815],
+ [103.1392168],
+ [269.3362326]
+ ]
+ },
+ {
+ "name": "50_02",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 2,
+ "K": [
+ [1058.92,0,971.224],
+ [0,1059.3,541.276],
+ [0,0,1]
+ ],
+ "distCoef": [0.0485216,-0.0529886,-0.000413578,-0.000171659,0.00909728],
+ "R": [
+ [-0.08404700998,-0.006825065684,-0.9964384169],
+ [-0.04073006897,0.9991643735,-0.003408260769],
+ [0.9956290281,0.04029855131,-0.08425476347]
+ ],
+ "t": [
+ [-4.246538185],
+ [93.69672118],
+ [271.0169727]
+ ]
+ },
+ {
+ "name": "50_03",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 3,
+ "K": [
+ [1050.35,0,971.069],
+ [0,1050.88,535.343],
+ [0,0,1]
+ ],
+ "distCoef": [0.0482196,-0.0555053,0.000460862,0.000594278,0.0128034],
+ "R": [
+ [-0.9791929995,-0.0009192386581,-0.2029291126],
+ [0.004325206908,0.9996680429,-0.02539875018],
+ [0.2028850964,-0.02574798878,-0.9788639736]
+ ],
+ "t": [
+ [-10.71273011],
+ [112.0293664],
+ [269.2258843]
+ ]
+ },
+ {
+ "name": "50_04",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 4,
+ "K": [
+ [1053.76,0,952.563],
+ [0,1053.62,535.073],
+ [0,0,1]
+ ],
+ "distCoef": [0.0534802,-0.059505,0.000265754,-0.00038559,0.0128987],
+ "R": [
+ [-0.4973721867,-0.01252789009,0.8674468052],
+ [-0.05725964091,0.9981894693,-0.01841512904],
+ [-0.8656455634,-0.05882886558,-0.4971890215]
+ ],
+ "t": [
+ [-12.12207689],
+ [119.639642],
+ [263.8142799]
+ ]
+ },
+ {
+ "name": "50_05",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 5,
+ "K": [
+ [1061.53,0,963.346],
+ [0,1061.99,535.689],
+ [0,0,1]
+ ],
+ "distCoef": [0.0450742,-0.0483577,0.000117724,0.00131017,0.00746483],
+ "R": [
+ [0.6332975321,0.02789684006,0.7734054578],
+ [-0.04440403331,0.9990136015,0.0003253688515],
+ [-0.772633495,-0.034548377,0.6339115806]
+ ],
+ "t": [
+ [4.398197962],
+ [114.449943],
+ [269.0646085]
+ ]
+ },
+ {
+ "name": "50_06",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 6,
+ "K": [
+ [1053.8,0,975.87],
+ [0,1054.44,518.546],
+ [0,0,1]
+ ],
+ "distCoef": [0.0608578,-0.0758877,0.000572907,0.000423304,0.0232485],
+ "R": [
+ [0.9936973916,-0.01776547634,0.1106791841],
+ [0.08238304881,0.7853099766,-0.6135969963],
+ [-0.07601662453,0.6188478234,0.7818240495]
+ ],
+ "t": [
+ [-23.36095562],
+ [58.01362542],
+ [350.0526212]
+ ]
+ },
+ {
+ "name": "50_07",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 7,
+ "K": [
+ [1058.37,0,951.456],
+ [0,1058.06,537.752],
+ [0,0,1]
+ ],
+ "distCoef": [0.0510704,-0.0625189,-0.000144014,6.68608e-05,0.016463],
+ "R": [
+ [0.4325769754,-0.03234243573,-0.9010167186],
+ [-0.4868424381,0.832758343,-0.2636247005],
+ [0.7588554545,0.5526911516,0.344486415]
+ ],
+ "t": [
+ [-19.0385587],
+ [87.13576568],
+ [341.2560709]
+ ]
+ },
+ {
+ "name": "50_08",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 8,
+ "K": [
+ [1051.92,0,937.937],
+ [0,1051.86,554.246],
+ [0,0,1]
+ ],
+ "distCoef": [0.0499863,-0.0613843,-4.12419e-05,-0.000155211,0.0174279],
+ "R": [
+ [-0.7043873056,-0.07078753835,-0.7062773168],
+ [-0.4398115151,0.8245196459,0.3559960458],
+ [0.5571394394,0.5613879923,-0.6119143463]
+ ],
+ "t": [
+ [-21.03532832],
+ [82.26745729],
+ [344.5100871]
+ ]
+ },
+ {
+ "name": "50_09",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 9,
+ "K": [
+ [1054,0,961.563],
+ [0,1054.08,544.179],
+ [0,0,1]
+ ],
+ "distCoef": [0.0446773,-0.0530941,0.000226286,-0.000324258,0.0121913],
+ "R": [
+ [-0.8728623151,-0.0989156561,0.4778358211],
+ [0.2068965126,0.8118396582,0.5459946908],
+ [-0.4419334927,0.5754407548,-0.6881589393]
+ ],
+ "t": [
+ [-36.30074608],
+ [73.0041962],
+ [346.5857858]
+ ]
+ },
+ {
+ "name": "50_10",
+ "type": "kinect-color",
+ "resolution": [1920,1080],
+ "panel": 50,
+ "node": 10,
+ "K": [
+ [1050.04,0,941.59],
+ [0,1050.6,559.398],
+ [0,0,1]
+ ],
+ "distCoef": [0.0506861,-0.0636966,0.000195295,-6.41025e-06,0.0181857],
+ "R": [
+ [0.1849149694,0.002001709126,0.9827524852],
+ [0.5894867579,0.7998990427,-0.1125472514],
+ [-0.786328059,0.6001312479,0.146733326]
+ ],
+ "t": [
+ [-12.26435316],
+ [64.88453925],
+ [349.5293231]
+ ]
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000139.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000139.json
new file mode 100644
index 0000000000000000000000000000000000000000..2625941cdc11aa88bde28760a9e83279f01b6974
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000139.json
@@ -0,0 +1,15 @@
+{ "version": 0.7,
+"univTime" :47884.218,
+"fpsType" :"hd_29_97",
+"bodies" :
+[
+{ "id": 0,
+"joints19": [112.193, -105.597, -63.2943, 0.681274, 98.5895, -126.086, -55.7911, 0.631165, 109.902, -62.3343, -62.7694, 0.434326, 103.926, -106.634, -77.9832, 0.634766, 96.8314, -78.8763, -80.315, 0.630554, 81.2761, -65.6605, -63.3084, 0.521851, 104.519, -62.7352, -72.4198, 0.444824, 65.6156, -49.5546, -67.2074, 0.308655, 54.2865, -10.3131, -52.1117, 0.236267, 120.42, -105.112, -49.3556, 0.60022, 116.189, -76.1983, -41.2718, 0.653992, 92.821, -64.5032, -42.2247, 0.527283, 115.285, -61.9334, -53.119, 0.411194, 85.1507, -47.7375, -27.3165, 0.324036, 68.5293, -8.10239, -40.0008, 0.237915, 99.4248, -129.754, -59.6533, 0.558838, 105.207, -126.2, -67.7812, 0.423035, 102.014, -129.179, -53.6288, 0.597534, 112.005, -124.794, -53.0751, 0.523254]
+},
+{ "id": 1,
+"joints19": [-75.6724, -98.3814, -40.586, 0.645996, -56.0144, -116.916, -44.6227, 0.601746, -74.2896, -55.1045, -36.5893, 0.251404, -83.9112, -98.0817, -25.7277, 0.517944, -88.2289, -74.2988, -14.0067, 0.396973, -68.2309, -74.1222, -1.47647, 0.329041, -79.1849, -55.0298, -28.0693, 0.249634, -51.5633, -47.6808, -7.04466, 0.285828, -74.7285, -17.5206, -29.2095, 0.225037, -67.3391, -99.4683, -53.6127, 0.477478, -47.0624, -92.1391, -77.8037, 0.564758, -44.4238, -79.2347, -57.8838, 0.42804, -69.3944, -55.1793, -45.1093, 0.227051, -34.6453, -49.6836, -25.2735, 0.309937, -40.5958, -7.8462, -31.1836, 0.256836, -56.8233, -120.322, -40.7627, 0.527283, -62.646, -116.933, -32.5876, 0.487427, -59.8079, -119.867, -46.6254, 0.471802, -69.67, -116.407, -47.7538, 0.328979]
+},
+{ "id": 2,
+"joints19": [-2.94539, -95.1867, 36.3111, 0.628723, 2.07299, -104.457, 17.1551, 0.454163, -7.05924, -50.5435, 42.1746, 0.244141, 9.71628, -93.5102, 43.4675, 0.522705, 12.0947, -69.463, 41.455, 0.387512, 19.2916, -69.8677, 21.9048, 0.328552, 1.02674, -50.5267, 46.2205, 0.253113, 26.6747, -43.1457, 17.5558, 0.371948, 27.6632, -7.91068, 13.5889, 0.338867, -15.3606, -97.7204, 30.3576, 0.512207, -37.812, -87.1386, 18.9922, 0.449524, -22.3384, -73.7289, 12.8309, 0.288696, -15.1452, -50.5603, 38.1288, 0.238159, 5.46492, -46.765, 4.57304, 0.31604, 4.54105, -11.5529, 11.0104, 0.225098, 5.10417, -106.784, 19.1646, 0.338989, 8.38426, -105.863, 28.9746, 0.374878, -0.147999, -108.49, 17.901, 0.389282, -6.41806, -109.461, 25.684, 0.374512]
+}
+] }
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000140.json b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000140.json
new file mode 100644
index 0000000000000000000000000000000000000000..e069734797ef5615dd6f326134aec5b1d86ec3b6
--- /dev/null
+++ b/vendor/ViTPose/tests/data/panoptic_body3d/160906_band2/hdPose3d_stage1_coco19/body3DScene_00000140.json
@@ -0,0 +1,15 @@
+{ "version": 0.7,
+"univTime" :47917.574,
+"fpsType" :"hd_29_97",
+"bodies" :
+[
+{ "id": 0,
+"joints19": [112.162, -105.636, -63.2601, 0.683655, 98.6079, -126.096, -55.7691, 0.633362, 109.895, -61.989, -62.8249, 0.434998, 103.92, -106.612, -77.9614, 0.633362, 96.8156, -78.8938, -80.3215, 0.628052, 81.2649, -65.6074, -63.3025, 0.5224, 104.579, -62.7546, -72.4201, 0.444275, 65.7338, -49.5044, -67.2157, 0.308167, 54.3536, -10.2596, -52.2195, 0.234131, 120.391, -105.108, -49.299, 0.603271, 116.181, -76.2786, -41.2666, 0.655334, 92.8451, -64.4865, -42.2389, 0.527039, 115.21, -61.2235, -53.2298, 0.411194, 85.1888, -47.835, -27.3393, 0.316833, 68.6198, -8.08162, -40.0417, 0.240723, 99.4313, -129.72, -59.6381, 0.558228, 105.205, -126.164, -67.7647, 0.423279, 102.011, -129.182, -53.6361, 0.597229, 111.982, -124.783, -53.0672, 0.522705]
+},
+{ "id": 1,
+"joints19": [-75.6746, -98.3656, -40.5723, 0.641663, -56.0544, -116.939, -44.5928, 0.603577, -74.4415, -55.0317, -36.6536, 0.252808, -83.918, -98.0843, -25.7209, 0.519165, -88.2603, -74.1941, -13.9948, 0.396057, -68.2309, -74.0839, -1.42833, 0.328918, -79.5003, -55.0092, -28.0401, 0.25, -51.5172, -47.7041, -7.04263, 0.294495, -74.7647, -17.5892, -29.1887, 0.228638, -67.361, -99.4319, -53.7077, 0.481934, -47.0466, -92.2037, -77.7492, 0.570923, -44.4639, -79.2762, -57.8438, 0.434448, -69.3827, -55.0541, -45.2672, 0.228271, -34.7812, -49.6926, -25.257, 0.32843, -40.4408, -8.21801, -31.4407, 0.283936, -56.906, -120.336, -40.6846, 0.523743, -62.7131, -116.956, -32.5538, 0.486816, -59.8485, -119.899, -46.5796, 0.469604, -69.74, -116.42, -47.7167, 0.333618]
+},
+{ "id": 2,
+"joints19": [-2.68829, -95.166, 36.4048, 0.630371, 2.45545, -104.642, 17.3385, 0.4422, -7.03441, -50.7646, 42.0224, 0.242737, 9.85479, -93.5391, 43.7496, 0.508667, 12.1837, -69.2865, 41.4611, 0.386414, 19.4591, -69.7157, 22.0246, 0.327209, 0.906971, -50.8216, 45.9673, 0.251282, 26.7278, -43.1852, 17.6063, 0.375671, 27.6219, -7.85445, 13.6172, 0.336487, -15.0965, -97.8212, 30.3863, 0.518982, -37.313, -87.4077, 18.8649, 0.450623, -22.0828, -73.86, 12.723, 0.291931, -14.9758, -50.7076, 38.0776, 0.235413, 5.50862, -46.8061, 4.64948, 0.321838, 4.38506, -11.3184, 11.4198, 0.237366, 5.50586, -106.921, 19.3049, 0.338745, 8.65006, -105.825, 29.1475, 0.375244, 0.254989, -108.642, 18.0692, 0.389526, -5.97449, -109.525, 25.9132, 0.369568]
+}
+] }
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_human_detections.json b/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_human_detections.json
new file mode 100644
index 0000000000000000000000000000000000000000..fb1bcf31512bab48c9da45571a13df3bc7ef671d
--- /dev/null
+++ b/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_human_detections.json
@@ -0,0 +1,3061 @@
+[
+ {
+ "bbox": [
+ 1475.2755126953125,
+ 2.719658136367798,
+ 96.9671630859375,
+ 252.88242316246033
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.9290200471878052
+ },
+ {
+ "bbox": [
+ 279.2542419433594,
+ 201.43528747558594,
+ 215.51690673828125,
+ 277.4363555908203
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.8697755932807922
+ },
+ {
+ "bbox": [
+ 375.3135070800781,
+ 1.6077430248260498,
+ 102.83343505859375,
+ 205.19831776618958
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.8078259229660034
+ },
+ {
+ "bbox": [
+ 1372.4200439453125,
+ 0.0,
+ 105.89013671875,
+ 242.61294555664062
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.7359948754310608
+ },
+ {
+ "bbox": [
+ 879.8322143554688,
+ 166.1944122314453,
+ 129.68414306640625,
+ 265.45030212402344
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.7012330293655396
+ },
+ {
+ "bbox": [
+ 1565.218994140625,
+ 0.6250243186950684,
+ 94.249267578125,
+ 251.48860788345337
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.6708132028579712
+ },
+ {
+ "bbox": [
+ 1625.5699462890625,
+ 34.00221633911133,
+ 113.07080078125,
+ 336.9929618835449
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.6564908027648926
+ },
+ {
+ "bbox": [
+ 1767.4072265625,
+ 0.0,
+ 94.924560546875,
+ 229.85476684570312
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.6467881202697754
+ },
+ {
+ "bbox": [
+ 956.6194458007812,
+ 900.006103515625,
+ 149.72381591796875,
+ 173.7783203125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.6429733037948608
+ },
+ {
+ "bbox": [
+ 574.7518310546875,
+ 876.6203002929688,
+ 133.7698974609375,
+ 200.78741455078125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.6194133758544922
+ },
+ {
+ "bbox": [
+ 467.8788146972656,
+ 776.9996948242188,
+ 108.48025512695312,
+ 287.51483154296875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.608767032623291
+ },
+ {
+ "bbox": [
+ 302.0422058105469,
+ 732.33837890625,
+ 124.57574462890625,
+ 331.01220703125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.5625099539756775
+ },
+ {
+ "bbox": [
+ 638.8469848632812,
+ 743.0866088867188,
+ 117.85137939453125,
+ 317.97259521484375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.5567368268966675
+ },
+ {
+ "bbox": [
+ 335.7384948730469,
+ 507.2187194824219,
+ 145.80545043945312,
+ 159.55679321289062
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.5184996724128723
+ },
+ {
+ "bbox": [
+ 1330.8204345703125,
+ 838.9266357421875,
+ 140.44580078125,
+ 240.1510009765625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.5148675441741943
+ },
+ {
+ "bbox": [
+ 720.7056884765625,
+ 2.9743223190307617,
+ 104.3197021484375,
+ 150.11820697784424
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.5129923820495605
+ },
+ {
+ "bbox": [
+ 196.63421630859375,
+ 693.4352416992188,
+ 119.49697875976562,
+ 362.00836181640625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.472736656665802
+ },
+ {
+ "bbox": [
+ 666.0804443359375,
+ 180.66146850585938,
+ 95.970458984375,
+ 213.87698364257812
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.4722053110599518
+ },
+ {
+ "bbox": [
+ 876.128173828125,
+ 339.4115905761719,
+ 135.45379638671875,
+ 319.6487121582031
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.4647904336452484
+ },
+ {
+ "bbox": [
+ 667.529296875,
+ 415.2683410644531,
+ 104.7076416015625,
+ 229.71560668945312
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.45972582697868347
+ },
+ {
+ "bbox": [
+ 112.86947631835938,
+ 264.6505432128906,
+ 144.888671875,
+ 191.26544189453125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.45595934987068176
+ },
+ {
+ "bbox": [
+ 1701.4876708984375,
+ 0.0,
+ 90.152587890625,
+ 221.60284423828125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.45339658856391907
+ },
+ {
+ "bbox": [
+ 1177.0682373046875,
+ 808.5385131835938,
+ 118.4273681640625,
+ 265.73162841796875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.4308188259601593
+ },
+ {
+ "bbox": [
+ 1581.5089111328125,
+ 773.6590576171875,
+ 153.54052734375,
+ 289.6710205078125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.4269048273563385
+ },
+ {
+ "bbox": [
+ 531.0040893554688,
+ 437.7104187011719,
+ 127.3616943359375,
+ 280.2588806152344
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.42152199149131775
+ },
+ {
+ "bbox": [
+ 1797.8150634765625,
+ 778.5232543945312,
+ 102.983642578125,
+ 292.46649169921875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.411865234375
+ },
+ {
+ "bbox": [
+ 1084.093505859375,
+ 2.85404109954834,
+ 93.6932373046875,
+ 210.73848819732666
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.40260007977485657
+ },
+ {
+ "bbox": [
+ 920.5157470703125,
+ 832.7113037109375,
+ 94.4918212890625,
+ 221.5032958984375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3867260217666626
+ },
+ {
+ "bbox": [
+ 1115.3507080078125,
+ 847.74365234375,
+ 109.4945068359375,
+ 226.804931640625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3844665586948395
+ },
+ {
+ "bbox": [
+ 1872.486083984375,
+ 19.00360679626465,
+ 42.8349609375,
+ 236.63503456115723
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.37733739614486694
+ },
+ {
+ "bbox": [
+ 1349.9853515625,
+ 210.24911499023438,
+ 131.93798828125,
+ 167.93081665039062
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3761371970176697
+ },
+ {
+ "bbox": [
+ 766.0445556640625,
+ 879.2682495117188,
+ 124.82427978515625,
+ 201.08441162109375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3682442009449005
+ },
+ {
+ "bbox": [
+ 817.4657592773438,
+ 0.0,
+ 80.7606201171875,
+ 168.49359130859375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3530486524105072
+ },
+ {
+ "bbox": [
+ 147.0262451171875,
+ 1.8125637769699097,
+ 79.67684936523438,
+ 99.51723968982697
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.31355297565460205
+ },
+ {
+ "bbox": [
+ 1159.018310546875,
+ 750.4727172851562,
+ 109.84375,
+ 160.12939453125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3134245276451111
+ },
+ {
+ "bbox": [
+ 201.1594696044922,
+ 625.8055419921875,
+ 77.64781188964844,
+ 134.331787109375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3070683181285858
+ },
+ {
+ "bbox": [
+ 1473.18359375,
+ 651.7177124023438,
+ 82.4835205078125,
+ 130.7080078125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.30168840289115906
+ },
+ {
+ "bbox": [
+ 932.6547241210938,
+ 0.0,
+ 94.53363037109375,
+ 160.51365661621094
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.3008910119533539
+ },
+ {
+ "bbox": [
+ 1700.9190673828125,
+ 828.179931640625,
+ 121.2147216796875,
+ 245.9788818359375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.29163801670074463
+ },
+ {
+ "bbox": [
+ 1634.7724609375,
+ 446.2858581542969,
+ 132.4085693359375,
+ 209.66311645507812
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.291547566652298
+ },
+ {
+ "bbox": [
+ 1556.4608154296875,
+ 473.771728515625,
+ 112.165283203125,
+ 180.64654541015625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2879399359226227
+ },
+ {
+ "bbox": [
+ 583.9107055664062,
+ 1.929314374923706,
+ 73.5870361328125,
+ 123.53908467292786
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.28340914845466614
+ },
+ {
+ "bbox": [
+ 1498.50634765625,
+ 698.7794799804688,
+ 96.718505859375,
+ 314.76446533203125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.28129440546035767
+ },
+ {
+ "bbox": [
+ 1280.0792236328125,
+ 775.8158569335938,
+ 76.7454833984375,
+ 188.51519775390625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.27848634123802185
+ },
+ {
+ "bbox": [
+ 1718.6058349609375,
+ 226.6940460205078,
+ 160.0238037109375,
+ 177.1758575439453
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.27552416920661926
+ },
+ {
+ "bbox": [
+ 756.9520263671875,
+ 810.5991821289062,
+ 83.45086669921875,
+ 189.677001953125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.27519550919532776
+ },
+ {
+ "bbox": [
+ 1728.245849609375,
+ 640.5650024414062,
+ 117.093994140625,
+ 210.5716552734375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.272867351770401
+ },
+ {
+ "bbox": [
+ 1772.5546875,
+ 525.9481201171875,
+ 132.1446533203125,
+ 174.74395751953125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2701846957206726
+ },
+ {
+ "bbox": [
+ 1305.05224609375,
+ 209.34393310546875,
+ 184.050048828125,
+ 414.58587646484375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.26895296573638916
+ },
+ {
+ "bbox": [
+ 810.69287109375,
+ 790.5480346679688,
+ 89.7996826171875,
+ 185.0943603515625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.26855093240737915
+ },
+ {
+ "bbox": [
+ 95.97314453125,
+ 724.7075805664062,
+ 114.75672912597656,
+ 298.14398193359375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.26742294430732727
+ },
+ {
+ "bbox": [
+ 1261.4110107421875,
+ 909.4841918945312,
+ 118.9820556640625,
+ 164.47723388671875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2666778564453125
+ },
+ {
+ "bbox": [
+ 1339.5250244140625,
+ 434.0279846191406,
+ 87.82666015625,
+ 147.42294311523438
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.26228952407836914
+ },
+ {
+ "bbox": [
+ 63.43070983886719,
+ 664.1151733398438,
+ 82.15074157714844,
+ 128.1494140625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.26013079285621643
+ },
+ {
+ "bbox": [
+ 1.3776787519454956,
+ 679.18505859375,
+ 111.62459480762482,
+ 224.9747314453125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2587812840938568
+ },
+ {
+ "bbox": [
+ 1439.8868408203125,
+ 816.7938842773438,
+ 97.72802734375,
+ 256.11944580078125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.256550669670105
+ },
+ {
+ "bbox": [
+ 660.9515380859375,
+ 744.8563842773438,
+ 94.61444091796875,
+ 115.916259765625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2563660442829132
+ },
+ {
+ "bbox": [
+ 556.6321411132812,
+ 0.0,
+ 31.12762451171875,
+ 77.6491470336914
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2539074718952179
+ },
+ {
+ "bbox": [
+ 414.3009948730469,
+ 682.0269165039062,
+ 92.76937866210938,
+ 310.0914306640625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.25366705656051636
+ },
+ {
+ "bbox": [
+ 1823.6094970703125,
+ 520.3126831054688,
+ 74.411865234375,
+ 80.507080078125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2529422640800476
+ },
+ {
+ "bbox": [
+ 258.0948486328125,
+ 2.8098771572113037,
+ 73.0369873046875,
+ 90.99600052833557
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.25058287382125854
+ },
+ {
+ "bbox": [
+ 508.9549560546875,
+ 714.0374145507812,
+ 132.6729736328125,
+ 206.59674072265625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.24579626321792603
+ },
+ {
+ "bbox": [
+ 1647.6907958984375,
+ 387.5267639160156,
+ 117.0858154296875,
+ 134.33120727539062
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2425207644701004
+ },
+ {
+ "bbox": [
+ 1445.354248046875,
+ 761.0438842773438,
+ 91.1209716796875,
+ 122.70550537109375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2403791844844818
+ },
+ {
+ "bbox": [
+ 1028.0394287109375,
+ 751.615478515625,
+ 101.6038818359375,
+ 172.39617919921875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.23425403237342834
+ },
+ {
+ "bbox": [
+ 10.321240425109863,
+ 668.003173828125,
+ 92.43458843231201,
+ 93.92236328125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.23368315398693085
+ },
+ {
+ "bbox": [
+ 480.19140625,
+ 3.0881388187408447,
+ 101.267578125,
+ 78.71852469444275
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2329442799091339
+ },
+ {
+ "bbox": [
+ 1319.99755859375,
+ 813.53125,
+ 58.90185546875,
+ 112.30328369140625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.23115667700767517
+ },
+ {
+ "bbox": [
+ 0.0,
+ 628.298828125,
+ 47.96708679199219,
+ 120.50457763671875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2270287126302719
+ },
+ {
+ "bbox": [
+ 298.7027893066406,
+ 666.9664306640625,
+ 119.76385498046875,
+ 144.8203125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2235877364873886
+ },
+ {
+ "bbox": [
+ 1054.49609375,
+ 1.8778526782989502,
+ 65.3221435546875,
+ 154.7142035961151
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.22313834726810455
+ },
+ {
+ "bbox": [
+ 296.7391052246094,
+ 680.0767822265625,
+ 35.053375244140625,
+ 69.30267333984375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.21813228726387024
+ },
+ {
+ "bbox": [
+ 1811.36962890625,
+ 285.1565246582031,
+ 102.1195068359375,
+ 269.7958679199219
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.21760663390159607
+ },
+ {
+ "bbox": [
+ 114.75823974609375,
+ 719.09228515625,
+ 74.72804260253906,
+ 83.634765625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2161155790090561
+ },
+ {
+ "bbox": [
+ 991.546875,
+ 1.210024356842041,
+ 59.4659423828125,
+ 152.63245916366577
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.2096937894821167
+ },
+ {
+ "bbox": [
+ 1852.13916015625,
+ 519.2532958984375,
+ 38.265380859375,
+ 43.08807373046875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.18011623620986938
+ },
+ {
+ "bbox": [
+ 316.677978515625,
+ 0.0,
+ 44.184600830078125,
+ 62.04084396362305
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.17839768528938293
+ },
+ {
+ "bbox": [
+ 1023.7964477539062,
+ 0.0,
+ 45.53558349609375,
+ 87.68540954589844
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.1771439015865326
+ },
+ {
+ "bbox": [
+ 0.0,
+ 690.8153076171875,
+ 27.172204971313477,
+ 55.42034912109375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.17463117837905884
+ },
+ {
+ "bbox": [
+ 1663.4932861328125,
+ 4.420060634613037,
+ 65.2760009765625,
+ 114.99270486831665
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.1590556651353836
+ },
+ {
+ "bbox": [
+ 1578.5491943359375,
+ 454.1618347167969,
+ 74.5714111328125,
+ 104.37033081054688
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.15501607954502106
+ },
+ {
+ "bbox": [
+ 544.5846557617188,
+ 697.2288208007812,
+ 35.70989990234375,
+ 26.73150634765625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.15327082574367523
+ },
+ {
+ "bbox": [
+ 534.465087890625,
+ 881.8455200195312,
+ 78.7249755859375,
+ 172.04473876953125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.14815860986709595
+ },
+ {
+ "bbox": [
+ 1873.2293701171875,
+ 834.9508056640625,
+ 45.2706298828125,
+ 230.974609375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.1479007452726364
+ },
+ {
+ "bbox": [
+ 146.6645965576172,
+ 723.4815673828125,
+ 30.512222290039062,
+ 41.179443359375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.13243095576763153
+ },
+ {
+ "bbox": [
+ 740.52490234375,
+ 10.856040000915527,
+ 38.1209716796875,
+ 77.29609775543213
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.1309206336736679
+ },
+ {
+ "bbox": [
+ 1783.414794921875,
+ 856.5660400390625,
+ 51.0806884765625,
+ 216.032958984375
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.13079363107681274
+ },
+ {
+ "bbox": [
+ 1353.722900390625,
+ 4.124818801879883,
+ 26.04736328125,
+ 36.974050521850586
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.12728439271450043
+ },
+ {
+ "bbox": [
+ 1423.4942626953125,
+ 875.3924560546875,
+ 16.2568359375,
+ 29.398681640625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.1250089704990387
+ },
+ {
+ "bbox": [
+ 1592.7584228515625,
+ 1.329086184501648,
+ 55.0660400390625,
+ 54.82293713092804
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.11483781039714813
+ },
+ {
+ "bbox": [
+ 1385.247314453125,
+ 7.618640422821045,
+ 19.5557861328125,
+ 37.21356248855591
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.11478649824857712
+ },
+ {
+ "bbox": [
+ 774.5552978515625,
+ 0.0,
+ 32.50115966796875,
+ 48.10002899169922
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.11244752258062363
+ },
+ {
+ "bbox": [
+ 1030.501953125,
+ 792.454833984375,
+ 44.9681396484375,
+ 111.78228759765625
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10898905247449875
+ },
+ {
+ "bbox": [
+ 302.1847229003906,
+ 695.43701171875,
+ 20.343109130859375,
+ 28.063720703125
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10741319507360458
+ },
+ {
+ "bbox": [
+ 1729.3040771484375,
+ 2.0999855995178223,
+ 26.806884765625,
+ 36.02122259140015
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10721274465322495
+ },
+ {
+ "bbox": [
+ 1762.438720703125,
+ 4.751255989074707,
+ 24.288818359375,
+ 40.14107036590576
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10624366253614426
+ },
+ {
+ "bbox": [
+ 211.49954223632812,
+ 328.7121887207031,
+ 56.994140625,
+ 60.76922607421875
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10590028017759323
+ },
+ {
+ "bbox": [
+ 1792.0831298828125,
+ 261.65728759765625,
+ 92.417236328125,
+ 84.54769897460938
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10410129278898239
+ },
+ {
+ "bbox": [
+ 1547.43359375,
+ 4.291971683502197,
+ 28.6832275390625,
+ 69.40435552597046
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10200422257184982
+ },
+ {
+ "bbox": [
+ 1335.0888671875,
+ 3.258249282836914,
+ 23.91845703125,
+ 32.369855880737305
+ ],
+ "category_id": 1,
+ "image_id": 10128340000,
+ "score": 0.10069120675325394
+ },
+ {
+ "bbox": [
+ 1283.4007568359375,
+ 6.713701248168945,
+ 629.122802734375,
+ 1056.8606395721436
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.9853803515434265
+ },
+ {
+ "bbox": [
+ 288.9501647949219,
+ 42.40924835205078,
+ 1185.7618713378906,
+ 999.2054977416992
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.9629650115966797
+ },
+ {
+ "bbox": [
+ 649.4730834960938,
+ 315.6942138671875,
+ 143.35650634765625,
+ 229.676513671875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8901010751724243
+ },
+ {
+ "bbox": [
+ 1058.3331298828125,
+ 258.07269287109375,
+ 310.98046875,
+ 259.15057373046875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8752242922782898
+ },
+ {
+ "bbox": [
+ 790.96240234375,
+ 182.09800720214844,
+ 105.51129150390625,
+ 97.01622009277344
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.872738242149353
+ },
+ {
+ "bbox": [
+ 777.576416015625,
+ 274.9346618652344,
+ 119.44439697265625,
+ 178.85000610351562
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8679455518722534
+ },
+ {
+ "bbox": [
+ 2.3131344318389893,
+ 412.2568054199219,
+ 273.67606234550476,
+ 235.93026733398438
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8616952300071716
+ },
+ {
+ "bbox": [
+ 8.783040046691895,
+ 198.89437866210938,
+ 196.3238935470581,
+ 266.4853515625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8512702584266663
+ },
+ {
+ "bbox": [
+ 220.74649047851562,
+ 94.02008056640625,
+ 98.13226318359375,
+ 124.78965759277344
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.7501042485237122
+ },
+ {
+ "bbox": [
+ 164.27354431152344,
+ 83.04096984863281,
+ 88.21920776367188,
+ 127.46699523925781
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.7067092061042786
+ },
+ {
+ "bbox": [
+ 1087.515625,
+ 181.69656372070312,
+ 87.4686279296875,
+ 72.61752319335938
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.702244758605957
+ },
+ {
+ "bbox": [
+ 1074.9063720703125,
+ 472.5963439941406,
+ 124.1480712890625,
+ 110.47763061523438
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.628270149230957
+ },
+ {
+ "bbox": [
+ 343.7706604003906,
+ 30.924612045288086,
+ 59.412750244140625,
+ 86.91977119445801
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5943357944488525
+ },
+ {
+ "bbox": [
+ 69.42112731933594,
+ 103.34648132324219,
+ 112.67413330078125,
+ 108.37942504882812
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5710238218307495
+ },
+ {
+ "bbox": [
+ 79.45482635498047,
+ 437.8648376464844,
+ 270.02677154541016,
+ 180.55715942382812
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.40784332156181335
+ },
+ {
+ "bbox": [
+ 1225.6717529296875,
+ 162.2100830078125,
+ 78.9639892578125,
+ 132.47430419921875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3427259922027588
+ },
+ {
+ "bbox": [
+ 0.9485000371932983,
+ 54.5380973815918,
+ 92.79364931583405,
+ 115.03351211547852
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.33483877778053284
+ },
+ {
+ "bbox": [
+ 1105.8240966796875,
+ 281.7027282714844,
+ 76.47314453125,
+ 55.8577880859375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3022329807281494
+ },
+ {
+ "bbox": [
+ 0.0,
+ 258.510498046875,
+ 85.2731704711914,
+ 205.99591064453125
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.277988463640213
+ },
+ {
+ "bbox": [
+ 1069.812255859375,
+ 430.1299133300781,
+ 178.785888671875,
+ 54.991607666015625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.25947925448417664
+ },
+ {
+ "bbox": [
+ 681.9738159179688,
+ 208.11050415039062,
+ 87.06488037109375,
+ 76.40863037109375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2577346861362457
+ },
+ {
+ "bbox": [
+ 684.65625,
+ 209.45753479003906,
+ 65.76763916015625,
+ 48.37471008300781
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.25362637639045715
+ },
+ {
+ "bbox": [
+ 1770.093017578125,
+ 45.35274887084961,
+ 148.260986328125,
+ 1012.7648048400879
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.23887047171592712
+ },
+ {
+ "bbox": [
+ 167.9042510986328,
+ 22.85419273376465,
+ 81.45010375976562,
+ 74.9856128692627
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.23093517124652863
+ },
+ {
+ "bbox": [
+ 686.263671875,
+ 45.065853118896484,
+ 418.443603515625,
+ 672.8133583068848
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.22159330546855927
+ },
+ {
+ "bbox": [
+ 1190.727783203125,
+ 260.0331115722656,
+ 45.408203125,
+ 42.90838623046875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2191120684146881
+ },
+ {
+ "bbox": [
+ 1051.7967529296875,
+ 212.4822998046875,
+ 37.3897705078125,
+ 71.61709594726562
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.13527318835258484
+ },
+ {
+ "bbox": [
+ 906.1925659179688,
+ 454.3064880371094,
+ 249.45501708984375,
+ 209.19338989257812
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.13330410420894623
+ },
+ {
+ "bbox": [
+ 852.9170532226562,
+ 360.49078369140625,
+ 25.87530517578125,
+ 70.86614990234375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.15234917402267456
+ },
+ {
+ "bbox": [
+ 609.119140625,
+ 295.8336181640625,
+ 98.669677734375,
+ 86.77999877929688
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.8445025086402893
+ },
+ {
+ "bbox": [
+ 378.2210693359375,
+ 156.46856689453125,
+ 79.51510620117188,
+ 59.65052795410156
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.7748774886131287
+ },
+ {
+ "bbox": [
+ 198.08822631835938,
+ 305.9843444824219,
+ 122.8443603515625,
+ 100.4822998046875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.7065314054489136
+ },
+ {
+ "bbox": [
+ 135.3995819091797,
+ 208.8668670654297,
+ 82.15673828125,
+ 32.42308044433594
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6814215779304504
+ },
+ {
+ "bbox": [
+ 535.6635131835938,
+ 300.5378112792969,
+ 94.14208984375,
+ 83.1962890625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6654942035675049
+ },
+ {
+ "bbox": [
+ 483.58563232421875,
+ 197.45590209960938,
+ 74.43743896484375,
+ 57.176239013671875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6608478426933289
+ },
+ {
+ "bbox": [
+ 215.0618896484375,
+ 210.8956756591797,
+ 69.7735595703125,
+ 29.752822875976562
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6438001394271851
+ },
+ {
+ "bbox": [
+ 166.78993225097656,
+ 260.73162841796875,
+ 81.71955871582031,
+ 33.886688232421875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6426426768302917
+ },
+ {
+ "bbox": [
+ 194.13543701171875,
+ 302.4077453613281,
+ 132.185302734375,
+ 203.56118774414062
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.64094477891922
+ },
+ {
+ "bbox": [
+ 24.686168670654297,
+ 160.48495483398438,
+ 65.35156631469727,
+ 43.957122802734375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6141790747642517
+ },
+ {
+ "bbox": [
+ 61.93497848510742,
+ 206.81692504882812,
+ 67.95804214477539,
+ 35.73725891113281
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.6034325361251831
+ },
+ {
+ "bbox": [
+ 684.8605346679688,
+ 296.6944274902344,
+ 60.11041259765625,
+ 79.523681640625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5703861713409424
+ },
+ {
+ "bbox": [
+ 277.9051818847656,
+ 118.02881622314453,
+ 75.3424072265625,
+ 74.72411346435547
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5354023575782776
+ },
+ {
+ "bbox": [
+ 557.520751953125,
+ 208.25003051757812,
+ 63.16949462890625,
+ 47.47157287597656
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5207008719444275
+ },
+ {
+ "bbox": [
+ 389.46875,
+ 260.3998718261719,
+ 95.03842163085938,
+ 28.859283447265625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.5194308757781982
+ },
+ {
+ "bbox": [
+ 246.87026977539062,
+ 258.12652587890625,
+ 83.399658203125,
+ 36.68548583984375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.47507211565971375
+ },
+ {
+ "bbox": [
+ 230.82713317871094,
+ 51.341026306152344,
+ 59.52711486816406,
+ 42.373046875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.4719221889972687
+ },
+ {
+ "bbox": [
+ 371.5136413574219,
+ 302.7303771972656,
+ 84.49050903320312,
+ 68.41122436523438
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.44887304306030273
+ },
+ {
+ "bbox": [
+ 449.14666748046875,
+ 303.34552001953125,
+ 95.31640625,
+ 48.94390869140625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.42651283740997314
+ },
+ {
+ "bbox": [
+ 59.20182800292969,
+ 77.63203430175781,
+ 69.07972717285156,
+ 36.52244567871094
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.42590340971946716
+ },
+ {
+ "bbox": [
+ 370.47991943359375,
+ 210.2904510498047,
+ 66.41464233398438,
+ 33.1710205078125
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.4237402677536011
+ },
+ {
+ "bbox": [
+ 475.22509765625,
+ 124.54940032958984,
+ 57.011474609375,
+ 40.61431121826172
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3908300995826721
+ },
+ {
+ "bbox": [
+ 467.0397033691406,
+ 66.16106414794922,
+ 47.917999267578125,
+ 27.583763122558594
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.38647398352622986
+ },
+ {
+ "bbox": [
+ 288.4964904785156,
+ 305.16815185546875,
+ 99.31219482421875,
+ 87.7886962890625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3735053241252899
+ },
+ {
+ "bbox": [
+ 444.114990234375,
+ 90.43252563476562,
+ 51.553955078125,
+ 31.16741943359375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.37254029512405396
+ },
+ {
+ "bbox": [
+ 99.98625183105469,
+ 40.55061340332031,
+ 76.22004699707031,
+ 65.01245880126953
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3680468797683716
+ },
+ {
+ "bbox": [
+ 294.51318359375,
+ 54.41352844238281,
+ 54.0465087890625,
+ 41.265953063964844
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3454741835594177
+ },
+ {
+ "bbox": [
+ 264.3034362792969,
+ 83.36378479003906,
+ 58.63067626953125,
+ 45.3909912109375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.33034616708755493
+ },
+ {
+ "bbox": [
+ 875.2257690429688,
+ 294.2908020019531,
+ 63.034912109375,
+ 73.73040771484375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.31166598200798035
+ },
+ {
+ "bbox": [
+ 552.3424072265625,
+ 102.28469848632812,
+ 53.5325927734375,
+ 32.012359619140625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.31135886907577515
+ },
+ {
+ "bbox": [
+ 447.3630065917969,
+ 159.95870971679688,
+ 75.57168579101562,
+ 79.81913757324219
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3080102503299713
+ },
+ {
+ "bbox": [
+ 744.2843627929688,
+ 170.82386779785156,
+ 48.20263671875,
+ 32.58000183105469
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.3024618923664093
+ },
+ {
+ "bbox": [
+ 518.8668823242188,
+ 173.53623962402344,
+ 57.2681884765625,
+ 28.869842529296875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.28725939989089966
+ },
+ {
+ "bbox": [
+ 578.883056640625,
+ 242.28355407714844,
+ 105.27862548828125,
+ 45.62568664550781
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2870064973831177
+ },
+ {
+ "bbox": [
+ 620.3238525390625,
+ 214.0165557861328,
+ 57.0029296875,
+ 29.954849243164062
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.27958208322525024
+ },
+ {
+ "bbox": [
+ 346.06988525390625,
+ 128.56320190429688,
+ 70.56277465820312,
+ 74.94837951660156
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2788334786891937
+ },
+ {
+ "bbox": [
+ 414.5040588378906,
+ 125.69651794433594,
+ 59.56060791015625,
+ 34.760101318359375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.27825745940208435
+ },
+ {
+ "bbox": [
+ 345.8397216796875,
+ 258.0870056152344,
+ 194.8671875,
+ 35.27862548828125
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2586188018321991
+ },
+ {
+ "bbox": [
+ 687.569091796875,
+ 163.837158203125,
+ 51.50909423828125,
+ 39.52703857421875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.24999305605888367
+ },
+ {
+ "bbox": [
+ 625.0399780273438,
+ 392.7872314453125,
+ 67.018310546875,
+ 72.13482666015625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2429981678724289
+ },
+ {
+ "bbox": [
+ 498.5255432128906,
+ 99.42186737060547,
+ 53.512054443359375,
+ 31.006126403808594
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.24067141115665436
+ },
+ {
+ "bbox": [
+ 142.8480224609375,
+ 309.98309326171875,
+ 82.30924987792969,
+ 98.9852294921875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.23763252794742584
+ },
+ {
+ "bbox": [
+ 536.9259643554688,
+ 133.77972412109375,
+ 53.9805908203125,
+ 43.579833984375
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2375190556049347
+ },
+ {
+ "bbox": [
+ 885.564453125,
+ 239.24940490722656,
+ 57.38165283203125,
+ 37.30012512207031
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.23535390198230743
+ },
+ {
+ "bbox": [
+ 395.301513671875,
+ 92.57003784179688,
+ 47.01910400390625,
+ 38.36552429199219
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.23471194505691528
+ },
+ {
+ "bbox": [
+ 409.6800537109375,
+ 60.70526123046875,
+ 51.487091064453125,
+ 32.35259246826172
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.21594807505607605
+ },
+ {
+ "bbox": [
+ 590.739013671875,
+ 132.8422393798828,
+ 55.618408203125,
+ 34.99034118652344
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.21444948017597198
+ },
+ {
+ "bbox": [
+ 142.70018005371094,
+ 14.566540718078613,
+ 56.78106689453125,
+ 33.07197093963623
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.2036537081003189
+ },
+ {
+ "bbox": [
+ 320.72296142578125,
+ 194.36314392089844,
+ 42.888824462890625,
+ 34.97528076171875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.20269575715065002
+ },
+ {
+ "bbox": [
+ 479.15374755859375,
+ 264.8033142089844,
+ 71.17230224609375,
+ 25.205291748046875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.1989617943763733
+ },
+ {
+ "bbox": [
+ 0.3339415192604065,
+ 187.03533935546875,
+ 50.64700025320053,
+ 20.45751953125
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.19690930843353271
+ },
+ {
+ "bbox": [
+ 74.00901794433594,
+ 105.07601165771484,
+ 66.710693359375,
+ 56.327720642089844
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.19045573472976685
+ },
+ {
+ "bbox": [
+ 347.0372314453125,
+ 259.55914306640625,
+ 53.66485595703125,
+ 32.394195556640625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.18698135018348694
+ },
+ {
+ "bbox": [
+ 67.07357025146484,
+ 9.42569351196289,
+ 74.41902923583984,
+ 62.75996780395508
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.1855248659849167
+ },
+ {
+ "bbox": [
+ 893.28857421875,
+ 213.1145782470703,
+ 46.3870849609375,
+ 34.87232971191406
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.17870844900608063
+ },
+ {
+ "bbox": [
+ 611.6231079101562,
+ 106.5094223022461,
+ 44.85430908203125,
+ 29.061744689941406
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.17700931429862976
+ },
+ {
+ "bbox": [
+ 847.1093139648438,
+ 286.3870849609375,
+ 56.32452392578125,
+ 86.06158447265625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.16932892799377441
+ },
+ {
+ "bbox": [
+ 445.4731140136719,
+ 97.76200103759766,
+ 49.56451416015625,
+ 45.203514099121094
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.16094166040420532
+ },
+ {
+ "bbox": [
+ 83.2696304321289,
+ 238.672607421875,
+ 87.30387115478516,
+ 59.288787841796875
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.1571291834115982
+ },
+ {
+ "bbox": [
+ 644.8650512695312,
+ 134.5099639892578,
+ 52.570556640625,
+ 45.77696228027344
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.14659520983695984
+ },
+ {
+ "bbox": [
+ 798.9510498046875,
+ 176.64842224121094,
+ 34.15826416015625,
+ 27.026199340820312
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.14340169727802277
+ },
+ {
+ "bbox": [
+ 289.8072204589844,
+ 2.8699655532836914,
+ 57.560302734375,
+ 31.036349296569824
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.12905792891979218
+ },
+ {
+ "bbox": [
+ 273.2252502441406,
+ 120.26922607421875,
+ 33.325103759765625,
+ 36.83570861816406
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.12813062965869904
+ },
+ {
+ "bbox": [
+ 536.1267700195312,
+ 301.2402038574219,
+ 105.0225830078125,
+ 164.69992065429688
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.1251327097415924
+ },
+ {
+ "bbox": [
+ 577.738037109375,
+ 167.33460998535156,
+ 52.75921630859375,
+ 43.77146911621094
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.1169745996594429
+ },
+ {
+ "bbox": [
+ 10.653980255126953,
+ 1.5155118703842163,
+ 64.12058639526367,
+ 63.142767548561096
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.11120772361755371
+ },
+ {
+ "bbox": [
+ 290.7361145019531,
+ 305.92962646484375,
+ 81.94302368164062,
+ 186.35324096679688
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.10804451256990433
+ },
+ {
+ "bbox": [
+ 383.0464172363281,
+ 33.47468948364258,
+ 42.016937255859375,
+ 40.26395034790039
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.10608372837305069
+ },
+ {
+ "bbox": [
+ 373.3436279296875,
+ 299.032470703125,
+ 162.34857177734375,
+ 71.123291015625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.10598088800907135
+ },
+ {
+ "bbox": [
+ 347.5797424316406,
+ 7.471529960632324,
+ 51.544647216796875,
+ 25.57726001739502
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.10507849603891373
+ },
+ {
+ "bbox": [
+ 9.35350513458252,
+ 944.8892211914062,
+ 1300.14759349823,
+ 121.89459228515625
+ ],
+ "category_id": 1,
+ "image_id": 10034180000,
+ "score": 0.21530765295028687
+ },
+ {
+ "bbox": [
+ 639.7239379882812,
+ 226.8717498779297,
+ 344.6689453125,
+ 663.6336212158203
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.988675594329834
+ },
+ {
+ "bbox": [
+ 6.2749924659729,
+ 351.6357116699219,
+ 243.3602614402771,
+ 364.3725280761719
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.956828773021698
+ },
+ {
+ "bbox": [
+ 461.7480163574219,
+ 277.44110107421875,
+ 115.16329956054688,
+ 186.4822998046875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.9538608193397522
+ },
+ {
+ "bbox": [
+ 1768.55322265625,
+ 245.51446533203125,
+ 138.985595703125,
+ 304.20843505859375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.9133968949317932
+ },
+ {
+ "bbox": [
+ 1155.5684814453125,
+ 359.0439453125,
+ 191.2630615234375,
+ 272.81744384765625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.9098905920982361
+ },
+ {
+ "bbox": [
+ 1259.7314453125,
+ 366.961181640625,
+ 90.6544189453125,
+ 138.16278076171875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.7968080043792725
+ },
+ {
+ "bbox": [
+ 480.37066650390625,
+ 386.0138854980469,
+ 150.568115234375,
+ 280.1358337402344
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.7637147307395935
+ },
+ {
+ "bbox": [
+ 263.7475280761719,
+ 188.89967346191406,
+ 90.03085327148438,
+ 113.91123962402344
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.7468248605728149
+ },
+ {
+ "bbox": [
+ 162.36859130859375,
+ 187.40757751464844,
+ 105.68603515625,
+ 143.9015655517578
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.7130147814750671
+ },
+ {
+ "bbox": [
+ 139.2628936767578,
+ 291.9899597167969,
+ 106.13040161132812,
+ 205.92654418945312
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.7115177512168884
+ },
+ {
+ "bbox": [
+ 1365.2760009765625,
+ 246.45489501953125,
+ 66.708984375,
+ 145.35330200195312
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.6987277865409851
+ },
+ {
+ "bbox": [
+ 1486.121337890625,
+ 449.1069641113281,
+ 68.625732421875,
+ 118.49978637695312
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.6513593792915344
+ },
+ {
+ "bbox": [
+ 1354.540771484375,
+ 443.40478515625,
+ 147.19580078125,
+ 194.12603759765625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.6448480486869812
+ },
+ {
+ "bbox": [
+ 1363.81591796875,
+ 373.9744567871094,
+ 81.1202392578125,
+ 102.91085815429688
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.5243184566497803
+ },
+ {
+ "bbox": [
+ 1514.0146484375,
+ 319.5240783691406,
+ 75.83056640625,
+ 144.65200805664062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.504604697227478
+ },
+ {
+ "bbox": [
+ 355.92431640625,
+ 377.6044921875,
+ 114.5035400390625,
+ 120.37677001953125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.4970506429672241
+ },
+ {
+ "bbox": [
+ 1582.33203125,
+ 266.6174621582031,
+ 98.7462158203125,
+ 264.5225524902344
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.48399269580841064
+ },
+ {
+ "bbox": [
+ 353.9928283691406,
+ 371.8907470703125,
+ 121.08633422851562,
+ 262.55682373046875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.4818037748336792
+ },
+ {
+ "bbox": [
+ 362.9367370605469,
+ 147.3871612548828,
+ 75.418212890625,
+ 109.99433898925781
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.4351760447025299
+ },
+ {
+ "bbox": [
+ 1241.2064208984375,
+ 368.8930969238281,
+ 127.748291015625,
+ 264.2134704589844
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.38909056782722473
+ },
+ {
+ "bbox": [
+ 1681.270263671875,
+ 256.126220703125,
+ 83.576416015625,
+ 137.42578125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.364656925201416
+ },
+ {
+ "bbox": [
+ 0.0,
+ 167.76327514648438,
+ 91.63196563720703,
+ 236.555419921875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.35032832622528076
+ },
+ {
+ "bbox": [
+ 1439.95703125,
+ 270.9534606933594,
+ 100.35986328125,
+ 218.63064575195312
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.346635103225708
+ },
+ {
+ "bbox": [
+ 1318.2305908203125,
+ 424.5197448730469,
+ 115.10791015625,
+ 192.50259399414062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.3269309401512146
+ },
+ {
+ "bbox": [
+ 1052.64013671875,
+ 287.7257385253906,
+ 63.3641357421875,
+ 172.54461669921875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.24086904525756836
+ },
+ {
+ "bbox": [
+ 1053.502197265625,
+ 331.1842346191406,
+ 227.3038330078125,
+ 310.5895080566406
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.21309363842010498
+ },
+ {
+ "bbox": [
+ 1070.9603271484375,
+ 360.4552917480469,
+ 96.628173828125,
+ 133.9866943359375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.18517304956912994
+ },
+ {
+ "bbox": [
+ 1665.9293212890625,
+ 255.31796264648438,
+ 146.314697265625,
+ 291.3702697753906
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.17204511165618896
+ },
+ {
+ "bbox": [
+ 405.0735778808594,
+ 386.8234558105469,
+ 190.69692993164062,
+ 313.5556945800781
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.15523910522460938
+ },
+ {
+ "bbox": [
+ 1589.0211181640625,
+ 265.5631103515625,
+ 84.9398193359375,
+ 150.40841674804688
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.15313847362995148
+ },
+ {
+ "bbox": [
+ 0.9758958220481873,
+ 422.1836853027344,
+ 142.32709795236588,
+ 306.2699279785156
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.146592915058136
+ },
+ {
+ "bbox": [
+ 1419.790283203125,
+ 240.48899841308594,
+ 55.875,
+ 102.48948669433594
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.14388331770896912
+ },
+ {
+ "bbox": [
+ 1142.052001953125,
+ 372.945068359375,
+ 375.743896484375,
+ 263.99609375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1362028419971466
+ },
+ {
+ "bbox": [
+ 1149.924560546875,
+ 228.89898681640625,
+ 77.2176513671875,
+ 141.24282836914062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.13104568421840668
+ },
+ {
+ "bbox": [
+ 7.145267009735107,
+ 362.8689270019531,
+ 148.28553438186646,
+ 151.63449096679688
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.130157008767128
+ },
+ {
+ "bbox": [
+ 1115.1795654296875,
+ 359.9970703125,
+ 55.0574951171875,
+ 73.02313232421875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1132773831486702
+ },
+ {
+ "bbox": [
+ 1797.716552734375,
+ 246.42071533203125,
+ 108.528076171875,
+ 179.66299438476562
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.10333290696144104
+ },
+ {
+ "bbox": [
+ 1281.1473388671875,
+ 254.05291748046875,
+ 95.2158203125,
+ 128.24417114257812
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.09135308116674423
+ },
+ {
+ "bbox": [
+ 483.60968017578125,
+ 383.16656494140625,
+ 106.47314453125,
+ 105.37130737304688
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.08747227489948273
+ },
+ {
+ "bbox": [
+ 1183.970458984375,
+ 248.7894744873047,
+ 123.838623046875,
+ 133.18003845214844
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.07844730466604233
+ },
+ {
+ "bbox": [
+ 1157.6649169921875,
+ 358.5057678222656,
+ 153.3060302734375,
+ 142.8681640625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.07668760418891907
+ },
+ {
+ "bbox": [
+ 158.5989532470703,
+ 3.899838924407959,
+ 94.29812622070312,
+ 113.55939722061157
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.0562337264418602
+ },
+ {
+ "bbox": [
+ 1046.19189453125,
+ 303.1739196777344,
+ 146.7403564453125,
+ 295.9938049316406
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05225243791937828
+ },
+ {
+ "bbox": [
+ 1075.177490234375,
+ 351.35552978515625,
+ 187.2501220703125,
+ 145.95687866210938
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.052039798349142075
+ },
+ {
+ "bbox": [
+ 4.226436614990234,
+ 596.753662109375,
+ 145.0108528137207,
+ 141.51971435546875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.44805338978767395
+ },
+ {
+ "bbox": [
+ 1471.1275634765625,
+ 546.7749633789062,
+ 409.1026611328125,
+ 85.891845703125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.17510481178760529
+ },
+ {
+ "bbox": [
+ 9.595407485961914,
+ 136.05421447753906,
+ 273.3134059906006,
+ 50.703155517578125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.14366888999938965
+ },
+ {
+ "bbox": [
+ 921.6530151367188,
+ 497.646484375,
+ 100.19329833984375,
+ 244.272216796875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.41841089725494385
+ },
+ {
+ "bbox": [
+ 1837.094482421875,
+ 311.22064208984375,
+ 30.9761962890625,
+ 48.001678466796875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.08423541486263275
+ },
+ {
+ "bbox": [
+ 1839.4462890625,
+ 311.10064697265625,
+ 37.092529296875,
+ 71.60287475585938
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.060598306357860565
+ },
+ {
+ "bbox": [
+ 332.7347412109375,
+ 440.8306579589844,
+ 26.84356689453125,
+ 49.14508056640625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.4217357635498047
+ },
+ {
+ "bbox": [
+ 1074.7474365234375,
+ 455.2643127441406,
+ 38.0753173828125,
+ 24.68829345703125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.10941091924905777
+ },
+ {
+ "bbox": [
+ 1034.816162109375,
+ 433.4083251953125,
+ 37.64892578125,
+ 38.33526611328125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05890995264053345
+ },
+ {
+ "bbox": [
+ 1133.7620849609375,
+ 508.0845642089844,
+ 70.1640625,
+ 130.23025512695312
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.4846752882003784
+ },
+ {
+ "bbox": [
+ 3.005446195602417,
+ 553.9013671875,
+ 142.2049114704132,
+ 183.9932861328125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.3487741947174072
+ },
+ {
+ "bbox": [
+ 272.37786865234375,
+ 411.44207763671875,
+ 81.43817138671875,
+ 55.8065185546875
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.21865300834178925
+ },
+ {
+ "bbox": [
+ 0.24188603460788727,
+ 453.31536865234375,
+ 148.4058190435171,
+ 234.45562744140625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.20409443974494934
+ },
+ {
+ "bbox": [
+ 30.815982818603516,
+ 605.8007202148438,
+ 125.22259140014648,
+ 55.677490234375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1920752078294754
+ },
+ {
+ "bbox": [
+ 1133.945556640625,
+ 486.0797424316406,
+ 142.3828125,
+ 149.95669555664062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1483757048845291
+ },
+ {
+ "bbox": [
+ 1113.4261474609375,
+ 458.69744873046875,
+ 106.506103515625,
+ 181.26995849609375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.13916584849357605
+ },
+ {
+ "bbox": [
+ 436.55487060546875,
+ 457.7103576660156,
+ 156.08184814453125,
+ 253.82962036132812
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.11243928223848343
+ },
+ {
+ "bbox": [
+ 284.70098876953125,
+ 303.1107482910156,
+ 85.747802734375,
+ 79.47280883789062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.09735624492168427
+ },
+ {
+ "bbox": [
+ 341.12774658203125,
+ 492.6709289550781,
+ 93.78155517578125,
+ 206.94662475585938
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.08169478923082352
+ },
+ {
+ "bbox": [
+ 0.0,
+ 549.1785278320312,
+ 119.3069839477539,
+ 111.58819580078125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.06240745261311531
+ },
+ {
+ "bbox": [
+ 7.89318323135376,
+ 645.31689453125,
+ 136.12907934188843,
+ 87.29620361328125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.054731015115976334
+ },
+ {
+ "bbox": [
+ 213.0941619873047,
+ 411.15179443359375,
+ 42.60209655761719,
+ 32.3763427734375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05244819447398186
+ },
+ {
+ "bbox": [
+ 130.32546997070312,
+ 487.7962951660156,
+ 300.62261962890625,
+ 236.79757690429688
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.8464512825012207
+ },
+ {
+ "bbox": [
+ 943.7500610351562,
+ 463.9021911621094,
+ 207.76824951171875,
+ 177.45816040039062
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.6438319087028503
+ },
+ {
+ "bbox": [
+ 126.81778717041016,
+ 458.4678955078125,
+ 290.0162582397461,
+ 57.33453369140625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1522291600704193
+ },
+ {
+ "bbox": [
+ 936.41162109375,
+ 449.2172546386719,
+ 192.6654052734375,
+ 35.686859130859375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.14834310114383698
+ },
+ {
+ "bbox": [
+ 887.5518798828125,
+ 464.12335205078125,
+ 357.5780029296875,
+ 257.0013427734375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.1072755679488182
+ },
+ {
+ "bbox": [
+ 7.024689197540283,
+ 405.5135498046875,
+ 338.4464716911316,
+ 278.21929931640625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.09115041047334671
+ },
+ {
+ "bbox": [
+ 161.556884765625,
+ 482.5937805175781,
+ 242.09451293945312,
+ 30.944366455078125
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.06512065976858139
+ },
+ {
+ "bbox": [
+ 933.17236328125,
+ 423.4557800292969,
+ 280.425048828125,
+ 68.74118041992188
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.06368233263492584
+ },
+ {
+ "bbox": [
+ 121.80384063720703,
+ 488.6224060058594,
+ 311.62242889404297,
+ 45.982086181640625
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05987454578280449
+ },
+ {
+ "bbox": [
+ 929.5904541015625,
+ 436.67840576171875,
+ 381.5384521484375,
+ 75.44134521484375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05840197578072548
+ },
+ {
+ "bbox": [
+ 235.27882385253906,
+ 486.3999328613281,
+ 136.96372985839844,
+ 23.562835693359375
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05732391029596329
+ },
+ {
+ "bbox": [
+ 69.67058563232422,
+ 417.2310485839844,
+ 672.0211868286133,
+ 311.6492614746094
+ ],
+ "category_id": 1,
+ "image_id": 10094730000,
+ "score": 0.05091623216867447
+ }
+
+]
diff --git a/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_val.json b/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_val.json
new file mode 100644
index 0000000000000000000000000000000000000000..a419bee11a24f416712f05094b498060ccda7b29
--- /dev/null
+++ b/vendor/ViTPose/tests/data/posetrack18/annotations/test_posetrack18_val.json
@@ -0,0 +1,2382 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "head_bottom",
+ "head_top",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/012834_mpii_test/000000.jpg",
+ "nframes": 140,
+ "frame_id": 10128340000,
+ "vid_id": "012834",
+ "ignore_regions_y": [
+ [
+ 1079,
+ 615,
+ 612,
+ 674,
+ 660,
+ 664,
+ 678,
+ 713,
+ 704,
+ 667,
+ 665,
+ 678,
+ 700,
+ 729,
+ 753,
+ 740,
+ 695,
+ 668,
+ 646,
+ 623,
+ 624,
+ 659,
+ 676,
+ 685,
+ 695,
+ 678,
+ 675,
+ 673,
+ 663,
+ 693,
+ 703,
+ 732,
+ 719,
+ 690,
+ 669,
+ 660,
+ 660,
+ 663,
+ 689,
+ 697,
+ 691,
+ 697,
+ 713,
+ 736,
+ 721,
+ 703,
+ 698,
+ 708,
+ 741,
+ 758,
+ 778,
+ 795,
+ 780,
+ 757,
+ 745,
+ 737,
+ 745,
+ 782,
+ 809,
+ 850,
+ 881,
+ 835,
+ 810,
+ 806,
+ 782,
+ 782,
+ 806,
+ 832,
+ 863,
+ 897,
+ 940,
+ 931,
+ 957,
+ 976,
+ 1003,
+ 1043,
+ 1045,
+ 1008,
+ 965,
+ 963,
+ 931,
+ 895,
+ 846,
+ 832,
+ 829,
+ 869,
+ 898,
+ 904,
+ 869,
+ 836,
+ 782,
+ 752,
+ 736,
+ 748,
+ 776,
+ 832,
+ 874,
+ 862,
+ 818,
+ 793,
+ 790,
+ 756,
+ 738,
+ 737,
+ 750,
+ 791,
+ 806,
+ 820,
+ 852,
+ 879,
+ 837,
+ 794,
+ 772,
+ 766,
+ 769,
+ 790,
+ 805,
+ 829,
+ 844,
+ 866,
+ 837,
+ 804,
+ 791,
+ 773,
+ 745,
+ 706,
+ 683,
+ 644,
+ 638,
+ 662,
+ 694,
+ 716,
+ 736,
+ 777,
+ 784,
+ 815,
+ 830,
+ 813,
+ 800,
+ 813,
+ 820,
+ 847,
+ 829,
+ 781,
+ 780,
+ 801,
+ 836,
+ 886,
+ 938,
+ 1018,
+ 1029,
+ 1079
+ ],
+ [
+ 0,
+ 21,
+ 43,
+ 60,
+ 90,
+ 95,
+ 95,
+ 43,
+ 40,
+ 84,
+ 104,
+ 104,
+ 74,
+ 6,
+ 4,
+ 71,
+ 69,
+ 0
+ ],
+ [
+ 0,
+ 4,
+ 48,
+ 106,
+ 214,
+ 207,
+ 46,
+ 50,
+ 170,
+ 156,
+ 96,
+ 157,
+ 160,
+ 62,
+ 65,
+ 156,
+ 165,
+ 162,
+ 140,
+ 93,
+ 93,
+ 7,
+ 4,
+ 121,
+ 129,
+ 84,
+ 75,
+ 68
+ ],
+ [
+ 0,
+ 0,
+ 739,
+ 729,
+ 720,
+ 740,
+ 768,
+ 785,
+ 803,
+ 815,
+ 757,
+ 735,
+ 632,
+ 620,
+ 632,
+ 640,
+ 662,
+ 656,
+ 607,
+ 645,
+ 645,
+ 628,
+ 604,
+ 570,
+ 543,
+ 512,
+ 485,
+ 467,
+ 451,
+ 448,
+ 456,
+ 482,
+ 512,
+ 548,
+ 554,
+ 542,
+ 498,
+ 479,
+ 454,
+ 404,
+ 387,
+ 398,
+ 415,
+ 528,
+ 546,
+ 468,
+ 410,
+ 400,
+ 359,
+ 375,
+ 373,
+ 273,
+ 254,
+ 284,
+ 253,
+ 204,
+ 206
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 3,
+ 0,
+ 30,
+ 44,
+ 74,
+ 99,
+ 106,
+ 102,
+ 115,
+ 121,
+ 141,
+ 156,
+ 165,
+ 187,
+ 200,
+ 211,
+ 196,
+ 198,
+ 210,
+ 226,
+ 252,
+ 266,
+ 263,
+ 271,
+ 291,
+ 299,
+ 326,
+ 339,
+ 360,
+ 399,
+ 412,
+ 424,
+ 437,
+ 432,
+ 439,
+ 461,
+ 489,
+ 510,
+ 534,
+ 548,
+ 559,
+ 567,
+ 587,
+ 593,
+ 604,
+ 612,
+ 633,
+ 652,
+ 645,
+ 638,
+ 649,
+ 650,
+ 661,
+ 654,
+ 662,
+ 685,
+ 713,
+ 727,
+ 733,
+ 752,
+ 762,
+ 769,
+ 785,
+ 812,
+ 841,
+ 863,
+ 869,
+ 877,
+ 899,
+ 909,
+ 918,
+ 906,
+ 902,
+ 909,
+ 917,
+ 900,
+ 932,
+ 932,
+ 941,
+ 919,
+ 926,
+ 935,
+ 950,
+ 957,
+ 983,
+ 1002,
+ 1007,
+ 1032,
+ 1034,
+ 1018,
+ 1018,
+ 1038,
+ 1074,
+ 1106,
+ 1119,
+ 1121,
+ 1130,
+ 1148,
+ 1152,
+ 1172,
+ 1195,
+ 1199,
+ 1209,
+ 1229,
+ 1242,
+ 1240,
+ 1242,
+ 1261,
+ 1264,
+ 1277,
+ 1285,
+ 1286,
+ 1296,
+ 1313,
+ 1336,
+ 1350,
+ 1367,
+ 1403,
+ 1417,
+ 1435,
+ 1459,
+ 1456,
+ 1429,
+ 1420,
+ 1465,
+ 1492,
+ 1496,
+ 1507,
+ 1529,
+ 1553,
+ 1570,
+ 1596,
+ 1609,
+ 1610,
+ 1649,
+ 1671,
+ 1703,
+ 1740,
+ 1763,
+ 1775,
+ 1803,
+ 1809,
+ 1815,
+ 1815,
+ 1857,
+ 1874,
+ 1881,
+ 1897,
+ 1896,
+ 1899,
+ 1888,
+ 1884
+ ],
+ [
+ 378,
+ 381,
+ 365,
+ 359,
+ 334,
+ 292,
+ 257,
+ 262,
+ 231,
+ 236,
+ 219,
+ 193,
+ 196,
+ 183,
+ 154,
+ 159,
+ 140,
+ 121
+ ],
+ [
+ 451,
+ 1173,
+ 1168,
+ 1168,
+ 1170,
+ 1085,
+ 1098,
+ 1070,
+ 1043,
+ 1000,
+ 993,
+ 979,
+ 934,
+ 937,
+ 918,
+ 903,
+ 893,
+ 832,
+ 785,
+ 759,
+ 726,
+ 710,
+ 667,
+ 664,
+ 585,
+ 576,
+ 507,
+ 485
+ ],
+ [
+ 1312,
+ 1918,
+ 1917,
+ 1895,
+ 1867,
+ 1835,
+ 1804,
+ 1779,
+ 1754,
+ 1720,
+ 1726,
+ 1739,
+ 1740,
+ 1735,
+ 1701,
+ 1635,
+ 1587,
+ 1578,
+ 1587,
+ 1564,
+ 1550,
+ 1543,
+ 1562,
+ 1579,
+ 1578,
+ 1581,
+ 1584,
+ 1589,
+ 1601,
+ 1610,
+ 1621,
+ 1637,
+ 1642,
+ 1659,
+ 1673,
+ 1681,
+ 1673,
+ 1671,
+ 1664,
+ 1671,
+ 1681,
+ 1728,
+ 1734,
+ 1789,
+ 1854,
+ 1807,
+ 1820,
+ 1778,
+ 1778,
+ 1717,
+ 1642,
+ 1635,
+ 1600,
+ 1520,
+ 1454,
+ 1415,
+ 1395
+ ]
+ ],
+ "id": 10128340000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/012834_mpii_test/000000.jpg"
+ },
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/009473_mpii_test/000000.jpg",
+ "nframes": 101,
+ "frame_id": 10094730000,
+ "vid_id": "009473",
+ "ignore_regions_y": [
+ [
+ 228,
+ 237,
+ 243,
+ 230,
+ 220,
+ 230,
+ 228,
+ 232,
+ 220,
+ 211,
+ 226,
+ 258,
+ 364,
+ 417,
+ 475,
+ 562,
+ 615,
+ 646,
+ 656,
+ 637,
+ 649,
+ 645,
+ 603,
+ 585,
+ 547
+ ],
+ [
+ 0,
+ 1,
+ 137,
+ 130,
+ 166,
+ 235,
+ 309,
+ 384,
+ 452,
+ 526,
+ 583,
+ 658,
+ 694,
+ 709,
+ 599,
+ 684,
+ 707,
+ 733,
+ 660,
+ 679,
+ 762
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 1907,
+ 1820,
+ 1758,
+ 1662,
+ 1577,
+ 1492,
+ 1375,
+ 1305,
+ 1250,
+ 1177,
+ 1111,
+ 1033,
+ 1047,
+ 1062,
+ 1056,
+ 1130,
+ 1173,
+ 1232,
+ 1282,
+ 1332,
+ 1416,
+ 1471,
+ 1515,
+ 1541,
+ 1909
+ ],
+ [
+ 0,
+ 257,
+ 284,
+ 407,
+ 450,
+ 505,
+ 562,
+ 592,
+ 613,
+ 626,
+ 639,
+ 639,
+ 594,
+ 454,
+ 371,
+ 343,
+ 269,
+ 152,
+ 88,
+ 35,
+ 3
+ ]
+ ],
+ "id": 10094730000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/009473_mpii_test/000000.jpg"
+ },
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/003418_mpii_test/000000.jpg",
+ "nframes": 149,
+ "frame_id": 10034180000,
+ "vid_id": "003418",
+ "ignore_regions_y": [
+ [
+ 639,
+ 647,
+ 635,
+ 618,
+ 590,
+ 547,
+ 501,
+ 499,
+ 515,
+ 518,
+ 526,
+ 528,
+ 509,
+ 473,
+ 450,
+ 407,
+ 352,
+ 339,
+ 309,
+ 271,
+ 262,
+ 252,
+ 211,
+ 211,
+ 183,
+ 175,
+ 139,
+ 105,
+ 94,
+ 62,
+ 45,
+ 22,
+ 22,
+ 30,
+ 52,
+ 66,
+ 86,
+ 92,
+ 101,
+ 109,
+ 162,
+ 158,
+ 135,
+ 103,
+ 94,
+ 75,
+ 60,
+ 37,
+ 18,
+ 9,
+ 1,
+ 0,
+ 79,
+ 75,
+ 88,
+ 103,
+ 156,
+ 164,
+ 167,
+ 201,
+ 196,
+ 194,
+ 207,
+ 237,
+ 262,
+ 256
+ ],
+ [
+ 94,
+ 71,
+ 62,
+ 41,
+ 28,
+ 30,
+ 43,
+ 64,
+ 88,
+ 96,
+ 120,
+ 115,
+ 98
+ ],
+ [
+ 235,
+ 215,
+ 211,
+ 224,
+ 252,
+ 239,
+ 207,
+ 196,
+ 211,
+ 228,
+ 218,
+ 198,
+ 181,
+ 186,
+ 198,
+ 218,
+ 233,
+ 252,
+ 277,
+ 279,
+ 292,
+ 318,
+ 347,
+ 354,
+ 392,
+ 430,
+ 439,
+ 447,
+ 462,
+ 477,
+ 496,
+ 539,
+ 515,
+ 464,
+ 426,
+ 398,
+ 366,
+ 333,
+ 322,
+ 315,
+ 318,
+ 303,
+ 298,
+ 294,
+ 266,
+ 245
+ ],
+ [
+ 207,
+ 213,
+ 239,
+ 237,
+ 215,
+ 179,
+ 179,
+ 184,
+ 209,
+ 222,
+ 239,
+ 264,
+ 279,
+ 271,
+ 269,
+ 290,
+ 260,
+ 226
+ ],
+ [
+ 194,
+ 175,
+ 160,
+ 164,
+ 179,
+ 167,
+ 160,
+ 216,
+ 266,
+ 262,
+ 266,
+ 269,
+ 281,
+ 298,
+ 309,
+ 318,
+ 332,
+ 341,
+ 345,
+ 345,
+ 290,
+ 262,
+ 226
+ ],
+ [
+ 424,
+ 442,
+ 432,
+ 432,
+ 455,
+ 469,
+ 474,
+ 505,
+ 559,
+ 555,
+ 465,
+ 449,
+ 444
+ ],
+ [
+ 926,
+ 901,
+ 857,
+ 792,
+ 751,
+ 694,
+ 636,
+ 540,
+ 474,
+ 403,
+ 351,
+ 265,
+ 211,
+ 155,
+ 98,
+ 71,
+ 40,
+ 0,
+ 0,
+ 1078,
+ 1078,
+ 1007,
+ 924,
+ 869,
+ 807,
+ 865,
+ 892,
+ 955,
+ 1003,
+ 1057,
+ 1078,
+ 1078,
+ 1030,
+ 961,
+ 926
+ ],
+ [
+ 1050,
+ 1076,
+ 1078,
+ 1057,
+ 1032,
+ 1013,
+ 998,
+ 982,
+ 971,
+ 951,
+ 936,
+ 913,
+ 888,
+ 844,
+ 799,
+ 763,
+ 732,
+ 723,
+ 713,
+ 753,
+ 784,
+ 817,
+ 830,
+ 871,
+ 911,
+ 930
+ ],
+ [
+ 478,
+ 461,
+ 423,
+ 405,
+ 394,
+ 263,
+ 257,
+ 265,
+ 290,
+ 315,
+ 334,
+ 342,
+ 344,
+ 411,
+ 448,
+ 448,
+ 448,
+ 430,
+ 424,
+ 423,
+ 421,
+ 409,
+ 444
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 0,
+ 1,
+ 198,
+ 258,
+ 307,
+ 337,
+ 343,
+ 335,
+ 320,
+ 290,
+ 273,
+ 260,
+ 232,
+ 190,
+ 196,
+ 183,
+ 171,
+ 162,
+ 149,
+ 132,
+ 105,
+ 77,
+ 69,
+ 322,
+ 324,
+ 303,
+ 279,
+ 273,
+ 247,
+ 224,
+ 226,
+ 215,
+ 203,
+ 190,
+ 192,
+ 181,
+ 169,
+ 167,
+ 154,
+ 139,
+ 99,
+ 86,
+ 81,
+ 56,
+ 60,
+ 62,
+ 60,
+ 49,
+ 35,
+ 15,
+ 9,
+ 3,
+ 0,
+ 11,
+ 13,
+ 0,
+ 1,
+ 9,
+ 90,
+ 64,
+ 49,
+ 33,
+ 18,
+ 13,
+ 15,
+ 0
+ ],
+ [
+ 341,
+ 347,
+ 352,
+ 356,
+ 371,
+ 383,
+ 388,
+ 392,
+ 403,
+ 392,
+ 398,
+ 377,
+ 375
+ ],
+ [
+ 688,
+ 694,
+ 713,
+ 724,
+ 728,
+ 752,
+ 764,
+ 783,
+ 796,
+ 796,
+ 824,
+ 828,
+ 839,
+ 856,
+ 864,
+ 864,
+ 884,
+ 899,
+ 903,
+ 843,
+ 854,
+ 854,
+ 850,
+ 884,
+ 901,
+ 873,
+ 833,
+ 815,
+ 796,
+ 747,
+ 716,
+ 666,
+ 654,
+ 660,
+ 667,
+ 694,
+ 711,
+ 724,
+ 737,
+ 775,
+ 792,
+ 790,
+ 756,
+ 688,
+ 686,
+ 686
+ ],
+ [
+ 1047,
+ 1079,
+ 1079,
+ 1088,
+ 1099,
+ 1103,
+ 1122,
+ 1133,
+ 1141,
+ 1164,
+ 1175,
+ 1164,
+ 1181,
+ 1126,
+ 1092,
+ 1077,
+ 1069,
+ 1047
+ ],
+ [
+ 1252,
+ 1254,
+ 1258,
+ 1277,
+ 1292,
+ 1301,
+ 1322,
+ 1330,
+ 1350,
+ 1322,
+ 1296,
+ 1277,
+ 1256,
+ 1233,
+ 1213,
+ 1198,
+ 1173,
+ 1130,
+ 1098,
+ 1081,
+ 1101,
+ 1198,
+ 1232
+ ],
+ [
+ 1165,
+ 1184,
+ 1226,
+ 1246,
+ 1238,
+ 1226,
+ 1209,
+ 1215,
+ 1180,
+ 1126,
+ 1057,
+ 1053,
+ 1128
+ ],
+ [
+ 1455,
+ 1438,
+ 1438,
+ 1444,
+ 1442,
+ 1423,
+ 1426,
+ 1409,
+ 1399,
+ 1390,
+ 1374,
+ 1349,
+ 1330,
+ 1319,
+ 1307,
+ 1309,
+ 1324,
+ 1330,
+ 1919,
+ 1919,
+ 1884,
+ 1855,
+ 1830,
+ 1828,
+ 1776,
+ 1732,
+ 1734,
+ 1726,
+ 1728,
+ 1707,
+ 1713,
+ 1469,
+ 1461,
+ 1459,
+ 1455
+ ],
+ [
+ 1463,
+ 1411,
+ 1255,
+ 1250,
+ 1246,
+ 1261,
+ 1284,
+ 1280,
+ 1271,
+ 1265,
+ 1275,
+ 1299,
+ 1299,
+ 1296,
+ 1315,
+ 1340,
+ 1365,
+ 1396,
+ 1444,
+ 1428,
+ 1434,
+ 1432,
+ 1446,
+ 1440,
+ 1453,
+ 1455
+ ],
+ [
+ 1246,
+ 1271,
+ 1313,
+ 1344,
+ 1384,
+ 1346,
+ 1307,
+ 1286,
+ 1255,
+ 1203,
+ 1153,
+ 1096,
+ 1078,
+ 1061,
+ 1036,
+ 1090,
+ 1121,
+ 1148,
+ 1169,
+ 1205,
+ 1228,
+ 1265,
+ 1267
+ ]
+ ],
+ "id": 10034180000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/003418_mpii_test/000000.jpg"
+ }
+ ],
+ "annotations": [
+ {
+ "bbox_head": [
+ 378,
+ 503,
+ 44,
+ 53
+ ],
+ "keypoints": [
+ 401,
+ 530,
+ 1,
+ 409.5254211,
+ 555.3547363,
+ 1,
+ 392.8559265,
+ 510.1089478,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 377,
+ 560,
+ 1,
+ 444,
+ 556,
+ 1,
+ 353,
+ 605,
+ 1,
+ 469.5,
+ 603.5,
+ 1,
+ 341.5,
+ 653.5,
+ 1,
+ 463,
+ 635,
+ 1,
+ 389,
+ 652,
+ 1,
+ 442,
+ 646,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 0,
+ "image_id": 10128340000,
+ "bbox": [
+ 322.3,
+ 488.60028996999995,
+ 166.39999999999998,
+ 186.40836786
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000000,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 571,
+ 446,
+ 42,
+ 46
+ ],
+ "keypoints": [
+ 600.5,
+ 475.5,
+ 1,
+ 590.4649048,
+ 493.8685303,
+ 1,
+ 593.1513062,
+ 450.3486023,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 570.5,
+ 509.5,
+ 1,
+ 608.5,
+ 509.5,
+ 1,
+ 539,
+ 558.5,
+ 1,
+ 634,
+ 539,
+ 1,
+ 558.5,
+ 584.5,
+ 1,
+ 624.5,
+ 528.5,
+ 1,
+ 605,
+ 595,
+ 1,
+ 601,
+ 593,
+ 1,
+ 640,
+ 634.5,
+ 1,
+ 598,
+ 672,
+ 1,
+ 616.5,
+ 700.5,
+ 1,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 1,
+ "image_id": 10128340000,
+ "bbox": [
+ 523.85,
+ 412.825892645,
+ 131.29999999999995,
+ 325.19681700999996
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000001,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 159,
+ 259,
+ 42,
+ 47
+ ],
+ "keypoints": [
+ 201,
+ 284.5,
+ 1,
+ 169.9334106,
+ 305.6158752,
+ 1,
+ 187.549942,
+ 265.1630859,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 139.5,
+ 307.5,
+ 1,
+ 193.5,
+ 319.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 209,
+ 371,
+ 1,
+ 144,
+ 365.5,
+ 1,
+ 231,
+ 392,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 222,
+ 337,
+ 1,
+ 241,
+ 341.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 267,
+ 416,
+ 1
+ ],
+ "track_id": 2,
+ "image_id": 10128340000,
+ "bbox": [
+ 120.375,
+ 242.53754878499996,
+ 165.75,
+ 196.08798833000003
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000002,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 372,
+ 205,
+ 44,
+ 44
+ ],
+ "keypoints": [
+ 410.5,
+ 230.5,
+ 1,
+ 387.8875732,
+ 251.1279602,
+ 1,
+ 398.5843201,
+ 208.9040375,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 359.5,
+ 262.5,
+ 1,
+ 409.5,
+ 266.5,
+ 1,
+ 337.5,
+ 308.5,
+ 1,
+ 450,
+ 306,
+ 1,
+ 292,
+ 314,
+ 1,
+ 480,
+ 311.5,
+ 1,
+ 390,
+ 339,
+ 1,
+ 409,
+ 339,
+ 1,
+ 405.5,
+ 418.5,
+ 1,
+ 447.5,
+ 366.5,
+ 1,
+ 391.5,
+ 464.5,
+ 1,
+ 437.5,
+ 440.5,
+ 1
+ ],
+ "track_id": 3,
+ "image_id": 10128340000,
+ "bbox": [
+ 263.8,
+ 170.56464312499998,
+ 244.39999999999998,
+ 332.27475125
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000003,
+ "iscrowd": false,
+ "num_keypoints": 15
+ },
+ {
+ "bbox_head": [
+ 693,
+ 410,
+ 44,
+ 49
+ ],
+ "keypoints": [
+ 718.5,
+ 440.5,
+ 1,
+ 717.704834,
+ 460.703125,
+ 1,
+ 712.9713745,
+ 414.8476562,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 694.5,
+ 474,
+ 1,
+ 743.5,
+ 472.5,
+ 1,
+ 681.5,
+ 530.5,
+ 1,
+ 757.5,
+ 523.5,
+ 1,
+ 667.5,
+ 564.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 705.5,
+ 563.5,
+ 1,
+ 737.5,
+ 560.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 692.5,
+ 607.5,
+ 1,
+ 716.5,
+ 603.5,
+ 1
+ ],
+ "track_id": 4,
+ "image_id": 10128340000,
+ "bbox": [
+ 654.0,
+ 385.94980463,
+ 117.0,
+ 250.44804694000004
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000004,
+ "iscrowd": false,
+ "num_keypoints": 12
+ },
+ {
+ "bbox_head": [
+ 923,
+ 347,
+ 46,
+ 58
+ ],
+ "keypoints": [
+ 965.5,
+ 382.5,
+ 1,
+ 933.9436646,
+ 403.0452576,
+ 1,
+ 955.0422363,
+ 355.7160645,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 922.5,
+ 403.5,
+ 1,
+ 932.5,
+ 431.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 960,
+ 475.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 991.5,
+ 462.5,
+ 1,
+ 934.5,
+ 512.5,
+ 1,
+ 922.5,
+ 506.5,
+ 1,
+ 946.5,
+ 567.5,
+ 1,
+ 964,
+ 578,
+ 1,
+ 900.5,
+ 598,
+ 1,
+ 936,
+ 634.5,
+ 1
+ ],
+ "track_id": 5,
+ "image_id": 10128340000,
+ "bbox": [
+ 886.85,
+ 313.89847417500005,
+ 118.29999999999995,
+ 362.4191161499999
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000005,
+ "iscrowd": false,
+ "num_keypoints": 13
+ },
+ {
+ "bbox_head": [
+ 691,
+ 179,
+ 43,
+ 52
+ ],
+ "keypoints": [
+ 708.5,
+ 212.5,
+ 1,
+ 722.6444702,
+ 230.0113831,
+ 1,
+ 704.8916626,
+ 186.2414551,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 712,
+ 244,
+ 1,
+ 742,
+ 237.5,
+ 1,
+ 723,
+ 293.5,
+ 1,
+ 745.5,
+ 281.5,
+ 1,
+ 692,
+ 319,
+ 1,
+ 0,
+ 0,
+ 0,
+ 722,
+ 323.5,
+ 1,
+ 748.5,
+ 314,
+ 1,
+ 657.5,
+ 301.5,
+ 1,
+ 668.5,
+ 299.5,
+ 1,
+ 670.5,
+ 367.5,
+ 1,
+ 689.5,
+ 362.5,
+ 1
+ ],
+ "track_id": 6,
+ "image_id": 10128340000,
+ "bbox": [
+ 643.85,
+ 159.05267336499998,
+ 118.29999999999995,
+ 235.63610837
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000006,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 927,
+ 160,
+ 39,
+ 52
+ ],
+ "keypoints": [
+ 952,
+ 189,
+ 1,
+ 946.763916,
+ 211.9986572,
+ 1,
+ 946.302063,
+ 166.5010071,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 914.5,
+ 234,
+ 1,
+ 979.5,
+ 236.5,
+ 1,
+ 890.5,
+ 270.5,
+ 1,
+ 998.5,
+ 286.5,
+ 1,
+ 894.5,
+ 324,
+ 1,
+ 0,
+ 0,
+ 0,
+ 932,
+ 326.5,
+ 1,
+ 958.5,
+ 327.5,
+ 1,
+ 1000.5,
+ 340.5,
+ 1,
+ 993.5,
+ 372.5,
+ 1,
+ 955.5,
+ 383.5,
+ 1,
+ 959.5,
+ 446.5,
+ 1
+ ],
+ "track_id": 7,
+ "image_id": 10128340000,
+ "bbox": [
+ 874.0,
+ 124.50115816500005,
+ 143.0,
+ 363.99869076999994
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000007,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 1367,
+ 427,
+ 47,
+ 45
+ ],
+ "keypoints": [
+ 1406,
+ 451,
+ 1,
+ 1379.198608,
+ 472.946106,
+ 1,
+ 1398.976074,
+ 431.9154358,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1375.5,
+ 467.5,
+ 1,
+ 1372,
+ 501,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1343.5,
+ 534.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1339.5,
+ 573.5,
+ 1,
+ 1381.5,
+ 531.5,
+ 1,
+ 1376,
+ 539.5,
+ 1,
+ 1452.5,
+ 524.5,
+ 1,
+ 1453.5,
+ 535.5,
+ 1,
+ 1469.5,
+ 603.5,
+ 1,
+ 1466,
+ 610,
+ 1
+ ],
+ "track_id": 8,
+ "image_id": 10128340000,
+ "bbox": [
+ 1320.0,
+ 405.20275117000006,
+ 169.0,
+ 231.50993345999996
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000008,
+ "iscrowd": false,
+ "num_keypoints": 13
+ },
+ {
+ "bbox_head": [
+ 1378,
+ 204,
+ 40,
+ 44
+ ],
+ "keypoints": [
+ 1389,
+ 234,
+ 1,
+ 1404.137573,
+ 248.9802094,
+ 1,
+ 1393.396851,
+ 208.7648468,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1375,
+ 272,
+ 1,
+ 1442.5,
+ 260.5,
+ 1,
+ 1374,
+ 315,
+ 1,
+ 1468,
+ 303.5,
+ 1,
+ 1367,
+ 340.5,
+ 1,
+ 1462.5,
+ 330.5,
+ 1,
+ 1407,
+ 349.5,
+ 1,
+ 1439,
+ 340.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 9,
+ "image_id": 10128340000,
+ "bbox": [
+ 1351.85,
+ 187.65457382,
+ 131.30000000000018,
+ 182.95569916
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000009,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 407,
+ -29,
+ 35,
+ 40
+ ],
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 425.1159668,
+ 12.25136662,
+ 1,
+ 424.0380249,
+ -24.93852425,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 455.5,
+ 21.5,
+ 1,
+ 395.5,
+ 29.5,
+ 1,
+ 474.5,
+ 64.5,
+ 1,
+ 391.5,
+ 67,
+ 1,
+ 474,
+ 108,
+ 1,
+ 379,
+ 107,
+ 1,
+ 446,
+ 88,
+ 1,
+ 426,
+ 88,
+ 1,
+ 424,
+ 113,
+ 1,
+ 403,
+ 113,
+ 1,
+ 430,
+ 173,
+ 1,
+ 415,
+ 171,
+ 1
+ ],
+ "track_id": 10,
+ "image_id": 10128340000,
+ "bbox": [
+ 364.675,
+ -54.62930288750002,
+ 124.14999999999998,
+ 257.32008152500003
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000010,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 648,
+ 253,
+ 138,
+ 103
+ ],
+ "keypoints": [
+ 750,
+ 297.5,
+ 1,
+ 734.1937256,
+ 371.1997375,
+ 1,
+ 704.1047363,
+ 254.4751892,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 697,
+ 352.5,
+ 1,
+ 766,
+ 369,
+ 1,
+ 0,
+ 0,
+ 0,
+ 857,
+ 419,
+ 1,
+ 766,
+ 281,
+ 1,
+ 942,
+ 422.5,
+ 1,
+ 722,
+ 525,
+ 1,
+ 792,
+ 544,
+ 1,
+ 783,
+ 697,
+ 1,
+ 755,
+ 699,
+ 1,
+ 808.5,
+ 832.5,
+ 1,
+ 672,
+ 782.5,
+ 1
+ ],
+ "track_id": 0,
+ "image_id": 10094730000,
+ "bbox": [
+ 631.5,
+ 167.77146757999992,
+ 351.0,
+ 751.4322540400001
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1009473000000,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 878,
+ 201,
+ 90,
+ 125
+ ],
+ "keypoints": [
+ 900,
+ 272,
+ 1,
+ 905.657959,
+ 322.6206665,
+ 1,
+ 936.0065308,
+ 219.1595001,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 950,
+ 366,
+ 1,
+ 852,
+ 311,
+ 1,
+ 1021,
+ 428,
+ 1,
+ 759,
+ 303,
+ 1,
+ 986,
+ 422,
+ 1,
+ 704,
+ 374,
+ 1,
+ 912,
+ 516,
+ 1,
+ 856,
+ 524,
+ 1,
+ 876,
+ 663,
+ 1,
+ 908,
+ 680,
+ 1,
+ 849,
+ 828,
+ 1,
+ 959,
+ 804,
+ 1
+ ],
+ "track_id": 1,
+ "image_id": 10094730000,
+ "bbox": [
+ 656.45,
+ 127.83342511500007,
+ 412.0999999999999,
+ 791.4926498699999
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1009473000001,
+ "iscrowd": false,
+ "num_keypoints": 15
+ },
+ {
+ "bbox_head": [
+ 346,
+ 337,
+ 296,
+ 237
+ ],
+ "keypoints": [
+ 621,
+ 471,
+ 1,
+ 542.2835693,
+ 599.2855835,
+ 1,
+ 457.787323,
+ 347.6607971,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 371,
+ 737,
+ 1,
+ 753,
+ 579,
+ 1,
+ 387,
+ 981,
+ 1,
+ 1023,
+ 353,
+ 1,
+ 0,
+ 0,
+ 0,
+ 953,
+ 141,
+ 1,
+ 968,
+ 833,
+ 1,
+ 1152,
+ 843,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1416,
+ 429,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 0,
+ "image_id": 10034180000,
+ "bbox": [
+ 214.25,
+ 15.0,
+ 1358.5,
+ 1092.0
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1003418000000,
+ "iscrowd": false,
+ "num_keypoints": 11
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/posetrack18/annotations/val/003418_mpii_test.json b/vendor/ViTPose/tests/data/posetrack18/annotations/val/003418_mpii_test.json
new file mode 100644
index 0000000000000000000000000000000000000000..00bf014969219ba959e892fd80d5f0a8ecee2e91
--- /dev/null
+++ b/vendor/ViTPose/tests/data/posetrack18/annotations/val/003418_mpii_test.json
@@ -0,0 +1,762 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "head_bottom",
+ "head_top",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/003418_mpii_test/000000.jpg",
+ "nframes": 149,
+ "frame_id": 10034180000,
+ "vid_id": "003418",
+ "ignore_regions_y": [
+ [
+ 639,
+ 647,
+ 635,
+ 618,
+ 590,
+ 547,
+ 501,
+ 499,
+ 515,
+ 518,
+ 526,
+ 528,
+ 509,
+ 473,
+ 450,
+ 407,
+ 352,
+ 339,
+ 309,
+ 271,
+ 262,
+ 252,
+ 211,
+ 211,
+ 183,
+ 175,
+ 139,
+ 105,
+ 94,
+ 62,
+ 45,
+ 22,
+ 22,
+ 30,
+ 52,
+ 66,
+ 86,
+ 92,
+ 101,
+ 109,
+ 162,
+ 158,
+ 135,
+ 103,
+ 94,
+ 75,
+ 60,
+ 37,
+ 18,
+ 9,
+ 1,
+ 0,
+ 79,
+ 75,
+ 88,
+ 103,
+ 156,
+ 164,
+ 167,
+ 201,
+ 196,
+ 194,
+ 207,
+ 237,
+ 262,
+ 256
+ ],
+ [
+ 94,
+ 71,
+ 62,
+ 41,
+ 28,
+ 30,
+ 43,
+ 64,
+ 88,
+ 96,
+ 120,
+ 115,
+ 98
+ ],
+ [
+ 235,
+ 215,
+ 211,
+ 224,
+ 252,
+ 239,
+ 207,
+ 196,
+ 211,
+ 228,
+ 218,
+ 198,
+ 181,
+ 186,
+ 198,
+ 218,
+ 233,
+ 252,
+ 277,
+ 279,
+ 292,
+ 318,
+ 347,
+ 354,
+ 392,
+ 430,
+ 439,
+ 447,
+ 462,
+ 477,
+ 496,
+ 539,
+ 515,
+ 464,
+ 426,
+ 398,
+ 366,
+ 333,
+ 322,
+ 315,
+ 318,
+ 303,
+ 298,
+ 294,
+ 266,
+ 245
+ ],
+ [
+ 207,
+ 213,
+ 239,
+ 237,
+ 215,
+ 179,
+ 179,
+ 184,
+ 209,
+ 222,
+ 239,
+ 264,
+ 279,
+ 271,
+ 269,
+ 290,
+ 260,
+ 226
+ ],
+ [
+ 194,
+ 175,
+ 160,
+ 164,
+ 179,
+ 167,
+ 160,
+ 216,
+ 266,
+ 262,
+ 266,
+ 269,
+ 281,
+ 298,
+ 309,
+ 318,
+ 332,
+ 341,
+ 345,
+ 345,
+ 290,
+ 262,
+ 226
+ ],
+ [
+ 424,
+ 442,
+ 432,
+ 432,
+ 455,
+ 469,
+ 474,
+ 505,
+ 559,
+ 555,
+ 465,
+ 449,
+ 444
+ ],
+ [
+ 926,
+ 901,
+ 857,
+ 792,
+ 751,
+ 694,
+ 636,
+ 540,
+ 474,
+ 403,
+ 351,
+ 265,
+ 211,
+ 155,
+ 98,
+ 71,
+ 40,
+ 0,
+ 0,
+ 1078,
+ 1078,
+ 1007,
+ 924,
+ 869,
+ 807,
+ 865,
+ 892,
+ 955,
+ 1003,
+ 1057,
+ 1078,
+ 1078,
+ 1030,
+ 961,
+ 926
+ ],
+ [
+ 1050,
+ 1076,
+ 1078,
+ 1057,
+ 1032,
+ 1013,
+ 998,
+ 982,
+ 971,
+ 951,
+ 936,
+ 913,
+ 888,
+ 844,
+ 799,
+ 763,
+ 732,
+ 723,
+ 713,
+ 753,
+ 784,
+ 817,
+ 830,
+ 871,
+ 911,
+ 930
+ ],
+ [
+ 478,
+ 461,
+ 423,
+ 405,
+ 394,
+ 263,
+ 257,
+ 265,
+ 290,
+ 315,
+ 334,
+ 342,
+ 344,
+ 411,
+ 448,
+ 448,
+ 448,
+ 430,
+ 424,
+ 423,
+ 421,
+ 409,
+ 444
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 0,
+ 1,
+ 198,
+ 258,
+ 307,
+ 337,
+ 343,
+ 335,
+ 320,
+ 290,
+ 273,
+ 260,
+ 232,
+ 190,
+ 196,
+ 183,
+ 171,
+ 162,
+ 149,
+ 132,
+ 105,
+ 77,
+ 69,
+ 322,
+ 324,
+ 303,
+ 279,
+ 273,
+ 247,
+ 224,
+ 226,
+ 215,
+ 203,
+ 190,
+ 192,
+ 181,
+ 169,
+ 167,
+ 154,
+ 139,
+ 99,
+ 86,
+ 81,
+ 56,
+ 60,
+ 62,
+ 60,
+ 49,
+ 35,
+ 15,
+ 9,
+ 3,
+ 0,
+ 11,
+ 13,
+ 0,
+ 1,
+ 9,
+ 90,
+ 64,
+ 49,
+ 33,
+ 18,
+ 13,
+ 15,
+ 0
+ ],
+ [
+ 341,
+ 347,
+ 352,
+ 356,
+ 371,
+ 383,
+ 388,
+ 392,
+ 403,
+ 392,
+ 398,
+ 377,
+ 375
+ ],
+ [
+ 688,
+ 694,
+ 713,
+ 724,
+ 728,
+ 752,
+ 764,
+ 783,
+ 796,
+ 796,
+ 824,
+ 828,
+ 839,
+ 856,
+ 864,
+ 864,
+ 884,
+ 899,
+ 903,
+ 843,
+ 854,
+ 854,
+ 850,
+ 884,
+ 901,
+ 873,
+ 833,
+ 815,
+ 796,
+ 747,
+ 716,
+ 666,
+ 654,
+ 660,
+ 667,
+ 694,
+ 711,
+ 724,
+ 737,
+ 775,
+ 792,
+ 790,
+ 756,
+ 688,
+ 686,
+ 686
+ ],
+ [
+ 1047,
+ 1079,
+ 1079,
+ 1088,
+ 1099,
+ 1103,
+ 1122,
+ 1133,
+ 1141,
+ 1164,
+ 1175,
+ 1164,
+ 1181,
+ 1126,
+ 1092,
+ 1077,
+ 1069,
+ 1047
+ ],
+ [
+ 1252,
+ 1254,
+ 1258,
+ 1277,
+ 1292,
+ 1301,
+ 1322,
+ 1330,
+ 1350,
+ 1322,
+ 1296,
+ 1277,
+ 1256,
+ 1233,
+ 1213,
+ 1198,
+ 1173,
+ 1130,
+ 1098,
+ 1081,
+ 1101,
+ 1198,
+ 1232
+ ],
+ [
+ 1165,
+ 1184,
+ 1226,
+ 1246,
+ 1238,
+ 1226,
+ 1209,
+ 1215,
+ 1180,
+ 1126,
+ 1057,
+ 1053,
+ 1128
+ ],
+ [
+ 1455,
+ 1438,
+ 1438,
+ 1444,
+ 1442,
+ 1423,
+ 1426,
+ 1409,
+ 1399,
+ 1390,
+ 1374,
+ 1349,
+ 1330,
+ 1319,
+ 1307,
+ 1309,
+ 1324,
+ 1330,
+ 1919,
+ 1919,
+ 1884,
+ 1855,
+ 1830,
+ 1828,
+ 1776,
+ 1732,
+ 1734,
+ 1726,
+ 1728,
+ 1707,
+ 1713,
+ 1469,
+ 1461,
+ 1459,
+ 1455
+ ],
+ [
+ 1463,
+ 1411,
+ 1255,
+ 1250,
+ 1246,
+ 1261,
+ 1284,
+ 1280,
+ 1271,
+ 1265,
+ 1275,
+ 1299,
+ 1299,
+ 1296,
+ 1315,
+ 1340,
+ 1365,
+ 1396,
+ 1444,
+ 1428,
+ 1434,
+ 1432,
+ 1446,
+ 1440,
+ 1453,
+ 1455
+ ],
+ [
+ 1246,
+ 1271,
+ 1313,
+ 1344,
+ 1384,
+ 1346,
+ 1307,
+ 1286,
+ 1255,
+ 1203,
+ 1153,
+ 1096,
+ 1078,
+ 1061,
+ 1036,
+ 1090,
+ 1121,
+ 1148,
+ 1169,
+ 1205,
+ 1228,
+ 1265,
+ 1267
+ ]
+ ],
+ "id": 10034180000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/003418_mpii_test/000000.jpg"
+ }
+ ],
+ "annotations": [
+ {
+ "bbox_head": [
+ 346,
+ 337,
+ 296,
+ 237
+ ],
+ "keypoints": [
+ 621,
+ 471,
+ 1,
+ 542.2835693,
+ 599.2855835,
+ 1,
+ 457.787323,
+ 347.6607971,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 371,
+ 737,
+ 1,
+ 753,
+ 579,
+ 1,
+ 387,
+ 981,
+ 1,
+ 1023,
+ 353,
+ 1,
+ 0,
+ 0,
+ 0,
+ 953,
+ 141,
+ 1,
+ 968,
+ 833,
+ 1,
+ 1152,
+ 843,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1416,
+ 429,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 0,
+ "image_id": 10034180000,
+ "bbox": [
+ 214.25,
+ 15.0,
+ 1358.5,
+ 1092.0
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1003418000000,
+ "iscrowd": false,
+ "num_keypoints": 11
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/posetrack18/annotations/val/009473_mpii_test.json b/vendor/ViTPose/tests/data/posetrack18/annotations/val/009473_mpii_test.json
new file mode 100644
index 0000000000000000000000000000000000000000..0c0bb2c5c1c786f9c5be423e641e297cbc6d4c5a
--- /dev/null
+++ b/vendor/ViTPose/tests/data/posetrack18/annotations/val/009473_mpii_test.json
@@ -0,0 +1,374 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "head_bottom",
+ "head_top",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/009473_mpii_test/000000.jpg",
+ "nframes": 101,
+ "frame_id": 10094730000,
+ "vid_id": "009473",
+ "ignore_regions_y": [
+ [
+ 228,
+ 237,
+ 243,
+ 230,
+ 220,
+ 230,
+ 228,
+ 232,
+ 220,
+ 211,
+ 226,
+ 258,
+ 364,
+ 417,
+ 475,
+ 562,
+ 615,
+ 646,
+ 656,
+ 637,
+ 649,
+ 645,
+ 603,
+ 585,
+ 547
+ ],
+ [
+ 0,
+ 1,
+ 137,
+ 130,
+ 166,
+ 235,
+ 309,
+ 384,
+ 452,
+ 526,
+ 583,
+ 658,
+ 694,
+ 709,
+ 599,
+ 684,
+ 707,
+ 733,
+ 660,
+ 679,
+ 762
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 1907,
+ 1820,
+ 1758,
+ 1662,
+ 1577,
+ 1492,
+ 1375,
+ 1305,
+ 1250,
+ 1177,
+ 1111,
+ 1033,
+ 1047,
+ 1062,
+ 1056,
+ 1130,
+ 1173,
+ 1232,
+ 1282,
+ 1332,
+ 1416,
+ 1471,
+ 1515,
+ 1541,
+ 1909
+ ],
+ [
+ 0,
+ 257,
+ 284,
+ 407,
+ 450,
+ 505,
+ 562,
+ 592,
+ 613,
+ 626,
+ 639,
+ 639,
+ 594,
+ 454,
+ 371,
+ 343,
+ 269,
+ 152,
+ 88,
+ 35,
+ 3
+ ]
+ ],
+ "id": 10094730000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/009473_mpii_test/000000.jpg"
+ }
+ ],
+ "annotations": [
+ {
+ "bbox_head": [
+ 648,
+ 253,
+ 138,
+ 103
+ ],
+ "keypoints": [
+ 750,
+ 297.5,
+ 1,
+ 734.1937256,
+ 371.1997375,
+ 1,
+ 704.1047363,
+ 254.4751892,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 697,
+ 352.5,
+ 1,
+ 766,
+ 369,
+ 1,
+ 0,
+ 0,
+ 0,
+ 857,
+ 419,
+ 1,
+ 766,
+ 281,
+ 1,
+ 942,
+ 422.5,
+ 1,
+ 722,
+ 525,
+ 1,
+ 792,
+ 544,
+ 1,
+ 783,
+ 697,
+ 1,
+ 755,
+ 699,
+ 1,
+ 808.5,
+ 832.5,
+ 1,
+ 672,
+ 782.5,
+ 1
+ ],
+ "track_id": 0,
+ "image_id": 10094730000,
+ "bbox": [
+ 631.5,
+ 167.77146757999992,
+ 351.0,
+ 751.4322540400001
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1009473000000,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 878,
+ 201,
+ 90,
+ 125
+ ],
+ "keypoints": [
+ 900,
+ 272,
+ 1,
+ 905.657959,
+ 322.6206665,
+ 1,
+ 936.0065308,
+ 219.1595001,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 950,
+ 366,
+ 1,
+ 852,
+ 311,
+ 1,
+ 1021,
+ 428,
+ 1,
+ 759,
+ 303,
+ 1,
+ 986,
+ 422,
+ 1,
+ 704,
+ 374,
+ 1,
+ 912,
+ 516,
+ 1,
+ 856,
+ 524,
+ 1,
+ 876,
+ 663,
+ 1,
+ 908,
+ 680,
+ 1,
+ 849,
+ 828,
+ 1,
+ 959,
+ 804,
+ 1
+ ],
+ "track_id": 1,
+ "image_id": 10094730000,
+ "bbox": [
+ 656.45,
+ 127.83342511500007,
+ 412.0999999999999,
+ 791.4926498699999
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1009473000001,
+ "iscrowd": false,
+ "num_keypoints": 15
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/posetrack18/annotations/val/012834_mpii_test.json b/vendor/ViTPose/tests/data/posetrack18/annotations/val/012834_mpii_test.json
new file mode 100644
index 0000000000000000000000000000000000000000..c97cd0b91bba6c7f3b1cee5f8d0c96e2c79ca3c4
--- /dev/null
+++ b/vendor/ViTPose/tests/data/posetrack18/annotations/val/012834_mpii_test.json
@@ -0,0 +1,1466 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "head_bottom",
+ "head_top",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle"
+ ],
+ "skeleton": [
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "has_no_densepose": true,
+ "is_labeled": true,
+ "file_name": "images/val/012834_mpii_test/000000.jpg",
+ "nframes": 140,
+ "frame_id": 10128340000,
+ "vid_id": "012834",
+ "ignore_regions_y": [
+ [
+ 1079,
+ 615,
+ 612,
+ 674,
+ 660,
+ 664,
+ 678,
+ 713,
+ 704,
+ 667,
+ 665,
+ 678,
+ 700,
+ 729,
+ 753,
+ 740,
+ 695,
+ 668,
+ 646,
+ 623,
+ 624,
+ 659,
+ 676,
+ 685,
+ 695,
+ 678,
+ 675,
+ 673,
+ 663,
+ 693,
+ 703,
+ 732,
+ 719,
+ 690,
+ 669,
+ 660,
+ 660,
+ 663,
+ 689,
+ 697,
+ 691,
+ 697,
+ 713,
+ 736,
+ 721,
+ 703,
+ 698,
+ 708,
+ 741,
+ 758,
+ 778,
+ 795,
+ 780,
+ 757,
+ 745,
+ 737,
+ 745,
+ 782,
+ 809,
+ 850,
+ 881,
+ 835,
+ 810,
+ 806,
+ 782,
+ 782,
+ 806,
+ 832,
+ 863,
+ 897,
+ 940,
+ 931,
+ 957,
+ 976,
+ 1003,
+ 1043,
+ 1045,
+ 1008,
+ 965,
+ 963,
+ 931,
+ 895,
+ 846,
+ 832,
+ 829,
+ 869,
+ 898,
+ 904,
+ 869,
+ 836,
+ 782,
+ 752,
+ 736,
+ 748,
+ 776,
+ 832,
+ 874,
+ 862,
+ 818,
+ 793,
+ 790,
+ 756,
+ 738,
+ 737,
+ 750,
+ 791,
+ 806,
+ 820,
+ 852,
+ 879,
+ 837,
+ 794,
+ 772,
+ 766,
+ 769,
+ 790,
+ 805,
+ 829,
+ 844,
+ 866,
+ 837,
+ 804,
+ 791,
+ 773,
+ 745,
+ 706,
+ 683,
+ 644,
+ 638,
+ 662,
+ 694,
+ 716,
+ 736,
+ 777,
+ 784,
+ 815,
+ 830,
+ 813,
+ 800,
+ 813,
+ 820,
+ 847,
+ 829,
+ 781,
+ 780,
+ 801,
+ 836,
+ 886,
+ 938,
+ 1018,
+ 1029,
+ 1079
+ ],
+ [
+ 0,
+ 21,
+ 43,
+ 60,
+ 90,
+ 95,
+ 95,
+ 43,
+ 40,
+ 84,
+ 104,
+ 104,
+ 74,
+ 6,
+ 4,
+ 71,
+ 69,
+ 0
+ ],
+ [
+ 0,
+ 4,
+ 48,
+ 106,
+ 214,
+ 207,
+ 46,
+ 50,
+ 170,
+ 156,
+ 96,
+ 157,
+ 160,
+ 62,
+ 65,
+ 156,
+ 165,
+ 162,
+ 140,
+ 93,
+ 93,
+ 7,
+ 4,
+ 121,
+ 129,
+ 84,
+ 75,
+ 68
+ ],
+ [
+ 0,
+ 0,
+ 739,
+ 729,
+ 720,
+ 740,
+ 768,
+ 785,
+ 803,
+ 815,
+ 757,
+ 735,
+ 632,
+ 620,
+ 632,
+ 640,
+ 662,
+ 656,
+ 607,
+ 645,
+ 645,
+ 628,
+ 604,
+ 570,
+ 543,
+ 512,
+ 485,
+ 467,
+ 451,
+ 448,
+ 456,
+ 482,
+ 512,
+ 548,
+ 554,
+ 542,
+ 498,
+ 479,
+ 454,
+ 404,
+ 387,
+ 398,
+ 415,
+ 528,
+ 546,
+ 468,
+ 410,
+ 400,
+ 359,
+ 375,
+ 373,
+ 273,
+ 254,
+ 284,
+ 253,
+ 204,
+ 206
+ ]
+ ],
+ "ignore_regions_x": [
+ [
+ 3,
+ 0,
+ 30,
+ 44,
+ 74,
+ 99,
+ 106,
+ 102,
+ 115,
+ 121,
+ 141,
+ 156,
+ 165,
+ 187,
+ 200,
+ 211,
+ 196,
+ 198,
+ 210,
+ 226,
+ 252,
+ 266,
+ 263,
+ 271,
+ 291,
+ 299,
+ 326,
+ 339,
+ 360,
+ 399,
+ 412,
+ 424,
+ 437,
+ 432,
+ 439,
+ 461,
+ 489,
+ 510,
+ 534,
+ 548,
+ 559,
+ 567,
+ 587,
+ 593,
+ 604,
+ 612,
+ 633,
+ 652,
+ 645,
+ 638,
+ 649,
+ 650,
+ 661,
+ 654,
+ 662,
+ 685,
+ 713,
+ 727,
+ 733,
+ 752,
+ 762,
+ 769,
+ 785,
+ 812,
+ 841,
+ 863,
+ 869,
+ 877,
+ 899,
+ 909,
+ 918,
+ 906,
+ 902,
+ 909,
+ 917,
+ 900,
+ 932,
+ 932,
+ 941,
+ 919,
+ 926,
+ 935,
+ 950,
+ 957,
+ 983,
+ 1002,
+ 1007,
+ 1032,
+ 1034,
+ 1018,
+ 1018,
+ 1038,
+ 1074,
+ 1106,
+ 1119,
+ 1121,
+ 1130,
+ 1148,
+ 1152,
+ 1172,
+ 1195,
+ 1199,
+ 1209,
+ 1229,
+ 1242,
+ 1240,
+ 1242,
+ 1261,
+ 1264,
+ 1277,
+ 1285,
+ 1286,
+ 1296,
+ 1313,
+ 1336,
+ 1350,
+ 1367,
+ 1403,
+ 1417,
+ 1435,
+ 1459,
+ 1456,
+ 1429,
+ 1420,
+ 1465,
+ 1492,
+ 1496,
+ 1507,
+ 1529,
+ 1553,
+ 1570,
+ 1596,
+ 1609,
+ 1610,
+ 1649,
+ 1671,
+ 1703,
+ 1740,
+ 1763,
+ 1775,
+ 1803,
+ 1809,
+ 1815,
+ 1815,
+ 1857,
+ 1874,
+ 1881,
+ 1897,
+ 1896,
+ 1899,
+ 1888,
+ 1884
+ ],
+ [
+ 378,
+ 381,
+ 365,
+ 359,
+ 334,
+ 292,
+ 257,
+ 262,
+ 231,
+ 236,
+ 219,
+ 193,
+ 196,
+ 183,
+ 154,
+ 159,
+ 140,
+ 121
+ ],
+ [
+ 451,
+ 1173,
+ 1168,
+ 1168,
+ 1170,
+ 1085,
+ 1098,
+ 1070,
+ 1043,
+ 1000,
+ 993,
+ 979,
+ 934,
+ 937,
+ 918,
+ 903,
+ 893,
+ 832,
+ 785,
+ 759,
+ 726,
+ 710,
+ 667,
+ 664,
+ 585,
+ 576,
+ 507,
+ 485
+ ],
+ [
+ 1312,
+ 1918,
+ 1917,
+ 1895,
+ 1867,
+ 1835,
+ 1804,
+ 1779,
+ 1754,
+ 1720,
+ 1726,
+ 1739,
+ 1740,
+ 1735,
+ 1701,
+ 1635,
+ 1587,
+ 1578,
+ 1587,
+ 1564,
+ 1550,
+ 1543,
+ 1562,
+ 1579,
+ 1578,
+ 1581,
+ 1584,
+ 1589,
+ 1601,
+ 1610,
+ 1621,
+ 1637,
+ 1642,
+ 1659,
+ 1673,
+ 1681,
+ 1673,
+ 1671,
+ 1664,
+ 1671,
+ 1681,
+ 1728,
+ 1734,
+ 1789,
+ 1854,
+ 1807,
+ 1820,
+ 1778,
+ 1778,
+ 1717,
+ 1642,
+ 1635,
+ 1600,
+ 1520,
+ 1454,
+ 1415,
+ 1395
+ ]
+ ],
+ "id": 10128340000,
+ "width": 1920,
+ "height": 1080,
+ "mask_file": "mask/val/012834_mpii_test/000000.jpg"
+ }
+ ],
+ "annotations": [
+ {
+ "bbox_head": [
+ 378,
+ 503,
+ 44,
+ 53
+ ],
+ "keypoints": [
+ 401,
+ 530,
+ 1,
+ 409.5254211,
+ 555.3547363,
+ 1,
+ 392.8559265,
+ 510.1089478,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 377,
+ 560,
+ 1,
+ 444,
+ 556,
+ 1,
+ 353,
+ 605,
+ 1,
+ 469.5,
+ 603.5,
+ 1,
+ 341.5,
+ 653.5,
+ 1,
+ 463,
+ 635,
+ 1,
+ 389,
+ 652,
+ 1,
+ 442,
+ 646,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 0,
+ "image_id": 10128340000,
+ "bbox": [
+ 322.3,
+ 488.60028996999995,
+ 166.39999999999998,
+ 186.40836786
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000000,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 571,
+ 446,
+ 42,
+ 46
+ ],
+ "keypoints": [
+ 600.5,
+ 475.5,
+ 1,
+ 590.4649048,
+ 493.8685303,
+ 1,
+ 593.1513062,
+ 450.3486023,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 570.5,
+ 509.5,
+ 1,
+ 608.5,
+ 509.5,
+ 1,
+ 539,
+ 558.5,
+ 1,
+ 634,
+ 539,
+ 1,
+ 558.5,
+ 584.5,
+ 1,
+ 624.5,
+ 528.5,
+ 1,
+ 605,
+ 595,
+ 1,
+ 601,
+ 593,
+ 1,
+ 640,
+ 634.5,
+ 1,
+ 598,
+ 672,
+ 1,
+ 616.5,
+ 700.5,
+ 1,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 1,
+ "image_id": 10128340000,
+ "bbox": [
+ 523.85,
+ 412.825892645,
+ 131.29999999999995,
+ 325.19681700999996
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000001,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 159,
+ 259,
+ 42,
+ 47
+ ],
+ "keypoints": [
+ 201,
+ 284.5,
+ 1,
+ 169.9334106,
+ 305.6158752,
+ 1,
+ 187.549942,
+ 265.1630859,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 139.5,
+ 307.5,
+ 1,
+ 193.5,
+ 319.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 209,
+ 371,
+ 1,
+ 144,
+ 365.5,
+ 1,
+ 231,
+ 392,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 222,
+ 337,
+ 1,
+ 241,
+ 341.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 267,
+ 416,
+ 1
+ ],
+ "track_id": 2,
+ "image_id": 10128340000,
+ "bbox": [
+ 120.375,
+ 242.53754878499996,
+ 165.75,
+ 196.08798833000003
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000002,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 372,
+ 205,
+ 44,
+ 44
+ ],
+ "keypoints": [
+ 410.5,
+ 230.5,
+ 1,
+ 387.8875732,
+ 251.1279602,
+ 1,
+ 398.5843201,
+ 208.9040375,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 359.5,
+ 262.5,
+ 1,
+ 409.5,
+ 266.5,
+ 1,
+ 337.5,
+ 308.5,
+ 1,
+ 450,
+ 306,
+ 1,
+ 292,
+ 314,
+ 1,
+ 480,
+ 311.5,
+ 1,
+ 390,
+ 339,
+ 1,
+ 409,
+ 339,
+ 1,
+ 405.5,
+ 418.5,
+ 1,
+ 447.5,
+ 366.5,
+ 1,
+ 391.5,
+ 464.5,
+ 1,
+ 437.5,
+ 440.5,
+ 1
+ ],
+ "track_id": 3,
+ "image_id": 10128340000,
+ "bbox": [
+ 263.8,
+ 170.56464312499998,
+ 244.39999999999998,
+ 332.27475125
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000003,
+ "iscrowd": false,
+ "num_keypoints": 15
+ },
+ {
+ "bbox_head": [
+ 693,
+ 410,
+ 44,
+ 49
+ ],
+ "keypoints": [
+ 718.5,
+ 440.5,
+ 1,
+ 717.704834,
+ 460.703125,
+ 1,
+ 712.9713745,
+ 414.8476562,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 694.5,
+ 474,
+ 1,
+ 743.5,
+ 472.5,
+ 1,
+ 681.5,
+ 530.5,
+ 1,
+ 757.5,
+ 523.5,
+ 1,
+ 667.5,
+ 564.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 705.5,
+ 563.5,
+ 1,
+ 737.5,
+ 560.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 692.5,
+ 607.5,
+ 1,
+ 716.5,
+ 603.5,
+ 1
+ ],
+ "track_id": 4,
+ "image_id": 10128340000,
+ "bbox": [
+ 654.0,
+ 385.94980463,
+ 117.0,
+ 250.44804694000004
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000004,
+ "iscrowd": false,
+ "num_keypoints": 12
+ },
+ {
+ "bbox_head": [
+ 923,
+ 347,
+ 46,
+ 58
+ ],
+ "keypoints": [
+ 965.5,
+ 382.5,
+ 1,
+ 933.9436646,
+ 403.0452576,
+ 1,
+ 955.0422363,
+ 355.7160645,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 922.5,
+ 403.5,
+ 1,
+ 932.5,
+ 431.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 960,
+ 475.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 991.5,
+ 462.5,
+ 1,
+ 934.5,
+ 512.5,
+ 1,
+ 922.5,
+ 506.5,
+ 1,
+ 946.5,
+ 567.5,
+ 1,
+ 964,
+ 578,
+ 1,
+ 900.5,
+ 598,
+ 1,
+ 936,
+ 634.5,
+ 1
+ ],
+ "track_id": 5,
+ "image_id": 10128340000,
+ "bbox": [
+ 886.85,
+ 313.89847417500005,
+ 118.29999999999995,
+ 362.4191161499999
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000005,
+ "iscrowd": false,
+ "num_keypoints": 13
+ },
+ {
+ "bbox_head": [
+ 691,
+ 179,
+ 43,
+ 52
+ ],
+ "keypoints": [
+ 708.5,
+ 212.5,
+ 1,
+ 722.6444702,
+ 230.0113831,
+ 1,
+ 704.8916626,
+ 186.2414551,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 712,
+ 244,
+ 1,
+ 742,
+ 237.5,
+ 1,
+ 723,
+ 293.5,
+ 1,
+ 745.5,
+ 281.5,
+ 1,
+ 692,
+ 319,
+ 1,
+ 0,
+ 0,
+ 0,
+ 722,
+ 323.5,
+ 1,
+ 748.5,
+ 314,
+ 1,
+ 657.5,
+ 301.5,
+ 1,
+ 668.5,
+ 299.5,
+ 1,
+ 670.5,
+ 367.5,
+ 1,
+ 689.5,
+ 362.5,
+ 1
+ ],
+ "track_id": 6,
+ "image_id": 10128340000,
+ "bbox": [
+ 643.85,
+ 159.05267336499998,
+ 118.29999999999995,
+ 235.63610837
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000006,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 927,
+ 160,
+ 39,
+ 52
+ ],
+ "keypoints": [
+ 952,
+ 189,
+ 1,
+ 946.763916,
+ 211.9986572,
+ 1,
+ 946.302063,
+ 166.5010071,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 914.5,
+ 234,
+ 1,
+ 979.5,
+ 236.5,
+ 1,
+ 890.5,
+ 270.5,
+ 1,
+ 998.5,
+ 286.5,
+ 1,
+ 894.5,
+ 324,
+ 1,
+ 0,
+ 0,
+ 0,
+ 932,
+ 326.5,
+ 1,
+ 958.5,
+ 327.5,
+ 1,
+ 1000.5,
+ 340.5,
+ 1,
+ 993.5,
+ 372.5,
+ 1,
+ 955.5,
+ 383.5,
+ 1,
+ 959.5,
+ 446.5,
+ 1
+ ],
+ "track_id": 7,
+ "image_id": 10128340000,
+ "bbox": [
+ 874.0,
+ 124.50115816500005,
+ 143.0,
+ 363.99869076999994
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000007,
+ "iscrowd": false,
+ "num_keypoints": 14
+ },
+ {
+ "bbox_head": [
+ 1367,
+ 427,
+ 47,
+ 45
+ ],
+ "keypoints": [
+ 1406,
+ 451,
+ 1,
+ 1379.198608,
+ 472.946106,
+ 1,
+ 1398.976074,
+ 431.9154358,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1375.5,
+ 467.5,
+ 1,
+ 1372,
+ 501,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1343.5,
+ 534.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1339.5,
+ 573.5,
+ 1,
+ 1381.5,
+ 531.5,
+ 1,
+ 1376,
+ 539.5,
+ 1,
+ 1452.5,
+ 524.5,
+ 1,
+ 1453.5,
+ 535.5,
+ 1,
+ 1469.5,
+ 603.5,
+ 1,
+ 1466,
+ 610,
+ 1
+ ],
+ "track_id": 8,
+ "image_id": 10128340000,
+ "bbox": [
+ 1320.0,
+ 405.20275117000006,
+ 169.0,
+ 231.50993345999996
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000008,
+ "iscrowd": false,
+ "num_keypoints": 13
+ },
+ {
+ "bbox_head": [
+ 1378,
+ 204,
+ 40,
+ 44
+ ],
+ "keypoints": [
+ 1389,
+ 234,
+ 1,
+ 1404.137573,
+ 248.9802094,
+ 1,
+ 1393.396851,
+ 208.7648468,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1375,
+ 272,
+ 1,
+ 1442.5,
+ 260.5,
+ 1,
+ 1374,
+ 315,
+ 1,
+ 1468,
+ 303.5,
+ 1,
+ 1367,
+ 340.5,
+ 1,
+ 1462.5,
+ 330.5,
+ 1,
+ 1407,
+ 349.5,
+ 1,
+ 1439,
+ 340.5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "track_id": 9,
+ "image_id": 10128340000,
+ "bbox": [
+ 1351.85,
+ 187.65457382,
+ 131.30000000000018,
+ 182.95569916
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000009,
+ "iscrowd": false,
+ "num_keypoints": 11
+ },
+ {
+ "bbox_head": [
+ 407,
+ -29,
+ 35,
+ 40
+ ],
+ "keypoints": [
+ 0,
+ 0,
+ 0,
+ 425.1159668,
+ 12.25136662,
+ 1,
+ 424.0380249,
+ -24.93852425,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 455.5,
+ 21.5,
+ 1,
+ 395.5,
+ 29.5,
+ 1,
+ 474.5,
+ 64.5,
+ 1,
+ 391.5,
+ 67,
+ 1,
+ 474,
+ 108,
+ 1,
+ 379,
+ 107,
+ 1,
+ 446,
+ 88,
+ 1,
+ 426,
+ 88,
+ 1,
+ 424,
+ 113,
+ 1,
+ 403,
+ 113,
+ 1,
+ 430,
+ 173,
+ 1,
+ 415,
+ 171,
+ 1
+ ],
+ "track_id": 10,
+ "image_id": 10128340000,
+ "bbox": [
+ 364.675,
+ -54.62930288750002,
+ 124.14999999999998,
+ 257.32008152500003
+ ],
+ "scores": [],
+ "category_id": 1,
+ "id": 1012834000010,
+ "iscrowd": false,
+ "num_keypoints": 14
+ }
+ ]
+}
diff --git a/vendor/ViTPose/tests/data/posetrack18/images/val/003418_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/images/val/003418_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..674831400ab364e665af6c35072fb0141f973406
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/images/val/003418_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/posetrack18/images/val/009473_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/images/val/009473_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8e5e87fd6c08ee8cda40b98e6c059aeb9d428b14
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/images/val/009473_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/posetrack18/images/val/012834_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/images/val/012834_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7a532a34571445d5fb534f8ca67edfabe46a8d72
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/images/val/012834_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/posetrack18/mask/val/003418_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/mask/val/003418_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b853b95dad8c9b445b06c250a16469e2c153fb1e
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/mask/val/003418_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/posetrack18/mask/val/009473_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/mask/val/009473_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f71150182e227807c1dd34f5598ec3d68f8f27ec
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/mask/val/009473_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/posetrack18/mask/val/012834_mpii_test/000000.jpg b/vendor/ViTPose/tests/data/posetrack18/mask/val/012834_mpii_test/000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7d0b60bfb80044b49da8a86ed8d324ceb51aabad
Binary files /dev/null and b/vendor/ViTPose/tests/data/posetrack18/mask/val/012834_mpii_test/000000.jpg differ
diff --git a/vendor/ViTPose/tests/data/rhd/00111.png b/vendor/ViTPose/tests/data/rhd/00111.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f9c3be1770586bf70cdc42adf5b638f9d9d2919
Binary files /dev/null and b/vendor/ViTPose/tests/data/rhd/00111.png differ
diff --git a/vendor/ViTPose/tests/data/rhd/01111.png b/vendor/ViTPose/tests/data/rhd/01111.png
new file mode 100644
index 0000000000000000000000000000000000000000..c2a9644495bc0d4633aeb3a61c6bc6b5d83683ca
Binary files /dev/null and b/vendor/ViTPose/tests/data/rhd/01111.png differ
diff --git a/vendor/ViTPose/tests/data/rhd/11111.png b/vendor/ViTPose/tests/data/rhd/11111.png
new file mode 100644
index 0000000000000000000000000000000000000000..e783f8af5e5c63937b92ea236bad7b3670d27d0d
Binary files /dev/null and b/vendor/ViTPose/tests/data/rhd/11111.png differ
diff --git a/vendor/ViTPose/tests/data/rhd/test_rhd.json b/vendor/ViTPose/tests/data/rhd/test_rhd.json
new file mode 100644
index 0000000000000000000000000000000000000000..d469c88192fb1e28ccb7174d12eb2ad5b33b7c1b
--- /dev/null
+++ b/vendor/ViTPose/tests/data/rhd/test_rhd.json
@@ -0,0 +1,857 @@
+{
+ "info": {
+ "description": "RHD",
+ "version": "1.1",
+ "year": "2021",
+ "date_created": "2021/03/13"
+ },
+ "licenses": "",
+ "images": [
+ {
+ "file_name": "00111.png",
+ "height": 320,
+ "width": 320,
+ "id": 111,
+ "cam_param": {
+ "focal": [
+ 299.0,
+ 299.0
+ ],
+ "princpt": [
+ 160.0,
+ 160.0
+ ]
+ }
+ },
+ {
+ "file_name": "01111.png",
+ "height": 320,
+ "width": 320,
+ "id": 1111,
+ "cam_param": {
+ "focal": [
+ 305.8999938964844,
+ 305.8999938964844
+ ],
+ "princpt": [
+ 160.0,
+ 160.0
+ ]
+ }
+ },
+ {
+ "file_name": "11111.png",
+ "height": 320,
+ "width": 320,
+ "id": 11111,
+ "cam_param": {
+ "focal": [
+ 263.20001220703125,
+ 263.20001220703125
+ ],
+ "princpt": [
+ 160.0,
+ 160.0
+ ]
+ }
+ }
+ ],
+ "annotations": [
+ {
+ "id": 111,
+ "image_id": 111,
+ "category_id": 1,
+ "iscrowd": 0,
+ "bbox": [
+ 162.5699920654297,
+ 63.858001708984375,
+ 93.96000671386719,
+ 121.4639892578125
+ ],
+ "area": 11412.757246157154,
+ "keypoints": [
+ [
+ 245.8000030517578,
+ 73.9800033569336,
+ 1
+ ],
+ [
+ 170.39999389648438,
+ 115.0,
+ 1
+ ],
+ [
+ 180.39999389648438,
+ 103.69999694824219,
+ 1
+ ],
+ [
+ 194.10000610351562,
+ 89.16999816894531,
+ 1
+ ],
+ [
+ 220.0,
+ 82.51000213623047,
+ 1
+ ],
+ [
+ 193.0,
+ 163.60000610351562,
+ 1
+ ],
+ [
+ 194.5,
+ 154.1999969482422,
+ 1
+ ],
+ [
+ 196.6999969482422,
+ 144.39999389648438,
+ 1
+ ],
+ [
+ 202.39999389648438,
+ 123.69999694824219,
+ 1
+ ],
+ [
+ 208.39999389648438,
+ 175.1999969482422,
+ 1
+ ],
+ [
+ 210.6999969482422,
+ 164.3000030517578,
+ 1
+ ],
+ [
+ 211.60000610351562,
+ 152.10000610351562,
+ 1
+ ],
+ [
+ 215.60000610351562,
+ 131.5,
+ 1
+ ],
+ [
+ 228.10000610351562,
+ 167.1999969482422,
+ 1
+ ],
+ [
+ 228.6999969482422,
+ 159.0,
+ 1
+ ],
+ [
+ 228.1999969482422,
+ 151.10000610351562,
+ 1
+ ],
+ [
+ 228.6999969482422,
+ 132.1999969482422,
+ 1
+ ],
+ [
+ 248.6999969482422,
+ 143.6999969482422,
+ 1
+ ],
+ [
+ 247.6999969482422,
+ 148.3000030517578,
+ 1
+ ],
+ [
+ 246.39999389648438,
+ 148.10000610351562,
+ 1
+ ],
+ [
+ 244.39999389648438,
+ 129.1999969482422,
+ 1
+ ]
+ ],
+ "joint_cam": [
+ [
+ 117.9000015258789,
+ -118.19999694824219,
+ 411.0
+ ],
+ [
+ 17.09000015258789,
+ -73.70999908447266,
+ 489.5
+ ],
+ [
+ 32.540000915527344,
+ -89.88999938964844,
+ 477.20001220703125
+ ],
+ [
+ 52.959999084472656,
+ -110.0,
+ 464.3999938964844
+ ],
+ [
+ 87.95999908447266,
+ -113.70000457763672,
+ 438.6999816894531
+ ],
+ [
+ 56.71000289916992,
+ 6.140999794006348,
+ 514.1000366210938
+ ],
+ [
+ 56.7599983215332,
+ -9.583000183105469,
+ 492.1000061035156
+ ],
+ [
+ 57.97999954223633,
+ -24.700000762939453,
+ 472.79998779296875
+ ],
+ [
+ 64.52000427246094,
+ -55.34000015258789,
+ 455.5
+ ],
+ [
+ 81.20000457763672,
+ 25.459999084472656,
+ 501.6999816894531
+ ],
+ [
+ 81.8800048828125,
+ 6.9070000648498535,
+ 483.3999938964844
+ ],
+ [
+ 80.05999755859375,
+ -12.220000267028809,
+ 464.1000061035156
+ ],
+ [
+ 83.37000274658203,
+ -42.70000076293945,
+ 448.0
+ ],
+ [
+ 113.20000457763672,
+ 11.960000038146973,
+ 496.6999816894531
+ ],
+ [
+ 109.0999984741211,
+ -1.5170000791549683,
+ 475.20001220703125
+ ],
+ [
+ 103.20000457763672,
+ -13.480000495910645,
+ 452.6999816894531
+ ],
+ [
+ 102.0,
+ -41.32999801635742,
+ 443.79998779296875
+ ],
+ [
+ 143.09999084472656,
+ -26.25,
+ 482.3000183105469
+ ],
+ [
+ 137.0,
+ -18.23000144958496,
+ 467.29998779296875
+ ],
+ [
+ 130.60000610351562,
+ -18.049999237060547,
+ 452.0
+ ],
+ [
+ 124.9000015258789,
+ -45.51000213623047,
+ 442.0999755859375
+ ]
+ ],
+ "hand_type": "left"
+ },
+ {
+ "id": 1111,
+ "image_id": 1111,
+ "category_id": 1,
+ "iscrowd": 0,
+ "bbox": [
+ 163.91000366210938,
+ 92.3900146484375,
+ 59.88001251220703,
+ 118.91999053955078
+ ],
+ "area": 7120.930521459843,
+ "keypoints": [
+ [
+ 218.8000030517578,
+ 201.39999389648438,
+ 1
+ ],
+ [
+ 168.89999389648438,
+ 147.89999389648438,
+ 1
+ ],
+ [
+ 171.5,
+ 161.6999969482422,
+ 1
+ ],
+ [
+ 182.3000030517578,
+ 178.0,
+ 1
+ ],
+ [
+ 202.60000610351562,
+ 191.10000610351562,
+ 1
+ ],
+ [
+ 184.6999969482422,
+ 174.1999969482422,
+ 1
+ ],
+ [
+ 175.6999969482422,
+ 162.39999389648438,
+ 1
+ ],
+ [
+ 180.60000610351562,
+ 148.10000610351562,
+ 1
+ ],
+ [
+ 200.5,
+ 153.3000030517578,
+ 1
+ ],
+ [
+ 181.5,
+ 173.89999389648438,
+ 1
+ ],
+ [
+ 180.0,
+ 156.5,
+ 1
+ ],
+ [
+ 185.10000610351562,
+ 140.89999389648438,
+ 1
+ ],
+ [
+ 205.6999969482422,
+ 146.6999969482422,
+ 1
+ ],
+ [
+ 175.39999389648438,
+ 160.3000030517578,
+ 1
+ ],
+ [
+ 183.60000610351562,
+ 145.3000030517578,
+ 1
+ ],
+ [
+ 194.89999389648438,
+ 132.6999969482422,
+ 1
+ ],
+ [
+ 208.8000030517578,
+ 145.6999969482422,
+ 1
+ ],
+ [
+ 199.39999389648438,
+ 102.30000305175781,
+ 1
+ ],
+ [
+ 201.89999389648438,
+ 116.19999694824219,
+ 1
+ ],
+ [
+ 208.10000610351562,
+ 127.4000015258789,
+ 1
+ ],
+ [
+ 210.60000610351562,
+ 148.5,
+ 1
+ ]
+ ],
+ "joint_cam": [
+ [
+ 96.12000274658203,
+ 67.63999938964844,
+ 499.70001220703125
+ ],
+ [
+ 15.380000114440918,
+ -20.969999313354492,
+ 528.0
+ ],
+ [
+ 20.209999084472656,
+ 3.059999942779541,
+ 536.0999755859375
+ ],
+ [
+ 38.869998931884766,
+ 31.470001220703125,
+ 533.2999877929688
+ ],
+ [
+ 73.62000274658203,
+ 53.81999969482422,
+ 529.2000122070312
+ ],
+ [
+ 42.380001068115234,
+ 24.42999839782715,
+ 524.7999877929688
+ ],
+ [
+ 27.260000228881836,
+ 4.211999893188477,
+ 529.7999877929688
+ ],
+ [
+ 36.38999938964844,
+ -21.010000228881836,
+ 540.0999755859375
+ ],
+ [
+ 71.79000091552734,
+ -11.949999809265137,
+ 542.7000122070312
+ ],
+ [
+ 35.5,
+ 22.989999771118164,
+ 505.5999755859375
+ ],
+ [
+ 33.540000915527344,
+ -5.878000259399414,
+ 512.9000244140625
+ ],
+ [
+ 42.69000244140625,
+ -32.55999755859375,
+ 521.0999755859375
+ ],
+ [
+ 78.16999816894531,
+ -22.75,
+ 522.8999633789062
+ ],
+ [
+ 24.560001373291016,
+ 0.47450000047683716,
+ 487.79998779296875
+ ],
+ [
+ 38.18000030517578,
+ -23.760000228881836,
+ 494.1000061035156
+ ],
+ [
+ 57.27000045776367,
+ -44.72999954223633,
+ 501.3999938964844
+ ],
+ [
+ 80.33000183105469,
+ -23.520000457763672,
+ 503.8999938964844
+ ],
+ [
+ 58.06999969482422,
+ -85.06999969482422,
+ 450.8999938964844
+ ],
+ [
+ 62.619998931884766,
+ -65.56999969482422,
+ 457.5
+ ],
+ [
+ 73.29999542236328,
+ -49.72999954223633,
+ 466.3999938964844
+ ],
+ [
+ 79.80000305175781,
+ -18.200000762939453,
+ 482.3000183105469
+ ]
+ ],
+ "hand_type": "left"
+ },
+ {
+ "id": 11111,
+ "image_id": 11111,
+ "category_id": 1,
+ "iscrowd": 0,
+ "bbox": [
+ 162.8300018310547,
+ 135.88999938964844,
+ 59.63999557495117,
+ 48.1200065612793
+ ],
+ "area": 2869.8769783813186,
+ "keypoints": [
+ [
+ 167.8000030517578,
+ 154.10000610351562,
+ 1
+ ],
+ [
+ 217.5,
+ 146.60000610351562,
+ 1
+ ],
+ [
+ 207.8000030517578,
+ 146.39999389648438,
+ 1
+ ],
+ [
+ 197.10000610351562,
+ 144.8000030517578,
+ 1
+ ],
+ [
+ 181.5,
+ 148.0,
+ 1
+ ],
+ [
+ 206.8000030517578,
+ 167.1999969482422,
+ 1
+ ],
+ [
+ 206.5,
+ 160.1999969482422,
+ 1
+ ],
+ [
+ 202.10000610351562,
+ 150.89999389648438,
+ 1
+ ],
+ [
+ 191.39999389648438,
+ 139.89999389648438,
+ 1
+ ],
+ [
+ 200.0,
+ 170.0,
+ 1
+ ],
+ [
+ 200.39999389648438,
+ 163.6999969482422,
+ 1
+ ],
+ [
+ 198.0,
+ 155.6999969482422,
+ 1
+ ],
+ [
+ 186.39999389648438,
+ 145.10000610351562,
+ 1
+ ],
+ [
+ 194.6999969482422,
+ 172.39999389648438,
+ 1
+ ],
+ [
+ 195.1999969482422,
+ 168.10000610351562,
+ 1
+ ],
+ [
+ 191.8000030517578,
+ 160.89999389648438,
+ 1
+ ],
+ [
+ 182.0,
+ 151.6999969482422,
+ 1
+ ],
+ [
+ 198.1999969482422,
+ 180.0,
+ 1
+ ],
+ [
+ 195.0,
+ 176.3000030517578,
+ 1
+ ],
+ [
+ 189.10000610351562,
+ 170.39999389648438,
+ 1
+ ],
+ [
+ 177.3000030517578,
+ 160.1999969482422,
+ 1
+ ]
+ ],
+ "joint_cam": [
+ [
+ 20.479999542236328,
+ -15.470000267028809,
+ 693.2999877929688
+ ],
+ [
+ 138.40000915527344,
+ -32.17000198364258,
+ 633.2000122070312
+ ],
+ [
+ 118.0,
+ -33.62999725341797,
+ 649.2000122070312
+ ],
+ [
+ 94.87999725341797,
+ -38.78999710083008,
+ 673.699951171875
+ ],
+ [
+ 56.01000213623047,
+ -31.309999465942383,
+ 686.2999877929688
+ ],
+ [
+ 116.80000305175781,
+ 18.049999237060547,
+ 656.7999877929688
+ ],
+ [
+ 112.80000305175781,
+ 0.48660001158714294,
+ 638.2999877929688
+ ],
+ [
+ 99.94000244140625,
+ -21.559999465942383,
+ 625.199951171875
+ ],
+ [
+ 74.3699951171875,
+ -47.75,
+ 623.7999877929688
+ ],
+ [
+ 99.95999908447266,
+ 24.979999542236328,
+ 658.5
+ ],
+ [
+ 97.34000396728516,
+ 9.000999450683594,
+ 633.4000244140625
+ ],
+ [
+ 88.5199966430664,
+ -9.983000755310059,
+ 612.7999877929688
+ ],
+ [
+ 61.19000244140625,
+ -34.4900016784668,
+ 609.6000366210938
+ ],
+ [
+ 86.45000457763672,
+ 31.0,
+ 655.6000366210938
+ ],
+ [
+ 84.11000061035156,
+ 19.43000030517578,
+ 629.7000122070312
+ ],
+ [
+ 73.44000244140625,
+ 2.186999797821045,
+ 608.6000366210938
+ ],
+ [
+ 50.53000259399414,
+ -19.139999389648438,
+ 605.7000122070312
+ ],
+ [
+ 91.06999969482422,
+ 47.60000228881836,
+ 626.6000366210938
+ ],
+ [
+ 81.22000122070312,
+ 37.78000259399414,
+ 610.800048828125
+ ],
+ [
+ 66.75,
+ 23.989999771118164,
+ 604.0999755859375
+ ],
+ [
+ 39.94999694824219,
+ 0.4115999937057495,
+ 607.7999877929688
+ ]
+ ],
+ "hand_type": "right"
+ }
+ ],
+ "categories": [
+ {
+ "id": 1,
+ "name": "hand",
+ "supercategory": "hand",
+ "keypoints": [
+ "wrist",
+ "thumb1",
+ "thumb2",
+ "thumb3",
+ "thumb4",
+ "forefinger1",
+ "forefinger2",
+ "forefinger3",
+ "forefinger4",
+ "middle_finger1",
+ "middle_finger2",
+ "middle_finger3",
+ "middle_finger4",
+ "ring_finger1",
+ "ring_finger2",
+ "ring_finger3",
+ "ring_finger4",
+ "pinky_finger1",
+ "pinky_finger2",
+ "pinky_finger3",
+ "pinky_finger4"
+ ],
+ "skeleton": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ],
+ [
+ 4,
+ 5
+ ],
+ [
+ 1,
+ 6
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 9
+ ],
+ [
+ 1,
+ 10
+ ],
+ [
+ 10,
+ 11
+ ],
+ [
+ 11,
+ 12
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 1,
+ 14
+ ],
+ [
+ 14,
+ 15
+ ],
+ [
+ 15,
+ 16
+ ],
+ [
+ 16,
+ 17
+ ],
+ [
+ 1,
+ 18
+ ],
+ [
+ 18,
+ 19
+ ],
+ [
+ 19,
+ 20
+ ],
+ [
+ 20,
+ 21
+ ]
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/wflw/36_Football_americanfootball_ball_36_415.jpg b/vendor/ViTPose/tests/data/wflw/36_Football_americanfootball_ball_36_415.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c285b42ef890a0264a6f1e9092da173de3814be1
Binary files /dev/null and b/vendor/ViTPose/tests/data/wflw/36_Football_americanfootball_ball_36_415.jpg differ
diff --git a/vendor/ViTPose/tests/data/wflw/7_Cheering_Cheering_7_16.jpg b/vendor/ViTPose/tests/data/wflw/7_Cheering_Cheering_7_16.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e8cdae16e9c28c61a64cced97b7a8999bb81a228
Binary files /dev/null and b/vendor/ViTPose/tests/data/wflw/7_Cheering_Cheering_7_16.jpg differ
diff --git a/vendor/ViTPose/tests/data/wflw/test_wflw.json b/vendor/ViTPose/tests/data/wflw/test_wflw.json
new file mode 100644
index 0000000000000000000000000000000000000000..d25c9e536088789c3fe9187ad35790d6d7545364
--- /dev/null
+++ b/vendor/ViTPose/tests/data/wflw/test_wflw.json
@@ -0,0 +1,1287 @@
+{
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "face",
+ "keypoints": [],
+ "skeleton": []
+ }
+ ],
+ "images": [
+ {
+ "id": 2,
+ "file_name": "36_Football_americanfootball_ball_36_415.jpg",
+ "height": 661,
+ "width": 1024
+ },
+ {
+ "id": 12,
+ "file_name": "7_Cheering_Cheering_7_16.jpg",
+ "height": 1024,
+ "width": 1024
+ }
+ ],
+ "annotations": [
+ {
+ "image_id": 2,
+ "id": 2,
+ "keypoints": [
+ 440.696106,
+ 132.732559,
+ 1,
+ 441.125309,
+ 138.20883600000002,
+ 1,
+ 441.775067,
+ 143.662576,
+ 1,
+ 442.838757,
+ 149.050135,
+ 1,
+ 444.330483,
+ 154.335484,
+ 1,
+ 446.199104,
+ 159.499988,
+ 1,
+ 448.359594,
+ 164.549883,
+ 1,
+ 450.707972,
+ 169.515574,
+ 1,
+ 453.152313,
+ 174.43489,
+ 1,
+ 455.663647,
+ 179.320327,
+ 1,
+ 458.272826,
+ 184.154116,
+ 1,
+ 461.018412,
+ 188.911642,
+ 1,
+ 463.94567,
+ 193.559255,
+ 1,
+ 467.22343,
+ 197.963659,
+ 1,
+ 471.117738,
+ 201.826457,
+ 1,
+ 475.751887,
+ 204.749145,
+ 1,
+ 480.989902,
+ 206.343046,
+ 1,
+ 487.065598,
+ 206.402104,
+ 1,
+ 492.947114,
+ 204.866338,
+ 1,
+ 498.297161,
+ 201.969714,
+ 1,
+ 503.013751,
+ 198.11849,
+ 1,
+ 507.157957,
+ 193.652851,
+ 1,
+ 510.843251,
+ 188.799396,
+ 1,
+ 514.1297,
+ 183.667342,
+ 1,
+ 516.955071,
+ 178.26847800000004,
+ 1,
+ 519.2164700000002,
+ 172.61085,
+ 1,
+ 520.848493,
+ 166.740574,
+ 1,
+ 521.9502669999998,
+ 160.74653700000005,
+ 1,
+ 522.708073,
+ 154.698484,
+ 1,
+ 523.292433,
+ 148.630853,
+ 1,
+ 523.782152,
+ 142.554803,
+ 1,
+ 524.208992,
+ 136.47398700000002,
+ 1,
+ 524.604004,
+ 130.391006,
+ 1,
+ 448.863007,
+ 130.600006,
+ 1,
+ 454.970001,
+ 126.268997,
+ 1,
+ 460.80896,
+ 127.033981,
+ 1,
+ 466.639008,
+ 127.488991,
+ 1,
+ 471.871002,
+ 128.024002,
+ 1,
+ 471.839966,
+ 131.30699199999998,
+ 1,
+ 466.7300110000001,
+ 130.602005,
+ 1,
+ 460.683014,
+ 129.84198,
+ 1,
+ 455.03299,
+ 128.447983,
+ 1,
+ 484.471008,
+ 126.532997,
+ 1,
+ 491.312988,
+ 124.467003,
+ 1,
+ 497.66098,
+ 122.172989,
+ 1,
+ 504.834991,
+ 123.182007,
+ 1,
+ 510.877014,
+ 127.14801,
+ 1,
+ 504.89801,
+ 125.372993,
+ 1,
+ 497.656982,
+ 124.851997,
+ 1,
+ 491.562012,
+ 127.464005,
+ 1,
+ 484.731995,
+ 129.934998,
+ 1,
+ 478.60199000000006,
+ 137.615005,
+ 1,
+ 478.797445,
+ 144.648194,
+ 1,
+ 478.710781,
+ 151.672784,
+ 1,
+ 479.648263,
+ 158.477047,
+ 1,
+ 472.342987,
+ 162.587006,
+ 1,
+ 476.080474,
+ 163.443556,
+ 1,
+ 479.895523,
+ 163.767872,
+ 1,
+ 484.256557,
+ 162.987089,
+ 1,
+ 488.335907,
+ 161.220047,
+ 1,
+ 454.908997,
+ 139.330002,
+ 1,
+ 457.994713,
+ 139.25393799999998,
+ 1,
+ 461.081185,
+ 139.258667,
+ 1,
+ 465.0926280000001,
+ 138.585731,
+ 1,
+ 469.109314,
+ 137.906326,
+ 1,
+ 465.638515,
+ 140.893484,
+ 1,
+ 461.276336,
+ 142.038503,
+ 1,
+ 457.94384,
+ 141.053913,
+ 1,
+ 488.993011,
+ 136.03999299999998,
+ 1,
+ 492.80749,
+ 136.399268,
+ 1,
+ 496.59449000000006,
+ 136.98136499999998,
+ 1,
+ 500.786029,
+ 137.41671000000002,
+ 1,
+ 504.984009,
+ 137.048096,
+ 1,
+ 501.171214,
+ 139.364812,
+ 1,
+ 496.775512,
+ 139.941385,
+ 1,
+ 492.595083,
+ 138.593753,
+ 1,
+ 468.338989,
+ 177.639496,
+ 1,
+ 472.57608,
+ 175.232479,
+ 1,
+ 477.20692,
+ 173.776125,
+ 1,
+ 480.569762,
+ 173.825898,
+ 1,
+ 483.565578,
+ 174.109503,
+ 1,
+ 490.367366,
+ 175.431598,
+ 1,
+ 496.381042,
+ 178.96504199999995,
+ 1,
+ 491.802489,
+ 182.339998,
+ 1,
+ 486.685901,
+ 184.599911,
+ 1,
+ 481.033928,
+ 184.016885,
+ 1,
+ 476.397867,
+ 182.850118,
+ 1,
+ 472.60944400000005,
+ 179.86823,
+ 1,
+ 469.541992,
+ 177.462006,
+ 1,
+ 474.959022,
+ 175.385376,
+ 1,
+ 480.747386,
+ 175.48909799999996,
+ 1,
+ 488.63756500000005,
+ 175.736854,
+ 1,
+ 495.716522,
+ 179.02507,
+ 1,
+ 488.642736,
+ 177.62467,
+ 1,
+ 481.422413,
+ 177.62252,
+ 1,
+ 475.491142,
+ 176.514907,
+ 1,
+ 461.279777,
+ 140.890199,
+ 1,
+ 496.453474,
+ 137.763648,
+ 1
+ ],
+ "num_keypoints": 98,
+ "bbox": [
+ 432.3053162,
+ 113.7500775,
+ 100.68947760000003,
+ 101.074938
+ ],
+ "iscrowd": 0,
+ "area": 10177.182705672392,
+ "category_id": 1,
+ "center": [
+ 482.5,
+ 164.5
+ ],
+ "scale": 0.425
+ },
+ {
+ "image_id": 12,
+ "id": 12,
+ "keypoints": [
+ 737.310974,
+ 302.2290040000001,
+ 1,
+ 735.612565,
+ 307.91392,
+ 1,
+ 733.935549,
+ 313.605176,
+ 1,
+ 732.301247,
+ 319.308828,
+ 1,
+ 730.746049,
+ 325.034488,
+ 1,
+ 729.333061,
+ 330.796748,
+ 1,
+ 728.130165,
+ 336.606254,
+ 1,
+ 727.2343940000002,
+ 342.470465,
+ 1,
+ 726.769826,
+ 348.383693,
+ 1,
+ 726.8573719999998,
+ 354.313744,
+ 1,
+ 727.578398,
+ 360.199602,
+ 1,
+ 729.013058,
+ 365.95307,
+ 1,
+ 731.013678,
+ 371.537441,
+ 1,
+ 733.2071559999998,
+ 377.050279,
+ 1,
+ 735.47649,
+ 382.530816,
+ 1,
+ 738.714025,
+ 387.475552,
+ 1,
+ 743.5621620000002,
+ 390.822643,
+ 1,
+ 749.412371,
+ 392.269205,
+ 1,
+ 755.438439,
+ 391.936151,
+ 1,
+ 761.2417849999998,
+ 390.247929,
+ 1,
+ 766.732837,
+ 387.708216,
+ 1,
+ 772.008913,
+ 384.743038,
+ 1,
+ 777.11136,
+ 381.48808,
+ 1,
+ 782.0033440000002,
+ 377.925076,
+ 1,
+ 786.614896,
+ 374.007594,
+ 1,
+ 790.746727,
+ 369.589677,
+ 1,
+ 794.345917,
+ 364.72535,
+ 1,
+ 797.705108,
+ 359.69007,
+ 1,
+ 800.979223,
+ 354.59913,
+ 1,
+ 804.030756,
+ 349.372408,
+ 1,
+ 806.796596,
+ 343.988855,
+ 1,
+ 809.360701,
+ 338.505917,
+ 1,
+ 811.822571,
+ 332.976135,
+ 1,
+ 738.142029,
+ 316.583008,
+ 1,
+ 745.198975,
+ 314.119995,
+ 1,
+ 749.843933,
+ 315.911957,
+ 1,
+ 754.8779910000002,
+ 317.789001,
+ 1,
+ 759.728943,
+ 321.003967,
+ 1,
+ 758.924988,
+ 323.009979,
+ 1,
+ 753.684021,
+ 320.766998,
+ 1,
+ 748.650024,
+ 318.889984,
+ 1,
+ 743.77301,
+ 317.5,
+ 1,
+ 776.567993,
+ 325.3989870000001,
+ 1,
+ 783.789917,
+ 325.703003,
+ 1,
+ 791.1229860000002,
+ 326.806976,
+ 1,
+ 797.598999,
+ 328.432007,
+ 1,
+ 802.210022,
+ 335.786987,
+ 1,
+ 796.032959,
+ 331.798981,
+ 1,
+ 789.445007,
+ 330.45401,
+ 1,
+ 782.429016,
+ 328.828003,
+ 1,
+ 775.448975,
+ 328.189972,
+ 1,
+ 766.489014,
+ 330.141998,
+ 1,
+ 763.441048,
+ 338.395354,
+ 1,
+ 760.1896519999998,
+ 346.556714,
+ 1,
+ 758.378882,
+ 354.899379,
+ 1,
+ 749.651978,
+ 347.691986,
+ 1,
+ 752.802228,
+ 352.909886,
+ 1,
+ 757.095133,
+ 357.015939,
+ 1,
+ 762.194149,
+ 356.25881,
+ 1,
+ 767.192932,
+ 354.72403,
+ 1,
+ 743.380371,
+ 322.295288,
+ 1,
+ 746.923719,
+ 321.313264,
+ 1,
+ 750.553004,
+ 321.784633,
+ 1,
+ 754.640226,
+ 323.780582,
+ 1,
+ 756.981018,
+ 327.664001,
+ 1,
+ 752.689438,
+ 328.511655,
+ 1,
+ 748.3559,
+ 328.079052,
+ 1,
+ 744.9315429999998,
+ 326.014911,
+ 1,
+ 778.2459719999998,
+ 334.537994,
+ 1,
+ 782.672983,
+ 333.246396,
+ 1,
+ 787.060109,
+ 334.610516,
+ 1,
+ 790.163963,
+ 337.265647,
+ 1,
+ 792.42627,
+ 340.685699,
+ 1,
+ 788.630666,
+ 341.780179,
+ 1,
+ 784.70712,
+ 341.598866,
+ 1,
+ 780.419418,
+ 339.058276,
+ 1,
+ 740.483521,
+ 361.065002,
+ 1,
+ 746.374246,
+ 362.133178,
+ 1,
+ 751.741875,
+ 364.488928,
+ 1,
+ 753.4344530000002,
+ 365.103217,
+ 1,
+ 755.192267,
+ 365.240915,
+ 1,
+ 759.601523,
+ 366.89777,
+ 1,
+ 763.757446,
+ 369.269043,
+ 1,
+ 759.467306,
+ 371.294422,
+ 1,
+ 755.0135389999998,
+ 372.896933,
+ 1,
+ 750.305609,
+ 372.79702,
+ 1,
+ 745.439744,
+ 370.475123,
+ 1,
+ 742.098872,
+ 366.24297,
+ 1,
+ 742.159546,
+ 363.090027,
+ 1,
+ 747.630617,
+ 364.064427,
+ 1,
+ 752.565978,
+ 366.666498,
+ 1,
+ 757.357922,
+ 367.478878,
+ 1,
+ 761.918091,
+ 369.147156,
+ 1,
+ 756.790297,
+ 369.722393,
+ 1,
+ 751.666194,
+ 369.277424,
+ 1,
+ 746.561781,
+ 366.750798,
+ 1,
+ 749.141667,
+ 325.096875,
+ 1,
+ 785.415625,
+ 337.221875,
+ 1
+ ],
+ "num_keypoints": 98,
+ "bbox": [
+ 718.2645514999999,
+ 293.2249839000001,
+ 102.06329400000016,
+ 108.0482411999999
+ ],
+ "iscrowd": 0,
+ "area": 11027.759407778518,
+ "category_id": 1,
+ "center": [
+ 769.0,
+ 347.5
+ ],
+ "scale": 0.455
+ },
+ {
+ "image_id": 12,
+ "id": 40,
+ "keypoints": [
+ 744.762024,
+ 731.096985,
+ 1,
+ 742.708957,
+ 737.737215,
+ 1,
+ 740.7710030000002,
+ 744.411776,
+ 1,
+ 739.0626599999998,
+ 751.148374,
+ 1,
+ 737.733779,
+ 757.96915,
+ 1,
+ 736.981188,
+ 764.875717,
+ 1,
+ 737.0235700000002,
+ 771.821884,
+ 1,
+ 737.765315,
+ 778.7307400000002,
+ 1,
+ 738.86145,
+ 785.593963,
+ 1,
+ 740.013747,
+ 792.448173,
+ 1,
+ 741.1454200000002,
+ 799.305824,
+ 1,
+ 742.3103629999998,
+ 806.157846,
+ 1,
+ 743.6804400000002,
+ 812.971502,
+ 1,
+ 744.630958,
+ 819.850678,
+ 1,
+ 745.515035,
+ 826.73686,
+ 1,
+ 748.690323,
+ 832.821804,
+ 1,
+ 754.099426,
+ 837.1631169999998,
+ 1,
+ 760.77823,
+ 840.673624,
+ 1,
+ 768.147343,
+ 842.162887,
+ 1,
+ 775.328568,
+ 840.156231,
+ 1,
+ 781.549446,
+ 835.8637679999998,
+ 1,
+ 787.79765,
+ 831.6084860000002,
+ 1,
+ 794.115317,
+ 827.4566940000002,
+ 1,
+ 800.175629,
+ 822.943352,
+ 1,
+ 805.771167,
+ 817.8629549999998,
+ 1,
+ 811.103558,
+ 812.504673,
+ 1,
+ 816.124275,
+ 806.855559,
+ 1,
+ 820.577538,
+ 800.750585,
+ 1,
+ 824.5104719999998,
+ 794.29608,
+ 1,
+ 828.03107,
+ 787.6072519999999,
+ 1,
+ 831.192861,
+ 780.74112,
+ 1,
+ 834.09596,
+ 773.761204,
+ 1,
+ 836.867371,
+ 766.727722,
+ 1,
+ 747.40802,
+ 744.338989,
+ 1,
+ 756.8099980000002,
+ 739.810974,
+ 1,
+ 762.8229980000001,
+ 742.584961,
+ 1,
+ 769.116028,
+ 746.8430179999998,
+ 1,
+ 774.4959719999998,
+ 750.2109379999998,
+ 1,
+ 772.661011,
+ 755.225037,
+ 1,
+ 766.9570309999998,
+ 751.564026,
+ 1,
+ 760.413025,
+ 748.731018,
+ 1,
+ 754.565979,
+ 745.8809809999998,
+ 1,
+ 794.039978,
+ 759.955017,
+ 1,
+ 802.140991,
+ 759.838989,
+ 1,
+ 809.362976,
+ 760.9539179999998,
+ 1,
+ 817.004089,
+ 762.0819700000002,
+ 1,
+ 822.989014,
+ 770.3709719999998,
+ 1,
+ 814.904053,
+ 767.382935,
+ 1,
+ 807.603088,
+ 766.098022,
+ 1,
+ 800.3809809999998,
+ 764.984009,
+ 1,
+ 792.616028,
+ 763.8099980000002,
+ 1,
+ 781.869995,
+ 762.830994,
+ 1,
+ 777.671572,
+ 775.3052809999998,
+ 1,
+ 773.599147,
+ 787.815521,
+ 1,
+ 768.793789,
+ 799.963975,
+ 1,
+ 759.9530639999998,
+ 790.217224,
+ 1,
+ 763.438017,
+ 796.8758799999998,
+ 1,
+ 768.200237,
+ 802.5832889999998,
+ 1,
+ 776.714431,
+ 800.940712,
+ 1,
+ 784.7540280000002,
+ 796.731995,
+ 1,
+ 752.452454,
+ 752.677429,
+ 1,
+ 758.142965,
+ 751.832449,
+ 1,
+ 763.787095,
+ 752.7987400000002,
+ 1,
+ 768.450332,
+ 755.789755,
+ 1,
+ 771.7440190000002,
+ 760.278992,
+ 1,
+ 766.108723,
+ 761.570158,
+ 1,
+ 760.4538719999998,
+ 760.565587,
+ 1,
+ 755.866811,
+ 757.23883,
+ 1,
+ 791.400024,
+ 769.619995,
+ 1,
+ 797.455167,
+ 766.7197309999998,
+ 1,
+ 804.060133,
+ 768.1290280000002,
+ 1,
+ 808.641021,
+ 770.830526,
+ 1,
+ 812.1015620000002,
+ 774.896179,
+ 1,
+ 807.2036360000002,
+ 776.0263259999998,
+ 1,
+ 802.194302,
+ 776.233114,
+ 1,
+ 796.1303330000002,
+ 774.055774,
+ 1,
+ 756.312012,
+ 806.9689940000002,
+ 1,
+ 761.152525,
+ 807.042413,
+ 1,
+ 765.388771,
+ 809.286819,
+ 1,
+ 766.746996,
+ 810.379537,
+ 1,
+ 768.3692599999998,
+ 811.051278,
+ 1,
+ 774.090223,
+ 811.996037,
+ 1,
+ 779.304504,
+ 814.633972,
+ 1,
+ 774.153851,
+ 817.59002,
+ 1,
+ 768.453259,
+ 819.044276,
+ 1,
+ 762.763688,
+ 817.53634,
+ 1,
+ 759.5313259999998,
+ 814.798765,
+ 1,
+ 757.4994230000002,
+ 811.065074,
+ 1,
+ 758.089478,
+ 808.210449,
+ 1,
+ 762.1575849999998,
+ 809.557143,
+ 1,
+ 765.7118929999998,
+ 811.955629,
+ 1,
+ 771.596042,
+ 812.993758,
+ 1,
+ 777.41687,
+ 814.616699,
+ 1,
+ 770.648339,
+ 816.4749009999998,
+ 1,
+ 763.8826849999998,
+ 815.569504,
+ 1,
+ 760.502713,
+ 812.2854629999998,
+ 1,
+ 762.746584,
+ 755.108075,
+ 1,
+ 802.488199,
+ 770.511801,
+ 1
+ ],
+ "num_keypoints": 98,
+ "bbox": [
+ 726.9925697,
+ 719.9903948,
+ 119.86341960000004,
+ 48.00960520000001
+ ],
+ "iscrowd": 0,
+ "area": 5754.595452917945,
+ "category_id": 1,
+ "center": [
+ 786.5,
+ 787.0
+ ],
+ "scale": 0.56
+ },
+ {
+ "image_id": 12,
+ "id": 1169,
+ "keypoints": [
+ 473.170593,
+ 353.335999,
+ 1,
+ 472.454142,
+ 358.228909,
+ 1,
+ 471.788643,
+ 363.128975,
+ 1,
+ 471.219892,
+ 368.041068,
+ 1,
+ 470.94488600000005,
+ 372.975959,
+ 1,
+ 471.550405,
+ 377.8716,
+ 1,
+ 473.340887,
+ 382.473922,
+ 1,
+ 475.160443,
+ 387.069845,
+ 1,
+ 476.591016,
+ 391.802996,
+ 1,
+ 478.183709,
+ 396.482262,
+ 1,
+ 480.41786,
+ 400.887374,
+ 1,
+ 483.28217300000006,
+ 404.915875,
+ 1,
+ 485.94821,
+ 409.072889,
+ 1,
+ 487.708742,
+ 413.688483,
+ 1,
+ 490.510995,
+ 417.693684,
+ 1,
+ 494.524824,
+ 420.571949,
+ 1,
+ 498.905613,
+ 422.85572,
+ 1,
+ 504.011519,
+ 423.758231,
+ 1,
+ 508.98311500000005,
+ 422.289137,
+ 1,
+ 512.9631360000002,
+ 418.935051,
+ 1,
+ 516.483537,
+ 415.068495,
+ 1,
+ 520.4627019999998,
+ 411.682146,
+ 1,
+ 524.5683650000002,
+ 408.443837,
+ 1,
+ 528.2940480000002,
+ 404.779843,
+ 1,
+ 531.408005,
+ 400.585369,
+ 1,
+ 533.7782599999998,
+ 395.929292,
+ 1,
+ 535.604259,
+ 391.029695,
+ 1,
+ 537.2263849999998,
+ 386.057698,
+ 1,
+ 538.779161,
+ 381.063564,
+ 1,
+ 540.257309,
+ 376.04686,
+ 1,
+ 541.658462,
+ 371.008091,
+ 1,
+ 543.005638,
+ 365.954595,
+ 1,
+ 544.3259889999998,
+ 360.894012,
+ 1,
+ 476.626984,
+ 359.039978,
+ 1,
+ 481.548981,
+ 356.339966,
+ 1,
+ 485.91098,
+ 357.414032,
+ 1,
+ 489.883972,
+ 359.63501,
+ 1,
+ 494.381958,
+ 363.002014,
+ 1,
+ 494.093964,
+ 365.890961,
+ 1,
+ 489.495972,
+ 362.60498,
+ 1,
+ 485.306,
+ 360.726959,
+ 1,
+ 481.214996,
+ 359.679962,
+ 1,
+ 506.893005,
+ 361.631012,
+ 1,
+ 512.028931,
+ 360.1489870000001,
+ 1,
+ 518.090027,
+ 359.3940120000001,
+ 1,
+ 524.357971,
+ 359.295013,
+ 1,
+ 529.7819820000002,
+ 363.101013,
+ 1,
+ 523.89093,
+ 362.149994,
+ 1,
+ 517.776001,
+ 362.536987,
+ 1,
+ 511.72399900000005,
+ 363.200958,
+ 1,
+ 506.344971,
+ 365.294006,
+ 1,
+ 501.347992,
+ 367.559998,
+ 1,
+ 500.189242,
+ 376.773265,
+ 1,
+ 498.956651,
+ 385.973584,
+ 1,
+ 498.187578,
+ 395.168944,
+ 1,
+ 491.300049,
+ 391.111084,
+ 1,
+ 494.141522,
+ 394.808003,
+ 1,
+ 498.40839000000005,
+ 396.494433,
+ 1,
+ 502.755472,
+ 394.932757,
+ 1,
+ 506.338013,
+ 391.92099,
+ 1,
+ 478.184326,
+ 363.812805,
+ 1,
+ 482.723986,
+ 362.820035,
+ 1,
+ 487.304845,
+ 363.425472,
+ 1,
+ 490.813411,
+ 365.581369,
+ 1,
+ 493.446991,
+ 368.773987,
+ 1,
+ 489.161792,
+ 369.81081,
+ 1,
+ 484.775209,
+ 369.488891,
+ 1,
+ 480.564629,
+ 367.719005,
+ 1,
+ 507.460999,
+ 371.890991,
+ 1,
+ 511.778055,
+ 367.477709,
+ 1,
+ 517.9155969999998,
+ 366.674784,
+ 1,
+ 522.678173,
+ 367.724831,
+ 1,
+ 527.087158,
+ 369.840698,
+ 1,
+ 522.757581,
+ 373.347413,
+ 1,
+ 517.304023,
+ 374.436933,
+ 1,
+ 512.204885,
+ 373.876232,
+ 1,
+ 488.688477,
+ 399.221008,
+ 1,
+ 493.442718,
+ 400.217365,
+ 1,
+ 498.155821,
+ 401.379151,
+ 1,
+ 499.542691,
+ 401.500691,
+ 1,
+ 501.246774,
+ 401.519773,
+ 1,
+ 506.836522,
+ 401.478986,
+ 1,
+ 512.330994,
+ 402.490967,
+ 1,
+ 508.472018,
+ 405.699605,
+ 1,
+ 504.486124,
+ 408.714805,
+ 1,
+ 499.5658360000001,
+ 409.33235,
+ 1,
+ 494.571667,
+ 407.645284,
+ 1,
+ 490.657115,
+ 404.125086,
+ 1,
+ 489.64199800000006,
+ 400.294006,
+ 1,
+ 494.17166900000007,
+ 403.631745,
+ 1,
+ 499.518763,
+ 405.267957,
+ 1,
+ 505.461418,
+ 404.415617,
+ 1,
+ 511.249969,
+ 402.717072,
+ 1,
+ 505.450159,
+ 404.418683,
+ 1,
+ 499.495484,
+ 405.266308,
+ 1,
+ 494.161436,
+ 403.626137,
+ 1,
+ 486.570186,
+ 366.549068,
+ 1,
+ 517.171429,
+ 369.798758,
+ 1
+ ],
+ "num_keypoints": 98,
+ "bbox": [
+ 463.60677570000007,
+ 346.2937758,
+ 88.05732359999968,
+ 84.50667840000006
+ ],
+ "iscrowd": 0,
+ "area": 7441.431926229908,
+ "category_id": 1,
+ "center": [
+ 507.5,
+ 388.5
+ ],
+ "scale": 0.375
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/data/zebra/810.jpg b/vendor/ViTPose/tests/data/zebra/810.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aadc05e426839414302ce91934f218d0c0bff89a
Binary files /dev/null and b/vendor/ViTPose/tests/data/zebra/810.jpg differ
diff --git a/vendor/ViTPose/tests/data/zebra/850.jpg b/vendor/ViTPose/tests/data/zebra/850.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..90075c843e3c4a4537c64a4970f9304e01c9836c
Binary files /dev/null and b/vendor/ViTPose/tests/data/zebra/850.jpg differ
diff --git a/vendor/ViTPose/tests/data/zebra/test_zebra.json b/vendor/ViTPose/tests/data/zebra/test_zebra.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e834c554b52dcb0366b5abb4c0ada82ca2f62a0
--- /dev/null
+++ b/vendor/ViTPose/tests/data/zebra/test_zebra.json
@@ -0,0 +1,156 @@
+{
+ "categories": [
+ {
+ "supercategory": "animal",
+ "id": 1,
+ "name": "zebra",
+ "keypoints": [
+ "snout",
+ "head",
+ "neck",
+ "forelegL1",
+ "forelegR1",
+ "hindlegL1",
+ "hindlegR1",
+ "tailbase",
+ "tailtip"
+ ],
+ "skeleton": [
+ [
+ 2,
+ 1
+ ],
+ [
+ 3,
+ 2
+ ],
+ [
+ 4,
+ 3
+ ],
+ [
+ 5,
+ 3
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 8
+ ],
+ [
+ 8,
+ 3
+ ],
+ [
+ 9,
+ 8
+ ]
+ ]
+ }
+ ],
+ "images": [
+ {
+ "id": 810,
+ "file_name": "810.jpg",
+ "height": 160,
+ "width": 160
+ },
+ {
+ "id": 850,
+ "file_name": "850.jpg",
+ "height": 160,
+ "width": 160
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 121.13823384782104,
+ 64.42827920259212,
+ 2.0,
+ 117.9981442098391,
+ 70.81295036652858,
+ 2.0,
+ 101.74729396479975,
+ 80.0,
+ 2.0,
+ 95.57905809119656,
+ 75.75733930455307,
+ 2.0,
+ 95.2128993293075,
+ 82.7116929245571,
+ 2.0,
+ 62.78126573755127,
+ 75.8747890881429,
+ 2.0,
+ 62.31104503893799,
+ 83.86685797031176,
+ 2.0,
+ 58.25270603520024,
+ 80.0,
+ 2.0,
+ 53.31206457278393,
+ 85.52060239198866,
+ 2.0
+ ],
+ "image_id": 810,
+ "id": 810,
+ "num_keypoints": 9,
+ "bbox": [
+ 53.31206457278393,
+ 64.42827920259212,
+ 68.8261692750371,
+ 22.092323189396538
+ ],
+ "iscrowd": 0,
+ "area": 1520.5299755122337,
+ "category_id": 1
+ },
+ {
+ "keypoints": [
+ 122.31461535908949,
+ 89.25315845576364,
+ 2.0,
+ 117.81536523827128,
+ 87.97006030862022,
+ 2.0,
+ 101.66067429997881,
+ 80.0,
+ 2.0,
+ 97.88660503356242,
+ 74.70007144842482,
+ 2.0,
+ 96.6342743993913,
+ 81.95450979316085,
+ 2.0,
+ 62.9768902919959,
+ 75.51961961159495,
+ 2.0,
+ 63.64287080847072,
+ 83.46692756256179,
+ 2.0,
+ 58.3393257000212,
+ 80.0,
+ 2.0,
+ 55.41273077187657,
+ 77.94207820202976,
+ 2.0
+ ],
+ "image_id": 850,
+ "id": 850,
+ "num_keypoints": 9,
+ "bbox": [
+ 55.41273077187657,
+ 74.70007144842482,
+ 67.90188458721292,
+ 15.553087007338817
+ ],
+ "iscrowd": 0,
+ "area": 1056.083918947201,
+ "category_id": 1
+ }
+ ]
+}
\ No newline at end of file
diff --git a/vendor/ViTPose/tests/test_apis/test_inference.py b/vendor/ViTPose/tests/test_apis/test_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbdb6146766940fed93d5d458e6d9ea0a2ce983c
--- /dev/null
+++ b/vendor/ViTPose/tests/test_apis/test_inference.py
@@ -0,0 +1,198 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+
+from mmpose.apis import (inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model,
+ process_mmdet_results, vis_pose_result)
+from mmpose.datasets import DatasetInfo
+
+
+def test_top_down_demo():
+ # COCO demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'coco/res50_coco_256x192.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/coco/000000000785.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+
+ person_result = []
+ person_result.append({'bbox': [50, 50, 50, 100]})
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # AIC demo
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'aic/res50_aic_256x192.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # OneHand10K demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'onehand10k/res50_onehand10k_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/onehand10k/9.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # InterHand2DDataset demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'interhand2d/res50_interhand2d_all_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/interhand2.6m/image2017.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # Face300WDataset demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ '300w/res50_300w_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/300w/indoor_020.png'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # FaceAFLWDataset demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'aflw/res50_aflw_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/aflw/image04476.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # FaceCOFWDataset demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'cofw/res50_cofw_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/cofw/001766.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+
+def test_bottom_up_demo():
+
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/associative_embedding/'
+ 'coco/res50_coco_512x512.py',
+ None,
+ device='cpu')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test'].get(
+ 'dataset_info', None))
+
+ pose_results, _ = inference_bottom_up_pose_model(
+ pose_model, image_name, dataset_info=dataset_info)
+
+ # show the results
+ vis_pose_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # test dataset_info without sigmas
+ pose_model_copy = copy.deepcopy(pose_model)
+
+ pose_model_copy.cfg.data.test.dataset_info.pop('sigmas')
+ pose_results, _ = inference_bottom_up_pose_model(
+ pose_model_copy, image_name, dataset_info=dataset_info)
+
+
+def test_process_mmdet_results():
+ det_results = [np.array([0, 0, 100, 100])]
+ det_mask_results = None
+
+ _ = process_mmdet_results(
+ mmdet_results=(det_results, det_mask_results), cat_id=1)
+
+ _ = process_mmdet_results(mmdet_results=det_results, cat_id=1)
diff --git a/vendor/ViTPose/tests/test_apis/test_inference_3d.py b/vendor/ViTPose/tests/test_apis/test_inference_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..350acd779ea7fa6f777be682a35963a3fbe1d84e
--- /dev/null
+++ b/vendor/ViTPose/tests/test_apis/test_inference_3d.py
@@ -0,0 +1,210 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+import pytest
+import torch
+
+from mmpose.apis import (extract_pose_sequence, inference_interhand_3d_model,
+ inference_mesh_model, inference_pose_lifter_model,
+ init_pose_model, vis_3d_mesh_result,
+ vis_3d_pose_result)
+from mmpose.datasets.dataset_info import DatasetInfo
+from tests.utils.mesh_utils import generate_smpl_weight_file
+
+
+def test_pose_lifter_demo():
+ # H36M demo
+ pose_model = init_pose_model(
+ 'configs/body/3d_kpt_sview_rgb_img/pose_lift/'
+ 'h36m/simplebaseline3d_h36m.py',
+ None,
+ device='cpu')
+
+ pose_det_result = {
+ 'keypoints': np.zeros((17, 3)),
+ 'bbox': [50, 50, 50, 50],
+ 'track_id': 0,
+ 'image_name': 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg',
+ }
+
+ pose_results_2d = [[pose_det_result]]
+
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+
+ pose_results_2d = extract_pose_sequence(
+ pose_results_2d, frame_idx=0, causal=False, seq_len=1, step=1)
+
+ _ = inference_pose_lifter_model(
+ pose_model,
+ pose_results_2d,
+ dataset_info=dataset_info,
+ with_track_id=False)
+
+ pose_lift_results = inference_pose_lifter_model(
+ pose_model,
+ pose_results_2d,
+ dataset_info=dataset_info,
+ with_track_id=True)
+
+ for res in pose_lift_results:
+ res['title'] = 'title'
+ vis_3d_pose_result(
+ pose_model,
+ pose_lift_results,
+ img=pose_results_2d[0][0]['image_name'],
+ dataset_info=dataset_info)
+
+ # test special cases
+ # Empty 2D results
+ _ = inference_pose_lifter_model(
+ pose_model, [[]], dataset_info=dataset_info, with_track_id=False)
+
+ if torch.cuda.is_available():
+ _ = inference_pose_lifter_model(
+ pose_model.cuda(),
+ pose_results_2d,
+ dataset_info=dataset_info,
+ with_track_id=False)
+
+ # test videopose3d
+ pose_model = init_pose_model(
+ 'configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/'
+ 'videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py',
+ None,
+ device='cpu')
+
+ pose_det_result_0 = {
+ 'keypoints': np.ones((17, 3)),
+ 'bbox': [50, 50, 100, 100],
+ 'track_id': 0,
+ 'image_name': 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg',
+ }
+ pose_det_result_1 = {
+ 'keypoints': np.ones((17, 3)),
+ 'bbox': [50, 50, 100, 100],
+ 'track_id': 1,
+ 'image_name': 'tests/data/h36m/S5_SittingDown.54138969_002061.jpg',
+ }
+ pose_det_result_2 = {
+ 'keypoints': np.ones((17, 3)),
+ 'bbox': [50, 50, 100, 100],
+ 'track_id': 2,
+ 'image_name': 'tests/data/h36m/S7_Greeting.55011271_000396.jpg',
+ }
+
+ pose_results_2d = [[pose_det_result_0], [pose_det_result_1],
+ [pose_det_result_2]]
+
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+
+ seq_len = pose_model.cfg.test_data_cfg.seq_len
+ pose_results_2d_seq = extract_pose_sequence(
+ pose_results_2d, 1, causal=False, seq_len=seq_len, step=1)
+
+ pose_lift_results = inference_pose_lifter_model(
+ pose_model,
+ pose_results_2d_seq,
+ dataset_info=dataset_info,
+ with_track_id=True,
+ image_size=[1000, 1000],
+ norm_pose_2d=True)
+
+ for res in pose_lift_results:
+ res['title'] = 'title'
+ vis_3d_pose_result(
+ pose_model,
+ pose_lift_results,
+ img=pose_results_2d[0][0]['image_name'],
+ dataset_info=dataset_info,
+ )
+
+
+def test_interhand3d_demo():
+ # H36M demo
+ pose_model = init_pose_model(
+ 'configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/'
+ 'res50_interhand3d_all_256x256.py',
+ None,
+ device='cpu')
+
+ image_name = 'tests/data/interhand2.6m/image2017.jpg'
+ det_result = {
+ 'image_name': image_name,
+ 'bbox': [50, 50, 50, 50], # bbox format is 'xywh'
+ 'camera_param': None,
+ 'keypoints_3d_gt': None
+ }
+ det_results = [det_result]
+ dataset = pose_model.cfg.data['test']['type']
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+
+ pose_results = inference_interhand_3d_model(
+ pose_model, image_name, det_results, dataset=dataset)
+
+ for res in pose_results:
+ res['title'] = 'title'
+
+ vis_3d_pose_result(
+ pose_model,
+ result=pose_results,
+ img=det_results[0]['image_name'],
+ dataset_info=dataset_info,
+ )
+
+ # test special cases
+ # Empty det results
+ _ = inference_interhand_3d_model(
+ pose_model, image_name, [], dataset=dataset)
+
+ if torch.cuda.is_available():
+ _ = inference_interhand_3d_model(
+ pose_model.cuda(), image_name, det_results, dataset=dataset)
+
+ with pytest.raises(NotImplementedError):
+ _ = inference_interhand_3d_model(
+ pose_model, image_name, det_results, dataset='test')
+
+
+def test_body_mesh_demo():
+ # H36M demo
+ config = 'configs/body/3d_mesh_sview_rgb_img/hmr' \
+ '/mixed/res50_mixed_224x224.py'
+ config = mmcv.Config.fromfile(config)
+ config.model.mesh_head.smpl_mean_params = \
+ 'tests/data/smpl/smpl_mean_params.npz'
+
+ pose_model = None
+ with tempfile.TemporaryDirectory() as tmpdir:
+ config.model.smpl.smpl_path = tmpdir
+ config.model.smpl.joints_regressor = osp.join(
+ tmpdir, 'test_joint_regressor.npy')
+ # generate weight file for SMPL model.
+ generate_smpl_weight_file(tmpdir)
+ pose_model = init_pose_model(config, device='cpu')
+
+ assert pose_model is not None, 'Fail to build pose model'
+
+ image_name = 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg'
+ det_result = {
+ 'keypoints': np.zeros((17, 3)),
+ 'bbox': [50, 50, 50, 50],
+ 'image_name': image_name,
+ }
+
+ # make person bounding boxes
+ person_results = [det_result]
+ dataset = pose_model.cfg.data['test']['type']
+
+ # test a single image, with a list of bboxes
+ pose_results = inference_mesh_model(
+ pose_model,
+ image_name,
+ person_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset=dataset)
+
+ vis_3d_mesh_result(pose_model, pose_results, image_name)
diff --git a/vendor/ViTPose/tests/test_apis/test_inference_tracking.py b/vendor/ViTPose/tests/test_apis/test_inference_tracking.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ef62b771aee1047bc116b299a5ee62e6490bad6
--- /dev/null
+++ b/vendor/ViTPose/tests/test_apis/test_inference_tracking.py
@@ -0,0 +1,157 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmpose.apis import (get_track_id, inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model,
+ vis_pose_tracking_result)
+from mmpose.datasets.dataset_info import DatasetInfo
+
+
+def test_top_down_pose_tracking_demo():
+ # COCO demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'coco/res50_coco_256x192.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/coco/000000000785.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+ person_result = [{'bbox': [50, 50, 50, 100]}]
+
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ pose_results, next_id = get_track_id(pose_results, [], next_id=0)
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+ pose_results_last = pose_results
+
+ # AIC demo
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'aic/res50_aic_256x192.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset_info=dataset_info)
+ pose_results, next_id = get_track_id(pose_results, pose_results_last,
+ next_id)
+ for pose_result in pose_results:
+ del pose_result['bbox']
+ pose_results, next_id = get_track_id(pose_results, pose_results_last,
+ next_id)
+
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # OneHand10K demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'onehand10k/res50_onehand10k_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/onehand10k/9.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name, [{
+ 'bbox': [10, 10, 30, 30]
+ }],
+ format='xywh',
+ dataset_info=dataset_info)
+ pose_results, next_id = get_track_id(pose_results, pose_results_last,
+ next_id)
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ # InterHand2D demo
+ pose_model = init_pose_model(
+ 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'interhand2d/res50_interhand2d_all_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/interhand2.6m/image2017.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name, [{
+ 'bbox': [50, 50, 0, 0]
+ }],
+ format='xywh',
+ dataset_info=dataset_info)
+ pose_results, next_id = get_track_id(pose_results, [], next_id=0)
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+ pose_results_last = pose_results
+
+ # MPII demo
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'mpii/res50_mpii_256x256.py',
+ None,
+ device='cpu')
+ image_name = 'tests/data/mpii/004645041.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+ # test a single image, with a list of bboxes.
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name, [{
+ 'bbox': [50, 50, 0, 0]
+ }],
+ format='xywh',
+ dataset_info=dataset_info)
+ pose_results, next_id = get_track_id(pose_results, pose_results_last,
+ next_id)
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+
+def test_bottom_up_pose_tracking_demo():
+ # COCO demo
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/associative_embedding/'
+ 'coco/res50_coco_512x512.py',
+ None,
+ device='cpu')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+ dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info'])
+
+ pose_results, _ = inference_bottom_up_pose_model(
+ pose_model, image_name, dataset_info=dataset_info)
+
+ pose_results, next_id = get_track_id(pose_results, [], next_id=0)
+
+ # show the results
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset_info=dataset_info)
+
+ pose_results_last = pose_results
+
+ # oks
+ pose_results, next_id = get_track_id(
+ pose_results, pose_results_last, next_id=next_id, use_oks=True)
+
+ pose_results_last = pose_results
+ # one_euro
+ pose_results, next_id = get_track_id(
+ pose_results, pose_results_last, next_id=next_id, use_one_euro=True)
diff --git a/vendor/ViTPose/tests/test_backbones/test_alexnet.py b/vendor/ViTPose/tests/test_backbones/test_alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..a01f3e8255edadae339b8ca504459baea38a1197
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_alexnet.py
@@ -0,0 +1,21 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmpose.models.backbones import AlexNet
+
+
+def test_alexnet_backbone():
+ """Test alexnet backbone."""
+ model = AlexNet(-1)
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 192)
+ feat = model(imgs)
+ assert feat.shape == (1, 256, 7, 5)
+
+ model = AlexNet(1)
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == (1, 1)
diff --git a/vendor/ViTPose/tests/test_backbones/test_backbones_utils.py b/vendor/ViTPose/tests/test_backbones/test_backbones_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b2769eb58756a185902cbfd813694939bde1c84
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_backbones_utils.py
@@ -0,0 +1,117 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones.utils import (InvertedResidual, SELayer,
+ channel_shuffle, make_divisible)
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def test_make_divisible():
+ # test min_value is None
+ result = make_divisible(34, 8, None)
+ assert result == 32
+
+ # test when new_value > min_ratio * value
+ result = make_divisible(10, 8, min_ratio=0.9)
+ assert result == 16
+
+ # test min_value = 0.8
+ result = make_divisible(33, 8, min_ratio=0.8)
+ assert result == 32
+
+
+def test_channel_shuffle():
+ x = torch.randn(1, 24, 56, 56)
+ with pytest.raises(AssertionError):
+ # num_channels should be divisible by groups
+ channel_shuffle(x, 7)
+
+ groups = 3
+ batch_size, num_channels, height, width = x.size()
+ channels_per_group = num_channels // groups
+ out = channel_shuffle(x, groups)
+ # test the output value when groups = 3
+ for b in range(batch_size):
+ for c in range(num_channels):
+ c_out = c % channels_per_group * groups + c // channels_per_group
+ for i in range(height):
+ for j in range(width):
+ assert x[b, c, i, j] == out[b, c_out, i, j]
+
+
+def test_inverted_residual():
+
+ with pytest.raises(AssertionError):
+ # stride must be in [1, 2]
+ InvertedResidual(16, 16, 32, stride=3)
+
+ with pytest.raises(AssertionError):
+ # se_cfg must be None or dict
+ InvertedResidual(16, 16, 32, se_cfg=list())
+
+ with pytest.raises(AssertionError):
+ # in_channeld and out_channels must be the same if
+ # with_expand_conv is False
+ InvertedResidual(16, 16, 32, with_expand_conv=False)
+
+ # Test InvertedResidual forward, stride=1
+ block = InvertedResidual(16, 16, 32, stride=1)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert getattr(block, 'se', None) is None
+ assert block.with_res_shortcut
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual forward, stride=2
+ block = InvertedResidual(16, 16, 32, stride=2)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert not block.with_res_shortcut
+ assert x_out.shape == torch.Size((1, 16, 28, 28))
+
+ # Test InvertedResidual forward with se layer
+ se_cfg = dict(channels=32)
+ block = InvertedResidual(16, 16, 32, stride=1, se_cfg=se_cfg)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert isinstance(block.se, SELayer)
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual forward, with_expand_conv=False
+ block = InvertedResidual(32, 16, 32, with_expand_conv=False)
+ x = torch.randn(1, 32, 56, 56)
+ x_out = block(x)
+ assert getattr(block, 'expand_conv', None) is None
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual forward with GroupNorm
+ block = InvertedResidual(
+ 16, 16, 32, norm_cfg=dict(type='GN', num_groups=2))
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ for m in block.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual forward with HSigmoid
+ block = InvertedResidual(16, 16, 32, act_cfg=dict(type='HSigmoid'))
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual forward with checkpoint
+ block = InvertedResidual(16, 16, 32, with_cp=True)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert block.with_cp
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
diff --git a/vendor/ViTPose/tests/test_backbones/test_cpm.py b/vendor/ViTPose/tests/test_backbones/test_cpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8ce354de6fa2d6ad5509b30238313b97f4be7fa
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_cpm.py
@@ -0,0 +1,64 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models import CPM
+from mmpose.models.backbones.cpm import CpmBlock
+
+
+def test_cpm_block():
+ with pytest.raises(AssertionError):
+ # len(channels) == len(kernels)
+ CpmBlock(
+ 3, channels=[3, 3, 3], kernels=[
+ 1,
+ ])
+
+ # Test CPM Block
+ model = CpmBlock(3, channels=[3, 3, 3], kernels=[1, 1, 1])
+ model.train()
+
+ imgs = torch.randn(1, 3, 10, 10)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 3, 10, 10])
+
+
+def test_cpm_backbone():
+ with pytest.raises(AssertionError):
+ # CPM's num_stacks should larger than 0
+ CPM(in_channels=3, out_channels=17, num_stages=-1)
+
+ with pytest.raises(AssertionError):
+ # CPM's in_channels should be 3
+ CPM(in_channels=2, out_channels=17)
+
+ # Test CPM
+ model = CPM(in_channels=3, out_channels=17, num_stages=1)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 192)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([1, 17, 32, 24])
+
+ imgs = torch.randn(1, 3, 384, 288)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([1, 17, 48, 36])
+
+ imgs = torch.randn(1, 3, 368, 368)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([1, 17, 46, 46])
+
+ # Test CPM multi-stages
+ model = CPM(in_channels=3, out_channels=17, num_stages=2)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 368, 368)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert feat[0].shape == torch.Size([1, 17, 46, 46])
+ assert feat[1].shape == torch.Size([1, 17, 46, 46])
diff --git a/vendor/ViTPose/tests/test_backbones/test_hourglass.py b/vendor/ViTPose/tests/test_backbones/test_hourglass.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a85610969dbf35e0c716f25707dbeb07a930092
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_hourglass.py
@@ -0,0 +1,77 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models import HourglassAENet, HourglassNet
+
+
+def test_hourglass_backbone():
+ with pytest.raises(AssertionError):
+ # HourglassNet's num_stacks should larger than 0
+ HourglassNet(num_stacks=0)
+
+ with pytest.raises(AssertionError):
+ # len(stage_channels) should equal len(stage_blocks)
+ HourglassNet(
+ stage_channels=[256, 256, 384, 384, 384],
+ stage_blocks=[2, 2, 2, 2, 2, 4])
+
+ with pytest.raises(AssertionError):
+ # len(stage_channels) should larger than downsample_times
+ HourglassNet(
+ downsample_times=5,
+ stage_channels=[256, 256, 384, 384, 384],
+ stage_blocks=[2, 2, 2, 2, 2])
+
+ # Test HourglassNet-52
+ model = HourglassNet(num_stacks=1)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 256)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([1, 256, 64, 64])
+
+ # Test HourglassNet-104
+ model = HourglassNet(num_stacks=2)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 256)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert feat[0].shape == torch.Size([1, 256, 64, 64])
+ assert feat[1].shape == torch.Size([1, 256, 64, 64])
+
+
+def test_hourglass_ae_backbone():
+ with pytest.raises(AssertionError):
+ # HourglassAENet's num_stacks should larger than 0
+ HourglassAENet(num_stacks=0)
+
+ with pytest.raises(AssertionError):
+ # len(stage_channels) should larger than downsample_times
+ HourglassAENet(
+ downsample_times=5, stage_channels=[256, 256, 384, 384, 384])
+
+ # num_stack=1
+ model = HourglassAENet(num_stacks=1)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 256)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([1, 34, 64, 64])
+
+ # num_stack=2
+ model = HourglassAENet(num_stacks=2)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 256, 256)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert feat[0].shape == torch.Size([1, 34, 64, 64])
+ assert feat[1].shape == torch.Size([1, 34, 64, 64])
diff --git a/vendor/ViTPose/tests/test_backbones/test_hrformer.py b/vendor/ViTPose/tests/test_backbones/test_hrformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b9175435c440743dcf8cf40dc476601d0f427c3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_hrformer.py
@@ -0,0 +1,187 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models.backbones.hrformer import (HRFomerModule, HRFormer,
+ HRFormerBlock)
+
+
+def test_hrformer_module():
+ norm_cfg = dict(type='BN')
+ block = HRFormerBlock
+ # Test multiscale forward
+ num_channles = (32, 64)
+ num_inchannels = [c * block.expansion for c in num_channles]
+ hrmodule = HRFomerModule(
+ num_branches=2,
+ block=block,
+ num_blocks=(2, 2),
+ num_inchannels=num_inchannels,
+ num_channels=num_channles,
+ num_heads=(1, 2),
+ num_window_sizes=(7, 7),
+ num_mlp_ratios=(4, 4),
+ drop_paths=(0., 0.),
+ norm_cfg=norm_cfg)
+
+ feats = [
+ torch.randn(1, num_inchannels[0], 64, 64),
+ torch.randn(1, num_inchannels[1], 32, 32)
+ ]
+ feats = hrmodule(feats)
+
+ assert len(str(hrmodule)) > 0
+ assert len(feats) == 2
+ assert feats[0].shape == torch.Size([1, num_inchannels[0], 64, 64])
+ assert feats[1].shape == torch.Size([1, num_inchannels[1], 32, 32])
+
+ # Test single scale forward
+ num_channles = (32, 64)
+ in_channels = [c * block.expansion for c in num_channles]
+ hrmodule = HRFomerModule(
+ num_branches=2,
+ block=block,
+ num_blocks=(2, 2),
+ num_inchannels=num_inchannels,
+ num_channels=num_channles,
+ num_heads=(1, 2),
+ num_window_sizes=(7, 7),
+ num_mlp_ratios=(4, 4),
+ drop_paths=(0., 0.),
+ norm_cfg=norm_cfg,
+ multiscale_output=False,
+ )
+
+ feats = [
+ torch.randn(1, in_channels[0], 64, 64),
+ torch.randn(1, in_channels[1], 32, 32)
+ ]
+ feats = hrmodule(feats)
+
+ assert len(feats) == 1
+ assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64])
+
+ # Test single branch HRFormer module
+ hrmodule = HRFomerModule(
+ num_branches=1,
+ block=block,
+ num_blocks=(1, ),
+ num_inchannels=[num_inchannels[0]],
+ num_channels=[num_channles[0]],
+ num_heads=(1, ),
+ num_window_sizes=(7, ),
+ num_mlp_ratios=(4, ),
+ drop_paths=(0.1, ),
+ norm_cfg=norm_cfg,
+ )
+
+ feats = [
+ torch.randn(1, in_channels[0], 64, 64),
+ ]
+ feats = hrmodule(feats)
+
+ assert len(feats) == 1
+ assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64])
+
+ # Value tests
+ kwargs = dict(
+ num_branches=2,
+ block=block,
+ num_blocks=(2, 2),
+ num_inchannels=num_inchannels,
+ num_channels=num_channles,
+ num_heads=(1, 2),
+ num_window_sizes=(7, 7),
+ num_mlp_ratios=(4, 4),
+ drop_paths=(0.1, 0.1),
+ norm_cfg=norm_cfg,
+ )
+
+ with pytest.raises(ValueError):
+ # len(num_blocks) should equal num_branches
+ kwargs['num_blocks'] = [2, 2, 2]
+ HRFomerModule(**kwargs)
+ kwargs['num_blocks'] = [2, 2]
+
+ with pytest.raises(ValueError):
+ # len(num_blocks) should equal num_branches
+ kwargs['num_channels'] = [2]
+ HRFomerModule(**kwargs)
+ kwargs['num_channels'] = [2, 2]
+
+ with pytest.raises(ValueError):
+ # len(num_blocks) should equal num_branches
+ kwargs['num_inchannels'] = [2]
+ HRFomerModule(**kwargs)
+ kwargs['num_inchannels'] = [2, 2]
+
+
+def test_hrformer_backbone():
+ norm_cfg = dict(type='BN')
+ # only have 3 stages
+ extra = dict(
+ drop_path_rate=0.2,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ window_sizes=(7, 7),
+ num_heads=(1, 2),
+ mlp_ratios=(4, 4),
+ num_blocks=(2, 2),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ window_sizes=(7, 7, 7),
+ num_heads=(1, 2, 4),
+ mlp_ratios=(4, 4, 4),
+ num_blocks=(2, 2, 2),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ window_sizes=(7, 7, 7, 7),
+ num_heads=(1, 2, 4, 8),
+ mlp_ratios=(4, 4, 4, 4),
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(32, 64, 128, 256),
+ multiscale_output=True))
+
+ with pytest.raises(ValueError):
+ # len(num_blocks) should equal num_branches
+ extra['stage4']['num_branches'] = 3
+ HRFormer(extra=extra)
+ extra['stage4']['num_branches'] = 4
+
+ # Test HRFormer-S
+ model = HRFormer(extra=extra, norm_cfg=norm_cfg)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 64, 64)
+ feats = model(imgs)
+ assert len(feats) == 4
+ assert feats[0].shape == torch.Size([1, 32, 16, 16])
+ assert feats[3].shape == torch.Size([1, 256, 2, 2])
+
+ # Test single scale output and model
+ # without relative position bias
+ extra['stage4']['multiscale_output'] = False
+ extra['with_rpe'] = False
+ model = HRFormer(extra=extra, norm_cfg=norm_cfg)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 64, 64)
+ feats = model(imgs)
+ assert len(feats) == 1
+ assert feats[0].shape == torch.Size([1, 32, 16, 16])
diff --git a/vendor/ViTPose/tests/test_backbones/test_hrnet.py b/vendor/ViTPose/tests/test_backbones/test_hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb878803958defcf3d138670658e77fb85a8c9d3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_hrnet.py
@@ -0,0 +1,129 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import HRNet
+from mmpose.models.backbones.hrnet import HRModule
+from mmpose.models.backbones.resnet import BasicBlock, Bottleneck
+
+
+def is_block(modules):
+ """Check if is HRModule building block."""
+ if isinstance(modules, (HRModule, )):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (_BatchNorm, )):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def test_hrmodule():
+ # Test HRModule forward
+ block = HRModule(
+ num_branches=1,
+ blocks=BasicBlock,
+ num_blocks=(4, ),
+ in_channels=[
+ 64,
+ ],
+ num_channels=(64, ))
+
+ x = torch.randn(2, 64, 56, 56)
+ x_out = block([x])
+ assert x_out[0].shape == torch.Size([2, 64, 56, 56])
+
+
+def test_hrnet_backbone():
+ extra = dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256)))
+
+ model = HRNet(extra, in_channels=3)
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 32, 56, 56])
+
+ # Test HRNet zero initialization of residual
+ model = HRNet(extra, in_channels=3, zero_init_residual=True)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert all_zeros(m.norm3)
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 32, 56, 56])
+
+ # Test HRNet with the first three stages frozen
+ frozen_stages = 3
+ model = HRNet(extra, in_channels=3, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ if frozen_stages >= 0:
+ assert model.norm1.training is False
+ assert model.norm2.training is False
+ for layer in [model.conv1, model.norm1, model.conv2, model.norm2]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ for i in range(1, frozen_stages + 1):
+ if i == 1:
+ layer = getattr(model, 'layer1')
+ else:
+ layer = getattr(model, f'stage{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ if i < 4:
+ layer = getattr(model, f'transition{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
diff --git a/vendor/ViTPose/tests/test_backbones/test_litehrnet.py b/vendor/ViTPose/tests/test_backbones/test_litehrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..de2b6db776da70a5184b6616f61b1cd14b231e19
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_litehrnet.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import LiteHRNet
+from mmpose.models.backbones.litehrnet import LiteHRModule
+from mmpose.models.backbones.resnet import Bottleneck
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (_BatchNorm, )):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def test_litehrmodule():
+ # Test LiteHRModule forward
+ block = LiteHRModule(
+ num_branches=1,
+ num_blocks=1,
+ in_channels=[
+ 40,
+ ],
+ reduce_ratio=8,
+ module_type='LITE')
+
+ x = torch.randn(2, 40, 56, 56)
+ x_out = block([[x]])
+ assert x_out[0][0].shape == torch.Size([2, 40, 56, 56])
+
+ block = LiteHRModule(
+ num_branches=1,
+ num_blocks=1,
+ in_channels=[
+ 40,
+ ],
+ reduce_ratio=8,
+ module_type='NAIVE')
+
+ x = torch.randn(2, 40, 56, 56)
+ x_out = block([x])
+ assert x_out[0].shape == torch.Size([2, 40, 56, 56])
+
+ with pytest.raises(ValueError):
+ block = LiteHRModule(
+ num_branches=1,
+ num_blocks=1,
+ in_channels=[
+ 40,
+ ],
+ reduce_ratio=8,
+ module_type='none')
+
+
+def test_litehrnet_backbone():
+ extra = dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True)
+
+ model = LiteHRNet(extra, in_channels=3)
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 40, 56, 56])
+
+ # Test HRNet zero initialization of residual
+ model = LiteHRNet(extra, in_channels=3)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert all_zeros(m.norm3)
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 40, 56, 56])
+
+ extra = dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('NAIVE', 'NAIVE', 'NAIVE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True)
+
+ model = LiteHRNet(extra, in_channels=3)
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 40, 56, 56])
+
+ # Test HRNet zero initialization of residual
+ model = LiteHRNet(extra, in_channels=3)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert all_zeros(m.norm3)
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 1
+ assert feat[0].shape == torch.Size([2, 40, 56, 56])
diff --git a/vendor/ViTPose/tests/test_backbones/test_mobilenet_v2.py b/vendor/ViTPose/tests/test_backbones/test_mobilenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..1381ec2604c803447a373f95bfd5509409b9dd95
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_mobilenet_v2.py
@@ -0,0 +1,257 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import MobileNetV2
+from mmpose.models.backbones.mobilenet_v2 import InvertedResidual
+
+
+def is_block(modules):
+ """Check if is ResNet building block."""
+ if isinstance(modules, (InvertedResidual, )):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_mobilenetv2_invertedresidual():
+
+ with pytest.raises(AssertionError):
+ # stride must be in [1, 2]
+ InvertedResidual(16, 24, stride=3, expand_ratio=6)
+
+ # Test InvertedResidual with checkpoint forward, stride=1
+ block = InvertedResidual(16, 24, stride=1, expand_ratio=6)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 56, 56))
+
+ # Test InvertedResidual with expand_ratio=1
+ block = InvertedResidual(16, 16, stride=1, expand_ratio=1)
+ assert len(block.conv) == 2
+
+ # Test InvertedResidual with use_res_connect
+ block = InvertedResidual(16, 16, stride=1, expand_ratio=6)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert block.use_res_connect is True
+ assert x_out.shape == torch.Size((1, 16, 56, 56))
+
+ # Test InvertedResidual with checkpoint forward, stride=2
+ block = InvertedResidual(16, 24, stride=2, expand_ratio=6)
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 28, 28))
+
+ # Test InvertedResidual with checkpoint forward
+ block = InvertedResidual(16, 24, stride=1, expand_ratio=6, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 56, 56))
+
+ # Test InvertedResidual with act_cfg=dict(type='ReLU')
+ block = InvertedResidual(
+ 16, 24, stride=1, expand_ratio=6, act_cfg=dict(type='ReLU'))
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 56, 56))
+
+
+def test_mobilenetv2_backbone():
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = MobileNetV2()
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(ValueError):
+ # frozen_stages must in range(1, 8)
+ MobileNetV2(frozen_stages=8)
+
+ with pytest.raises(ValueError):
+ # tout_indices in range(-1, 8)
+ MobileNetV2(out_indices=[8])
+
+ # Test MobileNetV2 with first stage frozen
+ frozen_stages = 1
+ model = MobileNetV2(frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+
+ for mod in model.conv1.modules():
+ for param in mod.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test MobileNetV2 with norm_eval=True
+ model = MobileNetV2(norm_eval=True)
+ model.init_weights()
+ model.train()
+
+ assert check_norm_state(model.modules(), False)
+
+ # Test MobileNetV2 forward with widen_factor=1.0
+ model = MobileNetV2(widen_factor=1.0, out_indices=range(0, 8))
+ model.init_weights()
+ model.train()
+
+ assert check_norm_state(model.modules(), True)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 8
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 24, 56, 56))
+ assert feat[2].shape == torch.Size((1, 32, 28, 28))
+ assert feat[3].shape == torch.Size((1, 64, 14, 14))
+ assert feat[4].shape == torch.Size((1, 96, 14, 14))
+ assert feat[5].shape == torch.Size((1, 160, 7, 7))
+ assert feat[6].shape == torch.Size((1, 320, 7, 7))
+ assert feat[7].shape == torch.Size((1, 1280, 7, 7))
+
+ # Test MobileNetV2 forward with widen_factor=0.5
+ model = MobileNetV2(widen_factor=0.5, out_indices=range(0, 7))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 7
+ assert feat[0].shape == torch.Size((1, 8, 112, 112))
+ assert feat[1].shape == torch.Size((1, 16, 56, 56))
+ assert feat[2].shape == torch.Size((1, 16, 28, 28))
+ assert feat[3].shape == torch.Size((1, 32, 14, 14))
+ assert feat[4].shape == torch.Size((1, 48, 14, 14))
+ assert feat[5].shape == torch.Size((1, 80, 7, 7))
+ assert feat[6].shape == torch.Size((1, 160, 7, 7))
+
+ # Test MobileNetV2 forward with widen_factor=2.0
+ model = MobileNetV2(widen_factor=2.0)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size((1, 2560, 7, 7))
+
+ # Test MobileNetV2 forward with out_indices=None
+ model = MobileNetV2(widen_factor=1.0)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size((1, 1280, 7, 7))
+
+ # Test MobileNetV2 forward with dict(type='ReLU')
+ model = MobileNetV2(
+ widen_factor=1.0, act_cfg=dict(type='ReLU'), out_indices=range(0, 7))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 7
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 24, 56, 56))
+ assert feat[2].shape == torch.Size((1, 32, 28, 28))
+ assert feat[3].shape == torch.Size((1, 64, 14, 14))
+ assert feat[4].shape == torch.Size((1, 96, 14, 14))
+ assert feat[5].shape == torch.Size((1, 160, 7, 7))
+ assert feat[6].shape == torch.Size((1, 320, 7, 7))
+
+ # Test MobileNetV2 with GroupNorm forward
+ model = MobileNetV2(widen_factor=1.0, out_indices=range(0, 7))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 7
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 24, 56, 56))
+ assert feat[2].shape == torch.Size((1, 32, 28, 28))
+ assert feat[3].shape == torch.Size((1, 64, 14, 14))
+ assert feat[4].shape == torch.Size((1, 96, 14, 14))
+ assert feat[5].shape == torch.Size((1, 160, 7, 7))
+ assert feat[6].shape == torch.Size((1, 320, 7, 7))
+
+ # Test MobileNetV2 with BatchNorm forward
+ model = MobileNetV2(
+ widen_factor=1.0,
+ norm_cfg=dict(type='GN', num_groups=2, requires_grad=True),
+ out_indices=range(0, 7))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 7
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 24, 56, 56))
+ assert feat[2].shape == torch.Size((1, 32, 28, 28))
+ assert feat[3].shape == torch.Size((1, 64, 14, 14))
+ assert feat[4].shape == torch.Size((1, 96, 14, 14))
+ assert feat[5].shape == torch.Size((1, 160, 7, 7))
+ assert feat[6].shape == torch.Size((1, 320, 7, 7))
+
+ # Test MobileNetV2 with layers 1, 3, 5 out forward
+ model = MobileNetV2(widen_factor=1.0, out_indices=(0, 2, 4))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 32, 28, 28))
+ assert feat[2].shape == torch.Size((1, 96, 14, 14))
+
+ # Test MobileNetV2 with checkpoint forward
+ model = MobileNetV2(
+ widen_factor=1.0, with_cp=True, out_indices=range(0, 7))
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 7
+ assert feat[0].shape == torch.Size((1, 16, 112, 112))
+ assert feat[1].shape == torch.Size((1, 24, 56, 56))
+ assert feat[2].shape == torch.Size((1, 32, 28, 28))
+ assert feat[3].shape == torch.Size((1, 64, 14, 14))
+ assert feat[4].shape == torch.Size((1, 96, 14, 14))
+ assert feat[5].shape == torch.Size((1, 160, 7, 7))
+ assert feat[6].shape == torch.Size((1, 320, 7, 7))
diff --git a/vendor/ViTPose/tests/test_backbones/test_mobilenet_v3.py b/vendor/ViTPose/tests/test_backbones/test_mobilenet_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cc00ea2a14b56b9ad989c1b9daa7d7b859369f3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_mobilenet_v3.py
@@ -0,0 +1,169 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import MobileNetV3
+from mmpose.models.backbones.utils import InvertedResidual
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_mobilenetv3_backbone():
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = MobileNetV3()
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AssertionError):
+ # arch must in [small, big]
+ MobileNetV3(arch='others')
+
+ with pytest.raises(ValueError):
+ # frozen_stages must less than 12 when arch is small
+ MobileNetV3(arch='small', frozen_stages=12)
+
+ with pytest.raises(ValueError):
+ # frozen_stages must less than 16 when arch is big
+ MobileNetV3(arch='big', frozen_stages=16)
+
+ with pytest.raises(ValueError):
+ # max out_indices must less than 11 when arch is small
+ MobileNetV3(arch='small', out_indices=(11, ))
+
+ with pytest.raises(ValueError):
+ # max out_indices must less than 15 when arch is big
+ MobileNetV3(arch='big', out_indices=(15, ))
+
+ # Test MobileNetv3
+ model = MobileNetV3()
+ model.init_weights()
+ model.train()
+
+ # Test MobileNetv3 with first stage frozen
+ frozen_stages = 1
+ model = MobileNetV3(frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ for param in model.conv1.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test MobileNetv3 with norm eval
+ model = MobileNetV3(norm_eval=True, out_indices=range(0, 11))
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test MobileNetv3 forward with small arch
+ model = MobileNetV3(out_indices=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 11
+ assert feat[0].shape == torch.Size([1, 16, 56, 56])
+ assert feat[1].shape == torch.Size([1, 24, 28, 28])
+ assert feat[2].shape == torch.Size([1, 24, 28, 28])
+ assert feat[3].shape == torch.Size([1, 40, 14, 14])
+ assert feat[4].shape == torch.Size([1, 40, 14, 14])
+ assert feat[5].shape == torch.Size([1, 40, 14, 14])
+ assert feat[6].shape == torch.Size([1, 48, 14, 14])
+ assert feat[7].shape == torch.Size([1, 48, 14, 14])
+ assert feat[8].shape == torch.Size([1, 96, 7, 7])
+ assert feat[9].shape == torch.Size([1, 96, 7, 7])
+ assert feat[10].shape == torch.Size([1, 96, 7, 7])
+
+ # Test MobileNetv3 forward with small arch and GroupNorm
+ model = MobileNetV3(
+ out_indices=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
+ norm_cfg=dict(type='GN', num_groups=2, requires_grad=True))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 11
+ assert feat[0].shape == torch.Size([1, 16, 56, 56])
+ assert feat[1].shape == torch.Size([1, 24, 28, 28])
+ assert feat[2].shape == torch.Size([1, 24, 28, 28])
+ assert feat[3].shape == torch.Size([1, 40, 14, 14])
+ assert feat[4].shape == torch.Size([1, 40, 14, 14])
+ assert feat[5].shape == torch.Size([1, 40, 14, 14])
+ assert feat[6].shape == torch.Size([1, 48, 14, 14])
+ assert feat[7].shape == torch.Size([1, 48, 14, 14])
+ assert feat[8].shape == torch.Size([1, 96, 7, 7])
+ assert feat[9].shape == torch.Size([1, 96, 7, 7])
+ assert feat[10].shape == torch.Size([1, 96, 7, 7])
+
+ # Test MobileNetv3 forward with big arch
+ model = MobileNetV3(
+ arch='big',
+ out_indices=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 15
+ assert feat[0].shape == torch.Size([1, 16, 112, 112])
+ assert feat[1].shape == torch.Size([1, 24, 56, 56])
+ assert feat[2].shape == torch.Size([1, 24, 56, 56])
+ assert feat[3].shape == torch.Size([1, 40, 28, 28])
+ assert feat[4].shape == torch.Size([1, 40, 28, 28])
+ assert feat[5].shape == torch.Size([1, 40, 28, 28])
+ assert feat[6].shape == torch.Size([1, 80, 14, 14])
+ assert feat[7].shape == torch.Size([1, 80, 14, 14])
+ assert feat[8].shape == torch.Size([1, 80, 14, 14])
+ assert feat[9].shape == torch.Size([1, 80, 14, 14])
+ assert feat[10].shape == torch.Size([1, 112, 14, 14])
+ assert feat[11].shape == torch.Size([1, 112, 14, 14])
+ assert feat[12].shape == torch.Size([1, 160, 14, 14])
+ assert feat[13].shape == torch.Size([1, 160, 7, 7])
+ assert feat[14].shape == torch.Size([1, 160, 7, 7])
+
+ # Test MobileNetv3 forward with big arch
+ model = MobileNetV3(arch='big', out_indices=(0, ))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 16, 112, 112])
+
+ # Test MobileNetv3 with checkpoint forward
+ model = MobileNetV3(with_cp=True)
+ for m in model.modules():
+ if isinstance(m, InvertedResidual):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 96, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_mspn.py b/vendor/ViTPose/tests/test_backbones/test_mspn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6aca441763b4e88c06cb629d4dd549a616bb40da
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_mspn.py
@@ -0,0 +1,32 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models import MSPN
+
+
+def test_mspn_backbone():
+ with pytest.raises(AssertionError):
+ # MSPN's num_stages should larger than 0
+ MSPN(num_stages=0)
+ with pytest.raises(AssertionError):
+ # MSPN's num_units should larger than 1
+ MSPN(num_units=1)
+ with pytest.raises(AssertionError):
+ # len(num_blocks) should equal num_units
+ MSPN(num_units=2, num_blocks=[2, 2, 2])
+
+ # Test MSPN's outputs
+ model = MSPN(num_stages=2, num_units=2, num_blocks=[2, 2])
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 511, 511)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert len(feat[0]) == 2
+ assert len(feat[1]) == 2
+ assert feat[0][0].shape == torch.Size([1, 256, 64, 64])
+ assert feat[0][1].shape == torch.Size([1, 256, 128, 128])
+ assert feat[1][0].shape == torch.Size([1, 256, 64, 64])
+ assert feat[1][1].shape == torch.Size([1, 256, 128, 128])
diff --git a/vendor/ViTPose/tests/test_backbones/test_regnet.py b/vendor/ViTPose/tests/test_backbones/test_regnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..165aad7f2f9ad4d7dcaef87636ba333b9d7959b1
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_regnet.py
@@ -0,0 +1,92 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models.backbones import RegNet
+
+regnet_test_data = [
+ ('regnetx_400mf',
+ dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22,
+ bot_mul=1.0), [32, 64, 160, 384]),
+ ('regnetx_800mf',
+ dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16,
+ bot_mul=1.0), [64, 128, 288, 672]),
+ ('regnetx_1.6gf',
+ dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18,
+ bot_mul=1.0), [72, 168, 408, 912]),
+ ('regnetx_3.2gf',
+ dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25,
+ bot_mul=1.0), [96, 192, 432, 1008]),
+ ('regnetx_4.0gf',
+ dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23,
+ bot_mul=1.0), [80, 240, 560, 1360]),
+ ('regnetx_6.4gf',
+ dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17,
+ bot_mul=1.0), [168, 392, 784, 1624]),
+ ('regnetx_8.0gf',
+ dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23,
+ bot_mul=1.0), [80, 240, 720, 1920]),
+ ('regnetx_12gf',
+ dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19,
+ bot_mul=1.0), [224, 448, 896, 2240]),
+]
+
+
+@pytest.mark.parametrize('arch_name,arch,out_channels', regnet_test_data)
+def test_regnet_backbone(arch_name, arch, out_channels):
+ with pytest.raises(AssertionError):
+ # ResNeXt depth should be in [50, 101, 152]
+ RegNet(arch_name + '233')
+
+ # output the last feature map
+ model = RegNet(arch_name)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert isinstance(feat, torch.Tensor)
+ assert feat.shape == (1, out_channels[-1], 7, 7)
+
+ # output feature map of all stages
+ model = RegNet(arch_name, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, out_channels[0], 56, 56)
+ assert feat[1].shape == (1, out_channels[1], 28, 28)
+ assert feat[2].shape == (1, out_channels[2], 14, 14)
+ assert feat[3].shape == (1, out_channels[3], 7, 7)
+
+
+@pytest.mark.parametrize('arch_name,arch,out_channels', regnet_test_data)
+def test_custom_arch(arch_name, arch, out_channels):
+ # output the last feature map
+ model = RegNet(arch)
+ model.init_weights()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert isinstance(feat, torch.Tensor)
+ assert feat.shape == (1, out_channels[-1], 7, 7)
+
+ # output feature map of all stages
+ model = RegNet(arch, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, out_channels[0], 56, 56)
+ assert feat[1].shape == (1, out_channels[1], 28, 28)
+ assert feat[2].shape == (1, out_channels[2], 14, 14)
+ assert feat[3].shape == (1, out_channels[3], 7, 7)
+
+
+def test_exception():
+ # arch must be a str or dict
+ with pytest.raises(TypeError):
+ _ = RegNet(50)
diff --git a/vendor/ViTPose/tests/test_backbones/test_resnest.py b/vendor/ViTPose/tests/test_backbones/test_resnest.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bb41b198b79c11831cde986ba8659a8379562f3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_resnest.py
@@ -0,0 +1,44 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models.backbones import ResNeSt
+from mmpose.models.backbones.resnest import Bottleneck as BottleneckS
+
+
+def test_bottleneck():
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow')
+
+ # Test ResNeSt Bottleneck structure
+ block = BottleneckS(
+ 64, 256, radix=2, reduction_factor=4, stride=2, style='pytorch')
+ assert block.avd_layer.stride == 2
+ assert block.conv2.channels == 64
+
+ # Test ResNeSt Bottleneck forward
+ block = BottleneckS(64, 64, radix=2, reduction_factor=4)
+ x = torch.randn(2, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 64, 56, 56])
+
+
+def test_resnest():
+ with pytest.raises(KeyError):
+ # ResNeSt depth should be in [50, 101, 152, 200]
+ ResNeSt(depth=18)
+
+ # Test ResNeSt with radix 2, reduction_factor 4
+ model = ResNeSt(
+ depth=50, radix=2, reduction_factor=4, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([2, 256, 56, 56])
+ assert feat[1].shape == torch.Size([2, 512, 28, 28])
+ assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([2, 2048, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_resnet.py b/vendor/ViTPose/tests/test_backbones/test_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..036a76c19ef85cc23d29ef040e14eb6b314898bb
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_resnet.py
@@ -0,0 +1,562 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.models.backbones import ResNet, ResNetV1d
+from mmpose.models.backbones.resnet import (BasicBlock, Bottleneck, ResLayer,
+ get_expansion)
+
+
+def is_block(modules):
+ """Check if is ResNet building block."""
+ if isinstance(modules, (BasicBlock, Bottleneck)):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_get_expansion():
+ assert get_expansion(Bottleneck, 2) == 2
+ assert get_expansion(BasicBlock) == 1
+ assert get_expansion(Bottleneck) == 4
+
+ class MyResBlock(nn.Module):
+
+ expansion = 8
+
+ assert get_expansion(MyResBlock) == 8
+
+ # expansion must be an integer or None
+ with pytest.raises(TypeError):
+ get_expansion(Bottleneck, '0')
+
+ # expansion is not specified and cannot be inferred
+ with pytest.raises(TypeError):
+
+ class SomeModule(nn.Module):
+ pass
+
+ get_expansion(SomeModule)
+
+
+def test_basic_block():
+ # expansion must be 1
+ with pytest.raises(AssertionError):
+ BasicBlock(64, 64, expansion=2)
+
+ # BasicBlock with stride 1, out_channels == in_channels
+ block = BasicBlock(64, 64)
+ assert block.in_channels == 64
+ assert block.mid_channels == 64
+ assert block.out_channels == 64
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 64
+ assert block.conv1.kernel_size == (3, 3)
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.in_channels == 64
+ assert block.conv2.out_channels == 64
+ assert block.conv2.kernel_size == (3, 3)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # BasicBlock with stride 1 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1, bias=False), nn.BatchNorm2d(128))
+ block = BasicBlock(64, 128, downsample=downsample)
+ assert block.in_channels == 64
+ assert block.mid_channels == 128
+ assert block.out_channels == 128
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 128
+ assert block.conv1.kernel_size == (3, 3)
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.in_channels == 128
+ assert block.conv2.out_channels == 128
+ assert block.conv2.kernel_size == (3, 3)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 128, 56, 56])
+
+ # BasicBlock with stride 2 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False),
+ nn.BatchNorm2d(128))
+ block = BasicBlock(64, 128, stride=2, downsample=downsample)
+ assert block.in_channels == 64
+ assert block.mid_channels == 128
+ assert block.out_channels == 128
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 128
+ assert block.conv1.kernel_size == (3, 3)
+ assert block.conv1.stride == (2, 2)
+ assert block.conv2.in_channels == 128
+ assert block.conv2.out_channels == 128
+ assert block.conv2.kernel_size == (3, 3)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 128, 28, 28])
+
+ # forward with checkpointing
+ block = BasicBlock(64, 64, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 64, 56, 56, requires_grad=True)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_bottleneck():
+ # style must be in ['pytorch', 'caffe']
+ with pytest.raises(AssertionError):
+ Bottleneck(64, 64, style='tensorflow')
+
+ # expansion must be divisible by out_channels
+ with pytest.raises(AssertionError):
+ Bottleneck(64, 64, expansion=3)
+
+ # Test Bottleneck style
+ block = Bottleneck(64, 64, stride=2, style='pytorch')
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.stride == (2, 2)
+ block = Bottleneck(64, 64, stride=2, style='caffe')
+ assert block.conv1.stride == (2, 2)
+ assert block.conv2.stride == (1, 1)
+
+ # Bottleneck with stride 1
+ block = Bottleneck(64, 64, style='pytorch')
+ assert block.in_channels == 64
+ assert block.mid_channels == 16
+ assert block.out_channels == 64
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 16
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 16
+ assert block.conv2.out_channels == 16
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 16
+ assert block.conv3.out_channels == 64
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # Bottleneck with stride 1 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1), nn.BatchNorm2d(128))
+ block = Bottleneck(64, 128, style='pytorch', downsample=downsample)
+ assert block.in_channels == 64
+ assert block.mid_channels == 32
+ assert block.out_channels == 128
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 32
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 32
+ assert block.conv2.out_channels == 32
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 32
+ assert block.conv3.out_channels == 128
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 128, 56, 56)
+
+ # Bottleneck with stride 2 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1, stride=2), nn.BatchNorm2d(128))
+ block = Bottleneck(
+ 64, 128, stride=2, style='pytorch', downsample=downsample)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 128, 28, 28)
+
+ # Bottleneck with expansion 2
+ block = Bottleneck(64, 64, style='pytorch', expansion=2)
+ assert block.in_channels == 64
+ assert block.mid_channels == 32
+ assert block.out_channels == 64
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 32
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 32
+ assert block.conv2.out_channels == 32
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 32
+ assert block.conv3.out_channels == 64
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # Test Bottleneck with checkpointing
+ block = Bottleneck(64, 64, with_cp=True)
+ block.train()
+ assert block.with_cp
+ x = torch.randn(1, 64, 56, 56, requires_grad=True)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_basicblock_reslayer():
+ # 3 BasicBlock w/o downsample
+ layer = ResLayer(BasicBlock, 3, 32, 32)
+ assert len(layer) == 3
+ for i in range(3):
+ assert layer[i].in_channels == 32
+ assert layer[i].out_channels == 32
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 32, 56, 56)
+
+ # 3 BasicBlock w/ stride 1 and downsample
+ layer = ResLayer(BasicBlock, 3, 32, 64)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (1, 1)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # 3 BasicBlock w/ stride 2 and downsample
+ layer = ResLayer(BasicBlock, 3, 32, 64, stride=2)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (2, 2)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+ # 3 BasicBlock w/ stride 2 and downsample with avg pool
+ layer = ResLayer(BasicBlock, 3, 32, 64, stride=2, avg_down=True)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 3
+ assert isinstance(layer[0].downsample[0], nn.AvgPool2d)
+ assert layer[0].downsample[0].stride == 2
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+
+def test_bottleneck_reslayer():
+ # 3 Bottleneck w/o downsample
+ layer = ResLayer(Bottleneck, 3, 32, 32)
+ assert len(layer) == 3
+ for i in range(3):
+ assert layer[i].in_channels == 32
+ assert layer[i].out_channels == 32
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 32, 56, 56)
+
+ # 3 Bottleneck w/ stride 1 and downsample
+ layer = ResLayer(Bottleneck, 3, 32, 64)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 1
+ assert layer[0].conv1.out_channels == 16
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (1, 1)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 16
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # 3 Bottleneck w/ stride 2 and downsample
+ layer = ResLayer(Bottleneck, 3, 32, 64, stride=2)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].conv1.out_channels == 16
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (2, 2)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 16
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+ # 3 Bottleneck w/ stride 2 and downsample with avg pool
+ layer = ResLayer(Bottleneck, 3, 32, 64, stride=2, avg_down=True)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].conv1.out_channels == 16
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 3
+ assert isinstance(layer[0].downsample[0], nn.AvgPool2d)
+ assert layer[0].downsample[0].stride == 2
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 16
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+ # 3 Bottleneck with custom expansion
+ layer = ResLayer(Bottleneck, 3, 32, 32, expansion=2)
+ assert len(layer) == 3
+ for i in range(3):
+ assert layer[i].in_channels == 32
+ assert layer[i].out_channels == 32
+ assert layer[i].stride == 1
+ assert layer[i].conv1.out_channels == 16
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 32, 56, 56)
+
+
+def test_resnet():
+ """Test resnet backbone."""
+ with pytest.raises(KeyError):
+ # ResNet depth should be in [18, 34, 50, 101, 152]
+ ResNet(20)
+
+ with pytest.raises(AssertionError):
+ # In ResNet: 1 <= num_stages <= 4
+ ResNet(50, num_stages=0)
+
+ with pytest.raises(AssertionError):
+ # In ResNet: 1 <= num_stages <= 4
+ ResNet(50, num_stages=5)
+
+ with pytest.raises(AssertionError):
+ # len(strides) == len(dilations) == num_stages
+ ResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = ResNet(50)
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ ResNet(50, style='tensorflow')
+
+ # Test ResNet50 norm_eval=True
+ model = ResNet(50, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test ResNet50 with torchvision pretrained weight
+ model = ResNet(depth=50, norm_eval=True)
+ model.init_weights('torchvision://resnet50')
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test ResNet50 with first stage frozen
+ frozen_stages = 1
+ model = ResNet(50, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ assert model.norm1.training is False
+ for layer in [model.conv1, model.norm1]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ResNet18 forward
+ model = ResNet(18, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 64, 56, 56)
+ assert feat[1].shape == (1, 128, 28, 28)
+ assert feat[2].shape == (1, 256, 14, 14)
+ assert feat[3].shape == (1, 512, 7, 7)
+
+ # Test ResNet50 with BatchNorm forward
+ model = ResNet(50, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 256, 56, 56)
+ assert feat[1].shape == (1, 512, 28, 28)
+ assert feat[2].shape == (1, 1024, 14, 14)
+ assert feat[3].shape == (1, 2048, 7, 7)
+
+ # Test ResNet50 with layers 1, 2, 3 out forward
+ model = ResNet(50, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == (1, 256, 56, 56)
+ assert feat[1].shape == (1, 512, 28, 28)
+ assert feat[2].shape == (1, 1024, 14, 14)
+
+ # Test ResNet50 with layers 3 (top feature maps) out forward
+ model = ResNet(50, out_indices=(3, ))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == (1, 2048, 7, 7)
+
+ # Test ResNet50 with checkpoint forward
+ model = ResNet(50, out_indices=(0, 1, 2, 3), with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 256, 56, 56)
+ assert feat[1].shape == (1, 512, 28, 28)
+ assert feat[2].shape == (1, 1024, 14, 14)
+ assert feat[3].shape == (1, 2048, 7, 7)
+
+ # zero initialization of residual blocks
+ model = ResNet(50, out_indices=(0, 1, 2, 3), zero_init_residual=True)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert all_zeros(m.norm3)
+ elif isinstance(m, BasicBlock):
+ assert all_zeros(m.norm2)
+
+ # non-zero initialization of residual blocks
+ model = ResNet(50, out_indices=(0, 1, 2, 3), zero_init_residual=False)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert not all_zeros(m.norm3)
+ elif isinstance(m, BasicBlock):
+ assert not all_zeros(m.norm2)
+
+
+def test_resnet_v1d():
+ model = ResNetV1d(depth=50, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ assert len(model.stem) == 3
+ for i in range(3):
+ assert isinstance(model.stem[i], ConvModule)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model.stem(imgs)
+ assert feat.shape == (1, 64, 112, 112)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 256, 56, 56)
+ assert feat[1].shape == (1, 512, 28, 28)
+ assert feat[2].shape == (1, 1024, 14, 14)
+ assert feat[3].shape == (1, 2048, 7, 7)
+
+ # Test ResNet50V1d with first stage frozen
+ frozen_stages = 1
+ model = ResNetV1d(depth=50, frozen_stages=frozen_stages)
+ assert len(model.stem) == 3
+ for i in range(3):
+ assert isinstance(model.stem[i], ConvModule)
+ model.init_weights()
+ model.train()
+ check_norm_state(model.stem, False)
+ for param in model.stem.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+
+def test_resnet_half_channel():
+ model = ResNet(50, base_channels=32, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 128, 56, 56)
+ assert feat[1].shape == (1, 256, 28, 28)
+ assert feat[2].shape == (1, 512, 14, 14)
+ assert feat[3].shape == (1, 1024, 7, 7)
diff --git a/vendor/ViTPose/tests/test_backbones/test_resnext.py b/vendor/ViTPose/tests/test_backbones/test_resnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..88191e142724e1e0e35819b55a0420f4f06388ba
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_resnext.py
@@ -0,0 +1,60 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models.backbones import ResNeXt
+from mmpose.models.backbones.resnext import Bottleneck as BottleneckX
+
+
+def test_bottleneck():
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ BottleneckX(64, 64, groups=32, width_per_group=4, style='tensorflow')
+
+ # Test ResNeXt Bottleneck structure
+ block = BottleneckX(
+ 64, 256, groups=32, width_per_group=4, stride=2, style='pytorch')
+ assert block.conv2.stride == (2, 2)
+ assert block.conv2.groups == 32
+ assert block.conv2.out_channels == 128
+
+ # Test ResNeXt Bottleneck forward
+ block = BottleneckX(64, 64, base_channels=16, groups=32, width_per_group=4)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnext():
+ with pytest.raises(KeyError):
+ # ResNeXt depth should be in [50, 101, 152]
+ ResNeXt(depth=18)
+
+ # Test ResNeXt with group 32, width_per_group 4
+ model = ResNeXt(
+ depth=50, groups=32, width_per_group=4, out_indices=(0, 1, 2, 3))
+ for m in model.modules():
+ if isinstance(m, BottleneckX):
+ assert m.conv2.groups == 32
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNeXt with group 32, width_per_group 4 and layers 3 out forward
+ model = ResNeXt(depth=50, groups=32, width_per_group=4, out_indices=(3, ))
+ for m in model.modules():
+ if isinstance(m, BottleneckX):
+ assert m.conv2.groups == 32
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 2048, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_rsn.py b/vendor/ViTPose/tests/test_backbones/test_rsn.py
new file mode 100644
index 0000000000000000000000000000000000000000..617dd9ed98c70d853488caed3eb4f08602a9a595
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_rsn.py
@@ -0,0 +1,35 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models import RSN
+
+
+def test_rsn_backbone():
+ with pytest.raises(AssertionError):
+ # RSN's num_stages should larger than 0
+ RSN(num_stages=0)
+ with pytest.raises(AssertionError):
+ # RSN's num_steps should larger than 1
+ RSN(num_steps=1)
+ with pytest.raises(AssertionError):
+ # RSN's num_units should larger than 1
+ RSN(num_units=1)
+ with pytest.raises(AssertionError):
+ # len(num_blocks) should equal num_units
+ RSN(num_units=2, num_blocks=[2, 2, 2])
+
+ # Test RSN's outputs
+ model = RSN(num_stages=2, num_units=2, num_blocks=[2, 2])
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 511, 511)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert len(feat[0]) == 2
+ assert len(feat[1]) == 2
+ assert feat[0][0].shape == torch.Size([1, 256, 64, 64])
+ assert feat[0][1].shape == torch.Size([1, 256, 128, 128])
+ assert feat[1][0].shape == torch.Size([1, 256, 64, 64])
+ assert feat[1][1].shape == torch.Size([1, 256, 128, 128])
diff --git a/vendor/ViTPose/tests/test_backbones/test_scnet.py b/vendor/ViTPose/tests/test_backbones/test_scnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..e03a87ba94b08fa721fe435b572bc65bd2a567c8
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_scnet.py
@@ -0,0 +1,163 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import SCNet
+from mmpose.models.backbones.scnet import SCBottleneck, SCConv
+
+
+def is_block(modules):
+ """Check if is SCNet building block."""
+ if isinstance(modules, (SCBottleneck, )):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (_BatchNorm, )):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_scnet_scconv():
+ # Test scconv forward
+ layer = SCConv(64, 64, 1, 4)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_scnet_bottleneck():
+ # Test Bottleneck forward
+ block = SCBottleneck(64, 64)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_scnet_backbone():
+ """Test scnet backbone."""
+ with pytest.raises(KeyError):
+ # SCNet depth should be in [50, 101]
+ SCNet(20)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = SCNet(50)
+ model.init_weights(pretrained=0)
+
+ # Test SCNet norm_eval=True
+ model = SCNet(50, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test SCNet50 with first stage frozen
+ frozen_stages = 1
+ model = SCNet(50, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ assert model.norm1.training is False
+ for layer in [model.conv1, model.norm1]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test SCNet with BatchNorm forward
+ model = SCNet(50, out_indices=(0, 1, 2, 3))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([2, 256, 56, 56])
+ assert feat[1].shape == torch.Size([2, 512, 28, 28])
+ assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([2, 2048, 7, 7])
+
+ # Test SCNet with layers 1, 2, 3 out forward
+ model = SCNet(50, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size([2, 256, 56, 56])
+ assert feat[1].shape == torch.Size([2, 512, 28, 28])
+ assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+
+ # Test SEResNet50 with layers 3 (top feature maps) out forward
+ model = SCNet(50, out_indices=(3, ))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([2, 2048, 7, 7])
+
+ # Test SEResNet50 with checkpoint forward
+ model = SCNet(50, out_indices=(0, 1, 2, 3), with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([2, 256, 56, 56])
+ assert feat[1].shape == torch.Size([2, 512, 28, 28])
+ assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([2, 2048, 7, 7])
+
+ # Test SCNet zero initialization of residual
+ model = SCNet(50, out_indices=(0, 1, 2, 3), zero_init_residual=True)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, SCBottleneck):
+ assert all_zeros(m.norm3)
+ model.train()
+
+ imgs = torch.randn(2, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([2, 256, 56, 56])
+ assert feat[1].shape == torch.Size([2, 512, 28, 28])
+ assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([2, 2048, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_seresnet.py b/vendor/ViTPose/tests/test_backbones/test_seresnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..4484c66ddec9e1aa38bd4797871a627a9a5e222b
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_seresnet.py
@@ -0,0 +1,243 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import AvgPool2d
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import SEResNet
+from mmpose.models.backbones.resnet import ResLayer
+from mmpose.models.backbones.seresnet import SEBottleneck, SELayer
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_selayer():
+ # Test selayer forward
+ layer = SELayer(64)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test selayer forward with different ratio
+ layer = SELayer(64, ratio=8)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_bottleneck():
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ SEBottleneck(64, 64, style='tensorflow')
+
+ # Test SEBottleneck with checkpoint forward
+ block = SEBottleneck(64, 64, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck style
+ block = SEBottleneck(64, 256, stride=2, style='pytorch')
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.stride == (2, 2)
+ block = SEBottleneck(64, 256, stride=2, style='caffe')
+ assert block.conv1.stride == (2, 2)
+ assert block.conv2.stride == (1, 1)
+
+ # Test Bottleneck forward
+ block = SEBottleneck(64, 64)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_res_layer():
+ # Test ResLayer of 3 Bottleneck w\o downsample
+ layer = ResLayer(SEBottleneck, 3, 64, 64, se_ratio=16)
+ assert len(layer) == 3
+ assert layer[0].conv1.in_channels == 64
+ assert layer[0].conv1.out_channels == 16
+ for i in range(1, len(layer)):
+ assert layer[i].conv1.in_channels == 64
+ assert layer[i].conv1.out_channels == 16
+ for i in range(len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test ResLayer of 3 SEBottleneck with downsample
+ layer = ResLayer(SEBottleneck, 3, 64, 256, se_ratio=16)
+ assert layer[0].downsample[0].out_channels == 256
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 56, 56])
+
+ # Test ResLayer of 3 SEBottleneck with stride=2
+ layer = ResLayer(SEBottleneck, 3, 64, 256, stride=2, se_ratio=8)
+ assert layer[0].downsample[0].out_channels == 256
+ assert layer[0].downsample[0].stride == (2, 2)
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+ # Test ResLayer of 3 SEBottleneck with stride=2 and average downsample
+ layer = ResLayer(
+ SEBottleneck, 3, 64, 256, stride=2, avg_down=True, se_ratio=8)
+ assert isinstance(layer[0].downsample[0], AvgPool2d)
+ assert layer[0].downsample[1].out_channels == 256
+ assert layer[0].downsample[1].stride == (1, 1)
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+
+def test_seresnet():
+ """Test resnet backbone."""
+ with pytest.raises(KeyError):
+ # SEResNet depth should be in [50, 101, 152]
+ SEResNet(20)
+
+ with pytest.raises(AssertionError):
+ # In SEResNet: 1 <= num_stages <= 4
+ SEResNet(50, num_stages=0)
+
+ with pytest.raises(AssertionError):
+ # In SEResNet: 1 <= num_stages <= 4
+ SEResNet(50, num_stages=5)
+
+ with pytest.raises(AssertionError):
+ # len(strides) == len(dilations) == num_stages
+ SEResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = SEResNet(50)
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ SEResNet(50, style='tensorflow')
+
+ # Test SEResNet50 norm_eval=True
+ model = SEResNet(50, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test SEResNet50 with torchvision pretrained weight
+ model = SEResNet(depth=50, norm_eval=True)
+ model.init_weights('torchvision://resnet50')
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test SEResNet50 with first stage frozen
+ frozen_stages = 1
+ model = SEResNet(50, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ assert model.norm1.training is False
+ for layer in [model.conv1, model.norm1]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test SEResNet50 with BatchNorm forward
+ model = SEResNet(50, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test SEResNet50 with layers 1, 2, 3 out forward
+ model = SEResNet(50, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+
+ # Test SEResNet50 with layers 3 (top feature maps) out forward
+ model = SEResNet(50, out_indices=(3, ))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 2048, 7, 7])
+
+ # Test SEResNet50 with checkpoint forward
+ model = SEResNet(50, out_indices=(0, 1, 2, 3), with_cp=True)
+ for m in model.modules():
+ if isinstance(m, SEBottleneck):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test SEResNet50 zero initialization of residual
+ model = SEResNet(50, out_indices=(0, 1, 2, 3), zero_init_residual=True)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, SEBottleneck):
+ assert all_zeros(m.norm3)
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_seresnext.py b/vendor/ViTPose/tests/test_backbones/test_seresnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c156050885a8aed4d5f04c61cec792c7aa1fd94
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_seresnext.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models.backbones import SEResNeXt
+from mmpose.models.backbones.seresnext import SEBottleneck as SEBottleneckX
+
+
+def test_bottleneck():
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ SEBottleneckX(64, 64, groups=32, width_per_group=4, style='tensorflow')
+
+ # Test SEResNeXt Bottleneck structure
+ block = SEBottleneckX(
+ 64, 256, groups=32, width_per_group=4, stride=2, style='pytorch')
+ assert block.width_per_group == 4
+ assert block.conv2.stride == (2, 2)
+ assert block.conv2.groups == 32
+ assert block.conv2.out_channels == 128
+ assert block.conv2.out_channels == block.mid_channels
+
+ # Test SEResNeXt Bottleneck structure (groups=1)
+ block = SEBottleneckX(
+ 64, 256, groups=1, width_per_group=4, stride=2, style='pytorch')
+ assert block.conv2.stride == (2, 2)
+ assert block.conv2.groups == 1
+ assert block.conv2.out_channels == 64
+ assert block.mid_channels == 64
+ assert block.conv2.out_channels == block.mid_channels
+
+ # Test SEResNeXt Bottleneck forward
+ block = SEBottleneckX(
+ 64, 64, base_channels=16, groups=32, width_per_group=4)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_seresnext():
+ with pytest.raises(KeyError):
+ # SEResNeXt depth should be in [50, 101, 152]
+ SEResNeXt(depth=18)
+
+ # Test SEResNeXt with group 32, width_per_group 4
+ model = SEResNeXt(
+ depth=50, groups=32, width_per_group=4, out_indices=(0, 1, 2, 3))
+ for m in model.modules():
+ if isinstance(m, SEBottleneckX):
+ assert m.conv2.groups == 32
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test SEResNeXt with group 32, width_per_group 4 and layers 3 out forward
+ model = SEResNeXt(
+ depth=50, groups=32, width_per_group=4, out_indices=(3, ))
+ for m in model.modules():
+ if isinstance(m, SEBottleneckX):
+ assert m.conv2.groups == 32
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 2048, 7, 7])
diff --git a/vendor/ViTPose/tests/test_backbones/test_shufflenet_v1.py b/vendor/ViTPose/tests/test_backbones/test_shufflenet_v1.py
new file mode 100644
index 0000000000000000000000000000000000000000..302d52f56a187fe9bd05f943b84d17142272f6fa
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_shufflenet_v1.py
@@ -0,0 +1,245 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import ShuffleNetV1
+from mmpose.models.backbones.shufflenet_v1 import ShuffleUnit
+
+
+def is_block(modules):
+ """Check if is ResNet building block."""
+ if isinstance(modules, (ShuffleUnit, )):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_shufflenetv1_shuffleuint():
+
+ with pytest.raises(ValueError):
+ # combine must be in ['add', 'concat']
+ ShuffleUnit(24, 16, groups=3, first_block=True, combine='test')
+
+ with pytest.raises(AssertionError):
+ # inplanes must be equal tp = outplanes when combine='add'
+ ShuffleUnit(64, 24, groups=4, first_block=True, combine='add')
+
+ # Test ShuffleUnit with combine='add'
+ block = ShuffleUnit(24, 24, groups=3, first_block=True, combine='add')
+ x = torch.randn(1, 24, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 56, 56))
+
+ # Test ShuffleUnit with combine='concat'
+ block = ShuffleUnit(24, 240, groups=3, first_block=True, combine='concat')
+ x = torch.randn(1, 24, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 240, 28, 28))
+
+ # Test ShuffleUnit with checkpoint forward
+ block = ShuffleUnit(
+ 24, 24, groups=3, first_block=True, combine='add', with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 24, 56, 56)
+ x.requires_grad = True
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 24, 56, 56))
+
+
+def test_shufflenetv1_backbone():
+
+ with pytest.raises(ValueError):
+ # frozen_stages must be in range(-1, 4)
+ ShuffleNetV1(frozen_stages=10)
+
+ with pytest.raises(ValueError):
+ # the item in out_indices must be in range(0, 4)
+ ShuffleNetV1(out_indices=[5])
+
+ with pytest.raises(ValueError):
+ # groups must be in [1, 2, 3, 4, 8]
+ ShuffleNetV1(groups=10)
+
+ with pytest.raises(TypeError):
+ # pretrained must be str or None
+ model = ShuffleNetV1()
+ model.init_weights(pretrained=1)
+
+ # Test ShuffleNetV1 norm state
+ model = ShuffleNetV1()
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), True)
+
+ # Test ShuffleNetV1 with first stage frozen
+ frozen_stages = 1
+ model = ShuffleNetV1(frozen_stages=frozen_stages, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+ for param in model.conv1.parameters():
+ assert param.requires_grad is False
+ for i in range(frozen_stages):
+ layer = model.layers[i]
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ShuffleNetV1 forward with groups=1
+ model = ShuffleNetV1(groups=1, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 144, 28, 28))
+ assert feat[1].shape == torch.Size((1, 288, 14, 14))
+ assert feat[2].shape == torch.Size((1, 576, 7, 7))
+
+ # Test ShuffleNetV1 forward with groups=2
+ model = ShuffleNetV1(groups=2, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 200, 28, 28))
+ assert feat[1].shape == torch.Size((1, 400, 14, 14))
+ assert feat[2].shape == torch.Size((1, 800, 7, 7))
+
+ # Test ShuffleNetV1 forward with groups=3
+ model = ShuffleNetV1(groups=3, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 240, 28, 28))
+ assert feat[1].shape == torch.Size((1, 480, 14, 14))
+ assert feat[2].shape == torch.Size((1, 960, 7, 7))
+
+ # Test ShuffleNetV1 forward with groups=4
+ model = ShuffleNetV1(groups=4, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 272, 28, 28))
+ assert feat[1].shape == torch.Size((1, 544, 14, 14))
+ assert feat[2].shape == torch.Size((1, 1088, 7, 7))
+
+ # Test ShuffleNetV1 forward with groups=8
+ model = ShuffleNetV1(groups=8, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 384, 28, 28))
+ assert feat[1].shape == torch.Size((1, 768, 14, 14))
+ assert feat[2].shape == torch.Size((1, 1536, 7, 7))
+
+ # Test ShuffleNetV1 forward with GroupNorm forward
+ model = ShuffleNetV1(
+ groups=3,
+ norm_cfg=dict(type='GN', num_groups=2, requires_grad=True),
+ out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size((1, 240, 28, 28))
+ assert feat[1].shape == torch.Size((1, 480, 14, 14))
+ assert feat[2].shape == torch.Size((1, 960, 7, 7))
+
+ # Test ShuffleNetV1 forward with layers 1, 2 forward
+ model = ShuffleNetV1(groups=3, out_indices=(1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert feat[0].shape == torch.Size((1, 480, 14, 14))
+ assert feat[1].shape == torch.Size((1, 960, 7, 7))
+
+ # Test ShuffleNetV1 forward with layers 2 forward
+ model = ShuffleNetV1(groups=3, out_indices=(2, ))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert isinstance(feat, torch.Tensor)
+ assert feat.shape == torch.Size((1, 960, 7, 7))
+
+ # Test ShuffleNetV1 forward with checkpoint forward
+ model = ShuffleNetV1(groups=3, with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+
+ # Test ShuffleNetV1 with norm_eval
+ model = ShuffleNetV1(norm_eval=True)
+ model.init_weights()
+ model.train()
+
+ assert check_norm_state(model.modules(), False)
diff --git a/vendor/ViTPose/tests/test_backbones/test_shufflenet_v2.py b/vendor/ViTPose/tests/test_backbones/test_shufflenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..2af5254d874dba1a1086fb00ce542ee3757c3cd3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_shufflenet_v2.py
@@ -0,0 +1,204 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import ShuffleNetV2
+from mmpose.models.backbones.shufflenet_v2 import InvertedResidual
+
+
+def is_block(modules):
+ """Check if is ResNet building block."""
+ if isinstance(modules, (InvertedResidual, )):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_shufflenetv2_invertedresidual():
+
+ with pytest.raises(AssertionError):
+ # when stride==1, in_channels should be equal to out_channels // 2 * 2
+ InvertedResidual(24, 32, stride=1)
+
+ with pytest.raises(AssertionError):
+ # when in_channels != out_channels // 2 * 2, stride should not be
+ # equal to 1.
+ InvertedResidual(24, 32, stride=1)
+
+ # Test InvertedResidual forward
+ block = InvertedResidual(24, 48, stride=2)
+ x = torch.randn(1, 24, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 48, 28, 28))
+
+ # Test InvertedResidual with checkpoint forward
+ block = InvertedResidual(48, 48, stride=1, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 48, 56, 56)
+ x.requires_grad = True
+ x_out = block(x)
+ assert x_out.shape == torch.Size((1, 48, 56, 56))
+
+
+def test_shufflenetv2_backbone():
+
+ with pytest.raises(ValueError):
+ # groups must be in 0.5, 1.0, 1.5, 2.0]
+ ShuffleNetV2(widen_factor=3.0)
+
+ with pytest.raises(ValueError):
+ # frozen_stages must be in [0, 1, 2, 3]
+ ShuffleNetV2(widen_factor=1.0, frozen_stages=4)
+
+ with pytest.raises(ValueError):
+ # out_indices must be in [0, 1, 2, 3]
+ ShuffleNetV2(widen_factor=1.0, out_indices=(4, ))
+
+ with pytest.raises(TypeError):
+ # pretrained must be str or None
+ model = ShuffleNetV2()
+ model.init_weights(pretrained=1)
+
+ # Test ShuffleNetV2 norm state
+ model = ShuffleNetV2()
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), True)
+
+ # Test ShuffleNetV2 with first stage frozen
+ frozen_stages = 1
+ model = ShuffleNetV2(frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ for param in model.conv1.parameters():
+ assert param.requires_grad is False
+ for i in range(0, frozen_stages):
+ layer = model.layers[i]
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ShuffleNetV2 with norm_eval
+ model = ShuffleNetV2(norm_eval=True)
+ model.init_weights()
+ model.train()
+
+ assert check_norm_state(model.modules(), False)
+
+ # Test ShuffleNetV2 forward with widen_factor=0.5
+ model = ShuffleNetV2(widen_factor=0.5, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size((1, 48, 28, 28))
+ assert feat[1].shape == torch.Size((1, 96, 14, 14))
+ assert feat[2].shape == torch.Size((1, 192, 7, 7))
+
+ # Test ShuffleNetV2 forward with widen_factor=1.0
+ model = ShuffleNetV2(widen_factor=1.0, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size((1, 116, 28, 28))
+ assert feat[1].shape == torch.Size((1, 232, 14, 14))
+ assert feat[2].shape == torch.Size((1, 464, 7, 7))
+
+ # Test ShuffleNetV2 forward with widen_factor=1.5
+ model = ShuffleNetV2(widen_factor=1.5, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size((1, 176, 28, 28))
+ assert feat[1].shape == torch.Size((1, 352, 14, 14))
+ assert feat[2].shape == torch.Size((1, 704, 7, 7))
+
+ # Test ShuffleNetV2 forward with widen_factor=2.0
+ model = ShuffleNetV2(widen_factor=2.0, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size((1, 244, 28, 28))
+ assert feat[1].shape == torch.Size((1, 488, 14, 14))
+ assert feat[2].shape == torch.Size((1, 976, 7, 7))
+
+ # Test ShuffleNetV2 forward with layers 3 forward
+ model = ShuffleNetV2(widen_factor=1.0, out_indices=(2, ))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert isinstance(feat, torch.Tensor)
+ assert feat.shape == torch.Size((1, 464, 7, 7))
+
+ # Test ShuffleNetV2 forward with layers 1 2 forward
+ model = ShuffleNetV2(widen_factor=1.0, out_indices=(1, 2))
+ model.init_weights()
+ model.train()
+
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 2
+ assert feat[0].shape == torch.Size((1, 232, 14, 14))
+ assert feat[1].shape == torch.Size((1, 464, 7, 7))
+
+ # Test ShuffleNetV2 forward with checkpoint forward
+ model = ShuffleNetV2(widen_factor=1.0, with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
diff --git a/vendor/ViTPose/tests/test_backbones/test_tcn.py b/vendor/ViTPose/tests/test_backbones/test_tcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..be66a0a7d32bbacbd54ca7f94faa415e44724a92
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_tcn.py
@@ -0,0 +1,153 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+
+from mmpose.models.backbones import TCN
+from mmpose.models.backbones.tcn import BasicTemporalBlock
+
+
+def test_basic_temporal_block():
+ with pytest.raises(AssertionError):
+ # padding( + shift) should not be larger than x.shape[2]
+ block = BasicTemporalBlock(1024, 1024, dilation=81)
+ x = torch.rand(2, 1024, 150)
+ x_out = block(x)
+
+ with pytest.raises(AssertionError):
+ # when use_stride_conv is True, shift + kernel_size // 2 should
+ # not be larger than x.shape[2]
+ block = BasicTemporalBlock(
+ 1024, 1024, kernel_size=5, causal=True, use_stride_conv=True)
+ x = torch.rand(2, 1024, 3)
+ x_out = block(x)
+
+ # BasicTemporalBlock with causal == False
+ block = BasicTemporalBlock(1024, 1024)
+ x = torch.rand(2, 1024, 241)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 1024, 235])
+
+ # BasicTemporalBlock with causal == True
+ block = BasicTemporalBlock(1024, 1024, causal=True)
+ x = torch.rand(2, 1024, 241)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 1024, 235])
+
+ # BasicTemporalBlock with residual == False
+ block = BasicTemporalBlock(1024, 1024, residual=False)
+ x = torch.rand(2, 1024, 241)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 1024, 235])
+
+ # BasicTemporalBlock, use_stride_conv == True
+ block = BasicTemporalBlock(1024, 1024, use_stride_conv=True)
+ x = torch.rand(2, 1024, 81)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 1024, 27])
+
+ # BasicTemporalBlock with use_stride_conv == True and causal == True
+ block = BasicTemporalBlock(1024, 1024, use_stride_conv=True, causal=True)
+ x = torch.rand(2, 1024, 81)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([2, 1024, 27])
+
+
+def test_tcn_backbone():
+ with pytest.raises(AssertionError):
+ # num_blocks should equal len(kernel_sizes) - 1
+ TCN(in_channels=34, num_blocks=3, kernel_sizes=(3, 3, 3))
+
+ with pytest.raises(AssertionError):
+ # kernel size should be odd
+ TCN(in_channels=34, kernel_sizes=(3, 4, 3))
+
+ # Test TCN with 2 blocks (use_stride_conv == False)
+ model = TCN(in_channels=34, num_blocks=2, kernel_sizes=(3, 3, 3))
+ pose2d = torch.rand((2, 34, 243))
+ feat = model(pose2d)
+ assert len(feat) == 2
+ assert feat[0].shape == (2, 1024, 235)
+ assert feat[1].shape == (2, 1024, 217)
+
+ # Test TCN with 4 blocks and weight norm clip
+ max_norm = 0.1
+ model = TCN(
+ in_channels=34,
+ num_blocks=4,
+ kernel_sizes=(3, 3, 3, 3, 3),
+ max_norm=max_norm)
+ pose2d = torch.rand((2, 34, 243))
+ feat = model(pose2d)
+ assert len(feat) == 4
+ assert feat[0].shape == (2, 1024, 235)
+ assert feat[1].shape == (2, 1024, 217)
+ assert feat[2].shape == (2, 1024, 163)
+ assert feat[3].shape == (2, 1024, 1)
+
+ for module in model.modules():
+ if isinstance(module, torch.nn.modules.conv._ConvNd):
+ norm = module.weight.norm().item()
+ np.testing.assert_allclose(
+ np.maximum(norm, max_norm), max_norm, rtol=1e-4)
+
+ # Test TCN with 4 blocks (use_stride_conv == True)
+ model = TCN(
+ in_channels=34,
+ num_blocks=4,
+ kernel_sizes=(3, 3, 3, 3, 3),
+ use_stride_conv=True)
+ pose2d = torch.rand((2, 34, 243))
+ feat = model(pose2d)
+ assert len(feat) == 4
+ assert feat[0].shape == (2, 1024, 27)
+ assert feat[1].shape == (2, 1024, 9)
+ assert feat[2].shape == (2, 1024, 3)
+ assert feat[3].shape == (2, 1024, 1)
+
+ # Check that the model w. or w/o use_stride_conv will have the same
+ # output and gradient after a forward+backward pass
+ model1 = TCN(
+ in_channels=34,
+ stem_channels=4,
+ num_blocks=1,
+ kernel_sizes=(3, 3),
+ dropout=0,
+ residual=False,
+ norm_cfg=None)
+ model2 = TCN(
+ in_channels=34,
+ stem_channels=4,
+ num_blocks=1,
+ kernel_sizes=(3, 3),
+ dropout=0,
+ residual=False,
+ norm_cfg=None,
+ use_stride_conv=True)
+ for m in model1.modules():
+ if isinstance(m, nn.Conv1d):
+ nn.init.constant_(m.weight, 0.5)
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ for m in model2.modules():
+ if isinstance(m, nn.Conv1d):
+ nn.init.constant_(m.weight, 0.5)
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ input1 = torch.rand((1, 34, 9))
+ input2 = input1.clone()
+ outputs1 = model1(input1)
+ outputs2 = model2(input2)
+ for output1, output2 in zip(outputs1, outputs2):
+ assert torch.isclose(output1, output2).all()
+
+ criterion = nn.MSELoss()
+ target = torch.rand(output1.shape)
+ loss1 = criterion(output1, target)
+ loss2 = criterion(output2, target)
+ loss1.backward()
+ loss2.backward()
+ for m1, m2 in zip(model1.modules(), model2.modules()):
+ if isinstance(m1, nn.Conv1d):
+ assert torch.isclose(m1.weight.grad, m2.weight.grad).all()
diff --git a/vendor/ViTPose/tests/test_backbones/test_v2v_net.py b/vendor/ViTPose/tests/test_backbones/test_v2v_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..33c467a11275a88364c9559769c7cf7ac979c3b9
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_v2v_net.py
@@ -0,0 +1,13 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmpose.models import builder
+
+
+def test_v2v_net():
+ """Test V2VNet."""
+ cfg = dict(type='V2VNet', input_channels=17, output_channels=15),
+ model = builder.build_backbone(*cfg)
+ input = torch.randn(2, 17, 32, 32, 32)
+ output = model(input)
+ assert output.shape == (2, 15, 32, 32, 32)
diff --git a/vendor/ViTPose/tests/test_backbones/test_vgg.py b/vendor/ViTPose/tests/test_backbones/test_vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..f69e38b3a3d344668121c8633608bc4ec94198fc
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_vgg.py
@@ -0,0 +1,137 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.models.backbones import VGG
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_vgg():
+ """Test VGG backbone."""
+ with pytest.raises(KeyError):
+ # VGG depth should be in [11, 13, 16, 19]
+ VGG(18)
+
+ with pytest.raises(AssertionError):
+ # In VGG: 1 <= num_stages <= 5
+ VGG(11, num_stages=0)
+
+ with pytest.raises(AssertionError):
+ # In VGG: 1 <= num_stages <= 5
+ VGG(11, num_stages=6)
+
+ with pytest.raises(AssertionError):
+ # len(dilations) == num_stages
+ VGG(11, dilations=(1, 1), num_stages=3)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = VGG(11)
+ model.init_weights(pretrained=0)
+
+ # Test VGG11 norm_eval=True
+ model = VGG(11, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test VGG11 forward without classifiers
+ model = VGG(11, out_indices=(0, 1, 2, 3, 4))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 5
+ assert feat[0].shape == (1, 64, 112, 112)
+ assert feat[1].shape == (1, 128, 56, 56)
+ assert feat[2].shape == (1, 256, 28, 28)
+ assert feat[3].shape == (1, 512, 14, 14)
+ assert feat[4].shape == (1, 512, 7, 7)
+
+ # Test VGG11 forward with classifiers
+ model = VGG(11, num_classes=10, out_indices=(0, 1, 2, 3, 4, 5))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 6
+ assert feat[0].shape == (1, 64, 112, 112)
+ assert feat[1].shape == (1, 128, 56, 56)
+ assert feat[2].shape == (1, 256, 28, 28)
+ assert feat[3].shape == (1, 512, 14, 14)
+ assert feat[4].shape == (1, 512, 7, 7)
+ assert feat[5].shape == (1, 10)
+
+ # Test VGG11BN forward
+ model = VGG(11, norm_cfg=dict(type='BN'), out_indices=(0, 1, 2, 3, 4))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 5
+ assert feat[0].shape == (1, 64, 112, 112)
+ assert feat[1].shape == (1, 128, 56, 56)
+ assert feat[2].shape == (1, 256, 28, 28)
+ assert feat[3].shape == (1, 512, 14, 14)
+ assert feat[4].shape == (1, 512, 7, 7)
+
+ # Test VGG11BN forward with classifiers
+ model = VGG(
+ 11,
+ num_classes=10,
+ norm_cfg=dict(type='BN'),
+ out_indices=(0, 1, 2, 3, 4, 5))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 6
+ assert feat[0].shape == (1, 64, 112, 112)
+ assert feat[1].shape == (1, 128, 56, 56)
+ assert feat[2].shape == (1, 256, 28, 28)
+ assert feat[3].shape == (1, 512, 14, 14)
+ assert feat[4].shape == (1, 512, 7, 7)
+ assert feat[5].shape == (1, 10)
+
+ # Test VGG13 with layers 1, 2, 3 out forward
+ model = VGG(13, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == (1, 64, 112, 112)
+ assert feat[1].shape == (1, 128, 56, 56)
+ assert feat[2].shape == (1, 256, 28, 28)
+
+ # Test VGG16 with top feature maps out forward
+ model = VGG(16)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == (1, 512, 7, 7)
+
+ # Test VGG19 with classification score out forward
+ model = VGG(19, num_classes=10)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == (1, 10)
diff --git a/vendor/ViTPose/tests/test_backbones/test_vipnas_mbv3.py b/vendor/ViTPose/tests/test_backbones/test_vipnas_mbv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..83011daf46908db675461fa62346abe4cb46cb60
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_vipnas_mbv3.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.models.backbones import ViPNAS_MobileNetV3
+from mmpose.models.backbones.utils import InvertedResidual
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_mobilenetv3_backbone():
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = ViPNAS_MobileNetV3()
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AttributeError):
+ # frozen_stages must no more than 21
+ model = ViPNAS_MobileNetV3(frozen_stages=22)
+ model.train()
+
+ # Test MobileNetv3
+ model = ViPNAS_MobileNetV3()
+ model.init_weights()
+ model.train()
+
+ # Test MobileNetv3 with first stage frozen
+ frozen_stages = 1
+ model = ViPNAS_MobileNetV3(frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ for param in model.conv1.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test MobileNetv3 with norm eval
+ model = ViPNAS_MobileNetV3(norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test MobileNetv3 forward
+ model = ViPNAS_MobileNetV3()
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 160, 7, 7])
+
+ # Test MobileNetv3 forward with GroupNorm
+ model = ViPNAS_MobileNetV3(
+ norm_cfg=dict(type='GN', num_groups=2, requires_grad=True))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 160, 7, 7])
+
+ # Test MobileNetv3 with checkpoint forward
+ model = ViPNAS_MobileNetV3(with_cp=True)
+ for m in model.modules():
+ if isinstance(m, InvertedResidual):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == torch.Size([1, 160, 7, 7])
+
+
+test_mobilenetv3_backbone()
diff --git a/vendor/ViTPose/tests/test_backbones/test_vipnas_resnet.py b/vendor/ViTPose/tests/test_backbones/test_vipnas_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..279358929d79e290333594b22ce0bdc3c4ee1775
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backbones/test_vipnas_resnet.py
@@ -0,0 +1,341 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmpose.models.backbones import ViPNAS_ResNet
+from mmpose.models.backbones.vipnas_resnet import (ViPNAS_Bottleneck,
+ ViPNAS_ResLayer,
+ get_expansion)
+
+
+def is_block(modules):
+ """Check if is ViPNAS_ResNet building block."""
+ if isinstance(modules, (ViPNAS_Bottleneck)):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.equal(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.equal(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_get_expansion():
+ assert get_expansion(ViPNAS_Bottleneck, 2) == 2
+ assert get_expansion(ViPNAS_Bottleneck) == 1
+
+ class MyResBlock(nn.Module):
+
+ expansion = 8
+
+ assert get_expansion(MyResBlock) == 8
+
+ # expansion must be an integer or None
+ with pytest.raises(TypeError):
+ get_expansion(ViPNAS_Bottleneck, '0')
+
+ # expansion is not specified and cannot be inferred
+ with pytest.raises(TypeError):
+
+ class SomeModule(nn.Module):
+ pass
+
+ get_expansion(SomeModule)
+
+
+def test_vipnas_bottleneck():
+ # style must be in ['pytorch', 'caffe']
+ with pytest.raises(AssertionError):
+ ViPNAS_Bottleneck(64, 64, style='tensorflow')
+
+ # expansion must be divisible by out_channels
+ with pytest.raises(AssertionError):
+ ViPNAS_Bottleneck(64, 64, expansion=3)
+
+ # Test ViPNAS_Bottleneck style
+ block = ViPNAS_Bottleneck(64, 64, stride=2, style='pytorch')
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.stride == (2, 2)
+ block = ViPNAS_Bottleneck(64, 64, stride=2, style='caffe')
+ assert block.conv1.stride == (2, 2)
+ assert block.conv2.stride == (1, 1)
+
+ # ViPNAS_Bottleneck with stride 1
+ block = ViPNAS_Bottleneck(64, 64, style='pytorch')
+ assert block.in_channels == 64
+ assert block.mid_channels == 16
+ assert block.out_channels == 64
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 16
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 16
+ assert block.conv2.out_channels == 16
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 16
+ assert block.conv3.out_channels == 64
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # ViPNAS_Bottleneck with stride 1 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1), nn.BatchNorm2d(128))
+ block = ViPNAS_Bottleneck(64, 128, style='pytorch', downsample=downsample)
+ assert block.in_channels == 64
+ assert block.mid_channels == 32
+ assert block.out_channels == 128
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 32
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 32
+ assert block.conv2.out_channels == 32
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 32
+ assert block.conv3.out_channels == 128
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 128, 56, 56)
+
+ # ViPNAS_Bottleneck with stride 2 and downsample
+ downsample = nn.Sequential(
+ nn.Conv2d(64, 128, kernel_size=1, stride=2), nn.BatchNorm2d(128))
+ block = ViPNAS_Bottleneck(
+ 64, 128, stride=2, style='pytorch', downsample=downsample)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 128, 28, 28)
+
+ # ViPNAS_Bottleneck with expansion 2
+ block = ViPNAS_Bottleneck(64, 64, style='pytorch', expansion=2)
+ assert block.in_channels == 64
+ assert block.mid_channels == 32
+ assert block.out_channels == 64
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 32
+ assert block.conv1.kernel_size == (1, 1)
+ assert block.conv2.in_channels == 32
+ assert block.conv2.out_channels == 32
+ assert block.conv2.kernel_size == (3, 3)
+ assert block.conv3.in_channels == 32
+ assert block.conv3.out_channels == 64
+ assert block.conv3.kernel_size == (1, 1)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # Test ViPNAS_Bottleneck with checkpointing
+ block = ViPNAS_Bottleneck(64, 64, with_cp=True)
+ block.train()
+ assert block.with_cp
+ x = torch.randn(1, 64, 56, 56, requires_grad=True)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_vipnas_bottleneck_reslayer():
+ # 3 Bottleneck w/o downsample
+ layer = ViPNAS_ResLayer(ViPNAS_Bottleneck, 3, 32, 32)
+ assert len(layer) == 3
+ for i in range(3):
+ assert layer[i].in_channels == 32
+ assert layer[i].out_channels == 32
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 32, 56, 56)
+
+ # 3 ViPNAS_Bottleneck w/ stride 1 and downsample
+ layer = ViPNAS_ResLayer(ViPNAS_Bottleneck, 3, 32, 64)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 1
+ assert layer[0].conv1.out_channels == 64
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (1, 1)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 64
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 56, 56)
+
+ # 3 ViPNAS_Bottleneck w/ stride 2 and downsample
+ layer = ViPNAS_ResLayer(ViPNAS_Bottleneck, 3, 32, 64, stride=2)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].conv1.out_channels == 64
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 2
+ assert isinstance(layer[0].downsample[0], nn.Conv2d)
+ assert layer[0].downsample[0].stride == (2, 2)
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 64
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+ # 3 ViPNAS_Bottleneck w/ stride 2 and downsample with avg pool
+ layer = ViPNAS_ResLayer(
+ ViPNAS_Bottleneck, 3, 32, 64, stride=2, avg_down=True)
+ assert len(layer) == 3
+ assert layer[0].in_channels == 32
+ assert layer[0].out_channels == 64
+ assert layer[0].stride == 2
+ assert layer[0].conv1.out_channels == 64
+ assert layer[0].downsample is not None and len(layer[0].downsample) == 3
+ assert isinstance(layer[0].downsample[0], nn.AvgPool2d)
+ assert layer[0].downsample[0].stride == 2
+ for i in range(1, 3):
+ assert layer[i].in_channels == 64
+ assert layer[i].out_channels == 64
+ assert layer[i].conv1.out_channels == 64
+ assert layer[i].stride == 1
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 64, 28, 28)
+
+ # 3 ViPNAS_Bottleneck with custom expansion
+ layer = ViPNAS_ResLayer(ViPNAS_Bottleneck, 3, 32, 32, expansion=2)
+ assert len(layer) == 3
+ for i in range(3):
+ assert layer[i].in_channels == 32
+ assert layer[i].out_channels == 32
+ assert layer[i].stride == 1
+ assert layer[i].conv1.out_channels == 16
+ assert layer[i].downsample is None
+ x = torch.randn(1, 32, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == (1, 32, 56, 56)
+
+
+def test_resnet():
+ """Test ViPNAS_ResNet backbone."""
+ with pytest.raises(KeyError):
+ # ViPNAS_ResNet depth should be in [50]
+ ViPNAS_ResNet(20)
+
+ with pytest.raises(AssertionError):
+ # In ViPNAS_ResNet: 1 <= num_stages <= 4
+ ViPNAS_ResNet(50, num_stages=0)
+
+ with pytest.raises(AssertionError):
+ # In ViPNAS_ResNet: 1 <= num_stages <= 4
+ ViPNAS_ResNet(50, num_stages=5)
+
+ with pytest.raises(AssertionError):
+ # len(strides) == len(dilations) == num_stages
+ ViPNAS_ResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = ViPNAS_ResNet(50)
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ ViPNAS_ResNet(50, style='tensorflow')
+
+ # Test ViPNAS_ResNet50 norm_eval=True
+ model = ViPNAS_ResNet(50, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test ViPNAS_ResNet50 with first stage frozen
+ frozen_stages = 1
+ model = ViPNAS_ResNet(50, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ assert model.norm1.training is False
+ for layer in [model.conv1, model.norm1]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, f'layer{i}')
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ViPNAS_ResNet50 with BatchNorm forward
+ model = ViPNAS_ResNet(50, out_indices=(0, 1, 2, 3))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 80, 56, 56)
+ assert feat[1].shape == (1, 160, 28, 28)
+ assert feat[2].shape == (1, 304, 14, 14)
+ assert feat[3].shape == (1, 608, 7, 7)
+
+ # Test ViPNAS_ResNet50 with layers 1, 2, 3 out forward
+ model = ViPNAS_ResNet(50, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == (1, 80, 56, 56)
+ assert feat[1].shape == (1, 160, 28, 28)
+ assert feat[2].shape == (1, 304, 14, 14)
+
+ # Test ViPNAS_ResNet50 with layers 3 (top feature maps) out forward
+ model = ViPNAS_ResNet(50, out_indices=(3, ))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert feat.shape == (1, 608, 7, 7)
+
+ # Test ViPNAS_ResNet50 with checkpoint forward
+ model = ViPNAS_ResNet(50, out_indices=(0, 1, 2, 3), with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == (1, 80, 56, 56)
+ assert feat[1].shape == (1, 160, 28, 28)
+ assert feat[2].shape == (1, 304, 14, 14)
+ assert feat[3].shape == (1, 608, 7, 7)
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_animal_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_animal_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..393361218308a4ef178c85366473e56e3024ebb9
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_animal_dataset_compatibility.py
@@ -0,0 +1,415 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import tempfile
+
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_animal_horse10_dataset_compatibility():
+ dataset = 'AnimalHorse10Dataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/horse10/test_horse10.json',
+ img_prefix='tests/data/horse10/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/horse10/test_horse10.json',
+ img_prefix='tests/data/horse10/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_animal_fly_dataset_compatibility():
+ dataset = 'AnimalFlyDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=32,
+ dataset_joints=32,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 192],
+ heatmap_size=[48, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/fly/test_fly.json',
+ img_prefix='tests/data/fly/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/fly/test_fly.json',
+ img_prefix='tests/data/fly/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_animal_locust_dataset_compatibility():
+ dataset = 'AnimalLocustDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=35,
+ dataset_joints=35,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ])
+
+ data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/locust/test_locust.json',
+ img_prefix='tests/data/locust/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/locust/test_locust.json',
+ img_prefix='tests/data/locust/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_animal_zebra_dataset_compatibility():
+ dataset = 'AnimalZebraDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=9,
+ dataset_joints=9,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
+
+ data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/zebra/test_zebra.json',
+ img_prefix='tests/data/zebra/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/zebra/test_zebra.json',
+ img_prefix='tests/data/zebra/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_animal_ATRW_dataset_compatibility():
+ dataset = 'AnimalATRWDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/atrw/test_atrw.json',
+ img_prefix='tests/data/atrw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/atrw/test_atrw.json',
+ img_prefix='tests/data/atrw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+
+
+def test_animal_Macaque_dataset_compatibility():
+ dataset = 'AnimalMacaqueDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/macaque/test_macaque.json',
+ img_prefix='tests/data/macaque/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/macaque/test_macaque.json',
+ img_prefix='tests/data/macaque/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+
+
+def test_animalpose_dataset_compatibility():
+ dataset = 'AnimalPoseDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/animalpose/test_animalpose.json',
+ img_prefix='tests/data/animalpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/animalpose/test_animalpose.json',
+ img_prefix='tests/data/animalpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_body3d_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_body3d_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7e4b7106779b163533b23474f59322249ecb50f
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_body3d_dataset_compatibility.py
@@ -0,0 +1,266 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+import numpy as np
+import pytest
+
+from mmpose.datasets import DATASETS
+from mmpose.datasets.builder import build_dataset
+
+
+def test_body3d_h36m_dataset_compatibility():
+ # Test Human3.6M dataset
+ dataset = 'Body3DH36MDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ # test single-frame input
+ data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ outputs = []
+ for result in custom_dataset:
+ outputs.append({
+ 'preds': result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = ['mpjpe', 'p-mpjpe', 'n-mpjpe']
+ infos = custom_dataset.evaluate(outputs, tmpdir, metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['N-MPJPE'], 0.0)
+
+ # test multi-frame input with joint_2d_src = 'detection'
+ data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=True,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file='tests/data/h36m/test_h36m_2d_detection.npy',
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ outputs = []
+ for result in custom_dataset:
+ outputs.append({
+ 'preds': result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = ['mpjpe', 'p-mpjpe', 'n-mpjpe']
+ infos = custom_dataset.evaluate(outputs, tmpdir, metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['N-MPJPE'], 0.0)
+
+
+def test_body3d_semi_supervision_dataset_compatibility():
+ # Test Body3d Semi-supervision Dataset
+
+ # load labeled dataset
+ labeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causall=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subset=1,
+ subjects=['S1'],
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+ labeled_dataset = dict(
+ type='Body3DH36MDataset',
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=labeled_data_cfg,
+ pipeline=[])
+
+ # load unlabled data
+ unlabeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subjects=['S5', 'S7', 'S8'],
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl',
+ need_2d_label=True)
+ unlabeled_dataset = dict(
+ type='Body3DH36MDataset',
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=unlabeled_data_cfg,
+ pipeline=[
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'unlabeled_input')],
+ meta_name='metas',
+ meta_keys=[])
+ ])
+
+ # combine labeled and unlabeled dataset to form a new dataset
+ dataset = 'Body3DSemiSupervisionDataset'
+ dataset_class = DATASETS.get(dataset)
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(labeled_dataset, unlabeled_dataset)
+ item = custom_dataset[0]
+ assert 'unlabeled_input' in item.keys()
+
+ unlabeled_dataset = build_dataset(unlabeled_dataset)
+ assert len(unlabeled_dataset) == len(custom_dataset)
+
+
+def test_body3d_mpi_inf_3dhp_dataset_compatibility():
+ # Test MPI-INF-3DHP dataset
+ dataset = 'Body3DMpiInf3dhpDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ # Test single-frame input on trainset
+ single_frame_train_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_train.pkl')
+
+ # Test single-frame input on testset
+ single_frame_test_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='gt',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_test.pkl')
+
+ # Test multi-frame input on trainset
+ multi_frame_train_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ joint_2d_src='gt',
+ joint_2d_det_file=None,
+ causal=True,
+ temporal_padding=True,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_train.pkl')
+
+ # Test multi-frame input on testset
+ multi_frame_test_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ temporal_padding=True,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_test.pkl')
+
+ ann_files = [
+ 'tests/data/mpi_inf_3dhp/test_3dhp_train.npz',
+ 'tests/data/mpi_inf_3dhp/test_3dhp_test.npz'
+ ] * 2
+ data_cfgs = [
+ single_frame_train_data_cfg, single_frame_test_data_cfg,
+ multi_frame_train_data_cfg, multi_frame_test_data_cfg
+ ]
+
+ for ann_file, data_cfg in zip(ann_files, data_cfgs):
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file=ann_file,
+ img_prefix='tests/data/mpi_inf_3dhp',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file=ann_file,
+ img_prefix='tests/data/mpi_inf_3dhp',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ outputs = []
+ for result in custom_dataset:
+ outputs.append({
+ 'preds':
+ result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = [
+ 'mpjpe', 'p-mpjpe', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc'
+ ]
+ infos = custom_dataset.evaluate(outputs, tmpdir, metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['3DPCK'], 100.)
+ np.testing.assert_almost_equal(infos['P-3DPCK'], 100.)
+ np.testing.assert_almost_equal(infos['3DAUC'], 30 / 31 * 100)
+ np.testing.assert_almost_equal(infos['P-3DAUC'], 30 / 31 * 100)
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_bottom_up_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_bottom_up_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..366fcfe82a1847ba16314778ae633f476ec9b89d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_bottom_up_dataset_compatibility.py
@@ -0,0 +1,325 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+import numpy as np
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+
+
+def convert_coco_to_output(coco, is_wholebody=False):
+ outputs = []
+ for img_id in coco.getImgIds():
+ preds = []
+ scores = []
+ image = coco.imgs[img_id]
+ ann_ids = coco.getAnnIds(img_id)
+ for ann_id in ann_ids:
+ obj = coco.anns[ann_id]
+ if is_wholebody:
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+ else:
+ keypoints = np.array(obj['keypoints']).reshape((-1, 3))
+ K = keypoints.shape[0]
+ if sum(keypoints[:, 2]) == 0:
+ continue
+ preds.append(
+ np.concatenate((keypoints[:, :2], np.ones(
+ [K, 1]), np.ones([K, 1]) * ann_id),
+ axis=1))
+ scores.append(1)
+ image_paths = []
+ image_paths.append(image['file_name'])
+
+ output = {}
+ output['preds'] = np.stack(preds)
+ output['scores'] = scores
+ output['image_paths'] = image_paths
+ output['output_heatmap'] = None
+
+ outputs.append(output)
+
+ return outputs
+
+
+def test_bottom_up_COCO_dataset_compatibility():
+ dataset = 'BottomUpCocoDataset'
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
+ ])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+ use_nms=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+ assert custom_dataset.dataset_name == 'coco'
+
+ outputs = convert_coco_to_output(custom_dataset.coco)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_bottom_up_CrowdPose_dataset_compatibility():
+ dataset = 'BottomUpCrowdPoseDataset'
+ # test CrowdPose datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ image_id = 103319
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+ assert custom_dataset.dataset_name == 'crowdpose'
+
+ outputs = convert_coco_to_output(custom_dataset.coco)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_bottom_up_MHP_dataset_compatibility():
+ dataset = 'BottomUpMhpDataset'
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+ )
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ image_id = 2889
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+ assert custom_dataset.dataset_name == 'mhp'
+
+ outputs = convert_coco_to_output(custom_dataset.coco)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_bottom_up_AIC_dataset_compatibility():
+ dataset = 'BottomUpAicDataset'
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+ )
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_coco_to_output(custom_dataset.coco)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_bottom_up_COCO_wholebody_dataset_compatibility():
+ dataset = 'BottomUpCocoWholeBodyDataset'
+ # test COCO-wholebody datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+ )
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco_wholebody'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ outputs = convert_coco_to_output(custom_dataset.coco, is_wholebody=True)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_deprecated_dataset_base.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_deprecated_dataset_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5aad98b15c6114d76b185c1a4b2abb3c9273fed
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_deprecated_dataset_base.py
@@ -0,0 +1,28 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+
+from mmpose.datasets.datasets.animal.animal_base_dataset import \
+ AnimalBaseDataset
+from mmpose.datasets.datasets.body3d.body3d_base_dataset import \
+ Body3DBaseDataset
+from mmpose.datasets.datasets.bottom_up.bottom_up_base_dataset import \
+ BottomUpBaseDataset
+from mmpose.datasets.datasets.face.face_base_dataset import FaceBaseDataset
+from mmpose.datasets.datasets.fashion.fashion_base_dataset import \
+ FashionBaseDataset
+from mmpose.datasets.datasets.hand.hand_base_dataset import HandBaseDataset
+from mmpose.datasets.datasets.top_down.topdown_base_dataset import \
+ TopDownBaseDataset
+
+
+@pytest.mark.parametrize('BaseDataset',
+ (AnimalBaseDataset, BottomUpBaseDataset,
+ FaceBaseDataset, FashionBaseDataset, HandBaseDataset,
+ TopDownBaseDataset, Body3DBaseDataset))
+def test_dataset_base_class(BaseDataset):
+ with pytest.raises(ImportError):
+
+ class Dataset(BaseDataset):
+ pass
+
+ _ = Dataset()
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_face_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_face_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..056845b357b5034a02df9402bfe086bc23e7ec59
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_face_dataset_compatibility.py
@@ -0,0 +1,170 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import tempfile
+from unittest.mock import MagicMock
+
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_face_300W_dataset_compatibility():
+ dataset = 'Face300WDataset'
+ # test Face 300W datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/300w/test_300w.json',
+ img_prefix='tests/data/300w/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/300w/test_300w.json',
+ img_prefix='tests/data/300w/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_face_AFLW_dataset_compatibility():
+ dataset = 'FaceAFLWDataset'
+ # test Face AFLW datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=19,
+ dataset_joints=19,
+ dataset_channel=[
+ list(range(19)),
+ ],
+ inference_channel=list(range(19)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/aflw/test_aflw.json',
+ img_prefix='tests/data/aflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aflw/test_aflw.json',
+ img_prefix='tests/data/aflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_face_WFLW_dataset_compatibility():
+ dataset = 'FaceWFLWDataset'
+ # test Face WFLW datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/wflw/test_wflw.json',
+ img_prefix='tests/data/wflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/wflw/test_wflw.json',
+ img_prefix='tests/data/wflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_fashion_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_fashion_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6471565c12154f8818ec944539bc0b181f5369f
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_fashion_dataset_compatibility.py
@@ -0,0 +1,69 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+from unittest.mock import MagicMock
+
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_deepfashion_dataset_compatibility():
+ dataset = 'DeepFashionDataset'
+ # test JHMDB datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ image_thr=0.0,
+ bbox_file='')
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/fld/test_fld.json',
+ img_prefix='tests/data/fld/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'deepfashion_full'
+
+ image_id = 128
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_hand_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_hand_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..af11f248d2b0b3478ba0cf207adc3fd4f2f9ea62
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_hand_dataset_compatibility.py
@@ -0,0 +1,388 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import tempfile
+
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_top_down_OneHand10K_dataset_compatibility():
+ dataset = 'OneHand10KDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/onehand10k/test_onehand10k.json',
+ img_prefix='tests/data/onehand10k/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/onehand10k/test_onehand10k.json',
+ img_prefix='tests/data/onehand10k/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_FreiHand_dataset_compatibility():
+ dataset = 'FreiHandDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[224, 224],
+ heatmap_size=[56, 56],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/freihand/test_freihand.json',
+ img_prefix='tests/data/freihand/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/freihand/test_freihand.json',
+ img_prefix='tests/data/freihand/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 8
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_RHD_dataset_compatibility():
+ dataset = 'Rhd2DDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/rhd/test_rhd.json',
+ img_prefix='tests/data/rhd/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/rhd/test_rhd.json',
+ img_prefix='tests/data/rhd/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_Panoptic_dataset_compatibility():
+ dataset = 'PanopticDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/panoptic/test_panoptic.json',
+ img_prefix='tests/data/panoptic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/panoptic/test_panoptic.json',
+ img_prefix='tests/data/panoptic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir,
+ ['PCKh', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCKh'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_InterHand2D_dataset_compatibility():
+ dataset = 'InterHand2DDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ assert len(custom_dataset.db) == 6
+
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK', 'EPE', 'AUC'])
+ print(infos, flush=True)
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_InterHand3D_dataset_compatibility():
+ dataset = 'InterHand3DDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=42,
+ dataset_joints=42,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 41
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64, 64],
+ heatmap3d_depth_bound=400.0,
+ heatmap_size_root=64,
+ root_depth_bound=400.0,
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/'
+ 'test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ assert len(custom_dataset.db) == 4
+
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(
+ custom_dataset.db, keys=['rel_root_depth', 'hand_type'], is_3d=True)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir,
+ ['MRRPE', 'MPJPE', 'Handedness_acc'])
+ assert_almost_equal(infos['MRRPE'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_all'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_single'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_interacting'], 0.0, decimal=5)
+ assert_almost_equal(infos['Handedness_acc'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_inference_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_inference_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb0988d35cbe758d842cb1b6837e0e397eca6957
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_inference_compatibility.py
@@ -0,0 +1,156 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+
+from mmpose.apis import (extract_pose_sequence, get_track_id,
+ inference_bottom_up_pose_model,
+ inference_pose_lifter_model,
+ inference_top_down_pose_model, init_pose_model,
+ vis_3d_pose_result, vis_pose_result,
+ vis_pose_tracking_result)
+
+
+def test_inference_without_dataset_info():
+ # Top down
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'coco/res50_coco_256x192.py',
+ None,
+ device='cpu')
+
+ if 'dataset_info' in pose_model.cfg:
+ _ = pose_model.cfg.pop('dataset_info')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+ person_result = []
+ person_result.append({'bbox': [50, 50, 50, 100]})
+
+ with pytest.warns(DeprecationWarning):
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model, image_name, person_result, format='xywh')
+
+ with pytest.warns(DeprecationWarning):
+ vis_pose_result(pose_model, image_name, pose_results)
+
+ with pytest.raises(NotImplementedError):
+ with pytest.warns(DeprecationWarning):
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_result,
+ format='xywh',
+ dataset='test')
+
+ # Bottom up
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/associative_embedding/'
+ 'coco/res50_coco_512x512.py',
+ None,
+ device='cpu')
+ if 'dataset_info' in pose_model.cfg:
+ _ = pose_model.cfg.pop('dataset_info')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+
+ with pytest.warns(DeprecationWarning):
+ pose_results, _ = inference_bottom_up_pose_model(
+ pose_model, image_name)
+ with pytest.warns(DeprecationWarning):
+ vis_pose_result(pose_model, image_name, pose_results)
+
+ # Top down tracking
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/'
+ 'coco/res50_coco_256x192.py',
+ None,
+ device='cpu')
+
+ if 'dataset_info' in pose_model.cfg:
+ _ = pose_model.cfg.pop('dataset_info')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+ person_result = [{'bbox': [50, 50, 50, 100]}]
+
+ with pytest.warns(DeprecationWarning):
+ pose_results, _ = inference_top_down_pose_model(
+ pose_model, image_name, person_result, format='xywh')
+
+ pose_results, _ = get_track_id(pose_results, [], next_id=0)
+
+ with pytest.warns(DeprecationWarning):
+ vis_pose_tracking_result(pose_model, image_name, pose_results)
+
+ with pytest.raises(NotImplementedError):
+ with pytest.warns(DeprecationWarning):
+ vis_pose_tracking_result(
+ pose_model, image_name, pose_results, dataset='test')
+
+ # Bottom up tracking
+ pose_model = init_pose_model(
+ 'configs/body/2d_kpt_sview_rgb_img/associative_embedding/'
+ 'coco/res50_coco_512x512.py',
+ None,
+ device='cpu')
+
+ if 'dataset_info' in pose_model.cfg:
+ _ = pose_model.cfg.pop('dataset_info')
+
+ image_name = 'tests/data/coco/000000000785.jpg'
+ with pytest.warns(DeprecationWarning):
+ pose_results, _ = inference_bottom_up_pose_model(
+ pose_model, image_name)
+
+ pose_results, next_id = get_track_id(pose_results, [], next_id=0)
+
+ with pytest.warns(DeprecationWarning):
+ vis_pose_tracking_result(
+ pose_model,
+ image_name,
+ pose_results,
+ dataset='BottomUpCocoDataset')
+
+ # Pose lifting
+ pose_model = init_pose_model(
+ 'configs/body/3d_kpt_sview_rgb_img/pose_lift/'
+ 'h36m/simplebaseline3d_h36m.py',
+ None,
+ device='cpu')
+
+ pose_det_result = {
+ 'keypoints': np.zeros((17, 3)),
+ 'bbox': [50, 50, 50, 50],
+ 'track_id': 0,
+ 'image_name': 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg',
+ }
+
+ if 'dataset_info' in pose_model.cfg:
+ _ = pose_model.cfg.pop('dataset_info')
+
+ pose_results_2d = [[pose_det_result]]
+
+ dataset = pose_model.cfg.data['test']['type']
+
+ pose_results_2d = extract_pose_sequence(
+ pose_results_2d, frame_idx=0, causal=False, seq_len=1, step=1)
+
+ with pytest.warns(DeprecationWarning):
+ _ = inference_pose_lifter_model(
+ pose_model, pose_results_2d, dataset, with_track_id=False)
+
+ with pytest.warns(DeprecationWarning):
+ pose_lift_results = inference_pose_lifter_model(
+ pose_model, pose_results_2d, dataset, with_track_id=True)
+
+ for res in pose_lift_results:
+ res['title'] = 'title'
+ with pytest.warns(DeprecationWarning):
+ vis_3d_pose_result(
+ pose_model,
+ pose_lift_results,
+ img=pose_results_2d[0][0]['image_name'],
+ dataset=dataset)
+
+ with pytest.raises(NotImplementedError):
+ with pytest.warns(DeprecationWarning):
+ _ = inference_pose_lifter_model(
+ pose_model, pose_results_2d, dataset='test')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_top_down_dataset_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_top_down_dataset_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a4333f6a866c8371f5bb6c6c7da23f0aad3b7b9
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_dataset_info_compatibility/test_top_down_dataset_compatibility.py
@@ -0,0 +1,748 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import tempfile
+from unittest.mock import MagicMock
+
+import pytest
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_top_down_COCO_dataset_compatibility():
+ dataset = 'TopDownCocoDataset'
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_MHP_dataset_compatibility():
+ dataset = 'TopDownMhpDataset'
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test det bbox
+ with pytest.raises(AssertionError):
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'mhp'
+
+ image_id = 2889
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_PoseTrack18_dataset_compatibility():
+ dataset = 'TopDownPoseTrack18Dataset'
+ # test PoseTrack datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_human_detections.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'posetrack18'
+
+ image_id = 10128340000
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+
+def test_top_down_CrowdPose_dataset_compatibility():
+ dataset = 'TopDownCrowdPoseDataset'
+ # test CrowdPose datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/crowdpose/test_crowdpose_det_AP_40.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'crowdpose'
+
+ image_id = 103319
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_COCO_wholebody_dataset_compatibility():
+ dataset = 'TopDownCocoWholeBodyDataset'
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco_wholebody'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_OCHuman_dataset_compatibility():
+ dataset = 'TopDownOCHumanDataset'
+ # test OCHuman datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/ochuman/test_ochuman.json',
+ img_prefix='tests/data/ochuman/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/ochuman/test_ochuman.json',
+ img_prefix='tests/data/ochuman/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'ochuman'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_MPII_dataset_compatibility():
+ dataset = 'TopDownMpiiDataset'
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ )
+
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mpii/test_mpii.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg_copy,
+ pipeline=[])
+
+ assert len(custom_dataset) == 5
+ assert custom_dataset.dataset_name == 'mpii'
+ _ = custom_dataset[0]
+
+
+def test_top_down_MPII_TRB_dataset_compatibility():
+ dataset = 'TopDownMpiiTrbDataset'
+ # test MPII TRB datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=40,
+ dataset_joints=40,
+ dataset_channel=[list(range(40))],
+ inference_channel=list(range(40)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/mpii/test_mpii_trb.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mpii/test_mpii_trb.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'mpii_trb'
+ _ = custom_dataset[0]
+
+
+def test_top_down_AIC_dataset_compatibility():
+ dataset = 'TopDownAicDataset'
+ # test AIC datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='')
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'aic'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'PCK')
+
+
+def test_top_down_JHMDB_dataset_compatibility():
+ dataset = 'TopDownJhmdbDataset'
+ # test JHMDB datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='')
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=True)
+
+ with pytest.warns(DeprecationWarning):
+ _ = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ test_mode=False)
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'jhmdb'
+
+ image_id = 2290001
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['PCK'])
+ assert_almost_equal(infos['Mean PCK'], 1.0)
+
+ infos = custom_dataset.evaluate(outputs, tmpdir, ['tPCK'])
+ assert_almost_equal(infos['Mean tPCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'mAP')
+
+
+def test_top_down_h36m_dataset_compatibility():
+ dataset = 'TopDownH36MDataset'
+ # test AIC datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test gt bbox
+ with pytest.warns(DeprecationWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/h36m_coco.json',
+ img_prefix='tests/data/h36m/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'h36m'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ _ = custom_dataset[0]
+
+ outputs = convert_db_to_output(custom_dataset.db)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ infos = custom_dataset.evaluate(outputs, tmpdir, 'EPE')
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(outputs, tmpdir, 'AUC')
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_eval_hook_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_eval_hook_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..f62f5868aad913348f3f919537c8ace3b4d90139
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_eval_hook_compatibility.py
@@ -0,0 +1,46 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import unittest.mock as mock
+
+import pytest
+import torch
+from torch.utils.data import DataLoader, Dataset
+
+from mmpose.core import DistEvalHook, EvalHook
+
+
+class ExampleDataset(Dataset):
+
+ def __init__(self):
+ self.index = 0
+ self.eval_result = [0.1, 0.4, 0.3, 0.7, 0.2, 0.05, 0.4, 0.6]
+
+ def __getitem__(self, idx):
+ results = dict(imgs=torch.tensor([1]))
+ return results
+
+ def __len__(self):
+ return 1
+
+ @mock.create_autospec
+ def evaluate(self, results, res_folder=None, logger=None):
+ pass
+
+
+def test_old_fashion_eval_hook_parameters():
+
+ data_loader = DataLoader(
+ ExampleDataset(),
+ batch_size=1,
+ sampler=None,
+ num_workers=0,
+ shuffle=False)
+
+ # test argument "key_indicator"
+ with pytest.warns(DeprecationWarning):
+ _ = EvalHook(data_loader, key_indicator='AP')
+ with pytest.warns(DeprecationWarning):
+ _ = DistEvalHook(data_loader, key_indicator='AP')
+
+ # test argument "gpu_collect"
+ with pytest.warns(DeprecationWarning):
+ _ = EvalHook(data_loader, save_best='AP', gpu_collect=False)
diff --git a/vendor/ViTPose/tests/test_backward_compatibility/test_registry_compatibility.py b/vendor/ViTPose/tests/test_backward_compatibility/test_registry_compatibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..68a487b6b3bc5f3f43ea6fdf75b61e2273df263b
--- /dev/null
+++ b/vendor/ViTPose/tests/test_backward_compatibility/test_registry_compatibility.py
@@ -0,0 +1,10 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# flake8: noqa
+import pytest
+
+
+def test_old_fashion_registry_importing():
+ with pytest.warns(DeprecationWarning):
+ from mmpose.models.registry import BACKBONES, HEADS, LOSSES, NECKS, POSENETS # isort: skip
+ with pytest.warns(DeprecationWarning):
+ from mmpose.datasets.registry import DATASETS, PIPELINES # noqa: F401
diff --git a/vendor/ViTPose/tests/test_config.py b/vendor/ViTPose/tests/test_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbcc5995beb48ed0c9ed0f29b37c905fd602506d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_config.py
@@ -0,0 +1,54 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from os.path import dirname, exists, join, relpath
+
+import torch
+from mmcv.runner import build_optimizer
+
+
+def _get_config_directory():
+ """Find the predefined detector config directory."""
+ try:
+ # Assume we are running in the source mmdetection repo
+ repo_dpath = dirname(dirname(__file__))
+ except NameError:
+ # For IPython development when this __file__ is not defined
+ import mmpose
+ repo_dpath = dirname(dirname(mmpose.__file__))
+ config_dpath = join(repo_dpath, 'configs')
+ if not exists(config_dpath):
+ raise Exception('Cannot find config path')
+ return config_dpath
+
+
+def test_config_build_detector():
+ """Test that all detection models defined in the configs can be
+ initialized."""
+ from mmcv import Config
+
+ from mmpose.models import build_posenet
+
+ config_dpath = _get_config_directory()
+ print(f'Found config_dpath = {config_dpath}')
+
+ import glob
+ config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
+ config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+ config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+ print(f'Using {len(config_names)} config files')
+
+ for config_fname in config_names:
+ config_fpath = join(config_dpath, config_fname)
+ config_mod = Config.fromfile(config_fpath)
+
+ print(f'Building detector, config_fpath = {config_fpath}')
+
+ # Remove pretrained keys to allow for testing in an offline environment
+ if 'pretrained' in config_mod.model:
+ config_mod.model['pretrained'] = None
+
+ detector = build_posenet(config_mod.model)
+ assert detector is not None
+
+ optimizer = build_optimizer(detector, config_mod.optimizer)
+ assert isinstance(optimizer, torch.optim.Optimizer)
diff --git a/vendor/ViTPose/tests/test_datasets/test_animal_dataset.py b/vendor/ViTPose/tests/test_datasets/test_animal_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..328c8d5d2f1dca5103ab22153616ddcb2b9fcbdc
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_animal_dataset.py
@@ -0,0 +1,500 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_animal_horse10_dataset():
+ dataset = 'AnimalHorse10Dataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/horse10.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=22,
+ dataset_joints=22,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 21
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 21
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/horse10/test_horse10.json',
+ img_prefix='tests/data/horse10/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/horse10/test_horse10.json',
+ img_prefix='tests/data/horse10/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'horse10'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 3
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_animal_fly_dataset():
+ dataset = 'AnimalFlyDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/fly.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=32,
+ dataset_joints=32,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 192],
+ heatmap_size=[48, 48],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/fly/test_fly.json',
+ img_prefix='tests/data/fly/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/fly/test_fly.json',
+ img_prefix='tests/data/fly/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'fly'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_animal_locust_dataset():
+ dataset = 'AnimalLocustDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/locust.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=35,
+ dataset_joints=35,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
+ ])
+
+ data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/locust/test_locust.json',
+ img_prefix='tests/data/locust/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/locust/test_locust.json',
+ img_prefix='tests/data/locust/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'locust'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_animal_zebra_dataset():
+ dataset = 'AnimalZebraDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/zebra.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=9,
+ dataset_joints=9,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8])
+
+ data_cfg = dict(
+ image_size=[160, 160],
+ heatmap_size=[40, 40],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/zebra/test_zebra.json',
+ img_prefix='tests/data/zebra/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/zebra/test_zebra.json',
+ img_prefix='tests/data/zebra/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'zebra'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+ assert_almost_equal(infos['PCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_animal_ATRW_dataset():
+ dataset = 'AnimalATRWDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/atrw.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/atrw/test_atrw.json',
+ img_prefix='tests/data/atrw/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/atrw/test_atrw.json',
+ img_prefix='tests/data/atrw/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'atrw'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+
+
+def test_animal_Macaque_dataset():
+ dataset = 'AnimalMacaqueDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/macaque.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/macaque/test_macaque.json',
+ img_prefix='tests/data/macaque/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/macaque/test_macaque.json',
+ img_prefix='tests/data/macaque/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'macaque'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+
+
+def test_animalpose_dataset():
+ dataset = 'AnimalPoseDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/animalpose.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=20,
+ dataset_joints=20,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/animalpose/test_animalpose.json',
+ img_prefix='tests/data/animalpose/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/animalpose/test_animalpose.json',
+ img_prefix='tests/data/animalpose/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'animalpose'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+
+
+def test_ap10k_dataset():
+ dataset = 'AnimalAP10KDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/ap10k.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/ap10k/test_ap10k.json',
+ img_prefix='tests/data/ap10k/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/ap10k/test_ap10k.json',
+ img_prefix='tests/data/ap10k/',
+ data_cfg=data_cfg_copy,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'ap10k'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+
+ for output in results:
+ # as there is only one box in each image for test
+ output['bbox_ids'] = [0 for _ in range(len(output['bbox_ids']))]
+
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
diff --git a/vendor/ViTPose/tests/test_datasets/test_body3d_dataset.py b/vendor/ViTPose/tests/test_datasets/test_body3d_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9cd94ee4d03ecac2eb2b8b41ef6ac4b611b18fd
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_body3d_dataset.py
@@ -0,0 +1,347 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+import numpy as np
+from mmcv import Config
+
+from mmpose.datasets import DATASETS
+from mmpose.datasets.builder import build_dataset
+
+
+def test_body3d_h36m_dataset():
+ # Test Human3.6M dataset
+ dataset = 'Body3DH36MDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/h36m.py').dataset_info
+
+ # test single-frame input
+ data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+
+ _ = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.dataset_name == 'h36m'
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ results = []
+ for result in custom_dataset:
+ results.append({
+ 'preds': result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = ['mpjpe', 'p-mpjpe', 'n-mpjpe']
+ infos = custom_dataset.evaluate(results, metric=metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['N-MPJPE'], 0.0)
+
+ # test multi-frame input with joint_2d_src = 'detection'
+ data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=True,
+ temporal_padding=True,
+ joint_2d_src='detection',
+ joint_2d_det_file='tests/data/h36m/test_h36m_2d_detection.npy',
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+
+ _ = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ results = []
+ for result in custom_dataset:
+ results.append({
+ 'preds': result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = ['mpjpe', 'p-mpjpe', 'n-mpjpe']
+ infos = custom_dataset.evaluate(results, metric=metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['N-MPJPE'], 0.0)
+
+
+def test_body3d_semi_supervision_dataset():
+ # Test Body3d Semi-supervision Dataset
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/h36m.py').dataset_info
+
+ # load labeled dataset
+ labeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causall=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subset=1,
+ subjects=['S1'],
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl')
+ labeled_dataset_cfg = dict(
+ type='Body3DH36MDataset',
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=labeled_data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[])
+
+ # load unlabled data
+ unlabeled_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ causal=False,
+ temporal_padding=True,
+ joint_2d_src='gt',
+ subjects=['S5', 'S7', 'S8'],
+ need_camera_param=True,
+ camera_param_file='tests/data/h36m/cameras.pkl',
+ need_2d_label=True)
+ unlabeled_dataset_cfg = dict(
+ type='Body3DH36MDataset',
+ ann_file='tests/data/h36m/test_h36m_body3d.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=unlabeled_data_cfg,
+ dataset_info=dataset_info,
+ pipeline=[
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'unlabeled_input')],
+ meta_name='metas',
+ meta_keys=[])
+ ])
+
+ # combine labeled and unlabeled dataset to form a new dataset
+ dataset = 'Body3DSemiSupervisionDataset'
+ dataset_class = DATASETS.get(dataset)
+ custom_dataset = dataset_class(labeled_dataset_cfg, unlabeled_dataset_cfg)
+ item = custom_dataset[0]
+ assert custom_dataset.labeled_dataset.dataset_name == 'h36m'
+ assert 'unlabeled_input' in item.keys()
+
+ unlabeled_dataset = build_dataset(unlabeled_dataset_cfg)
+ assert len(unlabeled_dataset) == len(custom_dataset)
+
+
+def test_body3d_mpi_inf_3dhp_dataset():
+ # Test MPI-INF-3DHP dataset
+ dataset = 'Body3DMpiInf3dhpDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/mpi_inf_3dhp.py').dataset_info
+
+ # Test single-frame input on trainset
+ single_frame_train_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_train.pkl')
+
+ # Test single-frame input on testset
+ single_frame_test_data_cfg = dict(
+ num_joints=17,
+ seq_len=1,
+ seq_frame_interval=1,
+ joint_2d_src='gt',
+ joint_2d_det_file=None,
+ causal=False,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_test.pkl')
+
+ # Test multi-frame input on trainset
+ multi_frame_train_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ joint_2d_src='gt',
+ joint_2d_det_file=None,
+ causal=True,
+ temporal_padding=True,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_train.pkl')
+
+ # Test multi-frame input on testset
+ multi_frame_test_data_cfg = dict(
+ num_joints=17,
+ seq_len=27,
+ seq_frame_interval=1,
+ joint_2d_src='pipeline',
+ joint_2d_det_file=None,
+ causal=False,
+ temporal_padding=True,
+ need_camera_param=True,
+ camera_param_file='tests/data/mpi_inf_3dhp/cameras_test.pkl')
+
+ ann_files = [
+ 'tests/data/mpi_inf_3dhp/test_3dhp_train.npz',
+ 'tests/data/mpi_inf_3dhp/test_3dhp_test.npz'
+ ] * 2
+ data_cfgs = [
+ single_frame_train_data_cfg, single_frame_test_data_cfg,
+ multi_frame_train_data_cfg, multi_frame_test_data_cfg
+ ]
+
+ for ann_file, data_cfg in zip(ann_files, data_cfgs):
+ _ = dataset_class(
+ ann_file=ann_file,
+ img_prefix='tests/data/mpi_inf_3dhp',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file=ann_file,
+ img_prefix='tests/data/mpi_inf_3dhp',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ results = []
+ for result in custom_dataset:
+ results.append({
+ 'preds': result['target'][None, ...],
+ 'target_image_paths': [result['target_image_path']],
+ })
+
+ metrics = ['mpjpe', 'p-mpjpe', '3dpck', 'p-3dpck', '3dauc', 'p-3dauc']
+ infos = custom_dataset.evaluate(results, metric=metrics)
+
+ np.testing.assert_almost_equal(infos['MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['P-MPJPE'], 0.0)
+ np.testing.assert_almost_equal(infos['3DPCK'], 100.)
+ np.testing.assert_almost_equal(infos['P-3DPCK'], 100.)
+ np.testing.assert_almost_equal(infos['3DAUC'], 30 / 31 * 100)
+ np.testing.assert_almost_equal(infos['P-3DAUC'], 30 / 31 * 100)
+
+
+def test_body3dmview_direct_panoptic_dataset():
+ # Test Mview-Panoptic dataset
+ dataset = 'Body3DMviewDirectPanopticDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/panoptic_body3d.py').dataset_info
+ space_size = [8000, 8000, 2000]
+ space_center = [0, -500, 800]
+ cube_size = [80, 80, 20]
+ train_data_cfg = dict(
+ image_size=[960, 512],
+ heatmap_size=[[240, 128]],
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+ num_joints=15,
+ seq_list=['160906_band1', '160906_band2'],
+ cam_list=[(0, 12), (0, 6)],
+ num_cameras=2,
+ seq_frame_interval=1,
+ subset='train',
+ need_2d_label=True,
+ need_camera_param=True,
+ root_id=2)
+
+ test_data_cfg = dict(
+ image_size=[960, 512],
+ heatmap_size=[[240, 128]],
+ num_joints=15,
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+ seq_list=['160906_band1', '160906_band2'],
+ cam_list=[(0, 12), (0, 6)],
+ num_cameras=2,
+ seq_frame_interval=1,
+ subset='validation',
+ need_2d_label=True,
+ need_camera_param=True,
+ root_id=2)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ _ = dataset_class(
+ ann_file=tmpdir + '/tmp_train.pkl',
+ img_prefix='tests/data/panoptic_body3d/',
+ data_cfg=train_data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dataset = dataset_class(
+ ann_file=tmpdir + '/tmp_validation.pkl',
+ img_prefix='tests/data/panoptic_body3d',
+ data_cfg=test_data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ import copy
+ gt_num = test_dataset.db_size // test_dataset.num_cameras
+ results = []
+ for i in range(gt_num):
+ index = test_dataset.num_cameras * i
+ db_rec = copy.deepcopy(test_dataset.db[index])
+ joints_3d = db_rec['joints_3d']
+ joints_3d_vis = db_rec['joints_3d_visible']
+ num_gts = len(joints_3d)
+ gt_pose = -np.ones((1, 10, test_dataset.num_joints, 5))
+
+ if num_gts > 0:
+ gt_pose[0, :num_gts, :, :3] = np.array(joints_3d)
+ gt_pose[0, :num_gts, :, 3] = np.array(joints_3d_vis)[:, :, 0] - 1.0
+ gt_pose[0, :num_gts, :, 4] = 1.0
+
+ results.append(dict(pose_3d=gt_pose, sample_id=[i]))
+ _ = test_dataset.evaluate(results, metric=['mAP', 'mpjpe'])
diff --git a/vendor/ViTPose/tests/test_datasets/test_bottom_up_dataset.py b/vendor/ViTPose/tests/test_datasets/test_bottom_up_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceb2bac3f5d16d60e6e8f05b001ed9c07f7dbc76
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_bottom_up_dataset.py
@@ -0,0 +1,334 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+
+
+def convert_coco_to_output(coco, is_wholebody=False):
+ results = []
+ for img_id in coco.getImgIds():
+ preds = []
+ scores = []
+ image = coco.imgs[img_id]
+ ann_ids = coco.getAnnIds(img_id)
+ for ann_id in ann_ids:
+ obj = coco.anns[ann_id]
+ if is_wholebody:
+ keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] +
+ obj['face_kpts'] + obj['lefthand_kpts'] +
+ obj['righthand_kpts']).reshape(-1, 3)
+ else:
+ keypoints = np.array(obj['keypoints']).reshape((-1, 3))
+ K = keypoints.shape[0]
+ if sum(keypoints[:, 2]) == 0:
+ continue
+ preds.append(
+ np.concatenate((keypoints[:, :2], np.ones(
+ [K, 1]), np.ones([K, 1]) * ann_id),
+ axis=1))
+ scores.append(1)
+ image_paths = []
+ image_paths.append(image['file_name'])
+
+ output = {}
+ output['preds'] = np.stack(preds)
+ output['scores'] = scores
+ output['image_paths'] = image_paths
+ output['output_heatmap'] = None
+
+ results.append(output)
+
+ return results
+
+
+def test_bottom_up_COCO_dataset():
+ dataset = 'BottomUpCocoDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco.py').dataset_info
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
+ ])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+ use_nms=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.dataset_name == 'coco'
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ results = convert_coco_to_output(custom_dataset.coco)
+
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_bottom_up_CrowdPose_dataset():
+ dataset = 'BottomUpCrowdPoseDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/crowdpose.py').dataset_info
+ # test CrowdPose datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False)
+
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.dataset_name == 'crowdpose'
+
+ image_id = 103319
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ results = convert_coco_to_output(custom_dataset.coco)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_bottom_up_MHP_dataset():
+ dataset = 'BottomUpMhpDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/mhp.py').dataset_info
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+ )
+
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.dataset_name == 'mhp'
+
+ image_id = 2889
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ results = convert_coco_to_output(custom_dataset.coco)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_bottom_up_AIC_dataset():
+ dataset = 'BottomUpAicDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/aic.py').dataset_info
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=1,
+ scale_aware_sigma=False,
+ )
+
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.dataset_name == 'aic'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ results = convert_coco_to_output(custom_dataset.coco)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_bottom_up_COCO_wholebody_dataset():
+ dataset = 'BottomUpCocoWholeBodyDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody.py').dataset_info
+ # test COCO-wholebody datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+ data_cfg = dict(
+ image_size=512,
+ base_size=256,
+ base_sigma=2,
+ heatmap_size=[128, 256],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ num_scales=2,
+ scale_aware_sigma=False,
+ )
+
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco_wholebody'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ results = convert_coco_to_output(custom_dataset.coco, is_wholebody=True)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
diff --git a/vendor/ViTPose/tests/test_datasets/test_dataset_info.py b/vendor/ViTPose/tests/test_datasets/test_dataset_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..d939b9dbb6ffcae494d292bdf2b5cea46d963a26
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_dataset_info.py
@@ -0,0 +1,77 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmpose.datasets import DatasetInfo
+
+
+def test_dataset_info():
+ dataset_info = dict(
+ dataset_name='zebra',
+ paper_info=dict(
+ author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
+ 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
+ 'Couzin, Iain D',
+ title='DeepPoseKit, a software toolkit for fast and robust '
+ 'animal pose estimation using deep learning',
+ container='Elife',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='snout', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='head', id=1, color=[255, 255, 255], type='', swap=''),
+ 2:
+ dict(name='neck', id=2, color=[255, 255, 255], type='', swap=''),
+ 3:
+ dict(
+ name='forelegL1',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 4:
+ dict(
+ name='forelegR1',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 5:
+ dict(
+ name='hindlegL1',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 6:
+ dict(
+ name='hindlegR1',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 7:
+ dict(
+ name='tailbase', id=7, color=[255, 255, 255], type='',
+ swap=''),
+ 8:
+ dict(
+ name='tailtip', id=8, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={
+ 0: dict(link=('head', 'snout'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('neck', 'head'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('forelegL1', 'neck'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('forelegR1', 'neck'), id=3, color=[255, 255, 255]),
+ 4:
+ dict(link=('hindlegL1', 'tailbase'), id=4, color=[255, 255, 255]),
+ 5:
+ dict(link=('hindlegR1', 'tailbase'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('tailbase', 'neck'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('tailtip', 'tailbase'), id=7, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 9,
+ sigmas=[])
+
+ dataset_info = DatasetInfo(dataset_info)
+ assert dataset_info.keypoint_num == len(dataset_info.flip_index)
diff --git a/vendor/ViTPose/tests/test_datasets/test_dataset_wrapper.py b/vendor/ViTPose/tests/test_datasets/test_dataset_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f724d251d69499fc6e1ec87430fba69964909b5d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_dataset_wrapper.py
@@ -0,0 +1,67 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv import Config
+
+from mmpose.datasets.builder import build_dataset
+
+
+def test_concat_dataset():
+ # build COCO-like dataset config
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco.py').dataset_info
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+
+ dataset_cfg = dict(
+ type='TopDownCocoDataset',
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info)
+
+ dataset = build_dataset(dataset_cfg)
+
+ # Case 1: build ConcatDataset explicitly
+ concat_dataset_cfg = dict(
+ type='ConcatDataset', datasets=[dataset_cfg, dataset_cfg])
+ concat_dataset = build_dataset(concat_dataset_cfg)
+ assert len(concat_dataset) == 2 * len(dataset)
+
+ # Case 2: build ConcatDataset from cfg sequence
+ concat_dataset = build_dataset([dataset_cfg, dataset_cfg])
+ assert len(concat_dataset) == 2 * len(dataset)
+
+ # Case 3: build ConcatDataset from ann_file sequence
+ concat_dataset_cfg = dataset_cfg.copy()
+ for key in ['ann_file', 'type', 'img_prefix', 'dataset_info']:
+ val = concat_dataset_cfg[key]
+ concat_dataset_cfg[key] = [val] * 2
+ for key in ['num_joints', 'dataset_channel']:
+ val = concat_dataset_cfg['data_cfg'][key]
+ concat_dataset_cfg['data_cfg'][key] = [val] * 2
+ concat_dataset = build_dataset(concat_dataset_cfg)
+ assert len(concat_dataset) == 2 * len(dataset)
diff --git a/vendor/ViTPose/tests/test_datasets/test_face_dataset.py b/vendor/ViTPose/tests/test_datasets/test_face_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fa30b2949e3fa4efaf405b5c17f7acc0cb36b91
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_face_dataset.py
@@ -0,0 +1,284 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from unittest.mock import MagicMock
+
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_face_300W_dataset():
+ dataset = 'Face300WDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/300w.py').dataset_info
+ # test Face 300W datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/300w/test_300w.json',
+ img_prefix='tests/data/300w/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/300w/test_300w.json',
+ img_prefix='tests/data/300w/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == '300w'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_face_coco_wholebody_dataset():
+ dataset = 'FaceCocoWholeBodyDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody_face.py').dataset_info
+ # test Face wholebody datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=68,
+ dataset_joints=68,
+ dataset_channel=[
+ list(range(68)),
+ ],
+ inference_channel=list(range(68)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_face_AFLW_dataset():
+ dataset = 'FaceAFLWDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/aflw.py').dataset_info
+ # test Face AFLW datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=19,
+ dataset_joints=19,
+ dataset_channel=[
+ list(range(19)),
+ ],
+ inference_channel=list(range(19)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/aflw/test_aflw.json',
+ img_prefix='tests/data/aflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aflw/test_aflw.json',
+ img_prefix='tests/data/aflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'aflw'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_face_WFLW_dataset():
+ dataset = 'FaceWFLWDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/wflw.py').dataset_info
+ # test Face WFLW datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=98,
+ dataset_joints=98,
+ dataset_channel=[
+ list(range(98)),
+ ],
+ inference_channel=list(range(98)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/wflw/test_wflw.json',
+ img_prefix='tests/data/wflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/wflw/test_wflw.json',
+ img_prefix='tests/data/wflw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'wflw'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_face_COFW_dataset():
+ dataset = 'FaceCOFWDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/cofw.py').dataset_info
+ # test Face COFW datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=29,
+ dataset_joints=29,
+ dataset_channel=[
+ list(range(29)),
+ ],
+ inference_channel=list(range(29)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/cofw/test_cofw.json',
+ img_prefix='tests/data/cofw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/cofw/test_cofw.json',
+ img_prefix='tests/data/cofw/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'cofw'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['NME'])
+ assert_almost_equal(infos['NME'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
diff --git a/vendor/ViTPose/tests/test_datasets/test_fashion_dataset.py b/vendor/ViTPose/tests/test_datasets/test_fashion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f5cdc8a3c131a21b3d4ad428b374e832acf6bd7
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_fashion_dataset.py
@@ -0,0 +1,70 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest.mock import MagicMock
+
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_deepfashion_dataset():
+ dataset = 'DeepFashionDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/deepfashion_full.py').dataset_info
+ # test JHMDB datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=8,
+ dataset_joints=8,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ image_thr=0.0,
+ bbox_file='')
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/fld/test_fld.json',
+ img_prefix='tests/data/fld/',
+ subset='full',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'deepfashion_full'
+
+ image_id = 128
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
diff --git a/vendor/ViTPose/tests/test_datasets/test_hand_dataset.py b/vendor/ViTPose/tests/test_datasets/test_hand_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f4bb1c03ad5d43a204bcf83ad54b98f88d1903d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_hand_dataset.py
@@ -0,0 +1,456 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_OneHand10K_dataset():
+ dataset = 'OneHand10KDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/onehand10k.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/onehand10k/test_onehand10k.json',
+ img_prefix='tests/data/onehand10k/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/onehand10k/test_onehand10k.json',
+ img_prefix='tests/data/onehand10k/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'onehand10k'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_hand_coco_wholebody_dataset():
+ dataset = 'HandCocoWholeBodyDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody_hand.py').dataset_info
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_FreiHand2D_dataset():
+ dataset = 'FreiHandDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/freihand2d.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[224, 224],
+ heatmap_size=[56, 56],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/freihand/test_freihand.json',
+ img_prefix='tests/data/freihand/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/freihand/test_freihand.json',
+ img_prefix='tests/data/freihand/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'freihand'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 8
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_RHD2D_dataset():
+ dataset = 'Rhd2DDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/rhd2d.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/rhd/test_rhd.json',
+ img_prefix='tests/data/rhd/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/rhd/test_rhd.json',
+ img_prefix='tests/data/rhd/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'rhd2d'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 3
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_Panoptic2D_dataset():
+ dataset = 'PanopticDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/panoptic_hand2d.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/panoptic/test_panoptic.json',
+ img_prefix='tests/data/panoptic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/panoptic/test_panoptic.json',
+ img_prefix='tests/data/panoptic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'panoptic_hand2d'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCKh', 'EPE', 'AUC'])
+ assert_almost_equal(infos['PCKh'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_InterHand2D_dataset():
+ dataset = 'InterHand2DDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/interhand2d.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=21,
+ dataset_joints=21,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'interhand2d'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ assert len(custom_dataset.db) == 6
+
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK', 'EPE', 'AUC'])
+ print(infos, flush=True)
+ assert_almost_equal(infos['PCK'], 1.0)
+ assert_almost_equal(infos['AUC'], 0.95)
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_InterHand3D_dataset():
+ dataset = 'InterHand3DDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/interhand3d.py').dataset_info
+
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=42,
+ dataset_joints=42,
+ dataset_channel=[
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 41
+ ],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64, 64],
+ heatmap3d_depth_bound=400.0,
+ heatmap_size_root=64,
+ root_depth_bound=400.0,
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+ # Test
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/interhand2.6m/test_interhand2.6m_data.json',
+ camera_file='tests/data/interhand2.6m/test_interhand2.6m_camera.json',
+ joint_file='tests/data/interhand2.6m/test_interhand2.6m_joint_3d.json',
+ img_prefix='tests/data/interhand2.6m/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.dataset_name == 'interhand3d'
+ assert custom_dataset.test_mode is False
+ assert custom_dataset.num_images == 4
+ assert len(custom_dataset.db) == 4
+
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(
+ custom_dataset.db, keys=['rel_root_depth', 'hand_type'], is_3d=True)
+ infos = custom_dataset.evaluate(
+ results, metric=['MRRPE', 'MPJPE', 'Handedness_acc'])
+ assert_almost_equal(infos['MRRPE'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_all'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_single'], 0.0, decimal=5)
+ assert_almost_equal(infos['MPJPE_interacting'], 0.0, decimal=5)
+ assert_almost_equal(infos['Handedness_acc'], 1.0)
+
+ with pytest.raises(KeyError):
+ infos = custom_dataset.evaluate(results, metric='mAP')
diff --git a/vendor/ViTPose/tests/test_datasets/test_mesh_dataset.py b/vendor/ViTPose/tests/test_datasets/test_mesh_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..59938a06583564650f2c83c569dd10d2fc848dde
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_mesh_dataset.py
@@ -0,0 +1,127 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+from mmpose.datasets import DATASETS
+
+
+def test_mesh_Mosh_dataset():
+ # test Mosh dataset
+ dataset = 'MoshDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mosh/test_mosh.npz', pipeline=[])
+
+ _ = custom_dataset[0]
+
+
+def test_mesh_H36M_dataset():
+ # test H36M dataset
+ dataset = 'MeshH36MDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ iuv_size=[64, 64],
+ num_joints=24,
+ use_IUV=True,
+ uv_type='BF')
+ _ = dataset_class(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[],
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ _ = custom_dataset[0]
+
+ # test evaluation
+ outputs = []
+ for item in custom_dataset:
+ pred = dict(
+ keypoints_3d=item['joints_3d'][None, ...],
+ image_path=item['image_file'])
+ outputs.append(pred)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ eval_result = custom_dataset.evaluate(outputs, tmpdir)
+ assert 'MPJPE' in eval_result
+ assert 'MPJPE-PA' in eval_result
+
+
+def test_mesh_Mix_dataset():
+ # test mesh Mix dataset
+
+ dataset = 'MeshMixDataset'
+ dataset_class = DATASETS.get(dataset)
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ iuv_size=[64, 64],
+ num_joints=24,
+ use_IUV=True,
+ uv_type='BF')
+
+ custom_dataset = dataset_class(
+ configs=[
+ dict(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[]),
+ dict(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[]),
+ ],
+ partition=[0.6, 0.4])
+
+ _ = custom_dataset[0]
+
+
+def test_mesh_Adversarial_dataset():
+ # test mesh Adversarial dataset
+
+ # load train dataset
+ data_cfg = dict(
+ image_size=[256, 256],
+ iuv_size=[64, 64],
+ num_joints=24,
+ use_IUV=True,
+ uv_type='BF')
+ train_dataset = dict(
+ type='MeshMixDataset',
+ configs=[
+ dict(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[]),
+ dict(
+ ann_file='tests/data/h36m/test_h36m.npz',
+ img_prefix='tests/data/h36m',
+ data_cfg=data_cfg,
+ pipeline=[]),
+ ],
+ partition=[0.6, 0.4])
+
+ # load adversarial dataset
+ adversarial_dataset = dict(
+ type='MoshDataset',
+ ann_file='tests/data/mosh/test_mosh.npz',
+ pipeline=[])
+
+ # combine train and adversarial dataset to form a new dataset
+ dataset = 'MeshAdversarialDataset'
+ dataset_class = DATASETS.get(dataset)
+ custom_dataset = dataset_class(train_dataset, adversarial_dataset)
+ item = custom_dataset[0]
+ assert 'mosh_theta' in item.keys()
diff --git a/vendor/ViTPose/tests/test_datasets/test_top_down_dataset.py b/vendor/ViTPose/tests/test_datasets/test_top_down_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..35c1a99347566d6988c06bed580a78a464d4499c
--- /dev/null
+++ b/vendor/ViTPose/tests/test_datasets/test_top_down_dataset.py
@@ -0,0 +1,1022 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from unittest.mock import MagicMock
+
+import pytest
+from mmcv import Config
+from numpy.testing import assert_almost_equal
+
+from mmpose.datasets import DATASETS
+from tests.utils.data_utils import convert_db_to_output
+
+
+def test_top_down_COCO_dataset():
+ dataset = 'TopDownCocoDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco.py').dataset_info
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_MHP_dataset():
+ dataset = 'TopDownMhpDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/mhp.py').dataset_info
+ # test MHP datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ bbox_thr=1.0,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ # Test det bbox
+ with pytest.raises(AssertionError):
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ # Test gt bbox
+ _ = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mhp/test_mhp.json',
+ img_prefix='tests/data/mhp/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'mhp'
+
+ image_id = 2889
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_PoseTrack18_dataset():
+ dataset = 'TopDownPoseTrack18Dataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/posetrack18.py').dataset_info
+ # test PoseTrack datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_human_detections.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'posetrack18'
+
+ image_id = 10128340000
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ assert len(custom_dataset) == 14
+ _ = custom_dataset[0]
+
+ # Test evaluate function, use gt bbox
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['Total AP'], 100)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+ # Test evaluate function, use det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert len(custom_dataset) == 278
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ # since the det box input assume each keypoint position to be (0,0)
+ # the Total AP will be zero.
+ assert_almost_equal(infos['Total AP'], 0.)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_PoseTrack18Video_dataset():
+ dataset = 'TopDownPoseTrack18VideoDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/posetrack18.py').dataset_info
+ # test PoseTrack18Video dataset
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_nms=True,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_human_detections.json',
+ # frame-related arguments
+ frame_index_rand=True,
+ frame_index_range=[-2, 2],
+ num_adj_frames=1,
+ frame_indices_test=[-2, 2, -1, 1, 0],
+ frame_weight_train=(0.0, 1.0),
+ frame_weight_test=(0.3, 0.1, 0.25, 0.25, 0.1),
+ )
+
+ # Test value of dataset_info
+ with pytest.raises(ValueError):
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=None,
+ test_mode=False)
+
+ # Test train mode (must use gt bbox)
+ with pytest.warns(UserWarning):
+ _ = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # # Test gt bbox + test mode
+ with pytest.warns(UserWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'posetrack18'
+ assert custom_dataset.ph_fill_len == 6
+
+ image_id = 10128340000
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ assert len(custom_dataset) == 14
+ _ = custom_dataset[0]
+
+ # Test det bbox + test mode
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(UserWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.frame_indices_test == [-2, -1, 0, 1, 2]
+ assert len(custom_dataset) == 278
+
+ # Test non-random index
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['frame_index_rand'] = False
+ data_cfg_copy['frame_indices_train'] = [0, -1]
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ assert custom_dataset.frame_indices_train == [-1, 0]
+
+ # Test evaluate function, use gt bbox
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['Total AP'], 100)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+ # Test evaluate function, use det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ with pytest.warns(UserWarning):
+ custom_dataset = dataset_class(
+ ann_file='tests/data/posetrack18/annotations/'
+ 'test_posetrack18_val.json',
+ img_prefix='tests/data/posetrack18/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ # since the det box input assume each keypoint position to be (0,0),
+ # the Total AP will be zero.
+ assert_almost_equal(infos['Total AP'], 0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_CrowdPose_dataset():
+ dataset = 'TopDownCrowdPoseDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/crowdpose.py').dataset_info
+ # test CrowdPose datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/crowdpose/test_crowdpose_det_AP_40.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/crowdpose/test_crowdpose.json',
+ img_prefix='tests/data/crowdpose/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'crowdpose'
+
+ image_id = 103319
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 2
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_COCO_wholebody_dataset():
+ dataset = 'TopDownCocoWholeBodyDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/coco_wholebody.py').dataset_info
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=133,
+ dataset_joints=133,
+ dataset_channel=[
+ list(range(133)),
+ ],
+ inference_channel=list(range(133)))
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/coco/test_coco_wholebody.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'coco_wholebody'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_halpe_dataset():
+ dataset = 'TopDownHalpeDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/halpe.py').dataset_info
+ # test Halpe datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=136,
+ dataset_joints=136,
+ dataset_channel=[
+ list(range(136)),
+ ],
+ inference_channel=list(range(136)))
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='tests/data/coco/test_coco_det_AP_H_56.json',
+ )
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/halpe/test_halpe.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/halpe/test_halpe.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/halpe/test_halpe.json',
+ img_prefix='tests/data/coco/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'halpe'
+
+ image_id = 785
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 4
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_OCHuman_dataset():
+ dataset = 'TopDownOCHumanDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/ochuman.py').dataset_info
+ # test OCHuman datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='',
+ )
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/ochuman/test_ochuman.json',
+ img_prefix='tests/data/ochuman/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/ochuman/test_ochuman.json',
+ img_prefix='tests/data/ochuman/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'ochuman'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_MPII_dataset():
+ dataset = 'TopDownMpiiDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/mpii.py').dataset_info
+ # test COCO datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=16,
+ dataset_joints=16,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ )
+
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mpii/test_mpii.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ )
+
+ assert len(custom_dataset) == 5
+ assert custom_dataset.dataset_name == 'mpii'
+ _ = custom_dataset[0]
+
+
+def test_top_down_MPII_TRB_dataset():
+ dataset = 'TopDownMpiiTrbDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/mpii_trb.py').dataset_info
+ # test MPII TRB datasets
+ dataset_class = DATASETS.get(dataset)
+
+ channel_cfg = dict(
+ num_output_channels=40,
+ dataset_joints=40,
+ dataset_channel=[list(range(40))],
+ inference_channel=list(range(40)))
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ _ = dataset_class(
+ ann_file='tests/data/mpii/test_mpii_trb.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ custom_dataset = dataset_class(
+ ann_file='tests/data/mpii/test_mpii_trb.json',
+ img_prefix='tests/data/mpii/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'mpii_trb'
+ _ = custom_dataset[0]
+
+
+def test_top_down_AIC_dataset():
+ dataset = 'TopDownAicDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/aic.py').dataset_info
+ # test AIC datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=14,
+ dataset_joints=14,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='')
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/aic/test_aic.json',
+ img_prefix='tests/data/aic/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'aic'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='mAP')
+ assert_almost_equal(infos['AP'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='PCK')
+
+
+def test_top_down_JHMDB_dataset():
+ dataset = 'TopDownJhmdbDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/jhmdb.py').dataset_info
+ # test JHMDB datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=15,
+ dataset_joints=15,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ ],
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+
+ data_cfg = dict(
+ image_size=[192, 256],
+ heatmap_size=[48, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.0,
+ bbox_file='')
+
+ with pytest.raises(AssertionError):
+ # Test det bbox
+ data_cfg_copy = copy.deepcopy(data_cfg)
+ data_cfg_copy['use_gt_bbox'] = False
+ _ = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ _ = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg_copy,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/jhmdb/test_jhmdb_sub1.json',
+ img_prefix='tests/data/jhmdb/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'jhmdb'
+
+ image_id = 2290001
+ assert image_id in custom_dataset.img_ids
+ assert len(custom_dataset.img_ids) == 3
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric=['PCK'])
+ assert_almost_equal(infos['Mean PCK'], 1.0)
+
+ infos = custom_dataset.evaluate(results, metric=['tPCK'])
+ assert_almost_equal(infos['Mean tPCK'], 1.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='mAP')
+
+
+def test_top_down_h36m_dataset():
+ dataset = 'TopDownH36MDataset'
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/h36m.py').dataset_info
+ # test AIC datasets
+ dataset_class = DATASETS.get(dataset)
+ dataset_class.load_annotations = MagicMock()
+ dataset_class.coco = MagicMock()
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ data_cfg = dict(
+ image_size=[256, 256],
+ heatmap_size=[64, 64],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'])
+
+ # Test gt bbox
+ custom_dataset = dataset_class(
+ ann_file='tests/data/h36m/h36m_coco.json',
+ img_prefix='tests/data/h36m/',
+ data_cfg=data_cfg,
+ pipeline=[],
+ dataset_info=dataset_info,
+ test_mode=True)
+
+ assert custom_dataset.test_mode is True
+ assert custom_dataset.dataset_name == 'h36m'
+
+ image_id = 1
+ assert image_id in custom_dataset.img_ids
+ _ = custom_dataset[0]
+
+ results = convert_db_to_output(custom_dataset.db)
+ infos = custom_dataset.evaluate(results, metric='EPE')
+ assert_almost_equal(infos['EPE'], 0.0)
+
+ with pytest.raises(KeyError):
+ _ = custom_dataset.evaluate(results, metric='AUC')
diff --git a/vendor/ViTPose/tests/test_eval_hook.py b/vendor/ViTPose/tests/test_eval_hook.py
new file mode 100644
index 0000000000000000000000000000000000000000..f472541c9527d1958831455b8ba511b08c072cce
--- /dev/null
+++ b/vendor/ViTPose/tests/test_eval_hook.py
@@ -0,0 +1,258 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import unittest.mock as mock
+from collections import OrderedDict
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.runner import EpochBasedRunner, build_optimizer
+from mmcv.utils import get_logger
+from torch.utils.data import DataLoader, Dataset
+
+from mmpose.core import DistEvalHook, EvalHook
+
+
+class ExampleDataset(Dataset):
+
+ def __init__(self):
+ self.index = 0
+ self.eval_result = [0.1, 0.4, 0.3, 0.7, 0.2, 0.05, 0.4, 0.6]
+
+ def __getitem__(self, idx):
+ results = dict(imgs=torch.tensor([1]))
+ return results
+
+ def __len__(self):
+ return 1
+
+ @mock.create_autospec
+ def evaluate(self, results, res_folder=None, logger=None):
+ pass
+
+
+class EvalDataset(ExampleDataset):
+
+ def evaluate(self, results, res_folder=None, logger=None):
+ acc = self.eval_result[self.index]
+ output = OrderedDict(acc=acc, index=self.index, score=acc)
+ self.index += 1
+ return output
+
+
+class ExampleModel(nn.Module):
+
+ def __init__(self):
+ super().__init__()
+ self.conv = nn.Linear(1, 1)
+ self.test_cfg = None
+
+ def forward(self, imgs, return_loss=False):
+ return imgs
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ outputs = {
+ 'loss': 0.5,
+ 'log_vars': {
+ 'accuracy': 0.98
+ },
+ 'num_samples': 1
+ }
+ return outputs
+
+
+@pytest.mark.skipif(
+ not torch.cuda.is_available(), reason='requires CUDA support')
+@patch('mmpose.apis.single_gpu_test', MagicMock)
+@patch('mmpose.apis.multi_gpu_test', MagicMock)
+@pytest.mark.parametrize('EvalHookCls', (EvalHook, DistEvalHook))
+def test_eval_hook(EvalHookCls):
+ with pytest.raises(TypeError):
+ # dataloader must be a pytorch DataLoader
+ test_dataset = ExampleDataset()
+ data_loader = [
+ DataLoader(
+ test_dataset,
+ batch_size=1,
+ sampler=None,
+ num_worker=0,
+ shuffle=False)
+ ]
+ EvalHookCls(data_loader)
+
+ with pytest.raises(KeyError):
+ # rule must be in keys of rule_map
+ test_dataset = ExampleDataset()
+ data_loader = DataLoader(
+ test_dataset,
+ batch_size=1,
+ sampler=None,
+ num_workers=0,
+ shuffle=False)
+ EvalHookCls(data_loader, save_best='auto', rule='unsupport')
+
+ with pytest.raises(ValueError):
+ # save_best must be valid when rule_map is None
+ test_dataset = ExampleDataset()
+ data_loader = DataLoader(
+ test_dataset,
+ batch_size=1,
+ sampler=None,
+ num_workers=0,
+ shuffle=False)
+ EvalHookCls(data_loader, save_best='unsupport')
+
+ optimizer_cfg = dict(
+ type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+
+ test_dataset = ExampleDataset()
+ loader = DataLoader(test_dataset, batch_size=1)
+ model = ExampleModel()
+ optimizer = build_optimizer(model, optimizer_cfg)
+
+ data_loader = DataLoader(test_dataset, batch_size=1)
+ eval_hook = EvalHookCls(data_loader, save_best=None)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=1)
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)])
+ assert runner.meta is None or 'best_score' not in runner.meta[
+ 'hook_msgs']
+ assert runner.meta is None or 'best_ckpt' not in runner.meta[
+ 'hook_msgs']
+
+ # when `save_best` is set to 'auto', first metric will be used.
+ loader = DataLoader(EvalDataset(), batch_size=1)
+ model = ExampleModel()
+ data_loader = DataLoader(EvalDataset(), batch_size=1)
+ eval_hook = EvalHookCls(data_loader, interval=1, save_best='auto')
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=8)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)])
+
+ real_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.7
+
+ loader = DataLoader(EvalDataset(), batch_size=1)
+ model = ExampleModel()
+ data_loader = DataLoader(EvalDataset(), batch_size=1)
+ eval_hook = EvalHookCls(data_loader, interval=1, save_best='acc')
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=8)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)])
+
+ real_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.7
+
+ data_loader = DataLoader(EvalDataset(), batch_size=1)
+ eval_hook = EvalHookCls(
+ data_loader, interval=1, save_best='score', rule='greater')
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)], 8)
+
+ real_path = osp.join(tmpdir, 'best_score_epoch_4.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.7
+
+ data_loader = DataLoader(EvalDataset(), batch_size=1)
+ eval_hook = EvalHookCls(data_loader, save_best='acc', rule='less')
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=8)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)])
+
+ real_path = osp.join(tmpdir, 'best_acc_epoch_6.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.05
+
+ data_loader = DataLoader(EvalDataset(), batch_size=1)
+ eval_hook = EvalHookCls(data_loader, save_best='acc')
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logger = get_logger('test_eval')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=2)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)])
+
+ real_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.4
+
+ resume_from = osp.join(tmpdir, 'latest.pth')
+ loader = DataLoader(ExampleDataset(), batch_size=1)
+ eval_hook = EvalHookCls(data_loader, save_best='acc')
+ runner = EpochBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logger,
+ max_epochs=8)
+ runner.register_checkpoint_hook(dict(interval=1))
+ runner.register_hook(eval_hook)
+ runner.resume(resume_from)
+ runner.run([loader], [('train', 1)])
+
+ real_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
+
+ assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)
+ assert runner.meta['hook_msgs']['best_score'] == 0.7
diff --git a/vendor/ViTPose/tests/test_evaluation/test_bottom_up_eval.py b/vendor/ViTPose/tests/test_evaluation/test_bottom_up_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..0459ae1bd979a49fb6a3a98978fe20d1442a2c4c
--- /dev/null
+++ b/vendor/ViTPose/tests/test_evaluation/test_bottom_up_eval.py
@@ -0,0 +1,102 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.core import (aggregate_scale, aggregate_stage_flip,
+ flip_feature_maps, get_group_preds, split_ae_outputs)
+
+
+def test_split_ae_outputs():
+ fake_outputs = [torch.zeros((1, 4, 2, 2))]
+ heatmaps, tags = split_ae_outputs(
+ fake_outputs,
+ num_joints=4,
+ with_heatmaps=[False],
+ with_ae=[True],
+ select_output_index=[0])
+
+
+def test_flip_feature_maps():
+ fake_outputs = [torch.zeros((1, 4, 2, 2))]
+ _ = flip_feature_maps(fake_outputs, None)
+ _ = flip_feature_maps(fake_outputs, flip_index=[1, 0])
+
+
+def test_aggregate_stage_flip():
+ fake_outputs = [torch.zeros((1, 4, 2, 2))]
+ fake_flip_outputs = [torch.ones((1, 4, 2, 2))]
+ output = aggregate_stage_flip(
+ fake_outputs,
+ fake_flip_outputs,
+ index=-1,
+ project2image=True,
+ size_projected=(4, 4),
+ align_corners=False,
+ aggregate_stage='concat',
+ aggregate_flip='average')
+ assert isinstance(output, list)
+
+ output = aggregate_stage_flip(
+ fake_outputs,
+ fake_flip_outputs,
+ index=-1,
+ project2image=True,
+ size_projected=(4, 4),
+ align_corners=False,
+ aggregate_stage='average',
+ aggregate_flip='average')
+ assert isinstance(output, list)
+
+ output = aggregate_stage_flip(
+ fake_outputs,
+ fake_flip_outputs,
+ index=-1,
+ project2image=True,
+ size_projected=(4, 4),
+ align_corners=False,
+ aggregate_stage='average',
+ aggregate_flip='concat')
+ assert isinstance(output, list)
+
+ output = aggregate_stage_flip(
+ fake_outputs,
+ fake_flip_outputs,
+ index=-1,
+ project2image=True,
+ size_projected=(4, 4),
+ align_corners=False,
+ aggregate_stage='concat',
+ aggregate_flip='concat')
+ assert isinstance(output, list)
+
+
+def test_aggregate_scale():
+ fake_outputs = [torch.zeros((1, 4, 2, 2)), torch.zeros((1, 4, 2, 2))]
+ output = aggregate_scale(
+ fake_outputs, align_corners=False, aggregate_scale='average')
+ assert isinstance(output, torch.Tensor)
+ assert output.shape == fake_outputs[0].shape
+
+ output = aggregate_scale(
+ fake_outputs, align_corners=False, aggregate_scale='unsqueeze_concat')
+
+ assert isinstance(output, torch.Tensor)
+ assert len(output.shape) == len(fake_outputs[0].shape) + 1
+
+
+def test_get_group_preds():
+ fake_grouped_joints = [np.array([[[0, 0], [1, 1]]])]
+ results = get_group_preds(
+ fake_grouped_joints,
+ center=np.array([0, 0]),
+ scale=np.array([1, 1]),
+ heatmap_size=np.array([2, 2]))
+ assert not results == []
+
+ results = get_group_preds(
+ fake_grouped_joints,
+ center=np.array([0, 0]),
+ scale=np.array([1, 1]),
+ heatmap_size=np.array([2, 2]),
+ use_udp=True)
+ assert not results == []
diff --git a/vendor/ViTPose/tests/test_evaluation/test_mesh_eval.py b/vendor/ViTPose/tests/test_evaluation/test_mesh_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ff4fa20e878d2f4f9db961e5415507bedfe79e6
--- /dev/null
+++ b/vendor/ViTPose/tests/test_evaluation/test_mesh_eval.py
@@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from mmpose.core import compute_similarity_transform
+
+
+def test_compute_similarity_transform():
+ source = np.random.rand(14, 3)
+ tran = np.random.rand(1, 3)
+ scale = 0.5
+ target = source * scale + tran
+ source_transformed = compute_similarity_transform(source, target)
+ assert_array_almost_equal(source_transformed, target)
diff --git a/vendor/ViTPose/tests/test_evaluation/test_pose3d_eval.py b/vendor/ViTPose/tests/test_evaluation/test_pose3d_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..80aaba57c232c629e48aa0393a53bd1bc148f403
--- /dev/null
+++ b/vendor/ViTPose/tests/test_evaluation/test_pose3d_eval.py
@@ -0,0 +1,49 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+
+from mmpose.core import keypoint_3d_auc, keypoint_3d_pck
+
+
+def test_keypoint_3d_pck():
+ target = np.random.rand(2, 5, 3)
+ output = np.copy(target)
+ mask = np.ones((output.shape[0], output.shape[1]), dtype=bool)
+
+ with pytest.raises(ValueError):
+ _ = keypoint_3d_pck(output, target, mask, alignment='norm')
+
+ pck = keypoint_3d_pck(output, target, mask, alignment='none')
+ np.testing.assert_almost_equal(pck, 100)
+
+ output[0, 0, :] = target[0, 0, :] + 1
+ pck = keypoint_3d_pck(output, target, mask, alignment='none')
+ np.testing.assert_almost_equal(pck, 90, 5)
+
+ output = target * 2
+ pck = keypoint_3d_pck(output, target, mask, alignment='scale')
+ np.testing.assert_almost_equal(pck, 100)
+
+ output = target + 2
+ pck = keypoint_3d_pck(output, target, mask, alignment='procrustes')
+ np.testing.assert_almost_equal(pck, 100)
+
+
+def test_keypoint_3d_auc():
+ target = np.random.rand(2, 5, 3)
+ output = np.copy(target)
+ mask = np.ones((output.shape[0], output.shape[1]), dtype=bool)
+
+ with pytest.raises(ValueError):
+ _ = keypoint_3d_auc(output, target, mask, alignment='norm')
+
+ auc = keypoint_3d_auc(output, target, mask, alignment='none')
+ np.testing.assert_almost_equal(auc, 30 / 31 * 100)
+
+ output = target * 2
+ auc = keypoint_3d_auc(output, target, mask, alignment='scale')
+ np.testing.assert_almost_equal(auc, 30 / 31 * 100)
+
+ output = target + 2
+ auc = keypoint_3d_auc(output, target, mask, alignment='procrustes')
+ np.testing.assert_almost_equal(auc, 30 / 31 * 100)
diff --git a/vendor/ViTPose/tests/test_evaluation/test_top_down_eval.py b/vendor/ViTPose/tests/test_evaluation/test_top_down_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cda7e141f5caa26c1dd12455e15b202b1d66bf2
--- /dev/null
+++ b/vendor/ViTPose/tests/test_evaluation/test_top_down_eval.py
@@ -0,0 +1,213 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+from numpy.testing import assert_array_almost_equal
+
+from mmpose.core import (keypoint_auc, keypoint_epe, keypoint_pck_accuracy,
+ keypoints_from_heatmaps, keypoints_from_heatmaps3d,
+ multilabel_classification_accuracy, pose_pck_accuracy)
+
+
+def test_pose_pck_accuracy():
+ output = np.zeros((1, 5, 64, 64), dtype=np.float32)
+ target = np.zeros((1, 5, 64, 64), dtype=np.float32)
+ mask = np.array([[True, True, False, False, False]])
+ # first channel
+ output[0, 0, 20, 20] = 1
+ target[0, 0, 10, 10] = 1
+ # second channel
+ output[0, 1, 30, 30] = 1
+ target[0, 1, 30, 30] = 1
+
+ acc, avg_acc, cnt = pose_pck_accuracy(output, target, mask)
+
+ assert_array_almost_equal(acc, np.array([0, 1, -1, -1, -1]), decimal=4)
+ assert abs(avg_acc - 0.5) < 1e-4
+ assert abs(cnt - 2) < 1e-4
+
+
+def test_keypoints_from_heatmaps():
+ heatmaps = np.ones((1, 1, 64, 64), dtype=np.float32)
+ heatmaps[0, 0, 31, 31] = 2
+ center = np.array([[127, 127]])
+ scale = np.array([[64 / 200.0, 64 / 200.0]])
+
+ udp_heatmaps = np.ones((32, 17, 64, 64), dtype=np.float32)
+ udp_heatmaps[:, :, 31, 31] = 2
+ udp_center = np.tile([127, 127], (32, 1))
+ udp_scale = np.tile([32, 32], (32, 1))
+
+ preds, maxvals = keypoints_from_heatmaps(heatmaps, center, scale)
+
+ assert_array_almost_equal(preds, np.array([[[126, 126]]]), decimal=4)
+ assert_array_almost_equal(maxvals, np.array([[[2]]]), decimal=4)
+ assert isinstance(preds, np.ndarray)
+ assert isinstance(maxvals, np.ndarray)
+
+ with pytest.raises(AssertionError):
+ # kernel should > 0
+ _ = keypoints_from_heatmaps(
+ heatmaps, center, scale, post_process='unbiased', kernel=0)
+
+ preds, maxvals = keypoints_from_heatmaps(
+ heatmaps, center, scale, post_process='unbiased')
+ assert_array_almost_equal(preds, np.array([[[126, 126]]]), decimal=4)
+ assert_array_almost_equal(maxvals, np.array([[[2]]]), decimal=4)
+ assert isinstance(preds, np.ndarray)
+ assert isinstance(maxvals, np.ndarray)
+
+ # test for udp dimension problem
+ preds, maxvals = keypoints_from_heatmaps(
+ udp_heatmaps,
+ udp_center,
+ udp_scale,
+ post_process='default',
+ target_type='GaussianHeatMap',
+ use_udp=True)
+ assert_array_almost_equal(preds, np.tile([76, 76], (32, 17, 1)), decimal=0)
+ assert_array_almost_equal(maxvals, np.tile([2], (32, 17, 1)), decimal=4)
+ assert isinstance(preds, np.ndarray)
+ assert isinstance(maxvals, np.ndarray)
+
+ preds1, maxvals1 = keypoints_from_heatmaps(
+ heatmaps,
+ center,
+ scale,
+ post_process='default',
+ target_type='GaussianHeatMap',
+ use_udp=True)
+ preds2, maxvals2 = keypoints_from_heatmaps(
+ heatmaps,
+ center,
+ scale,
+ post_process='default',
+ target_type='GaussianHeatmap',
+ use_udp=True)
+ assert_array_almost_equal(preds1, preds2, decimal=4)
+ assert_array_almost_equal(maxvals1, maxvals2, decimal=4)
+ assert isinstance(preds2, np.ndarray)
+ assert isinstance(maxvals2, np.ndarray)
+
+
+def test_keypoint_pck_accuracy():
+ output = np.zeros((2, 5, 2))
+ target = np.zeros((2, 5, 2))
+ mask = np.array([[True, True, False, True, True],
+ [True, True, False, True, True]])
+ thr = np.full((2, 2), 10, dtype=np.float32)
+ # first channel
+ output[0, 0] = [10, 0]
+ target[0, 0] = [10, 0]
+ # second channel
+ output[0, 1] = [20, 20]
+ target[0, 1] = [10, 10]
+ # third channel
+ output[0, 2] = [0, 0]
+ target[0, 2] = [-1, 0]
+ # fourth channel
+ output[0, 3] = [30, 30]
+ target[0, 3] = [30, 30]
+ # fifth channel
+ output[0, 4] = [0, 10]
+ target[0, 4] = [0, 10]
+
+ acc, avg_acc, cnt = keypoint_pck_accuracy(output, target, mask, 0.5, thr)
+
+ assert_array_almost_equal(acc, np.array([1, 0.5, -1, 1, 1]), decimal=4)
+ assert abs(avg_acc - 0.875) < 1e-4
+ assert abs(cnt - 4) < 1e-4
+
+ acc, avg_acc, cnt = keypoint_pck_accuracy(output, target, mask, 0.5,
+ np.zeros((2, 2)))
+ assert_array_almost_equal(acc, np.array([-1, -1, -1, -1, -1]), decimal=4)
+ assert abs(avg_acc) < 1e-4
+ assert abs(cnt) < 1e-4
+
+ acc, avg_acc, cnt = keypoint_pck_accuracy(output, target, mask, 0.5,
+ np.array([[0, 0], [10, 10]]))
+ assert_array_almost_equal(acc, np.array([1, 1, -1, 1, 1]), decimal=4)
+ assert abs(avg_acc - 1) < 1e-4
+ assert abs(cnt - 4) < 1e-4
+
+
+def test_keypoint_auc():
+ output = np.zeros((1, 5, 2))
+ target = np.zeros((1, 5, 2))
+ mask = np.array([[True, True, False, True, True]])
+ # first channel
+ output[0, 0] = [10, 4]
+ target[0, 0] = [10, 0]
+ # second channel
+ output[0, 1] = [10, 18]
+ target[0, 1] = [10, 10]
+ # third channel
+ output[0, 2] = [0, 0]
+ target[0, 2] = [0, -1]
+ # fourth channel
+ output[0, 3] = [40, 40]
+ target[0, 3] = [30, 30]
+ # fifth channel
+ output[0, 4] = [20, 10]
+ target[0, 4] = [0, 10]
+
+ auc = keypoint_auc(output, target, mask, 20, 4)
+ assert abs(auc - 0.375) < 1e-4
+
+
+def test_keypoint_epe():
+ output = np.zeros((1, 5, 2))
+ target = np.zeros((1, 5, 2))
+ mask = np.array([[True, True, False, True, True]])
+ # first channel
+ output[0, 0] = [10, 4]
+ target[0, 0] = [10, 0]
+ # second channel
+ output[0, 1] = [10, 18]
+ target[0, 1] = [10, 10]
+ # third channel
+ output[0, 2] = [0, 0]
+ target[0, 2] = [-1, -1]
+ # fourth channel
+ output[0, 3] = [40, 40]
+ target[0, 3] = [30, 30]
+ # fifth channel
+ output[0, 4] = [20, 10]
+ target[0, 4] = [0, 10]
+
+ epe = keypoint_epe(output, target, mask)
+ assert abs(epe - 11.5355339) < 1e-4
+
+
+def test_keypoints_from_heatmaps3d():
+ heatmaps = np.ones((1, 1, 64, 64, 64), dtype=np.float32)
+ heatmaps[0, 0, 10, 31, 40] = 2
+ center = np.array([[127, 127]])
+ scale = np.array([[64 / 200.0, 64 / 200.0]])
+ preds, maxvals = keypoints_from_heatmaps3d(heatmaps, center, scale)
+
+ assert_array_almost_equal(preds, np.array([[[135, 126, 10]]]), decimal=4)
+ assert_array_almost_equal(maxvals, np.array([[[2]]]), decimal=4)
+ assert isinstance(preds, np.ndarray)
+ assert isinstance(maxvals, np.ndarray)
+
+
+def test_multilabel_classification_accuracy():
+ output = np.array([[0.7, 0.8, 0.4], [0.8, 0.1, 0.1]])
+ target = np.array([[1, 0, 0], [1, 0, 1]])
+ mask = np.array([[True, True, True], [True, True, True]])
+ thr = 0.5
+ acc = multilabel_classification_accuracy(output, target, mask, thr)
+ assert acc == 0
+
+ output = np.array([[0.7, 0.2, 0.4], [0.8, 0.1, 0.9]])
+ thr = 0.5
+ acc = multilabel_classification_accuracy(output, target, mask, thr)
+ assert acc == 1
+
+ thr = 0.3
+ acc = multilabel_classification_accuracy(output, target, mask, thr)
+ assert acc == 0.5
+
+ mask = np.array([[True, True, False], [True, True, True]])
+ acc = multilabel_classification_accuracy(output, target, mask, thr)
+ assert acc == 1
diff --git a/vendor/ViTPose/tests/test_external/test_smpl.py b/vendor/ViTPose/tests/test_external/test_smpl.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3e2482188a46928712937bb5c3b68aa06958ca2
--- /dev/null
+++ b/vendor/ViTPose/tests/test_external/test_smpl.py
@@ -0,0 +1,78 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+
+import numpy as np
+import torch
+
+from mmpose.models.utils import SMPL
+from tests.utils.mesh_utils import generate_smpl_weight_file
+
+
+def test_smpl():
+ """Test smpl model."""
+
+ # build smpl model
+ smpl = None
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # generate weight file for SMPL model.
+ generate_smpl_weight_file(tmpdir)
+
+ smpl_cfg = dict(
+ smpl_path=tmpdir,
+ joints_regressor=osp.join(tmpdir, 'test_joint_regressor.npy'))
+ smpl = SMPL(**smpl_cfg)
+
+ assert smpl is not None, 'Fail to build SMPL model'
+
+ # test get face function
+ faces = smpl.get_faces()
+ assert isinstance(faces, np.ndarray)
+
+ betas = torch.zeros(3, 10)
+ body_pose = torch.zeros(3, 23 * 3)
+ global_orient = torch.zeros(3, 3)
+ transl = torch.zeros(3, 3)
+ gender = torch.LongTensor([-1, 0, 1])
+
+ # test forward with body_pose and global_orient in axis-angle format
+ smpl_out = smpl(
+ betas=betas, body_pose=body_pose, global_orient=global_orient)
+ assert isinstance(smpl_out, dict)
+ assert smpl_out['vertices'].shape == torch.Size([3, 6890, 3])
+ assert smpl_out['joints'].shape == torch.Size([3, 24, 3])
+
+ # test forward with body_pose and global_orient in rotation matrix format
+ body_pose = torch.eye(3).repeat([3, 23, 1, 1])
+ global_orient = torch.eye(3).repeat([3, 1, 1, 1])
+ _ = smpl(betas=betas, body_pose=body_pose, global_orient=global_orient)
+
+ # test forward with translation
+ _ = smpl(
+ betas=betas,
+ body_pose=body_pose,
+ global_orient=global_orient,
+ transl=transl)
+
+ # test forward with gender
+ _ = smpl(
+ betas=betas,
+ body_pose=body_pose,
+ global_orient=global_orient,
+ transl=transl,
+ gender=gender)
+
+ # test forward when all samples in the same gender
+ gender = torch.LongTensor([0, 0, 0])
+ _ = smpl(
+ betas=betas,
+ body_pose=body_pose,
+ global_orient=global_orient,
+ transl=transl,
+ gender=gender)
+
+ # test forward when batch size = 0
+ _ = smpl(
+ betas=torch.zeros(0, 10),
+ body_pose=torch.zeros(0, 23 * 3),
+ global_orient=torch.zeros(0, 3))
diff --git a/vendor/ViTPose/tests/test_losses/test_bottom_up_losses.py b/vendor/ViTPose/tests/test_losses/test_bottom_up_losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..803c19fa9379b8841631dc108c0f52bbe4321f10
--- /dev/null
+++ b/vendor/ViTPose/tests/test_losses/test_bottom_up_losses.py
@@ -0,0 +1,168 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+
+def test_multi_loss_factory():
+ from mmpose.models import build_loss
+
+ # test heatmap loss
+ loss_cfg = dict(type='HeatmapLoss')
+ loss = build_loss(loss_cfg)
+
+ with pytest.raises(AssertionError):
+ fake_pred = torch.zeros((2, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ fake_mask = torch.zeros((1, 64, 64))
+ loss(fake_pred, fake_label, fake_mask)
+
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ fake_mask = torch.zeros((1, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_mask), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ fake_mask = torch.zeros((1, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_mask), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ fake_mask = torch.ones((1, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_mask), torch.tensor(1.))
+
+ # test AE loss
+ fake_tags = torch.zeros((1, 18, 1))
+ fake_joints = torch.zeros((1, 3, 2, 2), dtype=torch.int)
+
+ loss_cfg = dict(type='AELoss', loss_type='exp')
+ loss = build_loss(loss_cfg)
+ assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))
+ assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.))
+
+ fake_tags[0, 0, 0] = 1.
+ fake_tags[0, 10, 0] = 0.
+ fake_joints[0, 0, 0, :] = torch.IntTensor((0, 1))
+ fake_joints[0, 0, 1, :] = torch.IntTensor((10, 1))
+ loss_cfg = dict(type='AELoss', loss_type='exp')
+ loss = build_loss(loss_cfg)
+ assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))
+ assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.25))
+
+ fake_tags[0, 0, 0] = 0
+ fake_tags[0, 7, 0] = 1.
+ fake_tags[0, 17, 0] = 1.
+ fake_joints[0, 1, 0, :] = torch.IntTensor((7, 1))
+ fake_joints[0, 1, 1, :] = torch.IntTensor((17, 1))
+
+ loss_cfg = dict(type='AELoss', loss_type='exp')
+ loss = build_loss(loss_cfg)
+ assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.))
+
+ loss_cfg = dict(type='AELoss', loss_type='max')
+ loss = build_loss(loss_cfg)
+ assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))
+
+ with pytest.raises(ValueError):
+ loss_cfg = dict(type='AELoss', loss_type='min')
+ loss = build_loss(loss_cfg)
+ loss(fake_tags, fake_joints)
+
+ # test MultiLossFactory
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=2,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=True,
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=2,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=0.001,
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=2,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=0.001,
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=2,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=True,
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=2,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=1.0)
+ loss = build_loss(loss_cfg)
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[False],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[False],
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ fake_outputs = [torch.zeros((1, 34, 64, 64))]
+ fake_heatmaps = [torch.zeros((1, 17, 64, 64))]
+ fake_masks = [torch.ones((1, 64, 64))]
+ fake_joints = [torch.zeros((1, 30, 17, 2))]
+ heatmaps_losses, push_losses, pull_losses = \
+ loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints)
+ assert heatmaps_losses == [None]
+ assert pull_losses == [None]
+ assert push_losses == [None]
+ loss_cfg = dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])
+ loss = build_loss(loss_cfg)
+ heatmaps_losses, push_losses, pull_losses = \
+ loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints)
+ assert len(heatmaps_losses) == 1
diff --git a/vendor/ViTPose/tests/test_losses/test_classification_loss.py b/vendor/ViTPose/tests/test_losses/test_classification_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cda4d653bc5e01a61be783150ee79c518ea649b
--- /dev/null
+++ b/vendor/ViTPose/tests/test_losses/test_classification_loss.py
@@ -0,0 +1,40 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def test_bce_loss():
+ from mmpose.models import build_loss
+
+ # test BCE loss without target weight(None)
+ loss_cfg = dict(type='BCELoss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 2))
+ fake_label = torch.zeros((1, 2))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 2)) * 0.5
+ fake_label = torch.zeros((1, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label), -torch.log(torch.tensor(0.5)))
+
+ # test BCE loss with target weight
+ loss_cfg = dict(type='BCELoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.ones((1, 2)) * 0.5
+ fake_label = torch.zeros((1, 2))
+ fake_weight = torch.ones((1, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_weight),
+ -torch.log(torch.tensor(0.5)))
+
+ fake_weight[:, 0] = 0
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_weight),
+ -0.5 * torch.log(torch.tensor(0.5)))
+
+ fake_weight = torch.ones(1)
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_weight),
+ -torch.log(torch.tensor(0.5)))
diff --git a/vendor/ViTPose/tests/test_losses/test_mesh_losses.py b/vendor/ViTPose/tests/test_losses/test_mesh_losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..98907675d26bfe65790edfc2bde7b8179aee4ad8
--- /dev/null
+++ b/vendor/ViTPose/tests/test_losses/test_mesh_losses.py
@@ -0,0 +1,163 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+from numpy.testing import assert_almost_equal
+
+from mmpose.models import build_loss
+from mmpose.models.utils.geometry import batch_rodrigues
+
+
+def test_mesh_loss():
+ """test mesh loss."""
+ loss_cfg = dict(
+ type='MeshLoss',
+ joints_2d_loss_weight=1,
+ joints_3d_loss_weight=1,
+ vertex_loss_weight=1,
+ smpl_pose_loss_weight=1,
+ smpl_beta_loss_weight=1,
+ img_res=256,
+ focal_length=5000)
+
+ loss = build_loss(loss_cfg)
+
+ smpl_pose = torch.zeros([1, 72], dtype=torch.float32)
+ smpl_rotmat = batch_rodrigues(smpl_pose.view(-1, 3)).view(-1, 24, 3, 3)
+ smpl_beta = torch.zeros([1, 10], dtype=torch.float32)
+ camera = torch.tensor([[1, 0, 0]], dtype=torch.float32)
+ vertices = torch.rand([1, 6890, 3], dtype=torch.float32)
+ joints_3d = torch.ones([1, 24, 3], dtype=torch.float32)
+ joints_2d = loss.project_points(joints_3d, camera) + (256 - 1) / 2
+
+ fake_pred = {}
+ fake_pred['pose'] = smpl_rotmat
+ fake_pred['beta'] = smpl_beta
+ fake_pred['camera'] = camera
+ fake_pred['vertices'] = vertices
+ fake_pred['joints_3d'] = joints_3d
+
+ fake_gt = {}
+ fake_gt['pose'] = smpl_pose
+ fake_gt['beta'] = smpl_beta
+ fake_gt['vertices'] = vertices
+ fake_gt['has_smpl'] = torch.ones(1, dtype=torch.float32)
+ fake_gt['joints_3d'] = joints_3d
+ fake_gt['joints_3d_visible'] = torch.ones([1, 24, 1], dtype=torch.float32)
+ fake_gt['joints_2d'] = joints_2d
+ fake_gt['joints_2d_visible'] = torch.ones([1, 24, 1], dtype=torch.float32)
+
+ losses = loss(fake_pred, fake_gt)
+ assert torch.allclose(losses['vertex_loss'], torch.tensor(0.))
+ assert torch.allclose(losses['smpl_pose_loss'], torch.tensor(0.))
+ assert torch.allclose(losses['smpl_beta_loss'], torch.tensor(0.))
+ assert torch.allclose(losses['joints_3d_loss'], torch.tensor(0.))
+ assert torch.allclose(losses['joints_2d_loss'], torch.tensor(0.))
+
+ fake_pred = {}
+ fake_pred['pose'] = smpl_rotmat + 1
+ fake_pred['beta'] = smpl_beta + 1
+ fake_pred['camera'] = camera
+ fake_pred['vertices'] = vertices + 1
+ fake_pred['joints_3d'] = joints_3d.clone()
+
+ joints_3d_t = joints_3d.clone()
+ joints_3d_t[:, 0] = joints_3d_t[:, 0] + 1
+ fake_gt = {}
+ fake_gt['pose'] = smpl_pose
+ fake_gt['beta'] = smpl_beta
+ fake_gt['vertices'] = vertices
+ fake_gt['has_smpl'] = torch.ones(1, dtype=torch.float32)
+ fake_gt['joints_3d'] = joints_3d_t
+ fake_gt['joints_3d_visible'] = torch.ones([1, 24, 1], dtype=torch.float32)
+ fake_gt['joints_2d'] = joints_2d + (256 - 1) / 2
+ fake_gt['joints_2d_visible'] = torch.ones([1, 24, 1], dtype=torch.float32)
+
+ losses = loss(fake_pred, fake_gt)
+ assert torch.allclose(losses['vertex_loss'], torch.tensor(1.))
+ assert torch.allclose(losses['smpl_pose_loss'], torch.tensor(1.))
+ assert torch.allclose(losses['smpl_beta_loss'], torch.tensor(1.))
+ assert torch.allclose(losses['joints_3d_loss'], torch.tensor(0.5 / 24))
+ assert torch.allclose(losses['joints_2d_loss'], torch.tensor(0.5))
+
+
+def test_gan_loss():
+ """test gan loss."""
+ with pytest.raises(NotImplementedError):
+ loss_cfg = dict(
+ type='GANLoss',
+ gan_type='test',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=1)
+ _ = build_loss(loss_cfg)
+
+ input_1 = torch.ones(1, 1)
+ input_2 = torch.ones(1, 3, 6, 6) * 2
+
+ # vanilla
+ loss_cfg = dict(
+ type='GANLoss',
+ gan_type='vanilla',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=2.0)
+ gan_loss = build_loss(loss_cfg)
+ loss = gan_loss(input_1, True, is_disc=False)
+ assert_almost_equal(loss.item(), 0.6265233)
+ loss = gan_loss(input_1, False, is_disc=False)
+ assert_almost_equal(loss.item(), 2.6265232)
+ loss = gan_loss(input_1, True, is_disc=True)
+ assert_almost_equal(loss.item(), 0.3132616)
+ loss = gan_loss(input_1, False, is_disc=True)
+ assert_almost_equal(loss.item(), 1.3132616)
+
+ # lsgan
+ loss_cfg = dict(
+ type='GANLoss',
+ gan_type='lsgan',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=2.0)
+ gan_loss = build_loss(loss_cfg)
+ loss = gan_loss(input_2, True, is_disc=False)
+ assert_almost_equal(loss.item(), 2.0)
+ loss = gan_loss(input_2, False, is_disc=False)
+ assert_almost_equal(loss.item(), 8.0)
+ loss = gan_loss(input_2, True, is_disc=True)
+ assert_almost_equal(loss.item(), 1.0)
+ loss = gan_loss(input_2, False, is_disc=True)
+ assert_almost_equal(loss.item(), 4.0)
+
+ # wgan
+ loss_cfg = dict(
+ type='GANLoss',
+ gan_type='wgan',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=2.0)
+ gan_loss = build_loss(loss_cfg)
+ loss = gan_loss(input_2, True, is_disc=False)
+ assert_almost_equal(loss.item(), -4.0)
+ loss = gan_loss(input_2, False, is_disc=False)
+ assert_almost_equal(loss.item(), 4)
+ loss = gan_loss(input_2, True, is_disc=True)
+ assert_almost_equal(loss.item(), -2.0)
+ loss = gan_loss(input_2, False, is_disc=True)
+ assert_almost_equal(loss.item(), 2.0)
+
+ # hinge
+ loss_cfg = dict(
+ type='GANLoss',
+ gan_type='hinge',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=2.0)
+ gan_loss = build_loss(loss_cfg)
+ loss = gan_loss(input_2, True, is_disc=False)
+ assert_almost_equal(loss.item(), -4.0)
+ loss = gan_loss(input_2, False, is_disc=False)
+ assert_almost_equal(loss.item(), -4.0)
+ loss = gan_loss(input_2, True, is_disc=True)
+ assert_almost_equal(loss.item(), 0.0)
+ loss = gan_loss(input_2, False, is_disc=True)
+ assert_almost_equal(loss.item(), 3.0)
diff --git a/vendor/ViTPose/tests/test_losses/test_regression_losses.py b/vendor/ViTPose/tests/test_losses/test_regression_losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..df710ba9e7dfafc8af81d54d395760eb1e95f958
--- /dev/null
+++ b/vendor/ViTPose/tests/test_losses/test_regression_losses.py
@@ -0,0 +1,185 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmpose.models import build_loss
+
+
+def test_smooth_l1_loss():
+ # test SmoothL1Loss without target weight(default None)
+ loss_cfg = dict(type='SmoothL1Loss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(.5))
+
+ # test SmoothL1Loss with target weight
+ loss_cfg = dict(type='SmoothL1Loss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(.5))
+
+
+def test_wing_loss():
+ # test WingLoss without target weight(default None)
+ loss_cfg = dict(type='WingLoss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.gt(loss(fake_pred, fake_label), torch.tensor(.5))
+
+ # test WingLoss with target weight
+ loss_cfg = dict(type='WingLoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.gt(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(.5))
+
+
+def test_soft_wing_loss():
+ # test SoftWingLoss without target weight(default None)
+ loss_cfg = dict(type='SoftWingLoss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.gt(loss(fake_pred, fake_label), torch.tensor(.5))
+
+ # test SoftWingLoss with target weight
+ loss_cfg = dict(type='SoftWingLoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 2))
+ fake_label = torch.zeros((1, 3, 2))
+ assert torch.gt(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(.5))
+
+
+def test_mse_regression_loss():
+ # w/o target weight(default None)
+ loss_cfg = dict(type='MSELoss')
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(1.))
+
+ # w/ target weight
+ loss_cfg = dict(type='MSELoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones_like(fake_label)),
+ torch.tensor(1.))
+
+
+def test_bone_loss():
+ # w/o target weight(default None)
+ loss_cfg = dict(type='BoneLoss', joint_parents=[0, 0, 1])
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(0.))
+
+ fake_pred = torch.tensor([[[0, 0, 0], [1, 1, 1], [2, 2, 2]]],
+ dtype=torch.float32)
+ fake_label = fake_pred * 2
+ assert torch.allclose(loss(fake_pred, fake_label), torch.tensor(3**0.5))
+
+ # w/ target weight
+ loss_cfg = dict(
+ type='BoneLoss', joint_parents=[0, 0, 1], use_target_weight=True)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 3))
+ fake_label = torch.zeros((1, 3, 3))
+ fake_weight = torch.ones((1, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_weight), torch.tensor(0.))
+
+ fake_pred = torch.tensor([[[0, 0, 0], [1, 1, 1], [2, 2, 2]]],
+ dtype=torch.float32)
+ fake_label = fake_pred * 2
+ fake_weight = torch.ones((1, 2))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, fake_weight), torch.tensor(3**0.5))
+
+
+def test_semi_supervision_loss():
+ loss_cfg = dict(
+ type='SemiSupervisionLoss',
+ joint_parents=[0, 0, 1],
+ warmup_iterations=1)
+ loss = build_loss(loss_cfg)
+
+ unlabeled_pose = torch.rand((1, 3, 3))
+ unlabeled_traj = torch.ones((1, 1, 3))
+ labeled_pose = unlabeled_pose.clone()
+ fake_pred = dict(
+ labeled_pose=labeled_pose,
+ unlabeled_pose=unlabeled_pose,
+ unlabeled_traj=unlabeled_traj)
+
+ intrinsics = torch.tensor([[1, 1, 1, 1, 0.1, 0.1, 0.1, 0, 0]],
+ dtype=torch.float32)
+ unlabled_target_2d = loss.project_joints(unlabeled_pose + unlabeled_traj,
+ intrinsics)
+ fake_label = dict(
+ unlabeled_target_2d=unlabled_target_2d, intrinsics=intrinsics)
+
+ # test warmup
+ losses = loss(fake_pred, fake_label)
+ assert not losses
+
+ # test semi-supervised loss
+ losses = loss(fake_pred, fake_label)
+ assert torch.allclose(losses['proj_loss'], torch.tensor(0.))
+ assert torch.allclose(losses['bone_loss'], torch.tensor(0.))
diff --git a/vendor/ViTPose/tests/test_losses/test_top_down_losses.py b/vendor/ViTPose/tests/test_losses/test_top_down_losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..a02595fa59404d48ed357fd5294c9ff22a4fab5a
--- /dev/null
+++ b/vendor/ViTPose/tests/test_losses/test_top_down_losses.py
@@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmpose.models import build_loss
+
+
+def test_adaptive_wing_loss():
+ # test Adaptive WingLoss without target weight
+ loss_cfg = dict(type='AdaptiveWingLoss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.))
+
+ # test WingLoss with target weight
+ loss_cfg = dict(type='AdaptiveWingLoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.ones((1, 3, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, torch.ones([1, 3, 1])), torch.tensor(0.))
+
+
+def test_mse_loss():
+ # test MSE loss without target weight
+ loss_cfg = dict(type='JointsMSELoss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.))
+
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(1.))
+
+ fake_pred = torch.zeros((1, 2, 64, 64))
+ fake_pred[0, 0] += 1
+ fake_label = torch.zeros((1, 2, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.5))
+
+ with pytest.raises(ValueError):
+ loss_cfg = dict(type='JointsOHKMMSELoss')
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, None), torch.tensor(0.))
+
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(type='JointsOHKMMSELoss', topk=-1)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, None), torch.tensor(0.))
+
+ loss_cfg = dict(type='JointsOHKMMSELoss', topk=2)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(1.))
+
+ loss_cfg = dict(type='JointsOHKMMSELoss', topk=2)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.zeros((1, 3, 64, 64))
+ fake_pred[0, 0] += 1
+ fake_label = torch.zeros((1, 3, 64, 64))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.5))
+
+ loss_cfg = dict(type='CombinedTargetMSELoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ target_weight = torch.ones((1, 1, 1))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, target_weight), torch.tensor(0.5))
+
+ loss_cfg = dict(type='CombinedTargetMSELoss', use_target_weight=True)
+ loss = build_loss(loss_cfg)
+ fake_pred = torch.ones((1, 3, 64, 64))
+ fake_label = torch.zeros((1, 3, 64, 64))
+ target_weight = torch.zeros((1, 1, 1))
+ assert torch.allclose(
+ loss(fake_pred, fake_label, target_weight), torch.tensor(0.))
+
+
+def test_smoothl1_loss():
+ # test MSE loss without target weight
+ loss_cfg = dict(type='SmoothL1Loss')
+ loss = build_loss(loss_cfg)
+
+ fake_pred = torch.zeros((1, 3))
+ fake_label = torch.zeros((1, 3))
+ assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.))
diff --git a/vendor/ViTPose/tests/test_models/test_bottom_up_forward.py b/vendor/ViTPose/tests/test_models/test_bottom_up_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..37e6c5ec8100dd3316bda4f781e7bba93fa1801d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_bottom_up_forward.py
@@ -0,0 +1,122 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.models.detectors import AssociativeEmbedding
+
+
+def test_ae_forward():
+ model_cfg = dict(
+ type='AssociativeEmbedding',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=18),
+ keypoint_head=dict(
+ type='AESimpleHead',
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=True,
+ with_ae_loss=[True],
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0])),
+ train_cfg=dict(),
+ test_cfg=dict(
+ num_joints=17,
+ max_num_people=30,
+ scale_factor=[1],
+ with_heatmaps=[True],
+ with_ae=[True],
+ project2image=True,
+ nms_kernel=5,
+ nms_padding=2,
+ tag_per_joint=True,
+ detection_threshold=0.1,
+ tag_threshold=1,
+ use_detection_val=True,
+ ignore_too_much=False,
+ adjust=True,
+ refine=True,
+ soft_nms=False,
+ flip_test=True,
+ post_process=True,
+ shift_heatmap=True,
+ use_gt_bbox=True,
+ flip_pairs=[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
+ [13, 14], [15, 16]],
+ ))
+
+ detector = AssociativeEmbedding(model_cfg['backbone'],
+ model_cfg['keypoint_head'],
+ model_cfg['train_cfg'],
+ model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ mask = mm_inputs.pop('mask')
+ joints = mm_inputs.pop('joints')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, mask, joints, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 256, 256)):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ target = np.zeros([N, 17, H // 32, W // 32], dtype=np.float32)
+ mask = np.ones([N, H // 32, W // 32], dtype=np.float32)
+ joints = np.zeros([N, 30, 17, 2], dtype=np.float32)
+
+ img_metas = [{
+ 'image_file':
+ 'test.jpg',
+ 'aug_data': [torch.zeros(1, 3, 256, 256)],
+ 'test_scale_factor': [1],
+ 'base_size': (256, 256),
+ 'center':
+ np.array([128, 128]),
+ 'scale':
+ np.array([1.28, 1.28]),
+ 'flip_index':
+ [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'imgs': torch.FloatTensor(imgs).requires_grad_(True),
+ 'target': [torch.FloatTensor(target)],
+ 'mask': [torch.FloatTensor(mask)],
+ 'joints': [torch.FloatTensor(joints)],
+ 'img_metas': img_metas
+ }
+ return mm_inputs
diff --git a/vendor/ViTPose/tests/test_models/test_bottom_up_head.py b/vendor/ViTPose/tests/test_models/test_bottom_up_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..4748f31b1e4b8db14a633bfd3befbbcf614693f7
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_bottom_up_head.py
@@ -0,0 +1,483 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models import AEHigherResolutionHead, AESimpleHead
+
+
+def test_ae_simple_head():
+ """test bottom up AE simple head."""
+
+ with pytest.raises(TypeError):
+ # extra
+ _ = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True],
+ extra=[],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ # test final_conv_kernel
+ with pytest.raises(AssertionError):
+ _ = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': -1},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ assert head.final_layer.padding == (1, 1)
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': 1},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ assert head.final_layer.padding == (0, 0)
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ assert head.final_layer.padding == (0, 0)
+ # test with_ae_loss
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 34, 32, 32])
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ with_ae_loss=[False],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 17, 32, 32])
+ # test tag_per_joint
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=False,
+ with_ae_loss=[False],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 17, 32, 32])
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=False,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 18, 32, 32])
+ head = AESimpleHead(
+ in_channels=512,
+ num_joints=17,
+ num_deconv_layers=0,
+ tag_per_joint=False,
+ with_ae_loss=[True],
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=1,
+ ae_loss_type='exp',
+ with_ae_loss=[True],
+ push_loss_factor=[0.001],
+ pull_loss_factor=[0.001],
+ with_heatmaps_loss=[True],
+ heatmaps_loss_factor=[1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert out[0].shape == torch.Size([1, 18, 32, 32])
+
+
+def test_ae_higherresolution_head():
+ """test bottom up AE higherresolution head."""
+
+ # test final_conv_kernel
+ with pytest.raises(AssertionError):
+ _ = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ extra={'final_conv_kernel': 0},
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ extra={'final_conv_kernel': 3},
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.final_layers[0].padding == (1, 1)
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ extra={'final_conv_kernel': 1},
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.final_layers[0].padding == (0, 0)
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.final_layers[0].padding == (0, 0)
+ # test deconv layers
+ with pytest.raises(ValueError):
+ _ = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ num_deconv_kernels=[1],
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ num_deconv_kernels=[4],
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.deconv_layers[0][0][0].output_padding == (0, 0)
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ num_deconv_kernels=[3],
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.deconv_layers[0][0][0].output_padding == (1, 1)
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ with_ae_loss=[True, False],
+ num_deconv_kernels=[2],
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ assert head.deconv_layers[0][0][0].output_padding == (0, 0)
+ # test tag_per_joint & ae loss
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ tag_per_joint=False,
+ with_ae_loss=[False, False],
+ extra={'final_conv_kernel': 3},
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[False, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 17, 32, 32])
+ assert out[1].shape == torch.Size([1, 17, 64, 64])
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ tag_per_joint=False,
+ with_ae_loss=[True, False],
+ extra={'final_conv_kernel': 3},
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, False],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 18, 32, 32])
+ assert out[1].shape == torch.Size([1, 17, 64, 64])
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True, True],
+ extra={'final_conv_kernel': 3},
+ cat_output=[True],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, True],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 34, 32, 32])
+ assert out[1].shape == torch.Size([1, 34, 64, 64])
+ # cat_output
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True, True],
+ extra={'final_conv_kernel': 3},
+ cat_output=[False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, True],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out[0].shape == torch.Size([1, 34, 32, 32])
+ assert out[1].shape == torch.Size([1, 34, 64, 64])
+ head = AEHigherResolutionHead(
+ in_channels=512,
+ num_joints=17,
+ tag_per_joint=True,
+ with_ae_loss=[True, True],
+ extra={'final_conv_kernel': 3},
+ cat_output=[False],
+ loss_keypoint=dict(
+ type='MultiLossFactory',
+ num_joints=17,
+ num_stages=2,
+ ae_loss_type='exp',
+ with_ae_loss=[True, True],
+ push_loss_factor=[0.001, 0.001],
+ pull_loss_factor=[0.001, 0.001],
+ with_heatmaps_loss=[True, True],
+ heatmaps_loss_factor=[1.0, 1.0]))
+ head.init_weights()
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert out[0].shape == torch.Size([1, 34, 32, 32])
+ assert out[1].shape == torch.Size([1, 34, 64, 64])
+
+
+def _demo_inputs(input_shape=(1, 3, 64, 64)):
+ """Create a superset of inputs needed to run backbone.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ Returns:
+ Random input tensor with the size of input_shape.
+ """
+ inps = np.random.random(input_shape)
+ inps = torch.FloatTensor(inps)
+ return inps
diff --git a/vendor/ViTPose/tests/test_models/test_interhand_3d_forward.py b/vendor/ViTPose/tests/test_models/test_interhand_3d_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2b272487d6480e4d3aab19eded077918fbf6252
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_interhand_3d_forward.py
@@ -0,0 +1,107 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.models import build_posenet
+
+
+def test_interhand3d_forward():
+ # model settings
+ model_cfg = dict(
+ type='Interhand3D',
+ pretrained='torchvision://resnet50',
+ backbone=dict(type='ResNet', depth=50),
+ keypoint_head=dict(
+ type='Interhand3DHead',
+ keypoint_head_cfg=dict(
+ in_channels=2048,
+ out_channels=21 * 64,
+ depth_size=64,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ ),
+ root_head_cfg=dict(
+ in_channels=2048,
+ heatmap_size=64,
+ hidden_dims=(512, ),
+ ),
+ hand_type_head_cfg=dict(
+ in_channels=2048,
+ num_labels=2,
+ hidden_dims=(512, ),
+ ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True),
+ loss_root_depth=dict(type='L1Loss'),
+ loss_hand_type=dict(type='BCELoss', use_target_weight=True),
+ ),
+ train_cfg={},
+ test_cfg=dict(flip_test=True, shift_heatmap=True))
+
+ detector = build_posenet(model_cfg)
+ detector.init_weights()
+
+ input_shape = (2, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 256, 256), num_outputs=None):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ imgs = torch.FloatTensor(imgs)
+
+ target = [
+ imgs.new_zeros(N, 42, 64, H // 4, W // 4),
+ imgs.new_zeros(N, 1),
+ imgs.new_zeros(N, 2),
+ ]
+ target_weight = [
+ imgs.new_ones(N, 42, 1),
+ imgs.new_ones(N, 1),
+ imgs.new_ones(N),
+ ]
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'center': np.array([W / 2, H / 2]),
+ 'scale': np.array([0.5, 0.5]),
+ 'bbox_score': 1.0,
+ 'bbox_id': 0,
+ 'flip_pairs': [],
+ 'inference_channel': np.arange(42),
+ 'image_file': '.png',
+ 'heatmap3d_depth_bound': 400.0,
+ 'root_depth_bound': 400.0,
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'imgs': imgs.requires_grad_(True),
+ 'target': target,
+ 'target_weight': target_weight,
+ 'img_metas': img_metas
+ }
+ return mm_inputs
diff --git a/vendor/ViTPose/tests/test_models/test_interhand_3d_head.py b/vendor/ViTPose/tests/test_models/test_interhand_3d_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..69242324ee9ca6aa4b945dd9ed3e5b0d10cf31fb
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_interhand_3d_head.py
@@ -0,0 +1,91 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.models import Interhand3DHead
+
+
+def test_interhand_3d_head():
+ """Test interhand 3d head."""
+ N = 4
+ input_shape = (N, 2048, 8, 8)
+ inputs = torch.rand(input_shape, dtype=torch.float32)
+ target = [
+ inputs.new_zeros(N, 42, 64, 64, 64),
+ inputs.new_zeros(N, 1),
+ inputs.new_zeros(N, 2),
+ ]
+ target_weight = [
+ inputs.new_ones(N, 42, 1),
+ inputs.new_ones(N, 1),
+ inputs.new_ones(N),
+ ]
+
+ img_metas = [{
+ 'img_shape': (256, 256, 3),
+ 'center': np.array([112, 112]),
+ 'scale': np.array([0.5, 0.5]),
+ 'bbox_score': 1.0,
+ 'bbox_id': 0,
+ 'flip_pairs': [],
+ 'inference_channel': np.arange(42),
+ 'image_file': '.png',
+ 'heatmap3d_depth_bound': 400.0,
+ 'root_depth_bound': 400.0,
+ } for _ in range(N)]
+
+ head = Interhand3DHead(
+ keypoint_head_cfg=dict(
+ in_channels=2048,
+ out_channels=21 * 64,
+ depth_size=64,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ ),
+ root_head_cfg=dict(
+ in_channels=2048,
+ heatmap_size=64,
+ hidden_dims=(512, ),
+ ),
+ hand_type_head_cfg=dict(
+ in_channels=2048,
+ num_labels=2,
+ hidden_dims=(512, ),
+ ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True),
+ loss_root_depth=dict(type='L1Loss'),
+ loss_hand_type=dict(type='BCELoss', use_target_weight=True),
+ train_cfg={},
+ test_cfg={},
+ )
+ head.init_weights()
+
+ # test forward
+ output = head(inputs)
+ assert isinstance(output, list)
+ assert len(output) == 3
+ assert output[0].shape == (N, 42, 64, 64, 64)
+ assert output[1].shape == (N, 1)
+ assert output[2].shape == (N, 2)
+
+ # test loss computation
+ losses = head.get_loss(output, target, target_weight)
+ assert 'hand_loss' in losses
+ assert 'rel_root_loss' in losses
+ assert 'hand_type_loss' in losses
+
+ # test inference model
+ flip_pairs = [[i, 21 + i] for i in range(21)]
+ output = head.inference_model(inputs, flip_pairs)
+ assert isinstance(output, list)
+ assert len(output) == 3
+ assert output[0].shape == (N, 42, 64, 64, 64)
+ assert output[1].shape == (N, 1)
+ assert output[2].shape == (N, 2)
+
+ # test decode
+ result = head.decode(img_metas, output)
+ assert 'preds' in result
+ assert 'rel_root_depth' in result
+ assert 'hand_type' in result
diff --git a/vendor/ViTPose/tests/test_models/test_layer.py b/vendor/ViTPose/tests/test_models/test_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b88fd1b95881946951cb65d87f3c93587815a83f
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_layer.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import build_conv_layer, build_upsample_layer
+
+
+def test_build_upsample_layer():
+ layer1 = nn.ConvTranspose2d(
+ in_channels=3,
+ out_channels=10,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1,
+ bias=False)
+
+ layer2 = build_upsample_layer(
+ dict(type='deconv'),
+ in_channels=3,
+ out_channels=10,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1,
+ bias=False)
+ layer2.load_state_dict(layer1.state_dict())
+
+ input_shape = (1, 3, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out1 = layer1(inputs)
+ out2 = layer2(inputs)
+ assert torch.equal(out1, out2)
+
+
+def test_build_conv_layer():
+ layer1 = nn.Conv2d(
+ in_channels=3, out_channels=10, kernel_size=3, stride=1, padding=1)
+
+ layer2 = build_conv_layer(
+ cfg=dict(type='Conv2d'),
+ in_channels=3,
+ out_channels=10,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+
+ layer2.load_state_dict(layer1.state_dict())
+
+ input_shape = (1, 3, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out1 = layer1(inputs)
+ out2 = layer2(inputs)
+ assert torch.equal(out1, out2)
+
+
+def _demo_inputs(input_shape=(1, 3, 64, 64)):
+ """Create a superset of inputs needed to run backbone.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ Returns:
+ Random input tensor with the size of input_shape.
+ """
+ inps = np.random.random(input_shape)
+ inps = torch.FloatTensor(inps)
+ return inps
diff --git a/vendor/ViTPose/tests/test_models/test_mesh_forward.py b/vendor/ViTPose/tests/test_models/test_mesh_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..f08f7693902e0663da17c05e060b0809d5e963f6
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_mesh_forward.py
@@ -0,0 +1,153 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+
+import numpy as np
+import torch
+
+from mmpose.core.optimizer import build_optimizers
+from mmpose.models.detectors.mesh import ParametricMesh
+from tests.utils.mesh_utils import generate_smpl_weight_file
+
+
+def test_parametric_mesh_forward():
+ """Test parametric mesh forward."""
+
+ tmpdir = tempfile.TemporaryDirectory()
+ # generate weight file for SMPL model.
+ generate_smpl_weight_file(tmpdir.name)
+
+ # Test ParametricMesh without discriminator
+ model_cfg = dict(
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=50),
+ mesh_head=dict(
+ type='HMRMeshHead',
+ in_channels=2048,
+ smpl_mean_params='tests/data/smpl/smpl_mean_params.npz'),
+ disc=None,
+ smpl=dict(
+ type='SMPL',
+ smpl_path=tmpdir.name,
+ joints_regressor=osp.join(tmpdir.name,
+ 'test_joint_regressor.npy')),
+ train_cfg=dict(disc_step=1),
+ test_cfg=dict(
+ flip_test=False,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11),
+ loss_mesh=dict(
+ type='MeshLoss',
+ joints_2d_loss_weight=1,
+ joints_3d_loss_weight=1,
+ vertex_loss_weight=1,
+ smpl_pose_loss_weight=1,
+ smpl_beta_loss_weight=1,
+ focal_length=5000,
+ img_res=256),
+ loss_gan=None)
+
+ detector = ParametricMesh(**model_cfg)
+ detector.init_weights()
+
+ optimizers_config = dict(generator=dict(type='Adam', lr=0.0001))
+ optims = build_optimizers(detector, optimizers_config)
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+ # Test forward train
+ output = detector.train_step(mm_inputs, optims)
+ assert isinstance(output, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ output = detector.val_step(data_batch=mm_inputs)
+ assert isinstance(output, dict)
+
+ imgs = mm_inputs.pop('img')
+ img_metas = mm_inputs.pop('img_metas')
+ output = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ assert isinstance(output, dict)
+
+ # Test ParametricMesh with discriminator
+ model_cfg['disc'] = dict()
+ model_cfg['loss_gan'] = dict(
+ type='GANLoss',
+ gan_type='lsgan',
+ real_label_val=1.0,
+ fake_label_val=0.0,
+ loss_weight=1)
+
+ optimizers_config['discriminator'] = dict(type='Adam', lr=0.0001)
+
+ detector = ParametricMesh(**model_cfg)
+ detector.init_weights()
+ optims = build_optimizers(detector, optimizers_config)
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+ # Test forward train
+ output = detector.train_step(mm_inputs, optims)
+ assert isinstance(output, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ output = detector.val_step(data_batch=mm_inputs)
+ assert isinstance(output, dict)
+
+ imgs = mm_inputs.pop('img')
+ img_metas = mm_inputs.pop('img_metas')
+ output = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ assert isinstance(output, dict)
+
+ _ = detector.forward_dummy(imgs)
+
+ tmpdir.cleanup()
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 256, 256)):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ joints_2d = np.zeros([N, 24, 2])
+ joints_2d_visible = np.ones([N, 24, 1])
+ joints_3d = np.zeros([N, 24, 3])
+ joints_3d_visible = np.ones([N, 24, 1])
+ pose = np.zeros([N, 72])
+ beta = np.zeros([N, 10])
+ has_smpl = np.ones([N])
+ mosh_theta = np.zeros([N, 3 + 72 + 10])
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'center': np.array([W / 2, H / 2]),
+ 'scale': np.array([0.5, 0.5]),
+ 'bbox_score': 1.0,
+ 'flip_pairs': [],
+ 'inference_channel': np.arange(17),
+ 'image_file': '.png',
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'img': torch.FloatTensor(imgs).requires_grad_(True),
+ 'joints_2d': torch.FloatTensor(joints_2d),
+ 'joints_2d_visible': torch.FloatTensor(joints_2d_visible),
+ 'joints_3d': torch.FloatTensor(joints_3d),
+ 'joints_3d_visible': torch.FloatTensor(joints_3d_visible),
+ 'pose': torch.FloatTensor(pose),
+ 'beta': torch.FloatTensor(beta),
+ 'has_smpl': torch.FloatTensor(has_smpl),
+ 'img_metas': img_metas,
+ 'mosh_theta': torch.FloatTensor(mosh_theta)
+ }
+
+ return mm_inputs
diff --git a/vendor/ViTPose/tests/test_models/test_mesh_head.py b/vendor/ViTPose/tests/test_models/test_mesh_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d1fc0e188d46a2481ee3927e35681a36407e853
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_mesh_head.py
@@ -0,0 +1,76 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models import HMRMeshHead
+from mmpose.models.misc.discriminator import SMPLDiscriminator
+
+
+def test_mesh_hmr_head():
+ """Test hmr mesh head."""
+ head = HMRMeshHead(in_channels=512)
+ head.init_weights()
+
+ input_shape = (1, 512, 8, 8)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ smpl_rotmat, smpl_shape, camera = out
+ assert smpl_rotmat.shape == torch.Size([1, 24, 3, 3])
+ assert smpl_shape.shape == torch.Size([1, 10])
+ assert camera.shape == torch.Size([1, 3])
+ """Test hmr mesh head with assigned mean parameters and n_iter """
+ head = HMRMeshHead(
+ in_channels=512,
+ smpl_mean_params='tests/data/smpl/smpl_mean_params.npz',
+ n_iter=3)
+ head.init_weights()
+ input_shape = (1, 512, 8, 8)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ smpl_rotmat, smpl_shape, camera = out
+ assert smpl_rotmat.shape == torch.Size([1, 24, 3, 3])
+ assert smpl_shape.shape == torch.Size([1, 10])
+ assert camera.shape == torch.Size([1, 3])
+
+ # test discriminator with SMPL pose parameters
+ # in rotation matrix representation
+ disc = SMPLDiscriminator(
+ beta_channel=(10, 10, 5, 1),
+ per_joint_channel=(9, 32, 32, 16, 1),
+ full_pose_channel=(23 * 16, 256, 1))
+ pred_theta = (camera, smpl_rotmat, smpl_shape)
+ pred_score = disc(pred_theta)
+ assert pred_score.shape[1] == 25
+
+ # test discriminator with SMPL pose parameters
+ # in axis-angle representation
+ pred_theta = (camera, camera.new_zeros([1, 72]), smpl_shape)
+ pred_score = disc(pred_theta)
+ assert pred_score.shape[1] == 25
+
+ with pytest.raises(TypeError):
+ _ = SMPLDiscriminator(
+ beta_channel=[10, 10, 5, 1],
+ per_joint_channel=(9, 32, 32, 16, 1),
+ full_pose_channel=(23 * 16, 256, 1))
+
+ with pytest.raises(ValueError):
+ _ = SMPLDiscriminator(
+ beta_channel=(10, ),
+ per_joint_channel=(9, 32, 32, 16, 1),
+ full_pose_channel=(23 * 16, 256, 1))
+
+
+def _demo_inputs(input_shape=(1, 3, 64, 64)):
+ """Create a superset of inputs needed to run mesh head.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ Returns:
+ Random input tensor with the size of input_shape.
+ """
+ inps = np.random.random(input_shape)
+ inps = torch.FloatTensor(inps)
+ return inps
diff --git a/vendor/ViTPose/tests/test_models/test_multitask_forward.py b/vendor/ViTPose/tests/test_models/test_multitask_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..97cfd7d0b0d150f8dc3439e91bbfd7f20ccaa8ac
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_multitask_forward.py
@@ -0,0 +1,116 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.models.detectors import MultiTask
+
+
+def test_multitask_forward():
+ """Test multitask forward."""
+
+ # build MultiTask detector
+ model_cfg = dict(
+ backbone=dict(type='ResNet', depth=50),
+ heads=[
+ dict(
+ type='DeepposeRegressionHead',
+ in_channels=2048,
+ num_joints=17,
+ loss_keypoint=dict(
+ type='SmoothL1Loss', use_target_weight=False)),
+ ],
+ necks=[dict(type='GlobalAveragePooling')],
+ head2neck={0: 0},
+ pretrained=None,
+ )
+ model = MultiTask(**model_cfg)
+
+ # build inputs and target
+ mm_inputs = _demo_mm_inputs()
+ inputs = mm_inputs['img']
+ target = [mm_inputs['target_keypoints']]
+ target_weight = [mm_inputs['target_weight']]
+ img_metas = mm_inputs['img_metas']
+
+ # Test forward train
+ losses = model(inputs, target, target_weight, return_loss=True)
+ assert 'reg_loss' in losses and 'acc_pose' in losses
+
+ # Test forward test
+ outputs = model(inputs, img_metas=img_metas, return_loss=False)
+ assert 'preds' in outputs
+
+ # Test dummy forward
+ outputs = model.forward_dummy(inputs)
+ assert outputs[0].shape == torch.Size([1, 17, 2])
+
+ # Build multitask detector with no neck
+ model_cfg = dict(
+ backbone=dict(type='ResNet', depth=50),
+ heads=[
+ dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=2048,
+ out_channels=17,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ loss_keypoint=dict(
+ type='JointsMSELoss', use_target_weight=True))
+ ],
+ pretrained=None,
+ )
+ model = MultiTask(**model_cfg)
+
+ # build inputs and target
+ target = [mm_inputs['target_heatmap']]
+
+ # Test forward train
+ losses = model(inputs, target, target_weight, return_loss=True)
+ assert 'heatmap_loss' in losses and 'acc_pose' in losses
+
+ # Test forward test
+ outputs = model(inputs, img_metas=img_metas, return_loss=False)
+ assert 'preds' in outputs
+
+ # Test dummy forward
+ outputs = model.forward_dummy(inputs)
+ assert outputs[0].shape == torch.Size([1, 17, 64, 64])
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 256, 256)):
+ """Create a superset of inputs needed to run test or train.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+
+ target_keypoints = np.zeros([N, 17, 2])
+ target_heatmap = np.zeros([N, 17, H // 4, W // 4])
+ target_weight = np.ones([N, 17, 1])
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'center': np.array([W / 2, H / 2]),
+ 'scale': np.array([0.5, 0.5]),
+ 'bbox_score': 1.0,
+ 'bbox_id': 0,
+ 'flip_pairs': [],
+ 'inference_channel': np.arange(17),
+ 'image_file': '.png',
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'img': torch.FloatTensor(imgs).requires_grad_(True),
+ 'target_keypoints': torch.FloatTensor(target_keypoints),
+ 'target_heatmap': torch.FloatTensor(target_heatmap),
+ 'target_weight': torch.FloatTensor(target_weight),
+ 'img_metas': img_metas,
+ }
+ return mm_inputs
diff --git a/vendor/ViTPose/tests/test_models/test_multiview_pose.py b/vendor/ViTPose/tests/test_models/test_multiview_pose.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad897775573b43db178b38bfbeec065b4f0fd017
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_multiview_pose.py
@@ -0,0 +1,129 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+from mmcv import Config
+
+from mmpose.datasets import DATASETS, build_dataloader
+from mmpose.models import builder
+
+
+def test_voxelpose_forward():
+ dataset = 'Body3DMviewDirectPanopticDataset'
+ dataset_class = DATASETS.get(dataset)
+ dataset_info = Config.fromfile(
+ 'configs/_base_/datasets/panoptic_body3d.py').dataset_info
+ space_size = [8000, 8000, 2000]
+ space_center = [0, -500, 800]
+ cube_size = [20, 20, 8]
+ data_cfg = dict(
+ image_size=[960, 512],
+ heatmap_size=[[240, 128]],
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+ num_joints=15,
+ seq_list=['160906_band1'],
+ cam_list=[(0, 12), (0, 6)],
+ num_cameras=2,
+ seq_frame_interval=1,
+ subset='train',
+ need_2d_label=True,
+ need_camera_param=True,
+ root_id=2)
+
+ pipeline = [
+ dict(
+ type='MultiItemProcess',
+ pipeline=[
+ dict(
+ type='BottomUpGenerateTarget', sigma=3, max_num_people=20)
+ ]),
+ dict(
+ type='DiscardDuplicatedItems',
+ keys_list=[
+ 'joints_3d', 'joints_3d_visible', 'ann_info', 'roots_3d',
+ 'num_persons', 'sample_id'
+ ]),
+ dict(
+ type='GenerateVoxel3DHeatmapTarget',
+ sigma=200.0,
+ joint_indices=[2]),
+ dict(type='RenameKeys', key_pairs=[('targets', 'input_heatmaps')]),
+ dict(
+ type='Collect',
+ keys=['targets_3d', 'input_heatmaps'],
+ meta_keys=[
+ 'camera', 'center', 'scale', 'joints_3d', 'num_persons',
+ 'joints_3d_visible', 'roots_3d', 'sample_id'
+ ]),
+ ]
+
+ model_cfg = dict(
+ type='DetectAndRegress',
+ backbone=None,
+ human_detector=dict(
+ type='VoxelCenterDetector',
+ image_size=[960, 512],
+ heatmap_size=[240, 128],
+ space_size=space_size,
+ cube_size=cube_size,
+ space_center=space_center,
+ center_net=dict(
+ type='V2VNet', input_channels=15, output_channels=1),
+ center_head=dict(
+ type='CuboidCenterHead',
+ space_size=space_size,
+ space_center=space_center,
+ cube_size=cube_size,
+ max_num=3,
+ max_pool_kernel=3),
+ train_cfg=dict(dist_threshold=500000000.0),
+ test_cfg=dict(center_threshold=0.0),
+ ),
+ pose_regressor=dict(
+ type='VoxelSinglePose',
+ image_size=[960, 512],
+ heatmap_size=[240, 128],
+ sub_space_size=[2000, 2000, 2000],
+ sub_cube_size=[20, 20, 8],
+ num_joints=15,
+ pose_net=dict(
+ type='V2VNet', input_channels=15, output_channels=15),
+ pose_head=dict(type='CuboidPoseHead', beta=100.0),
+ train_cfg=None,
+ test_cfg=None))
+
+ model = builder.build_posenet(model_cfg)
+ with tempfile.TemporaryDirectory() as tmpdir:
+ dataset = dataset_class(
+ ann_file=tmpdir + '/tmp_train.pkl',
+ img_prefix='tests/data/panoptic_body3d/',
+ data_cfg=data_cfg,
+ pipeline=pipeline,
+ dataset_info=dataset_info,
+ test_mode=False)
+
+ data_loader = build_dataloader(
+ dataset,
+ seed=None,
+ dist=False,
+ shuffle=False,
+ drop_last=False,
+ workers_per_gpu=1,
+ samples_per_gpu=1)
+
+ for data in data_loader:
+ # test forward_train
+ _ = model(
+ img=None,
+ img_metas=data['img_metas'].data[0],
+ return_loss=True,
+ targets_3d=data['targets_3d'],
+ input_heatmaps=data['input_heatmaps'])
+
+ # test forward_test
+ _ = model(
+ img=None,
+ img_metas=data['img_metas'].data[0],
+ return_loss=False,
+ input_heatmaps=data['input_heatmaps'])
diff --git a/vendor/ViTPose/tests/test_models/test_pose_lifter_forward.py b/vendor/ViTPose/tests/test_models/test_pose_lifter_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..04ebc658e16ca062fc211075d05096c4e3e471fc
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_pose_lifter_forward.py
@@ -0,0 +1,197 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+import torch
+
+from mmpose.models import build_posenet
+
+
+def _create_inputs(joint_num_in,
+ joint_channel_in,
+ joint_num_out,
+ joint_channel_out,
+ seq_len,
+ batch_size,
+ semi=False):
+ rng = np.random.RandomState(0)
+ pose_in = rng.rand(batch_size, joint_num_in * joint_channel_in, seq_len)
+ target = np.zeros((batch_size, joint_num_out, joint_channel_out),
+ dtype=np.float32)
+ target_weight = np.ones((batch_size, joint_num_out, joint_channel_out),
+ dtype=np.float32)
+
+ meta_info = {
+ 'root_position': np.zeros((1, joint_channel_out), np.float32),
+ 'root_position_index': 0,
+ 'target_mean': np.zeros((joint_num_out, joint_channel_out),
+ np.float32),
+ 'target_std': np.ones((joint_num_out, joint_channel_out), np.float32)
+ }
+ metas = [meta_info.copy() for _ in range(batch_size)]
+ inputs = {
+ 'input': torch.FloatTensor(pose_in).requires_grad_(True),
+ 'target': torch.FloatTensor(target),
+ 'target_weight': torch.FloatTensor(target_weight),
+ 'metas': metas,
+ }
+
+ if semi:
+ traj_target = np.zeros((batch_size, 1, joint_channel_out), np.float32)
+ unlabeled_pose_in = rng.rand(batch_size,
+ joint_num_in * joint_channel_in, seq_len)
+ unlabeled_target_2d = np.zeros(
+ (batch_size, joint_num_out, joint_channel_in), dtype=np.float32)
+ intrinsics = np.ones((batch_size, 4))
+
+ inputs['traj_target'] = torch.FloatTensor(traj_target)
+ inputs['unlabeled_input'] = torch.FloatTensor(
+ unlabeled_pose_in).requires_grad_(True)
+ inputs['unlabeled_target_2d'] = torch.FloatTensor(unlabeled_target_2d)
+ inputs['intrinsics'] = torch.FloatTensor(intrinsics)
+
+ return inputs
+
+
+def test_pose_lifter_forward():
+ # Test forward train for supervised learning with pose model only
+ model_cfg = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(type='TCN', in_channels=2 * 17),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=16,
+ max_norm=1.0,
+ loss_keypoint=dict(type='MPJPELoss'),
+ test_cfg=dict(restore_global_position=True)),
+ train_cfg=dict(),
+ test_cfg=dict())
+
+ cfg = mmcv.Config({'model': model_cfg})
+ detector = build_posenet(cfg.model)
+
+ detector.init_weights()
+
+ inputs = _create_inputs(
+ joint_num_in=17,
+ joint_channel_in=2,
+ joint_num_out=16,
+ joint_channel_out=3,
+ seq_len=27,
+ batch_size=8)
+
+ losses = detector.forward(
+ inputs['input'],
+ inputs['target'],
+ inputs['target_weight'],
+ inputs['metas'],
+ return_loss=True)
+
+ assert isinstance(losses, dict)
+
+ # Test forward test for supervised learning with pose model only
+ with torch.no_grad():
+ _ = detector.forward(
+ inputs['input'],
+ inputs['target'],
+ inputs['target_weight'],
+ inputs['metas'],
+ return_loss=False)
+ _ = detector.forward_dummy(inputs['input'])
+
+ # Test forward train for semi-supervised learning
+ model_cfg = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(type='TCN', in_channels=2 * 17),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss'),
+ test_cfg=dict(restore_global_position=True)),
+ traj_backbone=dict(type='TCN', in_channels=2 * 17),
+ traj_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=1,
+ loss_keypoint=dict(type='MPJPELoss'),
+ is_trajectory=True),
+ loss_semi=dict(
+ type='SemiSupervisionLoss',
+ joint_parents=[
+ 0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15
+ ]),
+ train_cfg=dict(),
+ test_cfg=dict())
+
+ cfg = mmcv.Config({'model': model_cfg})
+ detector = build_posenet(cfg.model)
+
+ detector.init_weights()
+
+ inputs = _create_inputs(
+ joint_num_in=17,
+ joint_channel_in=2,
+ joint_num_out=17,
+ joint_channel_out=3,
+ seq_len=27,
+ batch_size=8,
+ semi=True)
+
+ losses = detector.forward(**inputs, return_loss=True)
+
+ assert isinstance(losses, dict)
+ assert 'proj_loss' in losses
+
+ # Test forward test for semi-supervised learning
+ with torch.no_grad():
+ _ = detector.forward(**inputs, return_loss=False)
+ _ = detector.forward_dummy(inputs['input'])
+
+ # Test forward train for supervised learning with pose model and trajectory
+ # model sharing one backbone
+ model_cfg = dict(
+ type='PoseLifter',
+ pretrained=None,
+ backbone=dict(type='TCN', in_channels=2 * 17),
+ keypoint_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss'),
+ test_cfg=dict(restore_global_position=True)),
+ traj_head=dict(
+ type='TemporalRegressionHead',
+ in_channels=1024,
+ num_joints=1,
+ loss_keypoint=dict(type='MPJPELoss'),
+ is_trajectory=True),
+ train_cfg=dict(),
+ test_cfg=dict())
+
+ cfg = mmcv.Config({'model': model_cfg})
+ detector = build_posenet(cfg.model)
+
+ detector.init_weights()
+
+ inputs = _create_inputs(
+ joint_num_in=17,
+ joint_channel_in=2,
+ joint_num_out=17,
+ joint_channel_out=3,
+ seq_len=27,
+ batch_size=8,
+ semi=True)
+
+ losses = detector.forward(**inputs, return_loss=True)
+
+ assert isinstance(losses, dict)
+ assert 'traj_loss' in losses
+
+ # Test forward test for semi-supervised learning with pose model and
+ # trajectory model sharing one backbone
+ with torch.no_grad():
+ _ = detector.forward(**inputs, return_loss=False)
+ _ = detector.forward_dummy(inputs['input'])
diff --git a/vendor/ViTPose/tests/test_models/test_temporal_regression_head.py b/vendor/ViTPose/tests/test_models/test_temporal_regression_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..65f7d7823b20946518b4e545ca7d3638f1e0fd8d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_temporal_regression_head.py
@@ -0,0 +1,104 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models import TemporalRegressionHead
+
+
+def test_temporal_regression_head():
+ """Test temporal head."""
+
+ # w/o global position restoration
+ head = TemporalRegressionHead(
+ in_channels=1024,
+ num_joints=17,
+ loss_keypoint=dict(type='MPJPELoss', use_target_weight=True),
+ test_cfg=dict(restore_global_position=False))
+
+ head.init_weights()
+
+ with pytest.raises(AssertionError):
+ # ndim of the input tensor should be 3
+ input_shape = (1, 1024, 1, 1)
+ inputs = _demo_inputs(input_shape)
+ _ = head(inputs)
+
+ with pytest.raises(AssertionError):
+ # size of the last dim should be 1
+ input_shape = (1, 1024, 3)
+ inputs = _demo_inputs(input_shape)
+ _ = head(inputs)
+
+ input_shape = (1, 1024, 1)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 17, 3])
+
+ loss = head.get_loss(out, out, None)
+ assert torch.allclose(loss['reg_loss'], torch.tensor(0.))
+
+ _ = head.inference_model(inputs)
+ _ = head.inference_model(inputs, [(0, 1), (2, 3)])
+ metas = [{}]
+
+ acc = head.get_accuracy(out, out, None, metas=metas)
+ assert acc['mpjpe'] == 0.
+ np.testing.assert_almost_equal(acc['p_mpjpe'], 0., decimal=6)
+
+ # w/ global position restoration
+ head = TemporalRegressionHead(
+ in_channels=1024,
+ num_joints=16,
+ loss_keypoint=dict(type='MPJPELoss', use_target_weight=True),
+ test_cfg=dict(restore_global_position=True))
+ head.init_weights()
+
+ input_shape = (1, 1024, 1)
+ inputs = _demo_inputs(input_shape)
+ metas = [{
+ 'root_position': np.zeros((1, 3)),
+ 'root_position_index': 0,
+ 'root_weight': 1.
+ }]
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 16, 3])
+
+ inference_out = head.inference_model(inputs)
+ acc = head.get_accuracy(out, out, torch.ones_like(out), metas)
+ assert acc['mpjpe'] == 0.
+ np.testing.assert_almost_equal(acc['p_mpjpe'], 0.)
+
+ _ = head.decode(metas, inference_out)
+
+ # trajectory model (only predict root position)
+ head = TemporalRegressionHead(
+ in_channels=1024,
+ num_joints=1,
+ loss_keypoint=dict(type='MPJPELoss', use_target_weight=True),
+ is_trajectory=True,
+ test_cfg=dict(restore_global_position=False))
+
+ head.init_weights()
+
+ input_shape = (1, 1024, 1)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 1, 3])
+
+ loss = head.get_loss(out, out.squeeze(1), torch.ones_like(out))
+ assert torch.allclose(loss['traj_loss'], torch.tensor(0.))
+
+
+def _demo_inputs(input_shape=(1, 1024, 1)):
+ """Create a superset of inputs needed to run head.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 1024, 1).
+ Returns:
+ Random input tensor with the size of input_shape.
+ """
+ inps = np.random.random(input_shape)
+ inps = torch.FloatTensor(inps)
+ return inps
diff --git a/vendor/ViTPose/tests/test_models/test_top_down_forward.py b/vendor/ViTPose/tests/test_models/test_top_down_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..eda2b8fb02f34be9de8b8510c301e3f5242c2ac1
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_top_down_forward.py
@@ -0,0 +1,517 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+import torch
+
+from mmpose.models.detectors import PoseWarper, TopDown
+
+
+def test_vipnas_forward():
+ # model settings
+
+ channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+ model_cfg = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ViPNAS_ResNet', depth=50),
+ keypoint_head=dict(
+ type='ViPNASHeatmapSimpleHead',
+ in_channels=608,
+ out_channels=channel_cfg['num_output_channels'],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+ detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
+ model_cfg['train_cfg'], model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+
+
+def test_topdown_forward():
+ model_cfg = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(type='ResNet', depth=18),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=512,
+ out_channels=17,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+ detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
+ model_cfg['train_cfg'], model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+
+ # flip test
+ model_cfg = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=False)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+ detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
+ model_cfg['train_cfg'], model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+
+ model_cfg = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapMultiStageHead',
+ in_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=[
+ dict(
+ type='JointsMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+ detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
+ model_cfg['train_cfg'], model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 256, 256)
+ mm_inputs = _demo_mm_inputs(input_shape, num_outputs=None)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+
+ model_cfg = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ num_steps=4,
+ norm_cfg=dict(type='BN')),
+ keypoint_head=dict(
+ type='TopdownHeatmapMSMUHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_units=4,
+ use_prm=False,
+ norm_cfg=dict(type='BN'),
+ loss_keypoint=[dict(type='JointsMSELoss', use_target_weight=True)]
+ * 3 + [dict(type='JointsOHKMMSELoss', use_target_weight=True)]),
+ train_cfg=dict(num_units=4),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=False,
+ unbiased_decoding=False,
+ modulate_kernel=5))
+
+ detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
+ model_cfg['train_cfg'], model_cfg['test_cfg'],
+ model_cfg['pretrained'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 256, 192)
+ mm_inputs = _demo_mm_inputs(input_shape, num_outputs=4)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+
+def test_posewarper_forward():
+ # test PoseWarper
+ model_cfg = dict(
+ type='PoseWarper',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ frozen_stages=4,
+ ),
+ concat_tensors=True,
+ neck=dict(
+ type='PoseWarperNeck',
+ in_channels=48,
+ freeze_trans_layer=True,
+ out_channels=17,
+ inner_channels=128,
+ deform_groups=17,
+ dilations=(3, 6, 12, 18, 24),
+ trans_conv_kernel=1,
+ res_blocks_cfg=dict(block='BASIC', num_blocks=20),
+ offsets_kernel=3,
+ deform_conv_kernel=3),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=17,
+ out_channels=17,
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=False,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+ detector = PoseWarper(model_cfg['backbone'], model_cfg['neck'],
+ model_cfg['keypoint_head'], model_cfg['train_cfg'],
+ model_cfg['test_cfg'], model_cfg['pretrained'], None,
+ model_cfg['concat_tensors'])
+ assert detector.concat_tensors
+
+ detector.init_weights()
+
+ input_shape = (2, 3, 64, 64)
+ num_frames = 2
+ mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+ # test argument 'concat_tensors'
+ model_cfg_copy = copy.deepcopy(model_cfg)
+ model_cfg_copy['concat_tensors'] = False
+
+ detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
+ model_cfg_copy['keypoint_head'],
+ model_cfg_copy['train_cfg'],
+ model_cfg_copy['test_cfg'],
+ model_cfg_copy['pretrained'], None,
+ model_cfg_copy['concat_tensors'])
+ assert not detector.concat_tensors
+
+ detector.init_weights()
+
+ input_shape = (2, 3, 64, 64)
+ num_frames = 2
+ mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+ # flip test
+ model_cfg_copy = copy.deepcopy(model_cfg)
+ model_cfg_copy['test_cfg']['flip_test'] = True
+
+ detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
+ model_cfg_copy['keypoint_head'],
+ model_cfg_copy['train_cfg'],
+ model_cfg_copy['test_cfg'],
+ model_cfg_copy['pretrained'], None,
+ model_cfg_copy['concat_tensors'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 64, 64)
+ num_frames = 2
+ mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+ # test different number of dilations
+ model_cfg_copy = copy.deepcopy(model_cfg)
+ model_cfg_copy['neck']['dilations'] = (3, 6, 12)
+
+ detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
+ model_cfg_copy['keypoint_head'],
+ model_cfg_copy['train_cfg'],
+ model_cfg_copy['test_cfg'],
+ model_cfg_copy['pretrained'], None,
+ model_cfg_copy['concat_tensors'])
+
+ detector.init_weights()
+
+ input_shape = (2, 3, 64, 64)
+ num_frames = 2
+ mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+ # test different backbone, change head accordingly
+ model_cfg_copy = copy.deepcopy(model_cfg)
+ model_cfg_copy['backbone'] = dict(type='ResNet', depth=18)
+ model_cfg_copy['neck']['in_channels'] = 512
+ model_cfg_copy['keypoint_head'] = dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=17,
+ out_channels=17,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
+ model_cfg_copy['keypoint_head'],
+ model_cfg_copy['train_cfg'],
+ model_cfg_copy['test_cfg'],
+ model_cfg_copy['pretrained'], None,
+ model_cfg_copy['concat_tensors'])
+
+ detector.init_weights()
+
+ input_shape = (1, 3, 64, 64)
+ num_frames = 2
+ mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)
+
+ imgs = mm_inputs.pop('imgs')
+ target = mm_inputs.pop('target')
+ target_weight = mm_inputs.pop('target_weight')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ losses = detector.forward(
+ imgs, target, target_weight, img_metas, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
+ _ = detector.forward_dummy(imgs)
+
+
+def _demo_mm_inputs(
+ input_shape=(1, 3, 256, 256), num_outputs=None, num_frames=1):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+ num_frames (int):
+ number of frames for each sample, default: 1,
+ if larger than 1, return a list of tensors
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ if num_outputs is not None:
+ target = np.zeros([N, num_outputs, 17, H // 4, W // 4],
+ dtype=np.float32)
+ target_weight = np.ones([N, num_outputs, 17, 1], dtype=np.float32)
+ else:
+ target = np.zeros([N, 17, H // 4, W // 4], dtype=np.float32)
+ target_weight = np.ones([N, 17, 1], dtype=np.float32)
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'center': np.array([W / 2, H / 2]),
+ 'scale': np.array([0.5, 0.5]),
+ 'bbox_score': 1.0,
+ 'bbox_id': 0,
+ 'flip_pairs': [],
+ 'inference_channel': np.arange(17),
+ 'image_file': '.png',
+ 'frame_weight': np.random.uniform(0, 1, num_frames),
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'target': torch.FloatTensor(target),
+ 'target_weight': torch.FloatTensor(target_weight),
+ 'img_metas': img_metas
+ }
+
+ if num_frames == 1:
+ imgs = torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)
+ else:
+
+ imgs = [
+ torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)
+ for _ in range(num_frames)
+ ]
+
+ mm_inputs['imgs'] = imgs
+ return mm_inputs
diff --git a/vendor/ViTPose/tests/test_models/test_top_down_head.py b/vendor/ViTPose/tests/test_models/test_top_down_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..2558e33c5231f4972024c3183288c28bc486c1e6
--- /dev/null
+++ b/vendor/ViTPose/tests/test_models/test_top_down_head.py
@@ -0,0 +1,518 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models import (DeepposeRegressionHead, TopdownHeatmapMSMUHead,
+ TopdownHeatmapMultiStageHead,
+ TopdownHeatmapSimpleHead, ViPNASHeatmapSimpleHead)
+
+
+def test_vipnas_simple_head():
+ """Test simple head."""
+ with pytest.raises(TypeError):
+ # extra
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra=[],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(TypeError):
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3, in_channels=512, extra={'final_conv_kernel': 1})
+
+ # test num deconv layers
+ with pytest.raises(ValueError):
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=-1,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of layers should match
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of kernels should match
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(3, 2, 0),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, -1),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ # test final_conv_kernel
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ head.init_weights()
+ assert head.final_layer.padding == (1, 1)
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 1},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ assert head.final_layer.padding == (0, 0)
+ _ = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 0},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True),
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )))
+ assert len(head.final_layer) == 4
+
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 3, 256, 256])
+
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 3, 32, 32])
+
+ head = ViPNASHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert out.shape == torch.Size([1, 3, 32, 32])
+
+ head.init_weights()
+
+
+def test_top_down_simple_head():
+ """Test simple head."""
+ with pytest.raises(TypeError):
+ # extra
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra=[],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(TypeError):
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3, in_channels=512, extra={'final_conv_kernel': 1})
+
+ # test num deconv layers
+ with pytest.raises(ValueError):
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=-1,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of layers should match
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of kernels should match
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(3, 2, 0),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, -1),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ # test final_conv_kernel
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ head.init_weights()
+ assert head.final_layer.padding == (1, 1)
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 1},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ assert head.final_layer.padding == (0, 0)
+ _ = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 0},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True),
+ extra=dict(
+ final_conv_kernel=1, num_conv_layers=1, num_conv_kernels=(1, )))
+ assert len(head.final_layer) == 4
+
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 3, 256, 256])
+
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 3, 32, 32])
+
+ head = TopdownHeatmapSimpleHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert out.shape == torch.Size([1, 3, 32, 32])
+
+ head.init_weights()
+
+
+def test_top_down_multistage_head():
+ """Test multistage head."""
+ with pytest.raises(TypeError):
+ # the number of layers should match
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_stages=1,
+ extra=[],
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ # test num deconv layers
+ with pytest.raises(ValueError):
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=-1,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of layers should match
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the number of kernels should match
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_stages=1,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(3, 2, 0),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(ValueError):
+ # the deconv kernels should be 4, 3, 2
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, -1),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ with pytest.raises(AssertionError):
+ # inputs should be list
+ head = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+
+ # test final_conv_kernel
+ head = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 3},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ head.init_weights()
+ assert head.multi_final_layers[0].padding == (1, 1)
+ head = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 1},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ assert head.multi_final_layers[0].padding == (0, 0)
+ _ = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ extra={'final_conv_kernel': 0},
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+
+ head = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert len(out) == 1
+ assert out[0].shape == torch.Size([1, 3, 256, 256])
+
+ head = TopdownHeatmapMultiStageHead(
+ out_channels=3,
+ in_channels=512,
+ num_deconv_layers=0,
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
+ input_shape = (1, 512, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ out = head([inputs])
+ assert out[0].shape == torch.Size([1, 3, 32, 32])
+
+ head.init_weights()
+
+
+def test_top_down_msmu_head():
+ """Test multi-stage multi-unit head."""
+ with pytest.raises(AssertionError):
+ # inputs should be list
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 256, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ _ = head(inputs)
+
+ with pytest.raises(AssertionError):
+ # inputs should be list[list, ...]
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 256, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ inputs = [inputs] * 2
+ _ = head(inputs)
+
+ with pytest.raises(AssertionError):
+ # len(inputs) should equal to num_stages
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 256, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ inputs = [[inputs] * 2] * 3
+ _ = head(inputs)
+
+ with pytest.raises(AssertionError):
+ # len(inputs[0]) should equal to num_units
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 256, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ inputs = [[inputs] * 3] * 2
+ _ = head(inputs)
+
+ with pytest.raises(AssertionError):
+ # input channels should equal to param unit_channels
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 128, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ inputs = [[inputs] * 2] * 2
+ _ = head(inputs)
+
+ head = TopdownHeatmapMSMUHead(
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=2,
+ num_units=2,
+ loss_keypoint=(
+ [dict(type='JointsMSELoss', use_target_weight=True)] * 2 +
+ [dict(type='JointsOHKMMSELoss', use_target_weight=True)]) * 2)
+ input_shape = (1, 256, 32, 32)
+ inputs = _demo_inputs(input_shape)
+ inputs = [[inputs] * 2] * 2
+ out = head(inputs)
+ assert len(out) == 2 * 2
+ assert out[0].shape == torch.Size([1, 17, 64, 48])
+
+ head.init_weights()
+
+
+def test_fc_head():
+ """Test fc head."""
+ head = DeepposeRegressionHead(
+ in_channels=2048,
+ num_joints=17,
+ loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True))
+
+ head.init_weights()
+
+ input_shape = (1, 2048)
+ inputs = _demo_inputs(input_shape)
+ out = head(inputs)
+ assert out.shape == torch.Size([1, 17, 2])
+
+ loss = head.get_loss(out, out, torch.ones_like(out))
+ assert torch.allclose(loss['reg_loss'], torch.tensor(0.))
+
+ _ = head.inference_model(inputs)
+ _ = head.inference_model(inputs, [])
+
+ acc = head.get_accuracy(out, out, torch.ones_like(out))
+ assert acc['acc_pose'] == 1.
+
+
+def _demo_inputs(input_shape=(1, 3, 64, 64)):
+ """Create a superset of inputs needed to run backbone.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ Returns:
+ Random input tensor with the size of input_shape.
+ """
+ inps = np.random.random(input_shape)
+ inps = torch.FloatTensor(inps)
+ return inps
diff --git a/vendor/ViTPose/tests/test_necks/test_gap_neck.py b/vendor/ViTPose/tests/test_necks/test_gap_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..57d26cb0bd610e0e4c62877d8122ffe3cd6a42d6
--- /dev/null
+++ b/vendor/ViTPose/tests/test_necks/test_gap_neck.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models.necks import GlobalAveragePooling
+
+
+def test_gap():
+ """Test GlobalAveragePooling neck."""
+ gap = GlobalAveragePooling()
+
+ with pytest.raises(TypeError):
+ gap(1)
+
+ x0_shape = (32, 1024, 4, 4)
+ x1_shape = (32, 2048, 2, 2)
+ x0 = _demo_inputs(x0_shape)
+ x1 = _demo_inputs(x1_shape)
+
+ y = gap(x0)
+ assert y.shape == torch.Size([32, 1024])
+
+ y = gap([x0, x1])
+ assert y[0].shape == torch.Size([32, 1024])
+ assert y[1].shape == torch.Size([32, 2048])
+
+ y = gap((x0, x1))
+ assert y[0].shape == torch.Size([32, 1024])
+ assert y[1].shape == torch.Size([32, 2048])
+
+
+def _demo_inputs(input_shape=(1, 3, 64, 64)):
+ """Create a superset of inputs needed to run backbone.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ """
+ imgs = np.random.random(input_shape)
+ imgs = torch.FloatTensor(imgs)
+
+ return imgs
diff --git a/vendor/ViTPose/tests/test_necks/test_posewarper_neck.py b/vendor/ViTPose/tests/test_necks/test_posewarper_neck.py
new file mode 100644
index 0000000000000000000000000000000000000000..45faabfb5a41d586ff464d62627b29a128ae19b3
--- /dev/null
+++ b/vendor/ViTPose/tests/test_necks/test_posewarper_neck.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmpose.models.necks import PoseWarperNeck
+
+
+def test_posewarper_neck():
+ """Test PoseWarperNeck."""
+ with pytest.raises(AssertionError):
+ # test value of trans_conv_kernel
+ _ = PoseWarperNeck(
+ out_channels=3,
+ in_channels=512,
+ inner_channels=128,
+ trans_conv_kernel=2)
+
+ with pytest.raises(TypeError):
+ # test type of res_blocks_cfg
+ _ = PoseWarperNeck(
+ out_channels=3,
+ in_channels=512,
+ inner_channels=128,
+ res_blocks_cfg=2)
+
+ with pytest.raises(AssertionError):
+ # test value of dilations
+ neck = PoseWarperNeck(
+ out_channels=3, in_channels=512, inner_channels=128, dilations=[])
+
+ in_channels = 48
+ out_channels = 17
+ inner_channels = 128
+
+ neck = PoseWarperNeck(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ inner_channels=inner_channels)
+
+ with pytest.raises(TypeError):
+ # the forward require two arguments: inputs and frame_weight
+ _ = neck(1)
+
+ with pytest.raises(AssertionError):
+ # the inputs to PoseWarperNeck must be list or tuple
+ _ = neck(1, [0.1])
+
+ # test the case when num_frames * batch_size if larger than
+ # the default value of 'im2col_step' but can not be divided
+ # by it in mmcv.ops.deform_conv
+ b_0 = 8 # batch_size
+ b_1 = 16
+ h_0 = 4 # image height
+ h_1 = 2
+
+ num_frame_0 = 2
+ num_frame_1 = 5
+
+ # test input format
+ # B, C, H, W
+ x0_shape = (b_0, in_channels, h_0, h_0)
+ x1_shape = (b_1, in_channels, h_1, h_1)
+
+ # test concat_tensors case
+ # at the same time, features output from backbone like ResNet is Tensors
+ x0_shape = (b_0 * num_frame_0, in_channels, h_0, h_0)
+ x0 = _demo_inputs(x0_shape, length=1)
+ frame_weight_0 = np.random.uniform(0, 1, num_frame_0)
+
+ # test forward
+ y = neck(x0, frame_weight_0)
+ assert y.shape == torch.Size([b_0, out_channels, h_0, h_0])
+
+ # test concat_tensors case
+ # this time, features output from backbone like HRNet
+ # is list of Tensors rather than Tensors
+ x0_shape = (b_0 * num_frame_0, in_channels, h_0, h_0)
+ x0 = _demo_inputs(x0_shape, length=2)
+ x0 = [x0]
+ frame_weight_0 = np.random.uniform(0, 1, num_frame_0)
+
+ # test forward
+ y = neck(x0, frame_weight_0)
+ assert y.shape == torch.Size([b_0, out_channels, h_0, h_0])
+
+ # test not concat_tensors case
+ # at the same time, features output from backbone like ResNet is Tensors
+ x1_shape = (b_1, in_channels, h_1, h_1)
+ x1 = _demo_inputs(x1_shape, length=num_frame_1)
+ frame_weight_1 = np.random.uniform(0, 1, num_frame_1)
+
+ # test forward
+ y = neck(x1, frame_weight_1)
+ assert y.shape == torch.Size([b_1, out_channels, h_1, h_1])
+
+ # test not concat_tensors case
+ # this time, features output from backbone like HRNet
+ # is list of Tensors rather than Tensors
+ x1_shape = (b_1, in_channels, h_1, h_1)
+ x1 = _demo_inputs(x1_shape, length=2)
+ x1 = [x1 for _ in range(num_frame_1)]
+ frame_weight_1 = np.random.uniform(0, 1, num_frame_1)
+
+ # test forward
+ y = neck(x1, frame_weight_1)
+ assert y.shape == torch.Size([b_1, out_channels, h_1, h_1])
+
+ # test special case that when in concat_tensors case,
+ # batch_size * num_frames is larger than the default value
+ # 'im2col_step' in mmcv.ops.deform_conv, but can not be divided by it
+ # see https://github.com/open-mmlab/mmcv/issues/1440
+ x1_shape = (b_1 * num_frame_1, in_channels, h_1, h_1)
+ x1 = _demo_inputs(x1_shape, length=2)
+ x1 = [x1]
+ frame_weight_0 = np.random.uniform(0, 1, num_frame_1)
+
+ y = neck(x1, frame_weight_1)
+ assert y.shape == torch.Size([b_1, out_channels, h_1, h_1])
+
+ # test the inappropriate value of `im2col_step`
+ neck = PoseWarperNeck(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ inner_channels=inner_channels,
+ im2col_step=32)
+ with pytest.raises(AssertionError):
+ _ = neck(x1, frame_weight_1)
+
+
+def _demo_inputs(input_shape=(80, 48, 4, 4), length=1):
+ """Create a superset of inputs needed to run backbone.
+
+ Args:
+ input_shape (tuple): input batch dimensions.
+ Default: (1, 3, 64, 64).
+ length (int): the length of output list
+ nested (bool): whether the output Tensor is double-nested list.
+ """
+ imgs = [
+ torch.FloatTensor(np.random.random(input_shape)) for _ in range(length)
+ ]
+ return imgs
diff --git a/vendor/ViTPose/tests/test_onnx.py b/vendor/ViTPose/tests/test_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0179c2765af9dedc85dcf797500f3813432b5ab
--- /dev/null
+++ b/vendor/ViTPose/tests/test_onnx.py
@@ -0,0 +1,30 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+
+import torch.nn as nn
+
+from tools.deployment.pytorch2onnx import _convert_batchnorm, pytorch2onnx
+
+
+class DummyModel(nn.Module):
+
+ def __init__(self):
+ super().__init__()
+ self.conv = nn.Conv3d(1, 2, 1)
+ self.bn = nn.SyncBatchNorm(2)
+
+ def forward(self, x):
+ return self.bn(self.conv(x))
+
+ def forward_dummy(self, x):
+ return (self.forward(x), )
+
+
+def test_onnx_exporting():
+ with tempfile.TemporaryDirectory() as tmpdir:
+ out_file = osp.join(tmpdir, 'tmp.onnx')
+ model = DummyModel()
+ model = _convert_batchnorm(model)
+ # test exporting
+ pytorch2onnx(model, (1, 1, 1, 1, 1), output_file=out_file)
diff --git a/vendor/ViTPose/tests/test_optimizer.py b/vendor/ViTPose/tests/test_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2379f615c7228fb5cf01d4231cc3752fea71a096
--- /dev/null
+++ b/vendor/ViTPose/tests/test_optimizer.py
@@ -0,0 +1,101 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from mmpose.core import build_optimizers
+
+
+class ExampleModel(nn.Module):
+
+ def __init__(self):
+ super().__init__()
+ self.model1 = nn.Conv2d(3, 8, kernel_size=3)
+ self.model2 = nn.Conv2d(3, 4, kernel_size=3)
+
+ def forward(self, x):
+ return x
+
+
+def test_build_optimizers():
+ base_lr = 0.0001
+ base_wd = 0.0002
+ momentum = 0.9
+
+ # basic config with ExampleModel
+ optimizer_cfg = dict(
+ model1=dict(
+ type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum),
+ model2=dict(
+ type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum))
+ model = ExampleModel()
+ optimizers = build_optimizers(model, optimizer_cfg)
+ param_dict = dict(model.named_parameters())
+ assert isinstance(optimizers, dict)
+ for i in range(2):
+ optimizer = optimizers[f'model{i+1}']
+ param_groups = optimizer.param_groups[0]
+ assert isinstance(optimizer, torch.optim.SGD)
+ assert optimizer.defaults['lr'] == base_lr
+ assert optimizer.defaults['momentum'] == momentum
+ assert optimizer.defaults['weight_decay'] == base_wd
+ assert len(param_groups['params']) == 2
+ assert torch.equal(param_groups['params'][0],
+ param_dict[f'model{i+1}.weight'])
+ assert torch.equal(param_groups['params'][1],
+ param_dict[f'model{i+1}.bias'])
+
+ # basic config with Parallel model
+ model = torch.nn.DataParallel(ExampleModel())
+ optimizers = build_optimizers(model, optimizer_cfg)
+ param_dict = dict(model.named_parameters())
+ assert isinstance(optimizers, dict)
+ for i in range(2):
+ optimizer = optimizers[f'model{i+1}']
+ param_groups = optimizer.param_groups[0]
+ assert isinstance(optimizer, torch.optim.SGD)
+ assert optimizer.defaults['lr'] == base_lr
+ assert optimizer.defaults['momentum'] == momentum
+ assert optimizer.defaults['weight_decay'] == base_wd
+ assert len(param_groups['params']) == 2
+ assert torch.equal(param_groups['params'][0],
+ param_dict[f'module.model{i+1}.weight'])
+ assert torch.equal(param_groups['params'][1],
+ param_dict[f'module.model{i+1}.bias'])
+
+ # basic config with ExampleModel (one optimizer)
+ optimizer_cfg = dict(
+ type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
+ model = ExampleModel()
+ optimizer = build_optimizers(model, optimizer_cfg)
+ param_dict = dict(model.named_parameters())
+ assert isinstance(optimizers, dict)
+ param_groups = optimizer.param_groups[0]
+ assert isinstance(optimizer, torch.optim.SGD)
+ assert optimizer.defaults['lr'] == base_lr
+ assert optimizer.defaults['momentum'] == momentum
+ assert optimizer.defaults['weight_decay'] == base_wd
+ assert len(param_groups['params']) == 4
+ assert torch.equal(param_groups['params'][0], param_dict['model1.weight'])
+ assert torch.equal(param_groups['params'][1], param_dict['model1.bias'])
+ assert torch.equal(param_groups['params'][2], param_dict['model2.weight'])
+ assert torch.equal(param_groups['params'][3], param_dict['model2.bias'])
+
+ # basic config with Parallel model (one optimizer)
+ model = torch.nn.DataParallel(ExampleModel())
+ optimizer = build_optimizers(model, optimizer_cfg)
+ param_dict = dict(model.named_parameters())
+ assert isinstance(optimizers, dict)
+ param_groups = optimizer.param_groups[0]
+ assert isinstance(optimizer, torch.optim.SGD)
+ assert optimizer.defaults['lr'] == base_lr
+ assert optimizer.defaults['momentum'] == momentum
+ assert optimizer.defaults['weight_decay'] == base_wd
+ assert len(param_groups['params']) == 4
+ assert torch.equal(param_groups['params'][0],
+ param_dict['module.model1.weight'])
+ assert torch.equal(param_groups['params'][1],
+ param_dict['module.model1.bias'])
+ assert torch.equal(param_groups['params'][2],
+ param_dict['module.model2.weight'])
+ assert torch.equal(param_groups['params'][3],
+ param_dict['module.model2.bias'])
diff --git a/vendor/ViTPose/tests/test_pipelines/test_bottom_up_pipelines.py b/vendor/ViTPose/tests/test_pipelines/test_bottom_up_pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d05c633bdc743172057eca125f1bfdabc77f41a
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_bottom_up_pipelines.py
@@ -0,0 +1,427 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+
+import numpy as np
+import pytest
+import xtcocotools
+from xtcocotools.coco import COCO
+
+from mmpose.datasets.pipelines import (BottomUpGenerateHeatmapTarget,
+ BottomUpGeneratePAFTarget,
+ BottomUpGenerateTarget,
+ BottomUpGetImgSize,
+ BottomUpRandomAffine,
+ BottomUpRandomFlip, BottomUpResizeAlign,
+ LoadImageFromFile)
+
+
+def _get_mask(coco, anno, img_id):
+ img_info = coco.loadImgs(img_id)[0]
+
+ m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
+
+ for obj in anno:
+ if obj['iscrowd']:
+ rle = xtcocotools.mask.frPyObjects(obj['segmentation'],
+ img_info['height'],
+ img_info['width'])
+ m += xtcocotools.mask.decode(rle)
+ elif obj['num_keypoints'] == 0:
+ rles = xtcocotools.mask.frPyObjects(obj['segmentation'],
+ img_info['height'],
+ img_info['width'])
+ for rle in rles:
+ m += xtcocotools.mask.decode(rle)
+
+ return m < 0.5
+
+
+def _get_joints(anno, ann_info, int_sigma):
+ num_people = len(anno)
+
+ if ann_info['scale_aware_sigma']:
+ joints = np.zeros((num_people, ann_info['num_joints'], 4),
+ dtype=np.float32)
+ else:
+ joints = np.zeros((num_people, ann_info['num_joints'], 3),
+ dtype=np.float32)
+
+ for i, obj in enumerate(anno):
+ joints[i, :ann_info['num_joints'], :3] = \
+ np.array(obj['keypoints']).reshape([-1, 3])
+ if ann_info['scale_aware_sigma']:
+ # get person box
+ box = obj['bbox']
+ size = max(box[2], box[3])
+ sigma = size / 256 * 2
+ if int_sigma:
+ sigma = int(np.ceil(sigma))
+ assert sigma > 0, sigma
+ joints[i, :, 3] = sigma
+
+ return joints
+
+
+def _check_flip(origin_imgs, result_imgs):
+ """Check if the origin_imgs are flipped correctly."""
+ h, w, c = origin_imgs.shape
+ for i in range(h):
+ for j in range(w):
+ for k in range(c):
+ if result_imgs[i, j, k] != origin_imgs[i, w - 1 - j, k]:
+ return False
+ return True
+
+
+def test_bottomup_pipeline():
+
+ data_prefix = 'tests/data/coco/'
+ ann_file = osp.join(data_prefix, 'test_coco.json')
+ coco = COCO(ann_file)
+
+ ann_info = {}
+ ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
+ [11, 12], [13, 14], [15, 16]]
+ ann_info['flip_index'] = [
+ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15
+ ]
+
+ ann_info['use_different_joint_weights'] = False
+ ann_info['joint_weights'] = np.array([
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ dtype=np.float32).reshape((17, 1))
+ ann_info['image_size'] = np.array([384, 512])
+ ann_info['heatmap_size'] = np.array([[96, 128], [192, 256]])
+ ann_info['num_joints'] = 17
+ ann_info['num_scales'] = 2
+ ann_info['scale_aware_sigma'] = False
+
+ ann_ids = coco.getAnnIds(785)
+ anno = coco.loadAnns(ann_ids)
+ mask = _get_mask(coco, anno, 785)
+
+ anno = [
+ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+ ]
+ joints = _get_joints(anno, ann_info, False)
+
+ mask_list = [mask.copy() for _ in range(ann_info['num_scales'])]
+ joints_list = [joints.copy() for _ in range(ann_info['num_scales'])]
+
+ results = {}
+ results['dataset'] = 'coco'
+ results['image_file'] = osp.join(data_prefix, '000000000785.jpg')
+ results['mask'] = mask_list
+ results['joints'] = joints_list
+ results['ann_info'] = ann_info
+
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['img'].shape == (425, 640, 3)
+
+ # test HorizontalFlip
+ random_horizontal_flip = BottomUpRandomFlip(flip_prob=1.)
+ results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results))
+ assert _check_flip(results['img'], results_horizontal_flip['img'])
+
+ random_horizontal_flip = BottomUpRandomFlip(flip_prob=0.)
+ results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results))
+ assert (results['img'] == results_horizontal_flip['img']).all()
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ # test TopDownAffine
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short', 0)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (512, 384, 3)
+
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short',
+ 40)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (512, 384, 3)
+
+ results_copy = copy.deepcopy(results)
+ results_copy['ann_info']['scale_aware_sigma'] = True
+ joints = _get_joints(anno, results_copy['ann_info'], False)
+ results_copy['joints'] = \
+ [joints.copy() for _ in range(results_copy['ann_info']['num_scales'])]
+ results_affine_transform = random_affine_transform(results_copy)
+ assert results_affine_transform['img'].shape == (512, 384, 3)
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'long', 40)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (512, 384, 3)
+
+ with pytest.raises(ValueError):
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5],
+ 'short-long', 40)
+ results_affine_transform = random_affine_transform(
+ copy.deepcopy(results))
+
+ # test BottomUpGenerateTarget
+ generate_multi_target = BottomUpGenerateTarget(2, 30)
+ results_generate_multi_target = generate_multi_target(
+ copy.deepcopy(results))
+ assert 'targets' in results_generate_multi_target
+ assert len(results_generate_multi_target['targets']
+ ) == results['ann_info']['num_scales']
+
+ # test BottomUpGetImgSize when W > H
+ get_multi_scale_size = BottomUpGetImgSize([1])
+ results_get_multi_scale_size = get_multi_scale_size(copy.deepcopy(results))
+ assert 'test_scale_factor' in results_get_multi_scale_size['ann_info']
+ assert 'base_size' in results_get_multi_scale_size['ann_info']
+ assert 'center' in results_get_multi_scale_size['ann_info']
+ assert 'scale' in results_get_multi_scale_size['ann_info']
+ assert results_get_multi_scale_size['ann_info']['base_size'][1] == 512
+
+ # test BottomUpResizeAlign
+ transforms = [
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ ]
+ resize_align_multi_scale = BottomUpResizeAlign(transforms=transforms)
+ results_copy = copy.deepcopy(results_get_multi_scale_size)
+ results_resize_align_multi_scale = resize_align_multi_scale(results_copy)
+ assert 'aug_data' in results_resize_align_multi_scale['ann_info']
+
+ # test when W < H
+ ann_info['image_size'] = np.array([512, 384])
+ ann_info['heatmap_size'] = np.array([[128, 96], [256, 192]])
+ results = {}
+ results['dataset'] = 'coco'
+ results['image_file'] = osp.join(data_prefix, '000000000785.jpg')
+ results['mask'] = mask_list
+ results['joints'] = joints_list
+ results['ann_info'] = ann_info
+ results['img'] = np.random.rand(640, 425, 3)
+
+ # test HorizontalFlip
+ random_horizontal_flip = BottomUpRandomFlip(flip_prob=1.)
+ results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results))
+ assert _check_flip(results['img'], results_horizontal_flip['img'])
+
+ random_horizontal_flip = BottomUpRandomFlip(flip_prob=0.)
+ results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results))
+ assert (results['img'] == results_horizontal_flip['img']).all()
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_horizontal_flip(
+ copy.deepcopy(results_copy))
+
+ # test TopDownAffine
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short', 0)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (384, 512, 3)
+
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short',
+ 40)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (384, 512, 3)
+
+ results_copy = copy.deepcopy(results)
+ results_copy['ann_info']['scale_aware_sigma'] = True
+ joints = _get_joints(anno, results_copy['ann_info'], False)
+ results_copy['joints'] = \
+ [joints.copy() for _ in range(results_copy['ann_info']['num_scales'])]
+ results_affine_transform = random_affine_transform(results_copy)
+ assert results_affine_transform['img'].shape == (384, 512, 3)
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[0]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['joints'] = joints_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ results_copy = copy.deepcopy(results)
+ results_copy['mask'] = mask_list[:1]
+ with pytest.raises(AssertionError):
+ results_horizontal_flip = random_affine_transform(
+ copy.deepcopy(results_copy))
+
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'long', 40)
+ results_affine_transform = random_affine_transform(copy.deepcopy(results))
+ assert results_affine_transform['img'].shape == (384, 512, 3)
+
+ with pytest.raises(ValueError):
+ random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5],
+ 'short-long', 40)
+ results_affine_transform = random_affine_transform(
+ copy.deepcopy(results))
+
+ # test BottomUpGenerateTarget
+ generate_multi_target = BottomUpGenerateTarget(2, 30)
+ results_generate_multi_target = generate_multi_target(
+ copy.deepcopy(results))
+ assert 'targets' in results_generate_multi_target
+ assert len(results_generate_multi_target['targets']
+ ) == results['ann_info']['num_scales']
+
+ # test BottomUpGetImgSize when W < H
+ get_multi_scale_size = BottomUpGetImgSize([1])
+ results_get_multi_scale_size = get_multi_scale_size(copy.deepcopy(results))
+ assert 'test_scale_factor' in results_get_multi_scale_size['ann_info']
+ assert 'base_size' in results_get_multi_scale_size['ann_info']
+ assert 'center' in results_get_multi_scale_size['ann_info']
+ assert 'scale' in results_get_multi_scale_size['ann_info']
+ assert results_get_multi_scale_size['ann_info']['base_size'][0] == 512
+
+
+def test_BottomUpGenerateHeatmapTarget():
+
+ data_prefix = 'tests/data/coco/'
+ ann_file = osp.join(data_prefix, 'test_coco.json')
+ coco = COCO(ann_file)
+
+ ann_info = {}
+ ann_info['heatmap_size'] = np.array([128, 256])
+ ann_info['num_joints'] = 17
+ ann_info['num_scales'] = 2
+ ann_info['scale_aware_sigma'] = False
+
+ ann_ids = coco.getAnnIds(785)
+ anno = coco.loadAnns(ann_ids)
+ mask = _get_mask(coco, anno, 785)
+
+ anno = [
+ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+ ]
+ joints = _get_joints(anno, ann_info, False)
+
+ mask_list = [mask.copy() for _ in range(ann_info['num_scales'])]
+ joints_list = [joints.copy() for _ in range(ann_info['num_scales'])]
+
+ results = {}
+ results['dataset'] = 'coco'
+ results['image_file'] = osp.join(data_prefix, '000000000785.jpg')
+ results['mask'] = mask_list
+ results['joints'] = joints_list
+ results['ann_info'] = ann_info
+
+ generate_heatmap_target = BottomUpGenerateHeatmapTarget(2)
+ results_generate_heatmap_target = generate_heatmap_target(results)
+ assert 'target' in results_generate_heatmap_target
+ assert len(results_generate_heatmap_target['target']
+ ) == results['ann_info']['num_scales']
+
+
+def test_BottomUpGeneratePAFTarget():
+
+ ann_info = {}
+ ann_info['skeleton'] = [[0, 1], [2, 3]]
+ ann_info['heatmap_size'] = np.array([5])
+ ann_info['num_joints'] = 4
+ ann_info['num_scales'] = 1
+
+ mask = np.ones((5, 5), dtype=bool)
+ joints = np.array([[[1, 1, 2], [3, 3, 2], [0, 0, 0], [0, 0, 0]],
+ [[1, 3, 2], [3, 1, 2], [0, 0, 0], [0, 0, 0]]])
+
+ mask_list = [mask.copy() for _ in range(ann_info['num_scales'])]
+ joints_list = [joints.copy() for _ in range(ann_info['num_scales'])]
+
+ results = {}
+ results['dataset'] = 'coco'
+ results['mask'] = mask_list
+ results['joints'] = joints_list
+ results['ann_info'] = ann_info
+
+ generate_paf_target = BottomUpGeneratePAFTarget(1)
+ results_generate_paf_target = generate_paf_target(results)
+ sqrt = np.sqrt(2) / 2
+ assert (results_generate_paf_target['target'] == np.array(
+ [[[sqrt, sqrt, 0, sqrt, sqrt], [sqrt, sqrt, sqrt, sqrt, sqrt],
+ [0, sqrt, sqrt, sqrt, 0], [sqrt, sqrt, sqrt, sqrt, sqrt],
+ [sqrt, sqrt, 0, sqrt, sqrt]],
+ [[sqrt, sqrt, 0, -sqrt, -sqrt], [sqrt, sqrt, 0, -sqrt, -sqrt],
+ [0, 0, 0, 0, 0], [-sqrt, -sqrt, 0, sqrt, sqrt],
+ [-sqrt, -sqrt, 0, sqrt, sqrt]],
+ [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0]],
+ [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0]]],
+ dtype=np.float32)).all()
diff --git a/vendor/ViTPose/tests/test_pipelines/test_hand_transform.py b/vendor/ViTPose/tests/test_pipelines/test_hand_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..2225b87592a6c4711ab1d35e56167dc0ea8daacd
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_hand_transform.py
@@ -0,0 +1,68 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+
+from mmpose.datasets.pipelines import Compose
+
+
+def _check_flip(origin_imgs, result_imgs):
+ """Check if the origin_imgs are flipped correctly."""
+ h, w, c = origin_imgs.shape
+ for i in range(h):
+ for j in range(w):
+ for k in range(c):
+ if result_imgs[i, j, k] != origin_imgs[i, w - 1 - j, k]:
+ return False
+ return True
+
+
+def get_sample_data():
+ ann_info = {}
+ ann_info['image_size'] = np.array([256, 256])
+ ann_info['heatmap_size'] = np.array([64, 64, 64])
+ ann_info['heatmap3d_depth_bound'] = 400.0
+ ann_info['heatmap_size_root'] = 64
+ ann_info['root_depth_bound'] = 400.0
+ ann_info['num_joints'] = 42
+ ann_info['joint_weights'] = np.ones((ann_info['num_joints'], 1),
+ dtype=np.float32)
+ ann_info['use_different_joint_weights'] = False
+ ann_info['flip_pairs'] = [[i, 21 + i] for i in range(21)]
+ ann_info['inference_channel'] = list(range(42))
+ ann_info['num_output_channels'] = 42
+ ann_info['dataset_channel'] = list(range(42))
+
+ results = {
+ 'image_file': 'tests/data/interhand2.6m/image69148.jpg',
+ 'center': np.asarray([200, 200], dtype=np.float32),
+ 'scale': 1.0,
+ 'rotation': 0,
+ 'joints_3d': np.zeros([42, 3], dtype=np.float32),
+ 'joints_3d_visible': np.ones([42, 3], dtype=np.float32),
+ 'hand_type': np.asarray([1, 0], dtype=np.float32),
+ 'hand_type_valid': 1,
+ 'rel_root_depth': 50.0,
+ 'rel_root_valid': 1,
+ 'ann_info': ann_info
+ }
+ return results
+
+
+def test_hand_transforms():
+ results = get_sample_data()
+
+ # load image
+ pipeline = Compose([dict(type='LoadImageFromFile')])
+ results = pipeline(results)
+
+ # test random flip
+ pipeline = Compose([dict(type='HandRandomFlip', flip_prob=1)])
+ results_flip = pipeline(copy.deepcopy(results))
+ assert _check_flip(results['img'], results_flip['img'])
+
+ # test root depth target generation
+ pipeline = Compose([dict(type='HandGenerateRelDepthTarget')])
+ results_depth = pipeline(copy.deepcopy(results))
+ assert results_depth['target'].shape == (1, )
+ assert results_depth['target_weight'].shape == (1, )
diff --git a/vendor/ViTPose/tests/test_pipelines/test_mesh_pipelines.py b/vendor/ViTPose/tests/test_pipelines/test_mesh_pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c2c8d19bbfac23916b51c2da89bb39e106b874c
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_mesh_pipelines.py
@@ -0,0 +1,255 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os
+
+import numpy as np
+import torch
+from numpy.testing import assert_array_almost_equal
+
+from mmpose.datasets.pipelines import (Collect, IUVToTensor, LoadImageFromFile,
+ LoadIUVFromFile, MeshAffine,
+ MeshGetRandomScaleRotation,
+ MeshRandomChannelNoise, MeshRandomFlip,
+ NormalizeTensor, ToTensor)
+
+
+def _check_keys_contain(result_keys, target_keys):
+ """Check if all elements in target_keys is in result_keys."""
+ return set(target_keys).issubset(set(result_keys))
+
+
+def _check_flip(origin_imgs, result_imgs):
+ """Check if the origin_imgs are flipped correctly."""
+ h, w, c = origin_imgs.shape
+ for i in range(h):
+ for j in range(w):
+ for k in range(c):
+ if result_imgs[i, j, k] != origin_imgs[i, w - 1 - j, k]:
+ return False
+ return True
+
+
+def _check_rot90(origin_imgs, result_imgs):
+ if origin_imgs.shape[0] == result_imgs.shape[1] and \
+ origin_imgs.shape[1] == result_imgs.shape[0]:
+ return True
+ else:
+ return False
+
+
+def _check_normalize(origin_imgs, result_imgs, norm_cfg):
+ """Check if the origin_imgs are normalized correctly into result_imgs in a
+ given norm_cfg."""
+ target_imgs = result_imgs.copy()
+ for i in range(3):
+ target_imgs[i] *= norm_cfg['std'][i]
+ target_imgs[i] += norm_cfg['mean'][i]
+ assert_array_almost_equal(origin_imgs, target_imgs, decimal=4)
+
+
+def _box2cs(box, image_size):
+ x, y, w, h = box[:4]
+
+ aspect_ratio = 1. * image_size[0] / image_size[1]
+ center = np.zeros((2), dtype=np.float32)
+ center[0] = x + w * 0.5
+ center[1] = y + h * 0.5
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+ scale = np.array([w * 1.0 / 200.0, h * 1.0 / 200.0], dtype=np.float32)
+ scale = scale * 1.25
+ return center, scale
+
+
+def _load_test_data():
+ data_cfg = dict(
+ image_size=[256, 256],
+ iuv_size=[64, 64],
+ num_joints=24,
+ use_IUV=True,
+ uv_type='BF')
+ ann_file = 'tests/data/h36m/test_h36m.npz'
+ img_prefix = 'tests/data/h36m'
+ index = 0
+
+ ann_info = dict(image_size=np.array(data_cfg['image_size']))
+ ann_info['iuv_size'] = np.array(data_cfg['iuv_size'])
+ ann_info['num_joints'] = data_cfg['num_joints']
+ ann_info['flip_pairs'] = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9],
+ [20, 21], [22, 23]]
+ ann_info['use_different_joint_weights'] = False
+ ann_info['joint_weights'] = \
+ np.ones(ann_info['num_joints'], dtype=np.float32
+ ).reshape(ann_info['num_joints'], 1)
+ ann_info['uv_type'] = data_cfg['uv_type']
+ ann_info['use_IUV'] = data_cfg['use_IUV']
+ uv_type = ann_info['uv_type']
+ iuv_prefix = os.path.join(img_prefix, f'{uv_type}_IUV_gt')
+
+ ann_data = np.load(ann_file)
+
+ results = dict(ann_info=ann_info)
+ results['rotation'] = 0
+ results['image_file'] = os.path.join(img_prefix,
+ ann_data['imgname'][index])
+ scale = ann_data['scale'][index]
+ results['scale'] = np.array([scale, scale]).astype(np.float32)
+ results['center'] = ann_data['center'][index].astype(np.float32)
+
+ # Get gt 2D joints, if available
+ if 'part' in ann_data.keys():
+ keypoints = ann_data['part'][index].astype(np.float32)
+ results['joints_2d'] = keypoints[:, :2]
+ results['joints_2d_visible'] = keypoints[:, -1][:, np.newaxis]
+ else:
+ results['joints_2d'] = np.zeros((24, 2), dtype=np.float32)
+ results['joints_2d_visible'] = np.zeros((24, 1), dtype=np.float32)
+
+ # Get gt 3D joints, if available
+ if 'S' in ann_data.keys():
+ joints_3d = ann_data['S'][index].astype(np.float32)
+ results['joints_3d'] = joints_3d[:, :3]
+ results['joints_3d_visible'] = joints_3d[:, -1][:, np.newaxis]
+ else:
+ results['joints_3d'] = np.zeros((24, 3), dtype=np.float32)
+ results['joints_3d_visible'] = np.zeros((24, 1), dtype=np.float32)
+
+ # Get gt SMPL parameters, if available
+ if 'pose' in ann_data.keys() and 'shape' in ann_data.keys():
+ results['pose'] = ann_data['pose'][index].astype(np.float32)
+ results['beta'] = ann_data['shape'][index].astype(np.float32)
+ results['has_smpl'] = 1
+ else:
+ results['pose'] = np.zeros(72, dtype=np.float32)
+ results['beta'] = np.zeros(10, dtype=np.float32)
+ results['has_smpl'] = 0
+
+ # Get gender data, if available
+ if 'gender' in ann_data.keys():
+ gender = ann_data['gender'][index]
+ results['gender'] = 0 if str(gender) == 'm' else 1
+ else:
+ results['gender'] = -1
+
+ # Get IUV image, if available
+ if 'iuv_names' in ann_data.keys():
+ results['iuv_file'] = os.path.join(iuv_prefix,
+ ann_data['iuv_names'][index])
+ results['has_iuv'] = results['has_smpl']
+ else:
+ results['iuv_file'] = ''
+ results['has_iuv'] = 0
+
+ return copy.deepcopy(results)
+
+
+def test_mesh_pipeline():
+ # load data
+ results = _load_test_data()
+
+ # data_prefix = 'tests/data/coco/'
+ # ann_file = osp.join(data_prefix, 'test_coco.json')
+ # coco = COCO(ann_file)
+ #
+ # results = dict(image_file=osp.join(data_prefix, '000000000785.jpg'))
+
+ # test loading image
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['img'].shape == (1002, 1000, 3)
+
+ # test loading densepose IUV image without GT iuv image
+ transform = LoadIUVFromFile()
+ results_no_iuv = copy.deepcopy(results)
+ results_no_iuv['has_iuv'] = 0
+ results_no_iuv = transform(results_no_iuv)
+ assert results_no_iuv['iuv'] is None
+
+ # test loading densepose IUV image
+ results = transform(results)
+ assert results['iuv'].shape == (1002, 1000, 3)
+ assert results['iuv'][:, :, 0].max() <= 1
+
+ # test flip
+ random_flip = MeshRandomFlip(flip_prob=1.)
+ results_flip = random_flip(copy.deepcopy(results))
+ assert _check_flip(results['img'], results_flip['img'])
+ flip_iuv = results_flip['iuv']
+ flip_iuv[:, :, 1] = 255 - flip_iuv[:, :, 1]
+ assert _check_flip(results['iuv'], flip_iuv)
+ results = results_flip
+
+ # test flip without IUV image
+ results_no_iuv = random_flip(copy.deepcopy(results_no_iuv))
+ assert results_no_iuv['iuv'] is None
+
+ # test random scale and rotation
+ random_scale_rotation = MeshGetRandomScaleRotation()
+ results = random_scale_rotation(results)
+
+ # test affine
+ affine_transform = MeshAffine()
+ results_affine = affine_transform(copy.deepcopy(results))
+ assert results_affine['img'].shape == (256, 256, 3)
+ assert results_affine['iuv'].shape == (64, 64, 3)
+ results = results_affine
+
+ # test affine without IUV image
+ results_no_iuv['rotation'] = 30
+ results_no_iuv = affine_transform(copy.deepcopy(results_no_iuv))
+ assert results_no_iuv['iuv'] is None
+
+ # test channel noise
+ random_noise = MeshRandomChannelNoise()
+ results_noise = random_noise(copy.deepcopy(results))
+ results = results_noise
+
+ # transfer image to tensor
+ to_tensor = ToTensor()
+ results_tensor = to_tensor(copy.deepcopy(results))
+ assert isinstance(results_tensor['img'], torch.Tensor)
+ assert results_tensor['img'].shape == torch.Size([3, 256, 256])
+
+ # transfer IUV image to tensor
+ iuv_to_tensor = IUVToTensor()
+ results_tensor = iuv_to_tensor(results_tensor)
+ assert isinstance(results_tensor['part_index'], torch.LongTensor)
+ assert results_tensor['part_index'].shape == torch.Size([1, 64, 64])
+ max_I = results_tensor['part_index'].max().item()
+ assert (max_I == 0 or max_I == 1)
+ assert isinstance(results_tensor['uv_coordinates'], torch.FloatTensor)
+ assert results_tensor['uv_coordinates'].shape == torch.Size([2, 64, 64])
+
+ # transfer IUV image to tensor without GT IUV image
+ results_no_iuv = iuv_to_tensor(results_no_iuv)
+ assert isinstance(results_no_iuv['part_index'], torch.LongTensor)
+ assert results_no_iuv['part_index'].shape == torch.Size([1, 64, 64])
+ max_I = results_no_iuv['part_index'].max().item()
+ assert (max_I == 0)
+ assert isinstance(results_no_iuv['uv_coordinates'], torch.FloatTensor)
+ assert results_no_iuv['uv_coordinates'].shape == torch.Size([2, 64, 64])
+
+ # test norm
+ norm_cfg = {}
+ norm_cfg['mean'] = [0.485, 0.456, 0.406]
+ norm_cfg['std'] = [0.229, 0.224, 0.225]
+ normalize = NormalizeTensor(mean=norm_cfg['mean'], std=norm_cfg['std'])
+
+ results_normalize = normalize(copy.deepcopy(results_tensor))
+ _check_normalize(results_tensor['img'].data.numpy(),
+ results_normalize['img'].data.numpy(), norm_cfg)
+
+ # test collect
+ collect = Collect(
+ keys=[
+ 'img', 'joints_2d', 'joints_2d_visible', 'joints_3d',
+ 'joints_3d_visible', 'pose', 'beta', 'part_index', 'uv_coordinates'
+ ],
+ meta_keys=['image_file', 'center', 'scale', 'rotation', 'iuv_file'])
+ results_final = collect(results_normalize)
+
+ assert 'img_size' not in results_final['img_metas'].data
+ assert 'image_file' in results_final['img_metas'].data
diff --git a/vendor/ViTPose/tests/test_pipelines/test_pose3d_transform.py b/vendor/ViTPose/tests/test_pipelines/test_pose3d_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6a52d9d054f0d55811cf687267164a6f96f65af
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_pose3d_transform.py
@@ -0,0 +1,336 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+import pytest
+from numpy.testing import assert_array_almost_equal
+
+from mmpose.core import SimpleCamera
+from mmpose.datasets.pipelines import Compose
+
+H36M_JOINT_IDX = [14, 2, 1, 0, 3, 4, 5, 16, 12, 17, 18, 9, 10, 11, 8, 7, 6]
+
+
+def get_data_sample():
+
+ def _parse_h36m_imgname(imgname):
+ """Parse imgname to get information of subject, action and camera.
+
+ A typical h36m image filename is like:
+ S1_Directions_1.54138969_000001.jpg
+ """
+ subj, rest = osp.basename(imgname).split('_', 1)
+ action, rest = rest.split('.', 1)
+ camera, rest = rest.split('_', 1)
+ return subj, action, camera
+
+ ann_flle = 'tests/data/h36m/test_h36m.npz'
+ camera_param_file = 'tests/data/h36m/cameras.pkl'
+
+ data = np.load(ann_flle)
+ cameras = mmcv.load(camera_param_file)
+
+ _imgnames = data['imgname']
+ _joints_2d = data['part'][:, H36M_JOINT_IDX].astype(np.float32)
+ _joints_3d = data['S'][:, H36M_JOINT_IDX].astype(np.float32)
+ _centers = data['center'].astype(np.float32)
+ _scales = data['scale'].astype(np.float32)
+
+ frame_ids = [0]
+ target_frame_id = 0
+
+ results = {
+ 'frame_ids': frame_ids,
+ 'target_frame_id': target_frame_id,
+ 'input_2d': _joints_2d[frame_ids, :, :2],
+ 'input_2d_visible': _joints_2d[frame_ids, :, -1:],
+ 'input_3d': _joints_3d[frame_ids, :, :3],
+ 'input_3d_visible': _joints_3d[frame_ids, :, -1:],
+ 'target': _joints_3d[target_frame_id, :, :3],
+ 'target_visible': _joints_3d[target_frame_id, :, -1:],
+ 'imgnames': _imgnames[frame_ids],
+ 'scales': _scales[frame_ids],
+ 'centers': _centers[frame_ids],
+ }
+
+ # add camera parameters
+ subj, _, camera = _parse_h36m_imgname(_imgnames[frame_ids[0]])
+ results['camera_param'] = cameras[(subj, camera)]
+
+ # add image size
+ results['image_width'] = results['camera_param']['w']
+ results['image_height'] = results['camera_param']['h']
+
+ # add ann_info
+ ann_info = {}
+ ann_info['num_joints'] = 17
+ ann_info['joint_weights'] = np.full(17, 1.0, dtype=np.float32)
+ ann_info['flip_pairs'] = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15],
+ [13, 16]]
+ ann_info['upper_body_ids'] = (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ ann_info['lower_body_ids'] = (1, 2, 3, 4, 5, 6)
+ ann_info['use_different_joint_weights'] = False
+
+ results['ann_info'] = ann_info
+
+ return results
+
+
+def test_joint_transforms():
+ results = get_data_sample()
+
+ mean = np.random.rand(16, 3).astype(np.float32)
+ std = np.random.rand(16, 3).astype(np.float32) + 1e-6
+
+ pipeline = [
+ dict(
+ type='RelativeJointRandomFlip',
+ item='target',
+ flip_cfg=dict(center_mode='root', center_index=0),
+ visible_item='target_visible',
+ flip_prob=1.,
+ flip_camera=True),
+ dict(
+ type='GetRootCenteredPose',
+ item='target',
+ root_index=0,
+ root_name='global_position',
+ remove_root=True),
+ dict(
+ type='NormalizeJointCoordinate', item='target', mean=mean,
+ std=std),
+ dict(type='PoseSequenceToTensor', item='target'),
+ dict(
+ type='ImageCoordinateNormalization',
+ item='input_2d',
+ norm_camera=True),
+ dict(type='CollectCameraIntrinsics'),
+ dict(
+ type='Collect',
+ keys=[('input_2d', 'input'), ('target', 'output'), 'flip_pairs',
+ 'intrinsics'],
+ meta_name='metas',
+ meta_keys=['camera_param'])
+ ]
+
+ pipeline = Compose(pipeline)
+ output = pipeline(copy.deepcopy(results))
+
+ # test transformation of target
+ joints_0 = results['target']
+ joints_1 = output['output'].numpy()
+ # manually do transformations
+ flip_pairs = output['flip_pairs']
+ _joints_0_flipped = joints_0.copy()
+ for _l, _r in flip_pairs:
+ _joints_0_flipped[..., _l, :] = joints_0[..., _r, :]
+ _joints_0_flipped[..., _r, :] = joints_0[..., _l, :]
+ _joints_0_flipped[...,
+ 0] = 2 * joints_0[..., 0:1, 0] - _joints_0_flipped[...,
+ 0]
+ joints_0 = _joints_0_flipped
+ joints_0 = (joints_0[..., 1:, :] - joints_0[..., 0:1, :] - mean) / std
+ # convert to [K*C, T]
+ joints_0 = joints_0.reshape(-1)[..., None]
+ np.testing.assert_array_almost_equal(joints_0, joints_1)
+
+ # test transformation of input
+ joints_0 = results['input_2d']
+ joints_1 = output['input']
+ # manually do transformations
+ center = np.array(
+ [0.5 * results['image_width'], 0.5 * results['image_height']],
+ dtype=np.float32)
+ scale = np.array(0.5 * results['image_width'], dtype=np.float32)
+ joints_0 = (joints_0 - center) / scale
+ np.testing.assert_array_almost_equal(joints_0, joints_1)
+
+ # test transformation of camera parameters
+ camera_param_0 = results['camera_param']
+ camera_param_1 = output['metas'].data['camera_param']
+ # manually flip and normalization
+ camera_param_0['c'][0] *= -1
+ camera_param_0['p'][0] *= -1
+ camera_param_0['c'] = (camera_param_0['c'] -
+ np.array(center)[:, None]) / scale
+ camera_param_0['f'] = camera_param_0['f'] / scale
+ np.testing.assert_array_almost_equal(camera_param_0['c'],
+ camera_param_1['c'])
+ np.testing.assert_array_almost_equal(camera_param_0['f'],
+ camera_param_1['f'])
+
+ # test CollectCameraIntrinsics
+ intrinsics_0 = np.concatenate([
+ results['camera_param']['f'].reshape(2),
+ results['camera_param']['c'].reshape(2),
+ results['camera_param']['k'].reshape(3),
+ results['camera_param']['p'].reshape(2)
+ ])
+ intrinsics_1 = output['intrinsics']
+ np.testing.assert_array_almost_equal(intrinsics_0, intrinsics_1)
+
+ # test load mean/std from file
+ with tempfile.TemporaryDirectory() as tmpdir:
+ norm_param = {'mean': mean, 'std': std}
+ norm_param_file = osp.join(tmpdir, 'norm_param.pkl')
+ mmcv.dump(norm_param, norm_param_file)
+
+ pipeline = [
+ dict(
+ type='NormalizeJointCoordinate',
+ item='target',
+ norm_param_file=norm_param_file),
+ ]
+ pipeline = Compose(pipeline)
+
+
+def test_camera_projection():
+ results = get_data_sample()
+ pipeline_1 = [
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ output_name='input_3d_w',
+ camera_type='SimpleCamera',
+ mode='camera_to_world'),
+ dict(
+ type='CameraProjection',
+ item='input_3d_w',
+ output_name='input_3d_wp',
+ camera_type='SimpleCamera',
+ mode='world_to_pixel'),
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ output_name='input_3d_p',
+ camera_type='SimpleCamera',
+ mode='camera_to_pixel'),
+ dict(type='Collect', keys=['input_3d_wp', 'input_3d_p'], meta_keys=[])
+ ]
+ camera_param = results['camera_param'].copy()
+ camera_param['K'] = np.concatenate(
+ (np.diagflat(camera_param['f']), camera_param['c']), axis=-1)
+ pipeline_2 = [
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ output_name='input_3d_w',
+ camera_type='SimpleCamera',
+ camera_param=camera_param,
+ mode='camera_to_world'),
+ dict(
+ type='CameraProjection',
+ item='input_3d_w',
+ output_name='input_3d_wp',
+ camera_type='SimpleCamera',
+ camera_param=camera_param,
+ mode='world_to_pixel'),
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ output_name='input_3d_p',
+ camera_type='SimpleCamera',
+ camera_param=camera_param,
+ mode='camera_to_pixel'),
+ dict(
+ type='CameraProjection',
+ item='input_3d_w',
+ output_name='input_3d_wc',
+ camera_type='SimpleCamera',
+ camera_param=camera_param,
+ mode='world_to_camera'),
+ dict(
+ type='Collect',
+ keys=['input_3d_wp', 'input_3d_p', 'input_2d'],
+ meta_keys=[])
+ ]
+
+ output1 = Compose(pipeline_1)(results)
+ output2 = Compose(pipeline_2)(results)
+
+ np.testing.assert_allclose(
+ output1['input_3d_wp'], output1['input_3d_p'], rtol=1e-6)
+
+ np.testing.assert_allclose(
+ output2['input_3d_wp'], output2['input_3d_p'], rtol=1e-6)
+
+ np.testing.assert_allclose(
+ output2['input_3d_p'], output2['input_2d'], rtol=1e-3, atol=1e-1)
+
+ # test invalid camera parameters
+ with pytest.raises(ValueError):
+ # missing intrinsic parameters
+ camera_param_wo_intrinsic = camera_param.copy()
+ camera_param_wo_intrinsic.pop('K')
+ camera_param_wo_intrinsic.pop('f')
+ camera_param_wo_intrinsic.pop('c')
+ _ = Compose([
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ camera_type='SimpleCamera',
+ camera_param=camera_param_wo_intrinsic,
+ mode='camera_to_pixel')
+ ])
+
+ with pytest.raises(ValueError):
+ # invalid mode
+ _ = Compose([
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ camera_type='SimpleCamera',
+ camera_param=camera_param,
+ mode='dummy')
+ ])
+
+ # test camera without undistortion
+ camera_param_wo_undistortion = camera_param.copy()
+ camera_param_wo_undistortion.pop('k')
+ camera_param_wo_undistortion.pop('p')
+ _ = Compose([
+ dict(
+ type='CameraProjection',
+ item='input_3d',
+ camera_type='SimpleCamera',
+ camera_param=camera_param_wo_undistortion,
+ mode='camera_to_pixel')
+ ])
+
+ # test pixel to camera transformation
+ camera = SimpleCamera(camera_param_wo_undistortion)
+ kpt_camera = np.random.rand(14, 3)
+ kpt_pixel = camera.camera_to_pixel(kpt_camera)
+ _kpt_camera = camera.pixel_to_camera(
+ np.concatenate([kpt_pixel, kpt_camera[:, [2]]], -1))
+ assert_array_almost_equal(_kpt_camera, kpt_camera, decimal=4)
+
+
+def test_3d_heatmap_generation():
+ ann_info = dict(
+ image_size=np.array([256, 256]),
+ heatmap_size=np.array([64, 64, 64]),
+ heatmap3d_depth_bound=400.0,
+ num_joints=17,
+ joint_weights=np.ones((17, 1), dtype=np.float32),
+ use_different_joint_weights=False)
+
+ results = dict(
+ joints_3d=np.zeros([17, 3]),
+ joints_3d_visible=np.ones([17, 3]),
+ ann_info=ann_info)
+
+ pipeline = Compose([dict(type='Generate3DHeatmapTarget')])
+ results_3d = pipeline(results)
+ assert results_3d['target'].shape == (17, 64, 64, 64)
+ assert results_3d['target_weight'].shape == (17, 1)
+
+ # test joint_indices
+ pipeline = Compose(
+ [dict(type='Generate3DHeatmapTarget', joint_indices=[0, 8, 16])])
+ results_3d = pipeline(results)
+ assert results_3d['target'].shape == (3, 64, 64, 64)
+ assert results_3d['target_weight'].shape == (3, 1)
diff --git a/vendor/ViTPose/tests/test_pipelines/test_shared_transform.py b/vendor/ViTPose/tests/test_pipelines/test_shared_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..684a1035f84df49ab0ae2a61a9f63400f6cf65da
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_shared_transform.py
@@ -0,0 +1,218 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+import numpy as np
+import pytest
+from mmcv import bgr2rgb, build_from_cfg
+
+from mmpose.datasets import PIPELINES
+from mmpose.datasets.pipelines import Compose
+
+
+def check_keys_equal(result_keys, target_keys):
+ """Check if all elements in target_keys is in result_keys."""
+ return set(target_keys) == set(result_keys)
+
+
+def check_keys_contain(result_keys, target_keys):
+ """Check if elements in target_keys is in result_keys."""
+ return set(target_keys).issubset(set(result_keys))
+
+
+def test_compose():
+ with pytest.raises(TypeError):
+ # transform must be callable or a dict
+ Compose('LoadImageFromFile')
+
+ target_keys = ['img', 'img_rename', 'img_metas']
+
+ # test Compose given a data pipeline
+ img = np.random.randn(256, 256, 3)
+ results = dict(img=img, img_file='test_image.png')
+ test_pipeline = [
+ dict(
+ type='Collect',
+ keys=['img', ('img', 'img_rename')],
+ meta_keys=['img_file'])
+ ]
+ compose = Compose(test_pipeline)
+ compose_results = compose(results)
+ assert check_keys_equal(compose_results.keys(), target_keys)
+ assert check_keys_equal(compose_results['img_metas'].data.keys(),
+ ['img_file'])
+
+ # test Compose when forward data is None
+ results = None
+
+ class ExamplePipeline:
+
+ def __call__(self, results):
+ return None
+
+ nonePipeline = ExamplePipeline()
+ test_pipeline = [nonePipeline]
+ compose = Compose(test_pipeline)
+ compose_results = compose(results)
+ assert compose_results is None
+
+ assert repr(compose) == compose.__class__.__name__ + \
+ f'(\n {nonePipeline}\n)'
+
+
+def test_load_image_from_file():
+ # Define simple pipeline
+ load = dict(type='LoadImageFromFile')
+ load = build_from_cfg(load, PIPELINES)
+
+ data_prefix = 'tests/data/coco/'
+ image_file = osp.join(data_prefix, '00000000078.jpg')
+ results = dict(image_file=image_file)
+
+ # load an image that doesn't exist
+ with pytest.raises(FileNotFoundError):
+ results = load(results)
+
+ # mormal loading
+ image_file = osp.join(data_prefix, '000000000785.jpg')
+ results = dict(image_file=image_file)
+ results = load(results)
+ assert results['img'].shape == (425, 640, 3)
+
+ # load a single image from a list
+ image_file = [osp.join(data_prefix, '000000000785.jpg')]
+ results = dict(image_file=image_file)
+ results = load(results)
+ assert len(results['img']) == 1
+
+ # test loading multi images from a list
+ image_file = [
+ osp.join(data_prefix, '000000000785.jpg'),
+ osp.join(data_prefix, '00000004008.jpg'),
+ ]
+ results = dict(image_file=image_file)
+
+ with pytest.raises(FileNotFoundError):
+ results = load(results)
+
+ image_file = [
+ osp.join(data_prefix, '000000000785.jpg'),
+ osp.join(data_prefix, '000000040083.jpg'),
+ ]
+ results = dict(image_file=image_file)
+
+ results = load(results)
+ assert len(results['img']) == 2
+
+ # manually set image outside the pipeline
+ img = np.random.randint(0, 255, (32, 32, 3), dtype=np.uint8)
+ results = load(dict(img=img))
+ np.testing.assert_equal(results['img'], bgr2rgb(img))
+
+ imgs = np.random.randint(0, 255, (2, 32, 32, 3), dtype=np.uint8)
+ desired = np.concatenate([bgr2rgb(img) for img in imgs], axis=0)
+ results = load(dict(img=imgs))
+ np.testing.assert_equal(results['img'], desired)
+
+ # neither 'image_file' or valid 'img' is given
+ results = dict()
+ with pytest.raises(KeyError):
+ _ = load(results)
+
+ results = dict(img=np.random.randint(0, 255, (32, 32), dtype=np.uint8))
+ with pytest.raises(ValueError):
+ _ = load(results)
+
+
+def test_albu_transform():
+ data_prefix = 'tests/data/coco/'
+ results = dict(image_file=osp.join(data_prefix, '000000000785.jpg'))
+
+ # Define simple pipeline
+ load = dict(type='LoadImageFromFile')
+ load = build_from_cfg(load, PIPELINES)
+
+ albu_transform = dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='RandomBrightnessContrast', p=0.2),
+ dict(type='ToFloat')
+ ])
+ albu_transform = build_from_cfg(albu_transform, PIPELINES)
+
+ # Execute transforms
+ results = load(results)
+
+ results = albu_transform(results)
+
+ assert results['img'].dtype == np.float32
+
+
+def test_photometric_distortion_transform():
+ data_prefix = 'tests/data/coco/'
+ results = dict(image_file=osp.join(data_prefix, '000000000785.jpg'))
+
+ # Define simple pipeline
+ load = dict(type='LoadImageFromFile')
+ load = build_from_cfg(load, PIPELINES)
+
+ photo_transform = dict(type='PhotometricDistortion')
+ photo_transform = build_from_cfg(photo_transform, PIPELINES)
+
+ # Execute transforms
+ results = load(results)
+
+ results = photo_transform(results)
+
+ assert results['img'].dtype == np.uint8
+
+
+def test_multitask_gather():
+ ann_info = dict(
+ image_size=np.array([256, 256]),
+ heatmap_size=np.array([64, 64]),
+ num_joints=17,
+ joint_weights=np.ones((17, 1), dtype=np.float32),
+ use_different_joint_weights=False)
+
+ results = dict(
+ joints_3d=np.zeros([17, 3]),
+ joints_3d_visible=np.ones([17, 3]),
+ ann_info=ann_info)
+
+ pipeline_list = [[dict(type='TopDownGenerateTarget', sigma=2)],
+ [dict(type='TopDownGenerateTargetRegression')]]
+ pipeline = dict(
+ type='MultitaskGatherTarget',
+ pipeline_list=pipeline_list,
+ pipeline_indices=[0, 1, 0],
+ )
+ pipeline = build_from_cfg(pipeline, PIPELINES)
+
+ results = pipeline(results)
+ target = results['target']
+ target_weight = results['target_weight']
+ assert isinstance(target, list)
+ assert isinstance(target_weight, list)
+ assert target[0].shape == (17, 64, 64)
+ assert target_weight[0].shape == (17, 1)
+ assert target[1].shape == (17, 2)
+ assert target_weight[1].shape == (17, 2)
+ assert target[2].shape == (17, 64, 64)
+ assert target_weight[2].shape == (17, 1)
+
+
+def test_rename_keys():
+ results = dict(
+ joints_3d=np.ones([17, 3]), joints_3d_visible=np.ones([17, 3]))
+ pipeline = dict(
+ type='RenameKeys',
+ key_pairs=[('joints_3d', 'target'),
+ ('joints_3d_visible', 'target_weight')])
+ pipeline = build_from_cfg(pipeline, PIPELINES)
+ results = pipeline(results)
+ assert 'joints_3d' not in results
+ assert 'joints_3d_visible' not in results
+ assert 'target' in results
+ assert 'target_weight' in results
+ assert results['target'].shape == (17, 3)
+ assert results['target_weight'].shape == (17, 3)
diff --git a/vendor/ViTPose/tests/test_pipelines/test_top_down_pipelines.py b/vendor/ViTPose/tests/test_pipelines/test_top_down_pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4ca1fbf5d6fc201ddf53c59aaad45000a180a3b
--- /dev/null
+++ b/vendor/ViTPose/tests/test_pipelines/test_top_down_pipelines.py
@@ -0,0 +1,243 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+
+import numpy as np
+import torch
+from numpy.testing import assert_array_almost_equal
+from xtcocotools.coco import COCO
+
+from mmpose.datasets.pipelines import (Collect, LoadImageFromFile,
+ NormalizeTensor, TopDownAffine,
+ TopDownGenerateTarget,
+ TopDownGetRandomScaleRotation,
+ TopDownHalfBodyTransform,
+ TopDownRandomFlip,
+ TopDownRandomTranslation, ToTensor)
+
+
+def _check_keys_contain(result_keys, target_keys):
+ """Check if all elements in target_keys is in result_keys."""
+ return set(target_keys).issubset(set(result_keys))
+
+
+def _check_flip(origin_imgs, result_imgs):
+ """Check if the origin_imgs are flipped correctly."""
+ h, w, c = origin_imgs.shape
+ for i in range(h):
+ for j in range(w):
+ for k in range(c):
+ if result_imgs[i, j, k] != origin_imgs[i, w - 1 - j, k]:
+ return False
+ return True
+
+
+def _check_rot90(origin_imgs, result_imgs):
+ if origin_imgs.shape[0] == result_imgs.shape[1] and \
+ origin_imgs.shape[1] == result_imgs.shape[0]:
+ return True
+ else:
+ return False
+
+
+def _check_normalize(origin_imgs, result_imgs, norm_cfg):
+ """Check if the origin_imgs are normalized correctly into result_imgs in a
+ given norm_cfg."""
+ target_imgs = result_imgs.copy()
+ for i in range(3):
+ target_imgs[i] *= norm_cfg['std'][i]
+ target_imgs[i] += norm_cfg['mean'][i]
+ assert_array_almost_equal(origin_imgs, target_imgs, decimal=4)
+
+
+def _box2cs(box, image_size):
+ x, y, w, h = box[:4]
+
+ aspect_ratio = 1. * image_size[0] / image_size[1]
+ center = np.zeros((2), dtype=np.float32)
+ center[0] = x + w * 0.5
+ center[1] = y + h * 0.5
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+ scale = np.array([w * 1.0 / 200.0, h * 1.0 / 200.0], dtype=np.float32)
+ scale = scale * 1.25
+ return center, scale
+
+
+def test_top_down_pipeline():
+ # test loading
+ data_prefix = 'tests/data/coco/'
+ ann_file = osp.join(data_prefix, 'test_coco.json')
+ coco = COCO(ann_file)
+
+ results = dict(image_file=osp.join(data_prefix, '000000000785.jpg'))
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['image_file'] == osp.join(data_prefix, '000000000785.jpg')
+
+ assert results['img'].shape == (425, 640, 3)
+ image_size = (425, 640)
+
+ ann_ids = coco.getAnnIds(785)
+ ann = coco.anns[ann_ids[0]]
+
+ num_joints = 17
+ joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+ for ipt in range(num_joints):
+ joints_3d[ipt, 0] = ann['keypoints'][ipt * 3 + 0]
+ joints_3d[ipt, 1] = ann['keypoints'][ipt * 3 + 1]
+ joints_3d[ipt, 2] = 0
+ t_vis = ann['keypoints'][ipt * 3 + 2]
+ if t_vis > 1:
+ t_vis = 1
+ joints_3d_visible[ipt, 0] = t_vis
+ joints_3d_visible[ipt, 1] = t_vis
+ joints_3d_visible[ipt, 2] = 0
+
+ center, scale = _box2cs(ann['bbox'][:4], image_size)
+
+ results['joints_3d'] = joints_3d
+ results['joints_3d_visible'] = joints_3d_visible
+ results['center'] = center
+ results['scale'] = scale
+ results['bbox_score'] = 1
+ results['bbox_id'] = 0
+
+ results['ann_info'] = {}
+ results['ann_info']['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8],
+ [9, 10], [11, 12], [13, 14], [15, 16]]
+ results['ann_info']['num_joints'] = num_joints
+ results['ann_info']['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ results['ann_info']['lower_body_ids'] = (11, 12, 13, 14, 15, 16)
+ results['ann_info']['use_different_joint_weights'] = False
+ results['ann_info']['joint_weights'] = np.array([
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ dtype=np.float32).reshape(
+ (num_joints, 1))
+ results['ann_info']['image_size'] = np.array([192, 256])
+ results['ann_info']['heatmap_size'] = np.array([48, 64])
+
+ # test flip
+ random_flip = TopDownRandomFlip(flip_prob=1.)
+ results_flip = random_flip(copy.deepcopy(results))
+ assert _check_flip(results['img'], results_flip['img'])
+
+ # test random scale and rotate
+ random_scale_rotate = TopDownGetRandomScaleRotation(90, 0.3, 1.0)
+ results_scale_rotate = random_scale_rotate(copy.deepcopy(results))
+ assert results_scale_rotate['rotation'] <= 180
+ assert results_scale_rotate['rotation'] >= -180
+ assert (results_scale_rotate['scale'] / results['scale'] <= 1.3).all()
+ assert (results_scale_rotate['scale'] / results['scale'] >= 0.7).all()
+
+ # test halfbody transform
+ halfbody_transform = TopDownHalfBodyTransform(
+ num_joints_half_body=8, prob_half_body=1.)
+ results_halfbody = halfbody_transform(copy.deepcopy(results))
+ assert (results_halfbody['scale'] <= results['scale']).all()
+
+ affine_transform = TopDownAffine()
+ results['rotation'] = 90
+ results_affine = affine_transform(copy.deepcopy(results))
+ assert results_affine['img'].shape == (256, 192, 3)
+
+ results = results_affine
+ to_tensor = ToTensor()
+ results_tensor = to_tensor(copy.deepcopy(results))
+ assert isinstance(results_tensor['img'], torch.Tensor)
+ assert results_tensor['img'].shape == torch.Size([3, 256, 192])
+
+ norm_cfg = {}
+ norm_cfg['mean'] = [0.485, 0.456, 0.406]
+ norm_cfg['std'] = [0.229, 0.224, 0.225]
+
+ normalize = NormalizeTensor(mean=norm_cfg['mean'], std=norm_cfg['std'])
+
+ results_normalize = normalize(copy.deepcopy(results_tensor))
+ _check_normalize(results_tensor['img'].data.numpy(),
+ results_normalize['img'].data.numpy(), norm_cfg)
+
+ generate_target = TopDownGenerateTarget(
+ sigma=2, target_type='GaussianHeatMap', unbiased_encoding=True)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (num_joints, 1)
+
+ generate_target = TopDownGenerateTarget(
+ sigma=2, target_type='GaussianHeatmap', unbiased_encoding=True)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (num_joints, 1)
+
+ generate_target = TopDownGenerateTarget(sigma=2, unbiased_encoding=False)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (num_joints, 1)
+
+ generate_target = TopDownGenerateTarget(
+ sigma=[2, 3], unbiased_encoding=False)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ 2, num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (2, num_joints, 1)
+
+ generate_target = TopDownGenerateTarget(
+ kernel=(11, 11), encoding='Megvii', unbiased_encoding=False)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (num_joints, 1)
+
+ generate_target = TopDownGenerateTarget(
+ kernel=[(11, 11), (7, 7)], encoding='Megvii', unbiased_encoding=False)
+ results_target = generate_target(copy.deepcopy(results_tensor))
+ assert 'target' in results_target
+ assert results_target['target'].shape == (
+ 2, num_joints, results['ann_info']['heatmap_size'][1],
+ results['ann_info']['heatmap_size'][0])
+ assert 'target_weight' in results_target
+ assert results_target['target_weight'].shape == (2, num_joints, 1)
+
+ collect = Collect(
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ])
+ results_final = collect(results_target)
+ assert 'img_size' not in results_final['img_metas'].data
+ assert 'image_file' in results_final['img_metas'].data
+
+
+def test_random_translation():
+ results = dict(
+ center=np.zeros([2]),
+ scale=1,
+ )
+ pipeline = TopDownRandomTranslation()
+ results = pipeline(results)
+ assert results['center'].shape == (2, )
diff --git a/vendor/ViTPose/tests/test_post_processing.py b/vendor/ViTPose/tests/test_post_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..79c8c2a773e941500b168f147519d7e0a7c1a495
--- /dev/null
+++ b/vendor/ViTPose/tests/test_post_processing.py
@@ -0,0 +1,94 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from mmpose.core import (affine_transform, flip_back, fliplr_joints,
+ fliplr_regression, get_affine_transform, rotate_point,
+ transform_preds)
+
+
+def test_affine_transform():
+ pt = np.array([0, 1])
+ trans = np.array([[1, 0, 1], [0, 1, 0]])
+ result = affine_transform(pt, trans)
+ assert_array_almost_equal(result, np.array([1, 1]), decimal=4)
+ assert isinstance(result, np.ndarray)
+
+
+def test_rotate_point():
+ src_point = np.array([0, 1])
+ rot_rad = np.pi / 2.
+ result = rotate_point(src_point, rot_rad)
+ assert_array_almost_equal(result, np.array([-1, 0]), decimal=4)
+ assert isinstance(result, list)
+
+
+def test_fliplr_joints():
+ joints = np.array([[0, 0, 0], [1, 1, 0]])
+ joints_vis = np.array([[1], [1]])
+ joints_flip, _ = fliplr_joints(joints, joints_vis, 5, [[0, 1]])
+ res = np.array([[3, 1, 0], [4, 0, 0]])
+ assert_array_almost_equal(joints_flip, res)
+
+
+def test_flip_back():
+ heatmaps = np.random.random([1, 2, 32, 32])
+ flipped_heatmaps = flip_back(heatmaps, [[0, 1]])
+ heatmaps_new = flip_back(flipped_heatmaps, [[0, 1]])
+ assert_array_almost_equal(heatmaps, heatmaps_new)
+
+ heatmaps = np.random.random([1, 2, 32, 32])
+ flipped_heatmaps = flip_back(heatmaps, [[0, 1]])
+ heatmaps_new = flipped_heatmaps[..., ::-1]
+ assert_array_almost_equal(heatmaps[:, 0], heatmaps_new[:, 1])
+ assert_array_almost_equal(heatmaps[:, 1], heatmaps_new[:, 0])
+
+ ori_heatmaps = heatmaps.copy()
+ # test in-place flip
+ heatmaps = heatmaps[:, :, :, ::-1]
+ assert_array_almost_equal(ori_heatmaps[:, :, :, ::-1], heatmaps)
+
+
+def test_transform_preds():
+ coords = np.random.random([2, 2])
+ center = np.array([50, 50])
+ scale = np.array([100 / 200.0, 100 / 200.0])
+ size = np.array([100, 100])
+ result = transform_preds(coords, center, scale, size)
+ assert_array_almost_equal(coords, result)
+
+ coords = np.random.random([2, 2])
+ center = np.array([50, 50])
+ scale = np.array([100 / 200.0, 100 / 200.0])
+ size = np.array([101, 101])
+ result = transform_preds(coords, center, scale, size, use_udp=True)
+ assert_array_almost_equal(coords, result)
+
+
+def test_get_affine_transform():
+ center = np.array([50, 50])
+ scale = np.array([100 / 200.0, 100 / 200.0])
+ size = np.array([100, 100])
+ result = get_affine_transform(center, scale, 0, size)
+ trans = np.array([[1, 0, 0], [0, 1, 0]])
+ assert_array_almost_equal(trans, result)
+
+
+def test_flip_regression():
+ coords = np.random.rand(3, 3)
+ flip_pairs = [[1, 2]]
+ root = coords[:1]
+ coords_flipped = coords.copy()
+ coords_flipped[1] = coords[2]
+ coords_flipped[2] = coords[1]
+ coords_flipped[..., 0] = 2 * root[..., 0] - coords_flipped[..., 0]
+
+ # static mode
+ res_static = fliplr_regression(
+ coords, flip_pairs, center_mode='static', center_x=root[0, 0])
+ assert_array_almost_equal(res_static, coords_flipped)
+
+ # root mode
+ res_root = fliplr_regression(
+ coords, flip_pairs, center_mode='root', center_index=0)
+ assert_array_almost_equal(res_root, coords_flipped)
diff --git a/vendor/ViTPose/tests/test_post_processing/test_filter.py b/vendor/ViTPose/tests/test_post_processing/test_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..47016976f51b1f0a233a88861f1f882b961eee21
--- /dev/null
+++ b/vendor/ViTPose/tests/test_post_processing/test_filter.py
@@ -0,0 +1,36 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from mmpose.core.post_processing.one_euro_filter import OneEuroFilter
+
+
+def test_one_euro_filter():
+ np.random.seed(1)
+
+ kpts = []
+ frames = 100
+ for i in range(frames):
+ kpts.append({
+ 'keypoints': np.tile(np.array([10, 10, 0.9]), [17, 1]),
+ 'area': 100,
+ 'score': 0.9
+ })
+ kpts.append({
+ 'keypoints': np.tile(np.array([11, 11, 0.9]), [17, 1]),
+ 'area': 100,
+ 'score': 0.8
+ })
+
+ one_euro_filter = OneEuroFilter(
+ kpts[0]['keypoints'][:, :2], min_cutoff=1.7, beta=0.3, fps=30)
+
+ for i in range(1, len(kpts)):
+ kpts[i]['keypoints'][:, :2] = one_euro_filter(
+ kpts[i]['keypoints'][:, :2])
+
+ one_euro_filter = OneEuroFilter(
+ kpts[0]['keypoints'][:, :2], min_cutoff=1.7, beta=0.3)
+
+ for i in range(1, len(kpts)):
+ kpts[i]['keypoints'][:, :2] = one_euro_filter(
+ kpts[i]['keypoints'][:, :2])
diff --git a/vendor/ViTPose/tests/test_post_processing/test_group.py b/vendor/ViTPose/tests/test_post_processing/test_group.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec66efc3a9d1c1705a0b890c4d83a0ebf9ea687
--- /dev/null
+++ b/vendor/ViTPose/tests/test_post_processing/test_group.py
@@ -0,0 +1,72 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmpose.core.post_processing.group import HeatmapParser
+
+
+def test_group():
+ cfg = {}
+ cfg['num_joints'] = 17
+ cfg['detection_threshold'] = 0.1
+ cfg['tag_threshold'] = 1
+ cfg['use_detection_val'] = True
+ cfg['ignore_too_much'] = False
+ cfg['nms_kernel'] = 5
+ cfg['nms_padding'] = 2
+ cfg['tag_per_joint'] = True
+ cfg['max_num_people'] = 1
+ parser = HeatmapParser(cfg)
+ fake_heatmap = torch.zeros(1, 1, 5, 5)
+ fake_heatmap[0, 0, 3, 3] = 1
+ fake_heatmap[0, 0, 3, 2] = 0.8
+ assert parser.nms(fake_heatmap)[0, 0, 3, 2] == 0
+ fake_heatmap = torch.zeros(1, 17, 32, 32)
+ fake_tag = torch.zeros(1, 17, 32, 32, 1)
+ fake_heatmap[0, 0, 10, 10] = 0.8
+ fake_heatmap[0, 1, 12, 12] = 0.9
+ fake_heatmap[0, 4, 8, 8] = 0.8
+ fake_heatmap[0, 8, 6, 6] = 0.9
+ fake_tag[0, 0, 10, 10] = 0.8
+ fake_tag[0, 1, 12, 12] = 0.9
+ fake_tag[0, 4, 8, 8] = 0.8
+ fake_tag[0, 8, 6, 6] = 0.9
+ grouped, scores = parser.parse(fake_heatmap, fake_tag, True, True)
+ assert grouped[0][0, 0, 0] == 10.25
+ assert abs(scores[0] - 0.2) < 0.001
+ cfg['tag_per_joint'] = False
+ parser = HeatmapParser(cfg)
+ grouped, scores = parser.parse(fake_heatmap, fake_tag, False, False)
+ assert grouped[0][0, 0, 0] == 10.
+ grouped, scores = parser.parse(fake_heatmap, fake_tag, False, True)
+ assert grouped[0][0, 0, 0] == 10.
+
+
+def test_group_score_per_joint():
+ cfg = {}
+ cfg['num_joints'] = 17
+ cfg['detection_threshold'] = 0.1
+ cfg['tag_threshold'] = 1
+ cfg['use_detection_val'] = True
+ cfg['ignore_too_much'] = False
+ cfg['nms_kernel'] = 5
+ cfg['nms_padding'] = 2
+ cfg['tag_per_joint'] = True
+ cfg['max_num_people'] = 1
+ cfg['score_per_joint'] = True
+ parser = HeatmapParser(cfg)
+ fake_heatmap = torch.zeros(1, 1, 5, 5)
+ fake_heatmap[0, 0, 3, 3] = 1
+ fake_heatmap[0, 0, 3, 2] = 0.8
+ assert parser.nms(fake_heatmap)[0, 0, 3, 2] == 0
+ fake_heatmap = torch.zeros(1, 17, 32, 32)
+ fake_tag = torch.zeros(1, 17, 32, 32, 1)
+ fake_heatmap[0, 0, 10, 10] = 0.8
+ fake_heatmap[0, 1, 12, 12] = 0.9
+ fake_heatmap[0, 4, 8, 8] = 0.8
+ fake_heatmap[0, 8, 6, 6] = 0.9
+ fake_tag[0, 0, 10, 10] = 0.8
+ fake_tag[0, 1, 12, 12] = 0.9
+ fake_tag[0, 4, 8, 8] = 0.8
+ fake_tag[0, 8, 6, 6] = 0.9
+ grouped, scores = parser.parse(fake_heatmap, fake_tag, True, True)
+ assert len(scores[0]) == 17
diff --git a/vendor/ViTPose/tests/test_post_processing/test_nms.py b/vendor/ViTPose/tests/test_post_processing/test_nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..13d793d239bf45218ec4ff65ea2d562d8cbe07ac
--- /dev/null
+++ b/vendor/ViTPose/tests/test_post_processing/test_nms.py
@@ -0,0 +1,81 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+from mmpose.core.post_processing.nms import nms, oks_iou, oks_nms, soft_oks_nms
+
+
+def test_soft_oks_nms():
+ oks_thr = 0.9
+ kpts = []
+ kpts.append({
+ 'keypoints': np.tile(np.array([10, 10, 0.9]), [17, 1]),
+ 'area': 100,
+ 'score': 0.9
+ })
+ kpts.append({
+ 'keypoints': np.tile(np.array([10, 10, 0.9]), [17, 1]),
+ 'area': 100,
+ 'score': 0.4
+ })
+ kpts.append({
+ 'keypoints': np.tile(np.array([100, 100, 0.9]), [17, 1]),
+ 'area': 100,
+ 'score': 0.7
+ })
+
+ keep = soft_oks_nms([kpts[i] for i in range(len(kpts))], oks_thr)
+ assert (keep == np.array([0, 2, 1])).all()
+
+ keep = oks_nms([kpts[i] for i in range(len(kpts))], oks_thr)
+ assert (keep == np.array([0, 2])).all()
+
+ kpts_with_score_joints = []
+ kpts_with_score_joints.append({
+ 'keypoints':
+ np.tile(np.array([10, 10, 0.9]), [17, 1]),
+ 'area':
+ 100,
+ 'score':
+ np.tile(np.array([0.9]), 17)
+ })
+ kpts_with_score_joints.append({
+ 'keypoints':
+ np.tile(np.array([10, 10, 0.9]), [17, 1]),
+ 'area':
+ 100,
+ 'score':
+ np.tile(np.array([0.4]), 17)
+ })
+ kpts_with_score_joints.append({
+ 'keypoints':
+ np.tile(np.array([100, 100, 0.9]), [17, 1]),
+ 'area':
+ 100,
+ 'score':
+ np.tile(np.array([0.7]), 17)
+ })
+ keep = soft_oks_nms([
+ kpts_with_score_joints[i] for i in range(len(kpts_with_score_joints))
+ ],
+ oks_thr,
+ score_per_joint=True)
+ assert (keep == np.array([0, 2, 1])).all()
+
+ keep = oks_nms([
+ kpts_with_score_joints[i] for i in range(len(kpts_with_score_joints))
+ ],
+ oks_thr,
+ score_per_joint=True)
+ assert (keep == np.array([0, 2])).all()
+
+
+def test_func_nms():
+ result = nms(np.array([[0, 0, 10, 10, 0.9], [0, 0, 10, 8, 0.8]]), 0.5)
+ assert result == [0]
+
+
+def test_oks_iou():
+ result = oks_iou(np.ones([17 * 3]), np.ones([1, 17 * 3]), 1, [1])
+ assert result[0] == 1.
+ result = oks_iou(np.zeros([17 * 3]), np.ones([1, 17 * 3]), 1, [1])
+ assert result[0] < 0.01
diff --git a/vendor/ViTPose/tests/test_regularization.py b/vendor/ViTPose/tests/test_regularization.py
new file mode 100644
index 0000000000000000000000000000000000000000..a93cc63adf529fd449b7893e3a973766d4ddb69d
--- /dev/null
+++ b/vendor/ViTPose/tests/test_regularization.py
@@ -0,0 +1,19 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+from mmpose.core import WeightNormClipHook
+
+
+def test_weight_norm_clip():
+ torch.manual_seed(0)
+
+ module = torch.nn.Linear(2, 2, bias=False)
+ module.weight.data.fill_(2)
+ WeightNormClipHook(max_norm=1.0).register(module)
+
+ x = torch.rand(1, 2).requires_grad_()
+ _ = module(x)
+
+ weight_norm = module.weight.norm().item()
+ np.testing.assert_almost_equal(weight_norm, 1.0, decimal=6)
diff --git a/vendor/ViTPose/tests/test_utils.py b/vendor/ViTPose/tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b4d1c1fc952c9c9af7eaaf992300ad0416cd822
--- /dev/null
+++ b/vendor/ViTPose/tests/test_utils.py
@@ -0,0 +1,100 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import multiprocessing as mp
+import os
+import platform
+import time
+
+import cv2
+import mmcv
+import torch
+import torchvision
+from mmcv import Config
+
+import mmpose
+from mmpose.utils import StopWatch, collect_env, setup_multi_processes
+
+
+def test_collect_env():
+ env_info = collect_env()
+ assert env_info['PyTorch'] == torch.__version__
+ assert env_info['TorchVision'] == torchvision.__version__
+ assert env_info['OpenCV'] == cv2.__version__
+ assert env_info['MMCV'] == mmcv.__version__
+ assert '+' in env_info['MMPose']
+ assert mmpose.__version__ in env_info['MMPose']
+
+
+def test_stopwatch():
+ window_size = 5
+ test_loop = 10
+ outer_time = 100
+ inner_time = 100
+
+ stop_watch = StopWatch(window=window_size)
+ for _ in range(test_loop):
+ with stop_watch.timeit():
+ time.sleep(outer_time / 1000.)
+ with stop_watch.timeit('inner'):
+ time.sleep(inner_time / 1000.)
+
+ _ = stop_watch.report()
+ _ = stop_watch.report_strings()
+
+
+def test_setup_multi_processes():
+ # temp save system setting
+ sys_start_mehod = mp.get_start_method(allow_none=True)
+ sys_cv_threads = cv2.getNumThreads()
+ # pop and temp save system env vars
+ sys_omp_threads = os.environ.pop('OMP_NUM_THREADS', default=None)
+ sys_mkl_threads = os.environ.pop('MKL_NUM_THREADS', default=None)
+
+ # test config without setting env
+ config = dict(data=dict(workers_per_gpu=2))
+ cfg = Config(config)
+ setup_multi_processes(cfg)
+ assert os.getenv('OMP_NUM_THREADS') == '1'
+ assert os.getenv('MKL_NUM_THREADS') == '1'
+ # when set to 0, the num threads will be 1
+ assert cv2.getNumThreads() == 1
+ if platform.system() != 'Windows':
+ assert mp.get_start_method() == 'fork'
+
+ # test num workers <= 1
+ os.environ.pop('OMP_NUM_THREADS')
+ os.environ.pop('MKL_NUM_THREADS')
+ config = dict(data=dict(workers_per_gpu=0))
+ cfg = Config(config)
+ setup_multi_processes(cfg)
+ assert 'OMP_NUM_THREADS' not in os.environ
+ assert 'MKL_NUM_THREADS' not in os.environ
+
+ # test manually set env var
+ os.environ['OMP_NUM_THREADS'] = '4'
+ config = dict(data=dict(workers_per_gpu=2))
+ cfg = Config(config)
+ setup_multi_processes(cfg)
+ assert os.getenv('OMP_NUM_THREADS') == '4'
+
+ # test manually set opencv threads and mp start method
+ config = dict(
+ data=dict(workers_per_gpu=2),
+ opencv_num_threads=4,
+ mp_start_method='spawn')
+ cfg = Config(config)
+ setup_multi_processes(cfg)
+ assert cv2.getNumThreads() == 4
+ assert mp.get_start_method() == 'spawn'
+
+ # revert setting to avoid affecting other programs
+ if sys_start_mehod:
+ mp.set_start_method(sys_start_mehod, force=True)
+ cv2.setNumThreads(sys_cv_threads)
+ if sys_omp_threads:
+ os.environ['OMP_NUM_THREADS'] = sys_omp_threads
+ else:
+ os.environ.pop('OMP_NUM_THREADS')
+ if sys_mkl_threads:
+ os.environ['MKL_NUM_THREADS'] = sys_mkl_threads
+ else:
+ os.environ.pop('MKL_NUM_THREADS')
diff --git a/vendor/ViTPose/tests/test_version.py b/vendor/ViTPose/tests/test_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..392ded43806a9ed95c81a8bf4018cf9c8f8b6018
--- /dev/null
+++ b/vendor/ViTPose/tests/test_version.py
@@ -0,0 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmpose
+
+
+def test_version():
+ version = mmpose.__version__
+ assert isinstance(version, str)
+ assert isinstance(mmpose.short_version, str)
+ assert mmpose.short_version in version
diff --git a/vendor/ViTPose/tests/test_visualization.py b/vendor/ViTPose/tests/test_visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..f04dad24e6df2f064c19c1c9eac575e019701c7f
--- /dev/null
+++ b/vendor/ViTPose/tests/test_visualization.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+
+import mmcv
+import numpy as np
+import pytest
+
+from mmpose.core import (apply_bugeye_effect, apply_sunglasses_effect,
+ imshow_bboxes, imshow_keypoints, imshow_keypoints_3d)
+
+
+def test_imshow_keypoints():
+ # 2D keypoint
+ img = np.zeros((100, 100, 3), dtype=np.uint8)
+ kpts = np.array([[1, 1, 1], [10, 10, 1]], dtype=np.float32)
+ pose_result = [kpts]
+ skeleton = [[0, 1]]
+ pose_kpt_color = [(127, 127, 127)] * len(kpts)
+ pose_link_color = [(127, 127, 127)] * len(skeleton)
+ img_vis_2d = imshow_keypoints(
+ img,
+ pose_result,
+ skeleton=skeleton,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ show_keypoint_weight=True)
+
+ # 3D keypoint
+ kpts_3d = np.array([[0, 0, 0, 1], [1, 1, 1, 1]], dtype=np.float32)
+ pose_result_3d = [{'keypoints_3d': kpts_3d, 'title': 'test'}]
+ _ = imshow_keypoints_3d(
+ pose_result_3d,
+ img=img_vis_2d,
+ skeleton=skeleton,
+ pose_kpt_color=pose_kpt_color,
+ pose_link_color=pose_link_color,
+ vis_height=400)
+
+
+def test_imshow_bbox():
+ img = np.zeros((100, 100, 3), dtype=np.uint8)
+ bboxes = np.array([[10, 10, 30, 30], [10, 50, 30, 80]], dtype=np.float32)
+ labels = ['label 1', 'label 2']
+ colors = ['red', 'green']
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ _ = imshow_bboxes(
+ img,
+ bboxes,
+ labels=labels,
+ colors=colors,
+ show=False,
+ out_file=f'{tmpdir}/out.png')
+
+ # test case of empty bboxes
+ _ = imshow_bboxes(
+ img,
+ np.zeros((0, 4), dtype=np.float32),
+ labels=None,
+ colors='red',
+ show=False)
+
+ # test unmatched bboxes and labels
+ with pytest.raises(AssertionError):
+ _ = imshow_bboxes(
+ img,
+ np.zeros((0, 4), dtype=np.float32),
+ labels=labels[:1],
+ colors='red',
+ show=False)
+
+
+def test_effects():
+ img = np.zeros((100, 100, 3), dtype=np.uint8)
+ kpts = np.array([[10., 10., 0.8], [20., 10., 0.8]], dtype=np.float32)
+ bbox = np.array([0, 0, 50, 50], dtype=np.float32)
+ pose_results = [dict(bbox=bbox, keypoints=kpts)]
+ # sunglasses
+ sunglasses_img = mmcv.imread('demo/resources/sunglasses.jpg')
+ _ = apply_sunglasses_effect(
+ img,
+ pose_results,
+ sunglasses_img,
+ left_eye_index=1,
+ right_eye_index=0,
+ kpt_thr=0.5)
+ _ = apply_sunglasses_effect(
+ img,
+ pose_results,
+ sunglasses_img,
+ left_eye_index=1,
+ right_eye_index=0,
+ kpt_thr=0.9)
+
+ # bug-eye
+ _ = apply_bugeye_effect(
+ img, pose_results, left_eye_index=1, right_eye_index=0, kpt_thr=0.5)
+ _ = apply_bugeye_effect(
+ img, pose_results, left_eye_index=1, right_eye_index=0, kpt_thr=0.9)
diff --git a/vendor/ViTPose/tests/utils/data_utils.py b/vendor/ViTPose/tests/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a04e2e6eb77b67cb321e18d8c159da10016f939f
--- /dev/null
+++ b/vendor/ViTPose/tests/utils/data_utils.py
@@ -0,0 +1,47 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+
+
+def convert_db_to_output(db, batch_size=2, keys=None, is_3d=False):
+ outputs = []
+ len_db = len(db)
+ for i in range(0, len_db, batch_size):
+ keypoints_dim = 3 if is_3d else 2
+ keypoints = np.stack([
+ np.hstack([
+ db[j]['joints_3d'].reshape((-1, 3))[:, :keypoints_dim],
+ db[j]['joints_3d_visible'].reshape((-1, 3))[:, :1]
+ ]) for j in range(i, min(i + batch_size, len_db))
+ ])
+
+ image_paths = [
+ db[j]['image_file'] for j in range(i, min(i + batch_size, len_db))
+ ]
+ bbox_ids = [j for j in range(i, min(i + batch_size, len_db))]
+ box = np.stack([
+ np.array([
+ db[j]['center'][0], db[j]['center'][1], db[j]['scale'][0],
+ db[j]['scale'][1],
+ db[j]['scale'][0] * db[j]['scale'][1] * 200 * 200, 1.0
+ ],
+ dtype=np.float32)
+ for j in range(i, min(i + batch_size, len_db))
+ ])
+
+ output = {}
+ output['preds'] = keypoints
+ output['boxes'] = box
+ output['image_paths'] = image_paths
+ output['output_heatmap'] = None
+ output['bbox_ids'] = bbox_ids
+
+ if keys is not None:
+ keys = keys if isinstance(keys, list) else [keys]
+ for key in keys:
+ output[key] = [
+ db[j][key] for j in range(i, min(i + batch_size, len_db))
+ ]
+
+ outputs.append(output)
+
+ return outputs
diff --git a/vendor/ViTPose/tests/utils/mesh_utils.py b/vendor/ViTPose/tests/utils/mesh_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a03b5ab28ab525b31bfc89f7739b003a1413ca72
--- /dev/null
+++ b/vendor/ViTPose/tests/utils/mesh_utils.py
@@ -0,0 +1,35 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import pickle
+
+import numpy as np
+from scipy.sparse import csc_matrix
+
+
+def generate_smpl_weight_file(output_dir):
+ """Generate a SMPL model weight file to initialize SMPL model, and generate
+ a 3D joints regressor file."""
+
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ joint_regressor_file = os.path.join(output_dir, 'test_joint_regressor.npy')
+ np.save(joint_regressor_file, np.zeros([24, 6890]))
+
+ test_data = {}
+ test_data['f'] = np.zeros([1, 3], dtype=np.int32)
+ test_data['J_regressor'] = csc_matrix(np.zeros([24, 6890]))
+ test_data['kintree_table'] = np.zeros([2, 24], dtype=np.uint32)
+ test_data['J'] = np.zeros([24, 3])
+ test_data['weights'] = np.zeros([6890, 24])
+ test_data['posedirs'] = np.zeros([6890, 3, 207])
+ test_data['v_template'] = np.zeros([6890, 3])
+ test_data['shapedirs'] = np.zeros([6890, 3, 10])
+
+ with open(os.path.join(output_dir, 'SMPL_NEUTRAL.pkl'), 'wb') as out_file:
+ pickle.dump(test_data, out_file)
+ with open(os.path.join(output_dir, 'SMPL_MALE.pkl'), 'wb') as out_file:
+ pickle.dump(test_data, out_file)
+ with open(os.path.join(output_dir, 'SMPL_FEMALE.pkl'), 'wb') as out_file:
+ pickle.dump(test_data, out_file)
+ return
diff --git a/vendor/ViTPose/tools/analysis/analyze_logs.py b/vendor/ViTPose/tools/analysis/analyze_logs.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0e1a0260850de685bcee4bc5d7eac43345698e8
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/analyze_logs.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import json
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+
+
+def cal_train_time(log_dicts, args):
+ for i, log_dict in enumerate(log_dicts):
+ print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
+ all_times = []
+ for epoch in log_dict.keys():
+ if args.include_outliers:
+ all_times.append(log_dict[epoch]['time'])
+ else:
+ all_times.append(log_dict[epoch]['time'][1:])
+ all_times = np.array(all_times)
+ epoch_ave_time = all_times.mean(-1)
+ slowest_epoch = epoch_ave_time.argmax()
+ fastest_epoch = epoch_ave_time.argmin()
+ std_over_epoch = epoch_ave_time.std()
+ print(f'slowest epoch {slowest_epoch + 1}, '
+ f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
+ print(f'fastest epoch {fastest_epoch + 1}, '
+ f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
+ print(f'time std over epochs is {std_over_epoch:.4f}')
+ print(f'average iter time: {np.mean(all_times):.4f} s/iter')
+ print()
+
+
+def plot_curve(log_dicts, args):
+ if args.backend is not None:
+ plt.switch_backend(args.backend)
+ sns.set_style(args.style)
+ # if legend is None, use {filename}_{key} as legend
+ legend = args.legend
+ if legend is None:
+ legend = []
+ for json_log in args.json_logs:
+ for metric in args.keys:
+ legend.append(f'{json_log}_{metric}')
+ assert len(legend) == (len(args.json_logs) * len(args.keys))
+ metrics = args.keys
+
+ num_metrics = len(metrics)
+ for i, log_dict in enumerate(log_dicts):
+ epochs = list(log_dict.keys())
+ for j, metric in enumerate(metrics):
+ print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
+ if metric not in log_dict[epochs[0]]:
+ raise KeyError(
+ f'{args.json_logs[i]} does not contain metric {metric}')
+ xs = []
+ ys = []
+ num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]
+ for epoch in epochs:
+ iters = log_dict[epoch]['iter']
+ if log_dict[epoch]['mode'][-1] == 'val':
+ iters = iters[:-1]
+ xs.append(np.array(iters) + (epoch - 1) * num_iters_per_epoch)
+ ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
+ xs = np.concatenate(xs)
+ ys = np.concatenate(ys)
+ plt.xlabel('iter')
+ plt.plot(xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
+ plt.legend()
+ if args.title is not None:
+ plt.title(args.title)
+ if args.out is None:
+ plt.show()
+ else:
+ print(f'save curve to: {args.out}')
+ plt.savefig(args.out)
+ plt.cla()
+
+
+def add_plot_parser(subparsers):
+ parser_plt = subparsers.add_parser(
+ 'plot_curve', help='parser for plotting curves')
+ parser_plt.add_argument(
+ 'json_logs',
+ type=str,
+ nargs='+',
+ help='path of train log in json format')
+ parser_plt.add_argument(
+ '--keys',
+ type=str,
+ nargs='+',
+ default=['top1_acc'],
+ help='the metric that you want to plot')
+ parser_plt.add_argument('--title', type=str, help='title of figure')
+ parser_plt.add_argument(
+ '--legend',
+ type=str,
+ nargs='+',
+ default=None,
+ help='legend of each plot')
+ parser_plt.add_argument(
+ '--backend', type=str, default=None, help='backend of plt')
+ parser_plt.add_argument(
+ '--style', type=str, default='dark', help='style of plt')
+ parser_plt.add_argument('--out', type=str, default=None)
+
+
+def add_time_parser(subparsers):
+ parser_time = subparsers.add_parser(
+ 'cal_train_time',
+ help='parser for computing the average time per training iteration')
+ parser_time.add_argument(
+ 'json_logs',
+ type=str,
+ nargs='+',
+ help='path of train log in json format')
+ parser_time.add_argument(
+ '--include-outliers',
+ action='store_true',
+ help='include the first value of every epoch when computing '
+ 'the average time')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Analyze Json Log')
+ # currently only support plot curve and calculate average train time
+ subparsers = parser.add_subparsers(dest='task', help='task parser')
+ add_plot_parser(subparsers)
+ add_time_parser(subparsers)
+ args = parser.parse_args()
+ return args
+
+
+def load_json_logs(json_logs):
+ # load and convert json_logs to log_dict, key is epoch, value is a sub dict
+ # keys of sub dict is different metrics, e.g. memory, top1_acc
+ # value of sub dict is a list of corresponding values of all iterations
+ log_dicts = [dict() for _ in json_logs]
+ for json_log, log_dict in zip(json_logs, log_dicts):
+ with open(json_log, 'r') as log_file:
+ for line in log_file:
+ log = json.loads(line.strip())
+ # skip lines without `epoch` field
+ if 'epoch' not in log:
+ continue
+ epoch = log.pop('epoch')
+ if epoch not in log_dict:
+ log_dict[epoch] = defaultdict(list)
+ for k, v in log.items():
+ log_dict[epoch][k].append(v)
+ return log_dicts
+
+
+def main():
+ args = parse_args()
+
+ json_logs = args.json_logs
+ for json_log in json_logs:
+ assert json_log.endswith('.json')
+
+ log_dicts = load_json_logs(json_logs)
+
+ eval(args.task)(log_dicts, args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/analysis/benchmark_inference.py b/vendor/ViTPose/tools/analysis/benchmark_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..14c0736d5d6c9f7ced255495b095247e9d82e0d6
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/benchmark_inference.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+
+import torch
+from mmcv import Config
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel
+from mmcv.runner.fp16_utils import wrap_fp16_model
+
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.models import build_posenet
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='MMPose benchmark a recognizer')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument(
+ '--log-interval', default=10, help='interval of logging')
+ parser.add_argument(
+ '--fuse-conv-bn',
+ action='store_true',
+ help='Whether to fuse conv and bn, this will slightly increase'
+ 'the inference speed')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+
+ # build the dataloader
+ dataset = build_dataset(cfg.data.val)
+ data_loader = build_dataloader(
+ dataset,
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=False,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_posenet(cfg.model)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+ if args.fuse_conv_bn:
+ model = fuse_conv_bn(model)
+ model = MMDataParallel(model, device_ids=[0])
+
+ # the first several iterations may be very slow so skip them
+ num_warmup = 5
+ pure_inf_time = 0
+
+ # benchmark with total batch and take the average
+ for i, data in enumerate(data_loader):
+
+ torch.cuda.synchronize()
+ start_time = time.perf_counter()
+ with torch.no_grad():
+ model(return_loss=False, **data)
+
+ torch.cuda.synchronize()
+ elapsed = time.perf_counter() - start_time
+
+ if i >= num_warmup:
+ pure_inf_time += elapsed
+ if (i + 1) % args.log_interval == 0:
+ its = (i + 1 - num_warmup) / pure_inf_time
+ print(f'Done item [{i + 1:<3}], {its:.2f} items / s')
+ print(f'Overall average: {its:.2f} items / s')
+ print(f'Total time: {pure_inf_time:.2f} s')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/analysis/benchmark_processing.py b/vendor/ViTPose/tools/analysis/benchmark_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..d326f3defbf941fbae256709509e67751ba4da42
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/benchmark_processing.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+"""This file is for benchmark data loading process. It can also be used to
+refresh the memcached cache. The command line to run this file is:
+
+$ python -m cProfile -o program.prof tools/analysis/benchmark_processing.py
+configs/task/method/[config filename]
+
+Note: When debugging, the `workers_per_gpu` in the config should be set to 0
+during benchmark.
+
+It use cProfile to record cpu running time and output to program.prof
+To visualize cProfile output program.prof, use Snakeviz and run:
+$ snakeviz program.prof
+"""
+import argparse
+
+import mmcv
+from mmcv import Config
+
+from mmpose import __version__
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.utils import get_root_logger
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Benchmark data loading')
+ parser.add_argument('config', help='train config file path')
+ args = parser.parse_args()
+ cfg = Config.fromfile(args.config)
+
+ # init logger before other steps
+ logger = get_root_logger()
+ logger.info(f'MMPose Version: {__version__}')
+ logger.info(f'Config: {cfg.text}')
+
+ dataset = build_dataset(cfg.data.train)
+ data_loader = build_dataloader(
+ dataset,
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=False,
+ shuffle=False)
+
+ # Start progress bar after first 5 batches
+ prog_bar = mmcv.ProgressBar(
+ len(dataset) - 5 * cfg.data.samples_per_gpu, start=False)
+ for i, data in enumerate(data_loader):
+ if i == 5:
+ prog_bar.start()
+ for _ in data['img']:
+ if i < 5:
+ continue
+ prog_bar.update()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/analysis/get_flops.py b/vendor/ViTPose/tools/analysis/get_flops.py
new file mode 100644
index 0000000000000000000000000000000000000000..f492a877bce775dcad298e2ba727c6370d8d7706
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/get_flops.py
@@ -0,0 +1,103 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+from functools import partial
+
+import torch
+
+from mmpose.apis.inference import init_pose_model
+
+try:
+ from mmcv.cnn import get_model_complexity_info
+except ImportError:
+ raise ImportError('Please upgrade mmcv to >0.6.2')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a recognizer')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument(
+ '--shape',
+ type=int,
+ nargs='+',
+ default=[256, 192],
+ help='input image size')
+ parser.add_argument(
+ '--input-constructor',
+ '-c',
+ type=str,
+ choices=['none', 'batch'],
+ default='none',
+ help='If specified, it takes a callable method that generates '
+ 'input. Otherwise, it will generate a random tensor with '
+ 'input shape to calculate FLOPs.')
+ parser.add_argument(
+ '--batch-size', '-b', type=int, default=1, help='input batch size')
+ parser.add_argument(
+ '--not-print-per-layer-stat',
+ '-n',
+ action='store_true',
+ help='Whether to print complexity information'
+ 'for each layer in a model')
+ args = parser.parse_args()
+ return args
+
+
+def batch_constructor(flops_model, batch_size, input_shape):
+ """Generate a batch of tensors to the model."""
+ batch = {}
+
+ img = torch.ones(()).new_empty(
+ (batch_size, *input_shape),
+ dtype=next(flops_model.parameters()).dtype,
+ device=next(flops_model.parameters()).device)
+
+ batch['img'] = img
+ return batch
+
+
+def main():
+
+ args = parse_args()
+
+ if len(args.shape) == 1:
+ input_shape = (3, args.shape[0], args.shape[0])
+ elif len(args.shape) == 2:
+ input_shape = (3, ) + tuple(args.shape)
+ else:
+ raise ValueError('invalid input shape')
+
+ model = init_pose_model(args.config)
+
+ if args.input_constructor == 'batch':
+ input_constructor = partial(batch_constructor, model, args.batch_size)
+ else:
+ input_constructor = None
+
+ if args.input_constructor == 'batch':
+ input_constructor = partial(batch_constructor, model, args.batch_size)
+ else:
+ input_constructor = None
+
+ if hasattr(model, 'forward_dummy'):
+ model.forward = model.forward_dummy
+ else:
+ raise NotImplementedError(
+ 'FLOPs counter is currently not currently supported with {}'.
+ format(model.__class__.__name__))
+
+ flops, params = get_model_complexity_info(
+ model,
+ input_shape,
+ input_constructor=input_constructor,
+ print_per_layer_stat=(not args.not_print_per_layer_stat))
+ split_line = '=' * 30
+ input_shape = (args.batch_size, ) + input_shape
+ print(f'{split_line}\nInput shape: {input_shape}\n'
+ f'Flops: {flops}\nParams: {params}\n{split_line}')
+ print('!!!Please be cautious if you use the results in papers. '
+ 'You may need to check if all ops are supported and verify that the '
+ 'flops computation is correct.')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/analysis/print_config.py b/vendor/ViTPose/tools/analysis/print_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3538ef56bdd07a841352c138ccf23ac3390561a
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/print_config.py
@@ -0,0 +1,27 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+from mmcv import Config, DictAction
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Print the whole config')
+ parser.add_argument('config', help='config file path')
+ parser.add_argument(
+ '--options', nargs='+', action=DictAction, help='arguments in dict')
+ args = parser.parse_args()
+
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ if args.options is not None:
+ cfg.merge_from_dict(args.options)
+ print(f'Config:\n{cfg.pretty_text}')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/analysis/speed_test.py b/vendor/ViTPose/tools/analysis/speed_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..fef9e2d205ebbff2bf228c75e7e95fc6ac06f399
--- /dev/null
+++ b/vendor/ViTPose/tools/analysis/speed_test.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import time
+
+import torch
+from mmcv import Config
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel
+from mmcv.runner.fp16_utils import wrap_fp16_model
+
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.models import build_posenet
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='MMPose benchmark a recognizer')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('--bz', default=32, type=int, help='test config file path')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+
+ # Since we only care about the forward speed of the network
+ cfg.model.pretrained=None
+ cfg.model.test_cfg.flip_test=False
+ cfg.model.test_cfg.use_udp=False
+ cfg.model.test_cfg.post_process='none'
+
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+
+ # build the dataloader
+ dataset = build_dataset(cfg.data.val)
+ data_loader = build_dataloader(
+ dataset,
+ samples_per_gpu=args.bz,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=False,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_posenet(cfg.model)
+ model = MMDataParallel(model, device_ids=[0])
+ model.eval()
+
+ # get the example data
+ for i, data in enumerate(data_loader):
+ break
+
+ # the first several iterations may be very slow so skip them
+ num_warmup = 100
+ inference_times = 100
+
+ with torch.no_grad():
+ start_time = time.perf_counter()
+
+ for i in range(num_warmup):
+ torch.cuda.synchronize()
+ model(return_loss=False, **data)
+ torch.cuda.synchronize()
+
+ elapsed = time.perf_counter() - start_time
+ print(f'warmup cost {elapsed} time')
+
+ start_time = time.perf_counter()
+
+ for i in range(inference_times):
+ torch.cuda.synchronize()
+ model(return_loss=False, **data)
+ torch.cuda.synchronize()
+
+ elapsed = time.perf_counter() - start_time
+ fps = args.bz * inference_times / elapsed
+ print(f'the fps is {fps}')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/dataset/h36m_to_coco.py b/vendor/ViTPose/tools/dataset/h36m_to_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..f306d409ee22c9667e1d4f9d4510b3816465ad00
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/h36m_to_coco.py
@@ -0,0 +1,165 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import wraps
+
+import mmcv
+import numpy as np
+from PIL import Image
+
+from mmpose.core import SimpleCamera
+
+
+def _keypoint_camera_to_world(keypoints,
+ camera_params,
+ image_name=None,
+ dataset='Body3DH36MDataset'):
+ """Project 3D keypoints from the camera space to the world space.
+
+ Args:
+ keypoints (np.ndarray): 3D keypoints in shape [..., 3]
+ camera_params (dict): Parameters for all cameras.
+ image_name (str): The image name to specify the camera.
+ dataset (str): The dataset type, e.g., Body3DH36MDataset.
+ """
+ cam_key = None
+ if dataset == 'Body3DH36MDataset':
+ subj, rest = osp.basename(image_name).split('_', 1)
+ _, rest = rest.split('.', 1)
+ camera, rest = rest.split('_', 1)
+ cam_key = (subj, camera)
+ else:
+ raise NotImplementedError
+
+ camera = SimpleCamera(camera_params[cam_key])
+ keypoints_world = keypoints.copy()
+ keypoints_world[..., :3] = camera.camera_to_world(keypoints[..., :3])
+
+ return keypoints_world
+
+
+def _get_bbox_xywh(center, scale, w=200, h=200):
+ w = w * scale
+ h = h * scale
+ x = center[0] - w / 2
+ y = center[1] - h / 2
+ return [x, y, w, h]
+
+
+def mmcv_track_func(func):
+
+ @wraps(func)
+ def wrapped_func(args):
+ return func(*args)
+
+ return wrapped_func
+
+
+@mmcv_track_func
+def _get_img_info(img_idx, img_name, img_root):
+ try:
+ im = Image.open(osp.join(img_root, img_name))
+ w, h = im.size
+ except: # noqa: E722
+ return None
+
+ img = {
+ 'file_name': img_name,
+ 'height': h,
+ 'width': w,
+ 'id': img_idx + 1,
+ }
+ return img
+
+
+@mmcv_track_func
+def _get_ann(idx, kpt_2d, kpt_3d, center, scale, imgname, camera_params):
+ bbox = _get_bbox_xywh(center, scale)
+ kpt_3d = _keypoint_camera_to_world(kpt_3d, camera_params, imgname)
+
+ ann = {
+ 'id': idx + 1,
+ 'category_id': 1,
+ 'image_id': idx + 1,
+ 'iscrowd': 0,
+ 'bbox': bbox,
+ 'area': bbox[2] * bbox[3],
+ 'num_keypoints': 17,
+ 'keypoints': kpt_2d.reshape(-1).tolist(),
+ 'keypoints_3d': kpt_3d.reshape(-1).tolist()
+ }
+
+ return ann
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--ann-file', type=str, default='tests/data/h36m/test_h36m_body3d.npz')
+ parser.add_argument(
+ '--camera-param-file', type=str, default='tests/data/h36m/cameras.pkl')
+ parser.add_argument('--img-root', type=str, default='tests/data/h36m')
+ parser.add_argument(
+ '--out-file', type=str, default='tests/data/h36m/h36m_coco.json')
+ parser.add_argument('--full-img-name', action='store_true')
+
+ args = parser.parse_args()
+
+ h36m_data = np.load(args.ann_file)
+ h36m_camera_params = mmcv.load(args.camera_param_file)
+ h36m_coco = {}
+
+ # categories
+ h36m_cats = [{
+ 'supercategory':
+ 'person',
+ 'id':
+ 1,
+ 'name':
+ 'person',
+ 'keypoints': [
+ 'root (pelvis)', 'left_hip', 'left_knee', 'left_foot', 'right_hip',
+ 'right_knee', 'right_foot', 'spine', 'thorax', 'neck_base', 'head',
+ 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder',
+ 'right_elbow', 'right_wrist'
+ ],
+ 'skeleton': [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7],
+ [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13],
+ [8, 14], [14, 15], [15, 16]],
+ }]
+
+ # images
+ imgnames = h36m_data['imgname']
+ if not args.full_img_name:
+ imgnames = [osp.basename(fn) for fn in imgnames]
+ tasks = [(idx, fn, args.img_root) for idx, fn in enumerate(imgnames)]
+
+ h36m_imgs = mmcv.track_parallel_progress(_get_img_info, tasks, nproc=12)
+
+ # annotations
+ kpts_2d = h36m_data['part']
+ kpts_3d = h36m_data['S']
+ centers = h36m_data['center']
+ scales = h36m_data['scale']
+ tasks = [(idx, ) + args + (h36m_camera_params, )
+ for idx, args in enumerate(
+ zip(kpts_2d, kpts_3d, centers, scales, imgnames))]
+
+ h36m_anns = mmcv.track_parallel_progress(_get_ann, tasks, nproc=12)
+
+ # remove invalid data
+ h36m_imgs = [img for img in h36m_imgs if img is not None]
+ h36m_img_ids = set([img['id'] for img in h36m_imgs])
+ h36m_anns = [ann for ann in h36m_anns if ann['image_id'] in h36m_img_ids]
+
+ h36m_coco = {
+ 'categories': h36m_cats,
+ 'images': h36m_imgs,
+ 'annotations': h36m_anns,
+ }
+
+ mmcv.dump(h36m_coco, args.out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/dataset/mat2json.py b/vendor/ViTPose/tools/dataset/mat2json.py
new file mode 100644
index 0000000000000000000000000000000000000000..caf7453e70891ae1707a0b2f33d622253904a6ac
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/mat2json.py
@@ -0,0 +1,60 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import json
+import time
+
+from scipy.io import loadmat
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Converting the predicted .mat file to .json file.')
+ parser.add_argument('pred_mat_file', help='input prediction mat file.')
+ parser.add_argument(
+ 'gt_json_file',
+ help='input ground-truth json file to get the image name. '
+ 'Default: "data/mpii/mpii_val.json" ')
+ parser.add_argument('output_json_file', help='output converted json file.')
+ args = parser.parse_args()
+ return args
+
+
+def save_json(list_file, path):
+ with open(path, 'w') as f:
+ json.dump(list_file, f, indent=4)
+ return 0
+
+
+def convert_mat(pred_mat_file, gt_json_file, output_json_file):
+ res = loadmat(pred_mat_file)
+ preds = res['preds']
+ N = preds.shape[0]
+
+ with open(gt_json_file) as anno_file:
+ anno = json.load(anno_file)
+
+ assert len(anno) == N
+
+ instance = {}
+
+ for pred, ann in zip(preds, anno):
+ ann.pop('joints_vis')
+ ann['joints'] = pred.tolist()
+
+ instance['annotations'] = anno
+ instance['info'] = {}
+ instance['info']['description'] = 'Converted MPII prediction.'
+ instance['info']['year'] = time.strftime('%Y', time.localtime())
+ instance['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ save_json(instance, output_json_file)
+
+
+def main():
+ args = parse_args()
+ convert_mat(args.pred_mat_file, args.gt_json_file, args.output_json_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/dataset/parse_animalpose_dataset.py b/vendor/ViTPose/tools/dataset/parse_animalpose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..db37860164ea5ee00c3d2e2b354701ad24bb9f9e
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/parse_animalpose_dataset.py
@@ -0,0 +1,436 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os
+import re
+import time
+import warnings
+
+import cv2
+import numpy as np
+import xmltodict
+from xtcocotools.coco import COCO
+
+np.random.seed(0)
+
+
+def list_all_files(root_dir, ext='.xml'):
+ """List all files in the root directory and all its sub directories.
+
+ :param root_dir: root directory
+ :param ext: filename extension
+ :return: list of files
+ """
+ files = []
+ file_list = os.listdir(root_dir)
+ for i in range(0, len(file_list)):
+ path = os.path.join(root_dir, file_list[i])
+ if os.path.isdir(path):
+ files.extend(list_all_files(path))
+ if os.path.isfile(path):
+ if path.lower().endswith(ext):
+ files.append(path)
+ return files
+
+
+def get_anno_info():
+ keypoints_info = [
+ 'L_Eye',
+ 'R_Eye',
+ 'L_EarBase',
+ 'R_EarBase',
+ 'Nose',
+ 'Throat',
+ 'TailBase',
+ 'Withers',
+ 'L_F_Elbow',
+ 'R_F_Elbow',
+ 'L_B_Elbow',
+ 'R_B_Elbow',
+ 'L_F_Knee',
+ 'R_F_Knee',
+ 'L_B_Knee',
+ 'R_B_Knee',
+ 'L_F_Paw',
+ 'R_F_Paw',
+ 'L_B_Paw',
+ 'R_B_Paw',
+ ]
+ skeleton_info = [[1, 2], [1, 3], [2, 4], [1, 5], [2, 5], [5, 6], [6, 8],
+ [7, 8], [6, 9], [9, 13], [13, 17], [6, 10], [10, 14],
+ [14, 18], [7, 11], [11, 15], [15, 19], [7, 12], [12, 16],
+ [16, 20]]
+ category_info = [{
+ 'supercategory': 'animal',
+ 'id': 1,
+ 'name': 'animal',
+ 'keypoints': keypoints_info,
+ 'skeleton': skeleton_info
+ }]
+
+ return keypoints_info, skeleton_info, category_info
+
+
+def xml2coco_trainval(file_list, img_root, save_path, start_ann_id=0):
+ """Save annotations in coco-format.
+
+ :param file_list: list of data annotation files.
+ :param img_root: the root dir to load images.
+ :param save_path: the path to save transformed annotation file.
+ :param start_ann_id: the starting point to count the annotation id.
+ :param val_num: the number of annotated objects for validation.
+ """
+ images = []
+ annotations = []
+ img_ids = []
+ ann_ids = []
+
+ ann_id = start_ann_id
+
+ name2id = {
+ 'L_Eye': 0,
+ 'R_Eye': 1,
+ 'L_EarBase': 2,
+ 'R_EarBase': 3,
+ 'Nose': 4,
+ 'Throat': 5,
+ 'TailBase': 6,
+ 'Withers': 7,
+ 'L_F_Elbow': 8,
+ 'R_F_Elbow': 9,
+ 'L_B_Elbow': 10,
+ 'R_B_Elbow': 11,
+ 'L_F_Knee': 12,
+ 'R_F_Knee': 13,
+ 'L_B_Knee': 14,
+ 'R_B_Knee': 15,
+ 'L_F_Paw': 16,
+ 'R_F_Paw': 17,
+ 'L_B_Paw': 18,
+ 'R_B_Paw': 19
+ }
+ for file in file_list:
+ data_anno = xmltodict.parse(open(file).read())['annotation']
+
+ img_id = int(data_anno['image'].split('_')[0] +
+ data_anno['image'].split('_')[1])
+
+ if img_id not in img_ids:
+ image_name = 'VOC2012/JPEGImages/' + data_anno['image'] + '.jpg'
+ img = cv2.imread(os.path.join(img_root, image_name))
+
+ image = {}
+ image['id'] = img_id
+ image['file_name'] = image_name
+ image['height'] = img.shape[0]
+ image['width'] = img.shape[1]
+
+ images.append(image)
+ img_ids.append(img_id)
+ else:
+ pass
+
+ keypoint_anno = data_anno['keypoints']['keypoint']
+ assert len(keypoint_anno) == 20
+
+ keypoints = np.zeros([20, 3], dtype=np.float32)
+
+ for kpt_anno in keypoint_anno:
+ keypoint_name = kpt_anno['@name']
+ keypoint_id = name2id[keypoint_name]
+
+ visibility = int(kpt_anno['@visible'])
+
+ if visibility == 0:
+ continue
+ else:
+ keypoints[keypoint_id, 0] = float(kpt_anno['@x'])
+ keypoints[keypoint_id, 1] = float(kpt_anno['@y'])
+ keypoints[keypoint_id, 2] = 2
+
+ anno = {}
+ anno['keypoints'] = keypoints.reshape(-1).tolist()
+ anno['image_id'] = img_id
+ anno['id'] = ann_id
+ anno['num_keypoints'] = int(sum(keypoints[:, 2] > 0))
+
+ visible_bounds = data_anno['visible_bounds']
+ anno['bbox'] = [
+ float(visible_bounds['@xmin']),
+ float(visible_bounds['@ymin']),
+ float(visible_bounds['@width']),
+ float(visible_bounds['@height'])
+ ]
+ anno['iscrowd'] = 0
+ anno['area'] = float(anno['bbox'][2] * anno['bbox'][3])
+ anno['category_id'] = 1
+
+ annotations.append(anno)
+ ann_ids.append(ann_id)
+ ann_id += 1
+
+ cocotype = {}
+
+ cocotype['info'] = {}
+ cocotype['info'][
+ 'description'] = 'AnimalPose dataset Generated by MMPose Team'
+ cocotype['info']['version'] = '1.0'
+ cocotype['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ cocotype['images'] = images
+ cocotype['annotations'] = annotations
+
+ keypoints_info, skeleton_info, category_info = get_anno_info()
+
+ cocotype['categories'] = category_info
+
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
+ json.dump(cocotype, open(save_path, 'w'), indent=4)
+ print('number of images:', len(img_ids))
+ print('number of annotations:', len(ann_ids))
+ print(f'done {save_path}')
+
+
+def xml2coco_test(file_list, img_root, save_path, start_ann_id=0):
+ """Save annotations in coco-format.
+
+ :param file_list: list of data annotation files.
+ :param img_root: the root dir to load images.
+ :param save_path: the path to save transformed annotation file.
+ :param start_ann_id: the starting point to count the annotation id.
+ """
+ images = []
+ annotations = []
+ img_ids = []
+ ann_ids = []
+
+ ann_id = start_ann_id
+
+ name2id = {
+ 'L_eye': 0,
+ 'R_eye': 1,
+ 'L_ear': 2,
+ 'R_ear': 3,
+ 'Nose': 4,
+ 'Throat': 5,
+ 'Tail': 6,
+ 'withers': 7,
+ 'L_F_elbow': 8,
+ 'R_F_elbow': 9,
+ 'L_B_elbow': 10,
+ 'R_B_elbow': 11,
+ 'L_F_knee': 12,
+ 'R_F_knee': 13,
+ 'L_B_knee': 14,
+ 'R_B_knee': 15,
+ 'L_F_paw': 16,
+ 'R_F_paw': 17,
+ 'L_B_paw': 18,
+ 'R_B_paw': 19
+ }
+
+ cat2id = {'cat': 1, 'cow': 2, 'dog': 3, 'horse': 4, 'sheep': 5}
+
+ for file in file_list:
+ data_anno = xmltodict.parse(open(file).read())['annotation']
+
+ category_id = cat2id[data_anno['category']]
+
+ img_id = category_id * 1000 + int(
+ re.findall(r'\d+', data_anno['image'])[0])
+
+ assert img_id not in img_ids
+
+ # prepare images
+ image_name = os.path.join('animalpose_image_part2',
+ data_anno['category'], data_anno['image'])
+ img = cv2.imread(os.path.join(img_root, image_name))
+
+ image = {}
+ image['id'] = img_id
+ image['file_name'] = image_name
+ image['height'] = img.shape[0]
+ image['width'] = img.shape[1]
+
+ images.append(image)
+ img_ids.append(img_id)
+
+ # prepare annotations
+ keypoint_anno = data_anno['keypoints']['keypoint']
+ keypoints = np.zeros([20, 3], dtype=np.float32)
+
+ for kpt_anno in keypoint_anno:
+ keypoint_name = kpt_anno['@name']
+ keypoint_id = name2id[keypoint_name]
+
+ visibility = int(kpt_anno['@visible'])
+
+ if visibility == 0:
+ continue
+ else:
+ keypoints[keypoint_id, 0] = float(kpt_anno['@x'])
+ keypoints[keypoint_id, 1] = float(kpt_anno['@y'])
+ keypoints[keypoint_id, 2] = 2
+
+ anno = {}
+ anno['keypoints'] = keypoints.reshape(-1).tolist()
+ anno['image_id'] = img_id
+ anno['id'] = ann_id
+ anno['num_keypoints'] = int(sum(keypoints[:, 2] > 0))
+
+ visible_bounds = data_anno['visible_bounds']
+ anno['bbox'] = [
+ float(visible_bounds['@xmin']),
+ float(visible_bounds['@xmax']
+ ), # typo in original xml: should be 'ymin'
+ float(visible_bounds['@width']),
+ float(visible_bounds['@height'])
+ ]
+ anno['iscrowd'] = 0
+ anno['area'] = float(anno['bbox'][2] * anno['bbox'][3])
+ anno['category_id'] = 1
+
+ annotations.append(anno)
+ ann_ids.append(ann_id)
+ ann_id += 1
+
+ cocotype = {}
+
+ cocotype['info'] = {}
+ cocotype['info'][
+ 'description'] = 'AnimalPose dataset Generated by MMPose Team'
+ cocotype['info']['version'] = '1.0'
+ cocotype['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ cocotype['images'] = images
+ cocotype['annotations'] = annotations
+
+ keypoints_info, skeleton_info, category_info = get_anno_info()
+
+ cocotype['categories'] = category_info
+
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
+ json.dump(cocotype, open(save_path, 'w'), indent=4)
+ print('=========================================================')
+ print('number of images:', len(img_ids))
+ print('number of annotations:', len(ann_ids))
+ print(f'done {save_path}')
+
+
+def split_train_val(work_dir, trainval_file, train_file, val_file,
+ val_ann_num):
+ """Split train-val json file into training and validation files.
+
+ :param work_dir: path to load train-val json file, and save split files.
+ :param trainval_file: The input json file combining both train and val.
+ :param trainval_file: The output json file for training.
+ :param trainval_file: The output json file for validation.
+ :param val_ann_num: the number of validation annotations.
+ """
+
+ coco = COCO(os.path.join(work_dir, trainval_file))
+
+ img_list = list(coco.imgs.keys())
+ np.random.shuffle(img_list)
+
+ count = 0
+
+ images_train = []
+ images_val = []
+ annotations_train = []
+ annotations_val = []
+
+ for img_id in img_list:
+ ann_ids = coco.getAnnIds(img_id)
+
+ if count + len(ann_ids) <= val_ann_num:
+ # for validation
+ count += len(ann_ids)
+ images_val.append(coco.imgs[img_id])
+ for ann_id in ann_ids:
+ annotations_val.append(coco.anns[ann_id])
+
+ else:
+ images_train.append(coco.imgs[img_id])
+ for ann_id in ann_ids:
+ annotations_train.append(coco.anns[ann_id])
+
+ if count == val_ann_num:
+ print(f'We have found {count} annotations for validation.')
+ else:
+ warnings.warn(
+ f'We only found {count} annotations, instead of {val_ann_num}.')
+
+ cocotype_train = {}
+ cocotype_val = {}
+
+ keypoints_info, skeleton_info, category_info = get_anno_info()
+
+ cocotype_train['info'] = {}
+ cocotype_train['info'][
+ 'description'] = 'AnimalPose dataset Generated by MMPose Team'
+ cocotype_train['info']['version'] = '1.0'
+ cocotype_train['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype_train['info']['date_created'] = time.strftime(
+ '%Y/%m/%d', time.localtime())
+ cocotype_train['images'] = images_train
+ cocotype_train['annotations'] = annotations_train
+ cocotype_train['categories'] = category_info
+
+ json.dump(
+ cocotype_train,
+ open(os.path.join(work_dir, train_file), 'w'),
+ indent=4)
+ print('=========================================================')
+ print('number of images:', len(images_train))
+ print('number of annotations:', len(annotations_train))
+ print(f'done {train_file}')
+
+ cocotype_val['info'] = {}
+ cocotype_val['info'][
+ 'description'] = 'AnimalPose dataset Generated by MMPose Team'
+ cocotype_val['info']['version'] = '1.0'
+ cocotype_val['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype_val['info']['date_created'] = time.strftime(
+ '%Y/%m/%d', time.localtime())
+ cocotype_val['images'] = images_val
+ cocotype_val['annotations'] = annotations_val
+ cocotype_val['categories'] = category_info
+
+ json.dump(
+ cocotype_val, open(os.path.join(work_dir, val_file), 'w'), indent=4)
+ print('=========================================================')
+ print('number of images:', len(images_val))
+ print('number of annotations:', len(annotations_val))
+ print(f'done {val_file}')
+
+
+dataset_dir = 'data/animalpose/'
+
+# We choose the images from PascalVOC for train + val
+# In total, train+val: 3608 images, 5117 annotations
+xml2coco_trainval(
+ list_all_files(os.path.join(dataset_dir, 'PASCAL2011_animal_annotation')),
+ dataset_dir,
+ os.path.join(dataset_dir, 'annotations', 'animalpose_trainval.json'),
+ start_ann_id=1000000)
+
+# train: 2798 images, 4000 annotations
+# val: 810 images, 1117 annotations
+split_train_val(
+ os.path.join(dataset_dir, 'annotations'),
+ 'animalpose_trainval.json',
+ 'animalpose_train.json',
+ 'animalpose_val.json',
+ val_ann_num=1117)
+
+# We choose the remaining 1000 images for test
+# 1000 images, 1000 annotations
+xml2coco_test(
+ list_all_files(os.path.join(dataset_dir, 'animalpose_anno2')),
+ dataset_dir,
+ os.path.join(dataset_dir, 'annotations', 'animalpose_test.json'),
+ start_ann_id=0)
diff --git a/vendor/ViTPose/tools/dataset/parse_cofw_dataset.py b/vendor/ViTPose/tools/dataset/parse_cofw_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..46b6affcb6ddcd9454856f96feca1faa1f010b44
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/parse_cofw_dataset.py
@@ -0,0 +1,97 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os
+import time
+
+import cv2
+import h5py
+import numpy as np
+
+mat_files = ['COFW_train_color.mat', 'COFW_test_color.mat']
+dataset_dir = 'data/cofw/'
+
+image_root = os.path.join(dataset_dir, 'images/')
+annotation_root = os.path.join(dataset_dir, 'annotations/')
+
+os.makedirs(image_root, exist_ok=True)
+os.makedirs(annotation_root, exist_ok=True)
+
+cnt = 0
+for mat_file in mat_files:
+ mat = h5py.File(os.path.join(dataset_dir, mat_file), 'r')
+
+ if 'train' in mat_file:
+ imgs = mat['IsTr']
+ pts = mat['phisTr']
+ bboxes = mat['bboxesTr']
+ is_train = True
+ json_file = 'cofw_train.json'
+ else:
+ imgs = mat['IsT']
+ pts = mat['phisT']
+ bboxes = mat['bboxesT']
+ is_train = False
+ json_file = 'cofw_test.json'
+
+ images = []
+ annotations = []
+
+ num = pts.shape[1]
+ for idx in range(0, num):
+ cnt += 1
+ img = np.array(mat[imgs[0, idx]]).transpose()
+ keypoints = pts[:, idx].reshape(3, -1).transpose()
+ # 2 for valid and 1 for occlusion
+ keypoints[:, 2] = 2 - keypoints[:, 2]
+ # matlab 1-index to python 0-index
+ keypoints[:, :2] -= 1
+ bbox = bboxes[:, idx]
+
+ # check nonnegativity
+ bbox[bbox < 0] = 0
+ keypoints[keypoints < 0] = 0
+
+ image = {}
+ image['id'] = cnt
+ image['file_name'] = f'{str(cnt).zfill(6)}.jpg'
+ image['height'] = img.shape[0]
+ image['width'] = img.shape[1]
+ cv2.imwrite(
+ os.path.join(image_root, image['file_name']),
+ cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+ images.append(image)
+
+ anno = {}
+ anno['keypoints'] = keypoints.reshape(-1).tolist()
+ anno['image_id'] = cnt
+ anno['id'] = cnt
+ anno['num_keypoints'] = len(keypoints) # all keypoints are labelled
+ anno['bbox'] = bbox.tolist()
+ anno['iscrowd'] = 0
+ anno['area'] = anno['bbox'][2] * anno['bbox'][3]
+ anno['category_id'] = 1
+
+ annotations.append(anno)
+
+ cocotype = {}
+
+ cocotype['info'] = {}
+ cocotype['info']['description'] = 'COFW Generated by MMPose Team'
+ cocotype['info']['version'] = '1.0'
+ cocotype['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ cocotype['images'] = images
+ cocotype['annotations'] = annotations
+ cocotype['categories'] = [{
+ 'supercategory': 'person',
+ 'id': 1,
+ 'name': 'face',
+ 'keypoints': [],
+ 'skeleton': []
+ }]
+
+ ann_path = os.path.join(annotation_root, json_file)
+ json.dump(cocotype, open(ann_path, 'w'))
+ print(f'done {ann_path}')
diff --git a/vendor/ViTPose/tools/dataset/parse_deepposekit_dataset.py b/vendor/ViTPose/tools/dataset/parse_deepposekit_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fe7ae398f4f94a22e36cd76e377c5d5bcbf193d
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/parse_deepposekit_dataset.py
@@ -0,0 +1,180 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os
+import time
+
+import cv2
+import h5py
+import numpy as np
+
+np.random.seed(0)
+
+
+def save_coco_anno(keypoints_all,
+ annotated_all,
+ imgs_all,
+ keypoints_info,
+ skeleton_info,
+ dataset,
+ img_root,
+ save_path,
+ start_img_id=0,
+ start_ann_id=0):
+ """Save annotations in coco-format.
+
+ :param keypoints_all: keypoint annotations.
+ :param annotated_all: images annotated or not.
+ :param imgs_all: the array of images.
+ :param keypoints_info: information about keypoint name.
+ :param skeleton_info: information about skeleton connection.
+ :param dataset: information about dataset name.
+ :param img_root: the path to save images.
+ :param save_path: the path to save transformed annotation file.
+ :param start_img_id: the starting point to count the image id.
+ :param start_ann_id: the starting point to count the annotation id.
+ """
+ images = []
+ annotations = []
+
+ img_id = start_img_id
+ ann_id = start_ann_id
+
+ num_annotations, keypoints_num, _ = keypoints_all.shape
+
+ for i in range(num_annotations):
+ img = imgs_all[i]
+ keypoints = np.concatenate(
+ [keypoints_all[i], annotated_all[i][:, None] * 2], axis=1)
+
+ min_x, min_y = np.min(keypoints[keypoints[:, 2] > 0, :2], axis=0)
+ max_x, max_y = np.max(keypoints[keypoints[:, 2] > 0, :2], axis=0)
+
+ anno = {}
+ anno['keypoints'] = keypoints.reshape(-1).tolist()
+ anno['image_id'] = img_id
+ anno['id'] = ann_id
+ anno['num_keypoints'] = int(sum(keypoints[:, 2] > 0))
+ anno['bbox'] = [
+ float(min_x),
+ float(min_y),
+ float(max_x - min_x + 1),
+ float(max_y - min_y + 1)
+ ]
+ anno['iscrowd'] = 0
+ anno['area'] = anno['bbox'][2] * anno['bbox'][3]
+ anno['category_id'] = 1
+
+ annotations.append(anno)
+ ann_id += 1
+
+ image = {}
+ image['id'] = img_id
+ image['file_name'] = f'{img_id}.jpg'
+ image['height'] = img.shape[0]
+ image['width'] = img.shape[1]
+
+ images.append(image)
+ img_id += 1
+
+ cv2.imwrite(os.path.join(img_root, image['file_name']), img)
+
+ skeleton = np.concatenate(
+ [np.arange(keypoints_num)[:, None], skeleton_info[:, 0][:, None]],
+ axis=1) + 1
+ skeleton = skeleton[skeleton.min(axis=1) > 0]
+
+ cocotype = {}
+
+ cocotype['info'] = {}
+ cocotype['info'][
+ 'description'] = 'DeepPoseKit-Data Generated by MMPose Team'
+ cocotype['info']['version'] = '1.0'
+ cocotype['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ cocotype['images'] = images
+ cocotype['annotations'] = annotations
+ cocotype['categories'] = [{
+ 'supercategory': 'animal',
+ 'id': 1,
+ 'name': dataset,
+ 'keypoints': keypoints_info,
+ 'skeleton': skeleton.tolist()
+ }]
+
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
+ json.dump(cocotype, open(save_path, 'w'), indent=4)
+ print('number of images:', img_id)
+ print('number of annotations:', ann_id)
+ print(f'done {save_path}')
+
+
+for dataset in ['fly', 'locust', 'zebra']:
+ keypoints_info = []
+ if dataset == 'fly':
+ keypoints_info = [
+ 'head', 'eyeL', 'eyeR', 'neck', 'thorax', 'abdomen', 'forelegR1',
+ 'forelegR2', 'forelegR3', 'forelegR4', 'midlegR1', 'midlegR2',
+ 'midlegR3', 'midlegR4', 'hindlegR1', 'hindlegR2', 'hindlegR3',
+ 'hindlegR4', 'forelegL1', 'forelegL2', 'forelegL3', 'forelegL4',
+ 'midlegL1', 'midlegL2', 'midlegL3', 'midlegL4', 'hindlegL1',
+ 'hindlegL2', 'hindlegL3', 'hindlegL4', 'wingL', 'wingR'
+ ]
+ elif dataset == 'locust':
+ keypoints_info = [
+ 'head', 'neck', 'thorax', 'abdomen1', 'abdomen2', 'anttipL',
+ 'antbaseL', 'eyeL', 'forelegL1', 'forelegL2', 'forelegL3',
+ 'forelegL4', 'midlegL1', 'midlegL2', 'midlegL3', 'midlegL4',
+ 'hindlegL1', 'hindlegL2', 'hindlegL3', 'hindlegL4', 'anttipR',
+ 'antbaseR', 'eyeR', 'forelegR1', 'forelegR2', 'forelegR3',
+ 'forelegR4', 'midlegR1', 'midlegR2', 'midlegR3', 'midlegR4',
+ 'hindlegR1', 'hindlegR2', 'hindlegR3', 'hindlegR4'
+ ]
+ elif dataset == 'zebra':
+ keypoints_info = [
+ 'snout', 'head', 'neck', 'forelegL1', 'forelegR1', 'hindlegL1',
+ 'hindlegR1', 'tailbase', 'tailtip'
+ ]
+ else:
+ NotImplementedError()
+
+ dataset_dir = f'data/DeepPoseKit-Data/datasets/{dataset}'
+
+ with h5py.File(
+ os.path.join(dataset_dir, 'annotation_data_release.h5'), 'r') as f:
+ # List all groups
+ annotations = np.array(f['annotations'])
+ annotated = np.array(f['annotated'])
+ images = np.array(f['images'])
+ skeleton_info = np.array(f['skeleton'])
+
+ annotation_num, kpt_num, _ = annotations.shape
+
+ data_list = np.arange(0, annotation_num)
+ np.random.shuffle(data_list)
+
+ val_data_num = annotation_num // 10
+ train_data_num = annotation_num - val_data_num
+
+ train_list = data_list[0:train_data_num]
+ val_list = data_list[train_data_num:]
+
+ img_root = os.path.join(dataset_dir, 'images')
+ os.makedirs(img_root, exist_ok=True)
+
+ save_coco_anno(
+ annotations[train_list], annotated[train_list], images[train_list],
+ keypoints_info, skeleton_info, dataset, img_root,
+ os.path.join(dataset_dir, 'annotations', f'{dataset}_train.json'))
+ save_coco_anno(
+ annotations[val_list],
+ annotated[val_list],
+ images[val_list],
+ keypoints_info,
+ skeleton_info,
+ dataset,
+ img_root,
+ os.path.join(dataset_dir, 'annotations', f'{dataset}_test.json'),
+ start_img_id=train_data_num,
+ start_ann_id=train_data_num)
diff --git a/vendor/ViTPose/tools/dataset/parse_macaquepose_dataset.py b/vendor/ViTPose/tools/dataset/parse_macaquepose_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..85801a2225c0c08c6a1b67778b8241a14b79e49a
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/parse_macaquepose_dataset.py
@@ -0,0 +1,182 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import csv
+import json
+import os
+import time
+
+import cv2
+import numpy as np
+
+np.random.seed(0)
+
+
+def get_poly_area(x, y):
+ """Calculate area of polygon given (x,y) coordinates (Shoelace formula)
+
+ :param x: np.ndarray(N, )
+ :param y: np.ndarray(N, )
+ :return: area
+ """
+ return float(0.5 *
+ np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))))
+
+
+def get_seg_area(segmentations):
+ area = 0
+ for segmentation in segmentations:
+ area += get_poly_area(segmentation[:, 0], segmentation[:, 1])
+ return area
+
+
+def save_coco_anno(data_annotation,
+ img_root,
+ save_path,
+ start_img_id=0,
+ start_ann_id=0,
+ kpt_num=17):
+ """Save annotations in coco-format.
+
+ :param data_annotation: list of data annotation.
+ :param img_root: the root dir to load images.
+ :param save_path: the path to save transformed annotation file.
+ :param start_img_id: the starting point to count the image id.
+ :param start_ann_id: the starting point to count the annotation id.
+ :param kpt_num: the number of keypoint.
+ """
+ images = []
+ annotations = []
+
+ img_id = start_img_id
+ ann_id = start_ann_id
+
+ for i in range(0, len(data_annotation)):
+ data_anno = data_annotation[i]
+ image_name = data_anno[0]
+
+ img = cv2.imread(os.path.join(img_root, image_name))
+
+ kp_string = data_anno[1]
+ kps = json.loads(kp_string)
+
+ seg_string = data_anno[2]
+ segs = json.loads(seg_string)
+
+ for kp, seg in zip(kps, segs):
+ keypoints = np.zeros([kpt_num, 3])
+ for ind, p in enumerate(kp):
+ if p['position'] is None:
+ continue
+ else:
+ keypoints[ind, 0] = p['position'][0]
+ keypoints[ind, 1] = p['position'][1]
+ keypoints[ind, 2] = 2
+
+ segmentations = []
+
+ max_x = -1
+ max_y = -1
+ min_x = 999999
+ min_y = 999999
+ for segm in seg:
+ if len(segm['segment']) == 0:
+ continue
+
+ segmentation = np.array(segm['segment'])
+ segmentations.append(segmentation)
+
+ _max_x, _max_y = segmentation.max(0)
+ _min_x, _min_y = segmentation.min(0)
+
+ max_x = max(max_x, _max_x)
+ max_y = max(max_y, _max_y)
+ min_x = min(min_x, _min_x)
+ min_y = min(min_y, _min_y)
+
+ anno = {}
+ anno['keypoints'] = keypoints.reshape(-1).tolist()
+ anno['image_id'] = img_id
+ anno['id'] = ann_id
+ anno['num_keypoints'] = int(sum(keypoints[:, 2] > 0))
+ anno['bbox'] = [
+ float(min_x),
+ float(min_y),
+ float(max_x - min_x + 1),
+ float(max_y - min_y + 1)
+ ]
+ anno['iscrowd'] = 0
+ anno['area'] = get_seg_area(segmentations)
+ anno['category_id'] = 1
+ anno['segmentation'] = [
+ seg.reshape(-1).tolist() for seg in segmentations
+ ]
+
+ annotations.append(anno)
+ ann_id += 1
+
+ image = {}
+ image['id'] = img_id
+ image['file_name'] = image_name
+ image['height'] = img.shape[0]
+ image['width'] = img.shape[1]
+
+ images.append(image)
+ img_id += 1
+
+ cocotype = {}
+
+ cocotype['info'] = {}
+ cocotype['info']['description'] = 'MacaquePose Generated by MMPose Team'
+ cocotype['info']['version'] = '1.0'
+ cocotype['info']['year'] = time.strftime('%Y', time.localtime())
+ cocotype['info']['date_created'] = time.strftime('%Y/%m/%d',
+ time.localtime())
+
+ cocotype['images'] = images
+ cocotype['annotations'] = annotations
+ cocotype['categories'] = [{
+ 'supercategory':
+ 'animal',
+ 'id':
+ 1,
+ 'name':
+ 'macaque',
+ 'keypoints': [
+ 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
+ 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee',
+ 'right_knee', 'left_ankle', 'right_ankle'
+ ],
+ 'skeleton': [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
+ [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
+ [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
+ }]
+
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
+ json.dump(cocotype, open(save_path, 'w'), indent=4)
+ print('number of images:', img_id)
+ print('number of annotations:', ann_id)
+ print(f'done {save_path}')
+
+
+dataset_dir = '/data/macaque/'
+with open(os.path.join(dataset_dir, 'annotations.csv'), 'r') as fp:
+ data_annotation_all = list(csv.reader(fp, delimiter=','))[1:]
+
+np.random.shuffle(data_annotation_all)
+
+data_annotation_train = data_annotation_all[0:12500]
+data_annotation_val = data_annotation_all[12500:]
+
+img_root = os.path.join(dataset_dir, 'images')
+save_coco_anno(
+ data_annotation_train,
+ img_root,
+ os.path.join(dataset_dir, 'annotations', 'macaque_train.json'),
+ kpt_num=17)
+save_coco_anno(
+ data_annotation_val,
+ img_root,
+ os.path.join(dataset_dir, 'annotations', 'macaque_test.json'),
+ start_img_id=12500,
+ start_ann_id=15672,
+ kpt_num=17)
diff --git a/vendor/ViTPose/tools/dataset/preprocess_h36m.py b/vendor/ViTPose/tools/dataset/preprocess_h36m.py
new file mode 100644
index 0000000000000000000000000000000000000000..97f0edb50d61c5b405d3a6a1fa8c65cfb0c0a683
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/preprocess_h36m.py
@@ -0,0 +1,417 @@
+# -----------------------------------------------------------------------------
+# Adapted from https://github.com/anibali/h36m-fetch
+# Original license: Copyright (c) Aiden Nibali, under the Apache License.
+# -----------------------------------------------------------------------------
+
+import argparse
+import os
+import pickle
+import tarfile
+import xml.etree.ElementTree as ET
+from os.path import join
+
+import cv2
+import numpy as np
+from spacepy import pycdf
+
+
+class PreprocessH36m:
+ """Preprocess Human3.6M dataset.
+
+ Args:
+ metadata (str): Path to metadata.xml.
+ original_dir (str): Directory of the original dataset with all files
+ compressed. Specifically, .tgz files belonging to subject 1
+ should be placed under the subdirectory 's1'.
+ extracted_dir (str): Directory of the extracted files. If not given, it
+ will be placed under the same parent directory as original_dir.
+ processed_der (str): Directory of the processed files. If not given, it
+ will be placed under the same parent directory as original_dir.
+ sample_rate (int): Downsample FPS to `1 / sample_rate`. Default: 5.
+ """
+
+ def __init__(self,
+ metadata,
+ original_dir,
+ extracted_dir=None,
+ processed_dir=None,
+ sample_rate=5):
+ self.metadata = metadata
+ self.original_dir = original_dir
+ self.sample_rate = sample_rate
+
+ if extracted_dir is None:
+ self.extracted_dir = join(
+ os.path.dirname(os.path.abspath(self.original_dir)),
+ 'extracted')
+ else:
+ self.extracted_dir = extracted_dir
+
+ if processed_dir is None:
+ self.processed_dir = join(
+ os.path.dirname(os.path.abspath(self.original_dir)),
+ 'processed')
+ else:
+ self.processed_dir = processed_dir
+
+ self.subjects = []
+ self.sequence_mappings = {}
+ self.action_names = {}
+ self.camera_ids = []
+ self._load_metadata()
+
+ self.subjects_annot = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11']
+ self.subjects_splits = {
+ 'train': ['S1', 'S5', 'S6', 'S7', 'S8'],
+ 'test': ['S9', 'S11']
+ }
+ self.extract_files = ['Videos', 'D2_Positions', 'D3_Positions_mono']
+ self.movable_joints = [
+ 0, 1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27
+ ]
+ self.scale_factor = 1.2
+ self.image_sizes = {
+ '54138969': {
+ 'width': 1000,
+ 'height': 1002
+ },
+ '55011271': {
+ 'width': 1000,
+ 'height': 1000
+ },
+ '58860488': {
+ 'width': 1000,
+ 'height': 1000
+ },
+ '60457274': {
+ 'width': 1000,
+ 'height': 1002
+ }
+ }
+
+ def extract_tgz(self):
+ """Extract files from self.extrct_files."""
+ os.makedirs(self.extracted_dir, exist_ok=True)
+ for subject in self.subjects_annot:
+ cur_dir = join(self.original_dir, subject.lower())
+ for file in self.extract_files:
+ filename = join(cur_dir, file + '.tgz')
+ print(f'Extracting {filename} ...')
+ with tarfile.open(filename) as tar:
+ tar.extractall(self.extracted_dir)
+ print('Extraction done.\n')
+
+ def generate_cameras_file(self):
+ """Generate cameras.pkl which contains camera parameters for 11
+ subjects each with 4 cameras."""
+ cameras = {}
+ for subject in range(1, 12):
+ for camera in range(4):
+ key = (f'S{subject}', self.camera_ids[camera])
+ cameras[key] = self._get_camera_params(camera, subject)
+
+ out_file = join(self.processed_dir, 'annotation_body3d', 'cameras.pkl')
+ with open(out_file, 'wb') as fout:
+ pickle.dump(cameras, fout)
+ print(f'Camera parameters have been written to "{out_file}".\n')
+
+ def generate_annotations(self):
+ """Generate annotations for training and testing data."""
+ output_dir = join(self.processed_dir, 'annotation_body3d',
+ f'fps{50 // self.sample_rate}')
+ os.makedirs(output_dir, exist_ok=True)
+
+ for data_split in ('train', 'test'):
+ imgnames_all = []
+ centers_all = []
+ scales_all = []
+ kps2d_all = []
+ kps3d_all = []
+ for subject in self.subjects_splits[data_split]:
+ for action, subaction in self.sequence_mappings[subject].keys(
+ ):
+ if action == '1':
+ # exclude action "_ALL"
+ continue
+ for camera in self.camera_ids:
+ imgnames, centers, scales, kps2d, kps3d\
+ = self._load_annotations(
+ subject, action, subaction, camera)
+ imgnames_all.append(imgnames)
+ centers_all.append(centers)
+ scales_all.append(scales)
+ kps2d_all.append(kps2d)
+ kps3d_all.append(kps3d)
+
+ imgnames_all = np.concatenate(imgnames_all)
+ centers_all = np.concatenate(centers_all)
+ scales_all = np.concatenate(scales_all)
+ kps2d_all = np.concatenate(kps2d_all)
+ kps3d_all = np.concatenate(kps3d_all)
+
+ out_file = join(output_dir, f'h36m_{data_split}.npz')
+ np.savez(
+ out_file,
+ imgname=imgnames_all,
+ center=centers_all,
+ scale=scales_all,
+ part=kps2d_all,
+ S=kps3d_all)
+
+ print(
+ f'All annotations of {data_split}ing data have been written to'
+ f' "{out_file}". {len(imgnames_all)} samples in total.\n')
+
+ if data_split == 'train':
+ kps_3d_all = kps3d_all[..., :3] # remove visibility
+ mean_3d, std_3d = self._get_pose_stats(kps_3d_all)
+
+ kps_2d_all = kps2d_all[..., :2] # remove visibility
+ mean_2d, std_2d = self._get_pose_stats(kps_2d_all)
+
+ # centered around root
+ # the root keypoint is 0-index
+ kps_3d_rel = kps_3d_all[..., 1:, :] - kps_3d_all[..., :1, :]
+ mean_3d_rel, std_3d_rel = self._get_pose_stats(kps_3d_rel)
+
+ kps_2d_rel = kps_2d_all[..., 1:, :] - kps_2d_all[..., :1, :]
+ mean_2d_rel, std_2d_rel = self._get_pose_stats(kps_2d_rel)
+
+ stats = {
+ 'joint3d_stats': {
+ 'mean': mean_3d,
+ 'std': std_3d
+ },
+ 'joint2d_stats': {
+ 'mean': mean_2d,
+ 'std': std_2d
+ },
+ 'joint3d_rel_stats': {
+ 'mean': mean_3d_rel,
+ 'std': std_3d_rel
+ },
+ 'joint2d_rel_stats': {
+ 'mean': mean_2d_rel,
+ 'std': std_2d_rel
+ }
+ }
+ for name, stat_dict in stats.items():
+ out_file = join(output_dir, f'{name}.pkl')
+ with open(out_file, 'wb') as f:
+ pickle.dump(stat_dict, f)
+ print(f'Create statistic data file: {out_file}')
+
+ @staticmethod
+ def _get_pose_stats(kps):
+ """Get statistic information `mean` and `std` of pose data.
+
+ Args:
+ kps (ndarray): keypoints in shape [..., K, C] where K and C is
+ the keypoint category number and dimension.
+ Returns:
+ mean (ndarray): [K, C]
+ """
+ assert kps.ndim > 2
+ K, C = kps.shape[-2:]
+ kps = kps.reshape(-1, K, C)
+ mean = kps.mean(axis=0)
+ std = kps.std(axis=0)
+ return mean, std
+
+ def _load_metadata(self):
+ """Load meta data from metadata.xml."""
+
+ assert os.path.exists(self.metadata)
+
+ tree = ET.parse(self.metadata)
+ root = tree.getroot()
+
+ for i, tr in enumerate(root.find('mapping')):
+ if i == 0:
+ _, _, *self.subjects = [td.text for td in tr]
+ self.sequence_mappings \
+ = {subject: {} for subject in self.subjects}
+ elif i < 33:
+ action_id, subaction_id, *prefixes = [td.text for td in tr]
+ for subject, prefix in zip(self.subjects, prefixes):
+ self.sequence_mappings[subject][(action_id, subaction_id)]\
+ = prefix
+
+ for i, elem in enumerate(root.find('actionnames')):
+ action_id = str(i + 1)
+ self.action_names[action_id] = elem.text
+
+ self.camera_ids \
+ = [elem.text for elem in root.find('dbcameras/index2id')]
+
+ w0 = root.find('w0')
+ self.cameras_raw = [float(num) for num in w0.text[1:-1].split()]
+
+ def _get_base_filename(self, subject, action, subaction, camera):
+ """Get base filename given subject, action, subaction and camera."""
+ return f'{self.sequence_mappings[subject][(action, subaction)]}' + \
+ f'.{camera}'
+
+ def _get_camera_params(self, camera, subject):
+ """Get camera parameters given camera id and subject id."""
+ metadata_slice = np.zeros(15)
+ start = 6 * (camera * 11 + (subject - 1))
+
+ metadata_slice[:6] = self.cameras_raw[start:start + 6]
+ metadata_slice[6:] = self.cameras_raw[265 + camera * 9 - 1:265 +
+ (camera + 1) * 9 - 1]
+
+ # extrinsics
+ x, y, z = -metadata_slice[0], metadata_slice[1], -metadata_slice[2]
+
+ R_x = np.array([[1, 0, 0], [0, np.cos(x), np.sin(x)],
+ [0, -np.sin(x), np.cos(x)]])
+ R_y = np.array([[np.cos(y), 0, np.sin(y)], [0, 1, 0],
+ [-np.sin(y), 0, np.cos(y)]])
+ R_z = np.array([[np.cos(z), np.sin(z), 0], [-np.sin(z),
+ np.cos(z), 0], [0, 0, 1]])
+ R = (R_x @ R_y @ R_z).T
+ T = metadata_slice[3:6].reshape(-1, 1)
+ # convert unit from millimeter to meter
+ T *= 0.001
+
+ # intrinsics
+ c = metadata_slice[8:10, None]
+ f = metadata_slice[6:8, None]
+
+ # distortion
+ k = metadata_slice[10:13, None]
+ p = metadata_slice[13:15, None]
+
+ return {
+ 'R': R,
+ 'T': T,
+ 'c': c,
+ 'f': f,
+ 'k': k,
+ 'p': p,
+ 'w': self.image_sizes[self.camera_ids[camera]]['width'],
+ 'h': self.image_sizes[self.camera_ids[camera]]['height'],
+ 'name': f'camera{camera + 1}',
+ 'id': self.camera_ids[camera]
+ }
+
+ def _load_annotations(self, subject, action, subaction, camera):
+ """Load annotations for a sequence."""
+ subj_dir = join(self.extracted_dir, subject)
+ basename = self._get_base_filename(subject, action, subaction, camera)
+
+ # load 2D keypoints
+ with pycdf.CDF(
+ join(subj_dir, 'MyPoseFeatures', 'D2_Positions',
+ basename + '.cdf')) as cdf:
+ kps_2d = np.array(cdf['Pose'])
+
+ num_frames = kps_2d.shape[1]
+ kps_2d = kps_2d.reshape((num_frames, 32, 2))[::self.sample_rate,
+ self.movable_joints]
+ kps_2d = np.concatenate([kps_2d, np.ones((len(kps_2d), 17, 1))],
+ axis=2)
+
+ # load 3D keypoints
+ with pycdf.CDF(
+ join(subj_dir, 'MyPoseFeatures', 'D3_Positions_mono',
+ basename + '.cdf')) as cdf:
+ kps_3d = np.array(cdf['Pose'])
+
+ kps_3d = kps_3d.reshape(
+ (num_frames, 32, 3))[::self.sample_rate,
+ self.movable_joints] / 1000.
+ kps_3d = np.concatenate([kps_3d, np.ones((len(kps_3d), 17, 1))],
+ axis=2)
+
+ # calculate bounding boxes
+ bboxes = np.stack([
+ np.min(kps_2d[:, :, 0], axis=1),
+ np.min(kps_2d[:, :, 1], axis=1),
+ np.max(kps_2d[:, :, 0], axis=1),
+ np.max(kps_2d[:, :, 1], axis=1)
+ ],
+ axis=1)
+ centers = np.stack([(bboxes[:, 0] + bboxes[:, 2]) / 2,
+ (bboxes[:, 1] + bboxes[:, 3]) / 2],
+ axis=1)
+ scales = self.scale_factor * np.max(
+ bboxes[:, 2:] - bboxes[:, :2], axis=1) / 200
+
+ # extract frames and save imgnames
+ imgnames = []
+ video_path = join(subj_dir, 'Videos', basename + '.mp4')
+ sub_base = subject + '_' + basename.replace(' ', '_')
+ img_dir = join(self.processed_dir, 'images', subject, sub_base)
+ os.makedirs(img_dir, exist_ok=True)
+ prefix = join(subject, sub_base, sub_base)
+
+ cap = cv2.VideoCapture(video_path)
+ i = 0
+ while True:
+ success, img = cap.read()
+ if not success:
+ break
+ if i % self.sample_rate == 0:
+ imgname = f'{prefix}_{i + 1:06d}.jpg'
+ imgnames.append(imgname)
+ dest_path = join(self.processed_dir, 'images', imgname)
+ if not os.path.exists(dest_path):
+ cv2.imwrite(dest_path, img)
+ if len(imgnames) == len(centers):
+ break
+ i += 1
+ cap.release()
+ imgnames = np.array(imgnames)
+
+ print(f'Annoatations for sequence "{subject} {basename}" are loaded. '
+ f'{len(imgnames)} samples in total.')
+
+ return imgnames, centers, scales, kps_2d, kps_3d
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--metadata', type=str, required=True, help='Path to metadata.xml')
+ parser.add_argument(
+ '--original',
+ type=str,
+ required=True,
+ help='Directory of the original dataset with all files compressed. '
+ 'Specifically, .tgz files belonging to subject 1 should be placed '
+ 'under the subdirectory \"s1\".')
+ parser.add_argument(
+ '--extracted',
+ type=str,
+ default=None,
+ help='Directory of the extracted files. If not given, it will be '
+ 'placed under the same parent directory as original_dir.')
+ parser.add_argument(
+ '--processed',
+ type=str,
+ default=None,
+ help='Directory of the processed files. If not given, it will be '
+ 'placed under the same parent directory as original_dir.')
+ parser.add_argument(
+ '--sample_rate',
+ type=int,
+ default=5,
+ help='Downsample FPS to `1 / sample_rate`. Default: 5.')
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ args = parse_args()
+
+ h36m = PreprocessH36m(
+ metadata=args.metadata,
+ original_dir=args.original,
+ extracted_dir=args.extracted,
+ processed_dir=args.processed,
+ sample_rate=args.sample_rate)
+ h36m.extract_tgz()
+ h36m.generate_cameras_file()
+ h36m.generate_annotations()
diff --git a/vendor/ViTPose/tools/dataset/preprocess_mpi_inf_3dhp.py b/vendor/ViTPose/tools/dataset/preprocess_mpi_inf_3dhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bef25c9433e0894ffa03db72510204bd75b67f4
--- /dev/null
+++ b/vendor/ViTPose/tools/dataset/preprocess_mpi_inf_3dhp.py
@@ -0,0 +1,359 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import pickle
+import shutil
+from os.path import join
+
+import cv2
+import h5py
+import mmcv
+import numpy as np
+from scipy.io import loadmat
+
+train_subjects = [i for i in range(1, 9)]
+test_subjects = [i for i in range(1, 7)]
+train_seqs = [1, 2]
+train_cams = [0, 1, 2, 4, 5, 6, 7, 8]
+train_frame_nums = {
+ (1, 1): 6416,
+ (1, 2): 12430,
+ (2, 1): 6502,
+ (2, 2): 6081,
+ (3, 1): 12488,
+ (3, 2): 12283,
+ (4, 1): 6171,
+ (4, 2): 6675,
+ (5, 1): 12820,
+ (5, 2): 12312,
+ (6, 1): 6188,
+ (6, 2): 6145,
+ (7, 1): 6239,
+ (7, 2): 6320,
+ (8, 1): 6468,
+ (8, 2): 6054
+}
+test_frame_nums = {1: 6151, 2: 6080, 3: 5838, 4: 6007, 5: 320, 6: 492}
+train_img_size = (2048, 2048)
+root_index = 14
+joints_17 = [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6]
+
+
+def get_pose_stats(kps):
+ """Get statistic information `mean` and `std` of pose data.
+
+ Args:
+ kps (ndarray): keypoints in shape [..., K, C] where K and C is
+ the keypoint category number and dimension.
+ Returns:
+ mean (ndarray): [K, C]
+ """
+ assert kps.ndim > 2
+ K, C = kps.shape[-2:]
+ kps = kps.reshape(-1, K, C)
+ mean = kps.mean(axis=0)
+ std = kps.std(axis=0)
+ return mean, std
+
+
+def get_annotations(joints_2d, joints_3d, scale_factor=1.2):
+ """Get annotations, including centers, scales, joints_2d and joints_3d.
+
+ Args:
+ joints_2d: 2D joint coordinates in shape [N, K, 2], where N is the
+ frame number, K is the joint number.
+ joints_3d: 3D joint coordinates in shape [N, K, 3], where N is the
+ frame number, K is the joint number.
+ scale_factor: Scale factor of bounding box. Default: 1.2.
+ Returns:
+ centers (ndarray): [N, 2]
+ scales (ndarray): [N,]
+ joints_2d (ndarray): [N, K, 3]
+ joints_3d (ndarray): [N, K, 4]
+ """
+ # calculate joint visibility
+ visibility = (joints_2d[:, :, 0] >= 0) * \
+ (joints_2d[:, :, 0] < train_img_size[0]) * \
+ (joints_2d[:, :, 1] >= 0) * \
+ (joints_2d[:, :, 1] < train_img_size[1])
+ visibility = np.array(visibility, dtype=np.float32)[:, :, None]
+ joints_2d = np.concatenate([joints_2d, visibility], axis=-1)
+ joints_3d = np.concatenate([joints_3d, visibility], axis=-1)
+
+ # calculate bounding boxes
+ bboxes = np.stack([
+ np.min(joints_2d[:, :, 0], axis=1),
+ np.min(joints_2d[:, :, 1], axis=1),
+ np.max(joints_2d[:, :, 0], axis=1),
+ np.max(joints_2d[:, :, 1], axis=1)
+ ],
+ axis=1)
+ centers = np.stack([(bboxes[:, 0] + bboxes[:, 2]) / 2,
+ (bboxes[:, 1] + bboxes[:, 3]) / 2],
+ axis=1)
+ scales = scale_factor * np.max(bboxes[:, 2:] - bboxes[:, :2], axis=1) / 200
+
+ return centers, scales, joints_2d, joints_3d
+
+
+def load_trainset(data_root, out_dir):
+ """Load training data, create annotation file and camera file.
+ Args:
+ data_root: Directory of dataset, which is organized in the following
+ hierarchy:
+ data_root
+ |-- train
+ |-- S1
+ |-- Seq1
+ |-- Seq2
+ |-- S2
+ |-- ...
+ |-- test
+ |-- TS1
+ |-- TS2
+ |-- ...
+ out_dir: Directory to save annotation file.
+ """
+ _imgnames = []
+ _centers = []
+ _scales = []
+ _joints_2d = []
+ _joints_3d = []
+ cameras = {}
+
+ img_dir = join(out_dir, 'images')
+ os.makedirs(img_dir, exist_ok=True)
+ annot_dir = join(out_dir, 'annotations')
+ os.makedirs(annot_dir, exist_ok=True)
+
+ for subj in train_subjects:
+ for seq in train_seqs:
+ seq_path = join(data_root, 'train', f'S{subj}', f'Seq{seq}')
+ num_frames = train_frame_nums[(subj, seq)]
+
+ # load camera parametres
+ camera_file = join(seq_path, 'camera.calibration')
+ with open(camera_file, 'r') as fin:
+ lines = fin.readlines()
+ for cam in train_cams:
+ K = [float(s) for s in lines[cam * 7 + 5][11:-2].split()]
+ f = np.array([[K[0]], [K[5]]])
+ c = np.array([[K[2]], [K[6]]])
+ RT = np.array(
+ [float(s) for s in lines[cam * 7 + 6][11:-2].split()])
+ RT = np.reshape(RT, (4, 4))
+ R = RT[:3, :3]
+ # convert unit from millimeter to meter
+ T = RT[:3, 3:] * 0.001
+ size = [int(s) for s in lines[cam * 7 + 3][14:].split()]
+ w, h = size
+ cam_param = dict(
+ R=R, T=T, c=c, f=f, w=w, h=h, name=f'train_cam_{cam}')
+ cameras[f'S{subj}_Seq{seq}_Cam{cam}'] = cam_param
+
+ # load annotations
+ annot_file = os.path.join(seq_path, 'annot.mat')
+ annot2 = loadmat(annot_file)['annot2']
+ annot3 = loadmat(annot_file)['annot3']
+ for cam in train_cams:
+ # load 2D and 3D annotations
+ joints_2d = np.reshape(annot2[cam][0][:num_frames],
+ (num_frames, 28, 2))[:, joints_17]
+ joints_3d = np.reshape(annot3[cam][0][:num_frames],
+ (num_frames, 28, 3))[:, joints_17]
+ joints_3d = joints_3d * 0.001
+ centers, scales, joints_2d, joints_3d = get_annotations(
+ joints_2d, joints_3d)
+ _centers.append(centers)
+ _scales.append(scales)
+ _joints_2d.append(joints_2d)
+ _joints_3d.append(joints_3d)
+
+ # extract frames from video
+ video_path = join(seq_path, 'imageSequence',
+ f'video_{cam}.avi')
+ video = mmcv.VideoReader(video_path)
+ for i in mmcv.track_iter_progress(range(num_frames)):
+ img = video.read()
+ if img is None:
+ break
+ imgname = f'S{subj}_Seq{seq}_Cam{cam}_{i+1:06d}.jpg'
+ _imgnames.append(imgname)
+ cv2.imwrite(join(img_dir, imgname), img)
+
+ _imgnames = np.array(_imgnames)
+ _centers = np.concatenate(_centers)
+ _scales = np.concatenate(_scales)
+ _joints_2d = np.concatenate(_joints_2d)
+ _joints_3d = np.concatenate(_joints_3d)
+
+ out_file = join(annot_dir, 'mpi_inf_3dhp_train.npz')
+ np.savez(
+ out_file,
+ imgname=_imgnames,
+ center=_centers,
+ scale=_scales,
+ part=_joints_2d,
+ S=_joints_3d)
+ print(f'Create annotation file for trainset: {out_file}. '
+ f'{len(_imgnames)} samples in total.')
+
+ out_file = join(annot_dir, 'cameras_train.pkl')
+ with open(out_file, 'wb') as fout:
+ pickle.dump(cameras, fout)
+ print(f'Create camera file for trainset: {out_file}.')
+
+ # get `mean` and `std` of pose data
+ _joints_3d = _joints_3d[..., :3] # remove visibility
+ mean_3d, std_3d = get_pose_stats(_joints_3d)
+
+ _joints_2d = _joints_2d[..., :2] # remove visibility
+ mean_2d, std_2d = get_pose_stats(_joints_2d)
+
+ # centered around root
+ _joints_3d_rel = _joints_3d - _joints_3d[..., root_index:root_index + 1, :]
+ mean_3d_rel, std_3d_rel = get_pose_stats(_joints_3d_rel)
+ mean_3d_rel[root_index] = mean_3d[root_index]
+ std_3d_rel[root_index] = std_3d[root_index]
+
+ _joints_2d_rel = _joints_2d - _joints_2d[..., root_index:root_index + 1, :]
+ mean_2d_rel, std_2d_rel = get_pose_stats(_joints_2d_rel)
+ mean_2d_rel[root_index] = mean_2d[root_index]
+ std_2d_rel[root_index] = std_2d[root_index]
+
+ stats = {
+ 'joint3d_stats': {
+ 'mean': mean_3d,
+ 'std': std_3d
+ },
+ 'joint2d_stats': {
+ 'mean': mean_2d,
+ 'std': std_2d
+ },
+ 'joint3d_rel_stats': {
+ 'mean': mean_3d_rel,
+ 'std': std_3d_rel
+ },
+ 'joint2d_rel_stats': {
+ 'mean': mean_2d_rel,
+ 'std': std_2d_rel
+ }
+ }
+ for name, stat_dict in stats.items():
+ out_file = join(annot_dir, f'{name}.pkl')
+ with open(out_file, 'wb') as f:
+ pickle.dump(stat_dict, f)
+ print(f'Create statistic data file: {out_file}')
+
+
+def load_testset(data_root, out_dir, valid_only=True):
+ """Load testing data, create annotation file and camera file.
+
+ Args:
+ data_root: Directory of dataset.
+ out_dir: Directory to save annotation file.
+ valid_only: Only keep frames with valid_label == 1.
+ """
+ _imgnames = []
+ _centers = []
+ _scales = []
+ _joints_2d = []
+ _joints_3d = []
+ cameras = {}
+
+ img_dir = join(out_dir, 'images')
+ os.makedirs(img_dir, exist_ok=True)
+ annot_dir = join(out_dir, 'annotations')
+ os.makedirs(annot_dir, exist_ok=True)
+
+ for subj in test_subjects:
+ subj_path = join(data_root, 'test', f'TS{subj}')
+ num_frames = test_frame_nums[subj]
+
+ # load annotations
+ annot_file = os.path.join(subj_path, 'annot_data.mat')
+ with h5py.File(annot_file, 'r') as fin:
+ annot2 = np.array(fin['annot2']).reshape((-1, 17, 2))
+ annot3 = np.array(fin['annot3']).reshape((-1, 17, 3))
+ valid = np.array(fin['valid_frame']).reshape(-1)
+
+ # manually estimate camera intrinsics
+ fx, cx = np.linalg.lstsq(
+ annot3[:, :, [0, 2]].reshape((-1, 2)),
+ (annot2[:, :, 0] * annot3[:, :, 2]).reshape(-1, 1),
+ rcond=None)[0].flatten()
+ fy, cy = np.linalg.lstsq(
+ annot3[:, :, [1, 2]].reshape((-1, 2)),
+ (annot2[:, :, 1] * annot3[:, :, 2]).reshape(-1, 1),
+ rcond=None)[0].flatten()
+ if subj <= 4:
+ w, h = 2048, 2048
+ else:
+ w, h = 1920, 1080
+ cameras[f'TS{subj}'] = dict(
+ c=np.array([[cx], [cy]]),
+ f=np.array([[fx], [fy]]),
+ w=w,
+ h=h,
+ name=f'test_cam_{subj}')
+
+ # get annotations
+ if valid_only:
+ valid_frames = np.nonzero(valid)[0]
+ else:
+ valid_frames = np.arange(num_frames)
+ joints_2d = annot2[valid_frames, :, :]
+ joints_3d = annot3[valid_frames, :, :] * 0.001
+
+ centers, scales, joints_2d, joints_3d = get_annotations(
+ joints_2d, joints_3d)
+ _centers.append(centers)
+ _scales.append(scales)
+ _joints_2d.append(joints_2d)
+ _joints_3d.append(joints_3d)
+
+ # copy and rename images
+ for i in valid_frames:
+ imgname = f'TS{subj}_{i+1:06d}.jpg'
+ shutil.copyfile(
+ join(subj_path, 'imageSequence', f'img_{i+1:06d}.jpg'),
+ join(img_dir, imgname))
+ _imgnames.append(imgname)
+
+ _imgnames = np.array(_imgnames)
+ _centers = np.concatenate(_centers)
+ _scales = np.concatenate(_scales)
+ _joints_2d = np.concatenate(_joints_2d)
+ _joints_3d = np.concatenate(_joints_3d)
+
+ if valid_only:
+ out_file = join(annot_dir, 'mpi_inf_3dhp_test_valid.npz')
+ else:
+ out_file = join(annot_dir, 'mpi_inf_3dhp_test_all.npz')
+ np.savez(
+ out_file,
+ imgname=_imgnames,
+ center=_centers,
+ scale=_scales,
+ part=_joints_2d,
+ S=_joints_3d)
+ print(f'Create annotation file for testset: {out_file}. '
+ f'{len(_imgnames)} samples in total.')
+
+ out_file = join(annot_dir, 'cameras_test.pkl')
+ with open(out_file, 'wb') as fout:
+ pickle.dump(cameras, fout)
+ print(f'Create camera file for testset: {out_file}.')
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('data_root', type=str, help='data root')
+ parser.add_argument(
+ 'out_dir', type=str, help='directory to save annotation files.')
+ args = parser.parse_args()
+ data_root = args.data_root
+ out_dir = args.out_dir
+
+ load_trainset(data_root, out_dir)
+ load_testset(data_root, out_dir, valid_only=True)
diff --git a/vendor/ViTPose/tools/deployment/mmpose2torchserve.py b/vendor/ViTPose/tools/deployment/mmpose2torchserve.py
new file mode 100644
index 0000000000000000000000000000000000000000..492a45b6b36935fadbae8578c1ffecc5b928b893
--- /dev/null
+++ b/vendor/ViTPose/tools/deployment/mmpose2torchserve.py
@@ -0,0 +1,135 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import warnings
+from argparse import ArgumentParser, Namespace
+from tempfile import TemporaryDirectory
+
+import mmcv
+import torch
+from mmcv.runner import CheckpointLoader
+
+try:
+ from model_archiver.model_packaging import package_model
+ from model_archiver.model_packaging_utils import ModelExportUtils
+except ImportError:
+ package_model = None
+
+
+def mmpose2torchserve(config_file: str,
+ checkpoint_file: str,
+ output_folder: str,
+ model_name: str,
+ model_version: str = '1.0',
+ force: bool = False):
+ """Converts MMPose model (config + checkpoint) to TorchServe `.mar`.
+
+ Args:
+ config_file:
+ In MMPose config format.
+ The contents vary for each task repository.
+ checkpoint_file:
+ In MMPose checkpoint format.
+ The contents vary for each task repository.
+ output_folder:
+ Folder where `{model_name}.mar` will be created.
+ The file created will be in TorchServe archive format.
+ model_name:
+ If not None, used for naming the `{model_name}.mar` file
+ that will be created under `output_folder`.
+ If None, `{Path(checkpoint_file).stem}` will be used.
+ model_version:
+ Model's version.
+ force:
+ If True, if there is an existing `{model_name}.mar`
+ file under `output_folder` it will be overwritten.
+ """
+
+ mmcv.mkdir_or_exist(output_folder)
+
+ config = mmcv.Config.fromfile(config_file)
+
+ with TemporaryDirectory() as tmpdir:
+ model_file = osp.join(tmpdir, 'config.py')
+ config.dump(model_file)
+ handler_path = osp.join(osp.dirname(__file__), 'mmpose_handler.py')
+ model_name = model_name or osp.splitext(
+ osp.basename(checkpoint_file))[0]
+
+ # use mmcv CheckpointLoader if checkpoint is not from a local file
+ if not osp.isfile(checkpoint_file):
+ ckpt = CheckpointLoader.load_checkpoint(checkpoint_file)
+ checkpoint_file = osp.join(tmpdir, 'checkpoint.pth')
+ with open(checkpoint_file, 'wb') as f:
+ torch.save(ckpt, f)
+
+ args = Namespace(
+ **{
+ 'model_file': model_file,
+ 'serialized_file': checkpoint_file,
+ 'handler': handler_path,
+ 'model_name': model_name,
+ 'version': model_version,
+ 'export_path': output_folder,
+ 'force': force,
+ 'requirements_file': None,
+ 'extra_files': None,
+ 'runtime': 'python',
+ 'archive_format': 'default'
+ })
+ manifest = ModelExportUtils.generate_manifest_json(args)
+ package_model(args, manifest)
+
+
+def parse_args():
+ parser = ArgumentParser(
+ description='Convert MMPose models to TorchServe `.mar` format.')
+ parser.add_argument('config', type=str, help='config file path')
+ parser.add_argument('checkpoint', type=str, help='checkpoint file path')
+ parser.add_argument(
+ '--output-folder',
+ type=str,
+ required=True,
+ help='Folder where `{model_name}.mar` will be created.')
+ parser.add_argument(
+ '--model-name',
+ type=str,
+ default=None,
+ help='If not None, used for naming the `{model_name}.mar`'
+ 'file that will be created under `output_folder`.'
+ 'If None, `{Path(checkpoint_file).stem}` will be used.')
+ parser.add_argument(
+ '--model-version',
+ type=str,
+ default='1.0',
+ help='Number used for versioning.')
+ parser.add_argument(
+ '-f',
+ '--force',
+ action='store_true',
+ help='overwrite the existing `{model_name}.mar`')
+ args = parser.parse_args()
+
+ return args
+
+
+if __name__ == '__main__':
+ args = parse_args()
+
+ # Following strings of text style are from colorama package
+ bright_style, reset_style = '\x1b[1m', '\x1b[0m'
+ red_text, blue_text = '\x1b[31m', '\x1b[34m'
+ white_background = '\x1b[107m'
+
+ msg = white_background + bright_style + red_text
+ msg += 'DeprecationWarning: This tool will be deprecated in future. '
+ msg += blue_text + 'Welcome to use the unified model deployment toolbox '
+ msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
+ msg += reset_style
+ warnings.warn(msg)
+
+ if package_model is None:
+ raise ImportError('`torch-model-archiver` is required.'
+ 'Try: pip install torch-model-archiver')
+
+ mmpose2torchserve(args.config, args.checkpoint, args.output_folder,
+ args.model_name, args.model_version, args.force)
diff --git a/vendor/ViTPose/tools/deployment/mmpose_handler.py b/vendor/ViTPose/tools/deployment/mmpose_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7da881cdc9dd26ab23242052668958b8172ce57
--- /dev/null
+++ b/vendor/ViTPose/tools/deployment/mmpose_handler.py
@@ -0,0 +1,80 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import base64
+import os
+
+import mmcv
+import torch
+
+from mmpose.apis import (inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model)
+from mmpose.models.detectors import AssociativeEmbedding, TopDown
+
+try:
+ from ts.torch_handler.base_handler import BaseHandler
+except ImportError:
+ raise ImportError('Please install torchserve.')
+
+
+class MMPoseHandler(BaseHandler):
+
+ def initialize(self, context):
+ properties = context.system_properties
+ self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
+ self.device = torch.device(self.map_location + ':' +
+ str(properties.get('gpu_id')) if torch.cuda.
+ is_available() else self.map_location)
+ self.manifest = context.manifest
+
+ model_dir = properties.get('model_dir')
+ serialized_file = self.manifest['model']['serializedFile']
+ checkpoint = os.path.join(model_dir, serialized_file)
+ self.config_file = os.path.join(model_dir, 'config.py')
+
+ self.model = init_pose_model(self.config_file, checkpoint, self.device)
+ self.initialized = True
+
+ def preprocess(self, data):
+ images = []
+
+ for row in data:
+ image = row.get('data') or row.get('body')
+ if isinstance(image, str):
+ image = base64.b64decode(image)
+ image = mmcv.imfrombytes(image)
+ images.append(image)
+
+ return images
+
+ def inference(self, data, *args, **kwargs):
+ if isinstance(self.model, TopDown):
+ results = self._inference_top_down_pose_model(data)
+ elif isinstance(self.model, (AssociativeEmbedding, )):
+ results = self._inference_bottom_up_pose_model(data)
+ else:
+ raise NotImplementedError(
+ f'Model type {type(self.model)} is not supported.')
+
+ return results
+
+ def _inference_top_down_pose_model(self, data):
+ results = []
+ for image in data:
+ # use dummy person bounding box
+ preds, _ = inference_top_down_pose_model(
+ self.model, image, person_results=None)
+ results.append(preds)
+ return results
+
+ def _inference_bottom_up_pose_model(self, data):
+ results = []
+ for image in data:
+ preds, _ = inference_bottom_up_pose_model(self.model, image)
+ results.append(preds)
+ return results
+
+ def postprocess(self, data):
+ output = [[{
+ 'keypoints': pred['keypoints'].tolist()
+ } for pred in preds] for preds in data]
+
+ return output
diff --git a/vendor/ViTPose/tools/deployment/pytorch2onnx.py b/vendor/ViTPose/tools/deployment/pytorch2onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..5caff6e070b5690a0dc8ba8e09caac0409c23047
--- /dev/null
+++ b/vendor/ViTPose/tools/deployment/pytorch2onnx.py
@@ -0,0 +1,165 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import warnings
+
+import numpy as np
+import torch
+
+from mmpose.apis import init_pose_model
+
+try:
+ import onnx
+ import onnxruntime as rt
+except ImportError as e:
+ raise ImportError(f'Please install onnx and onnxruntime first. {e}')
+
+try:
+ from mmcv.onnx.symbolic import register_extra_symbolics
+except ModuleNotFoundError:
+ raise NotImplementedError('please update mmcv to version>=1.0.4')
+
+
+def _convert_batchnorm(module):
+ """Convert the syncBNs into normal BN3ds."""
+ module_output = module
+ if isinstance(module, torch.nn.SyncBatchNorm):
+ module_output = torch.nn.BatchNorm3d(module.num_features, module.eps,
+ module.momentum, module.affine,
+ module.track_running_stats)
+ if module.affine:
+ module_output.weight.data = module.weight.data.clone().detach()
+ module_output.bias.data = module.bias.data.clone().detach()
+ # keep requires_grad unchanged
+ module_output.weight.requires_grad = module.weight.requires_grad
+ module_output.bias.requires_grad = module.bias.requires_grad
+ module_output.running_mean = module.running_mean
+ module_output.running_var = module.running_var
+ module_output.num_batches_tracked = module.num_batches_tracked
+ for name, child in module.named_children():
+ module_output.add_module(name, _convert_batchnorm(child))
+ del module
+ return module_output
+
+
+def pytorch2onnx(model,
+ input_shape,
+ opset_version=11,
+ show=False,
+ output_file='tmp.onnx',
+ verify=False):
+ """Convert pytorch model to onnx model.
+
+ Args:
+ model (:obj:`nn.Module`): The pytorch model to be exported.
+ input_shape (tuple[int]): The input tensor shape of the model.
+ opset_version (int): Opset version of onnx used. Default: 11.
+ show (bool): Determines whether to print the onnx model architecture.
+ Default: False.
+ output_file (str): Output onnx model name. Default: 'tmp.onnx'.
+ verify (bool): Determines whether to verify the onnx model.
+ Default: False.
+ """
+ model.cpu().eval()
+
+ one_img = torch.randn(input_shape)
+
+ register_extra_symbolics(opset_version)
+ torch.onnx.export(
+ model,
+ one_img,
+ output_file,
+ export_params=True,
+ keep_initializers_as_inputs=True,
+ verbose=show,
+ opset_version=opset_version)
+
+ print(f'Successfully exported ONNX model: {output_file}')
+ if verify:
+ # check by onnx
+ onnx_model = onnx.load(output_file)
+ onnx.checker.check_model(onnx_model)
+
+ # check the numerical value
+ # get pytorch output
+ pytorch_results = model(one_img)
+ if not isinstance(pytorch_results, (list, tuple)):
+ assert isinstance(pytorch_results, torch.Tensor)
+ pytorch_results = [pytorch_results]
+
+ # get onnx output
+ input_all = [node.name for node in onnx_model.graph.input]
+ input_initializer = [
+ node.name for node in onnx_model.graph.initializer
+ ]
+ net_feed_input = list(set(input_all) - set(input_initializer))
+ assert len(net_feed_input) == 1
+ sess = rt.InferenceSession(output_file)
+ onnx_results = sess.run(None,
+ {net_feed_input[0]: one_img.detach().numpy()})
+
+ # compare results
+ assert len(pytorch_results) == len(onnx_results)
+ for pt_result, onnx_result in zip(pytorch_results, onnx_results):
+ assert np.allclose(
+ pt_result.detach().cpu(), onnx_result, atol=1.e-5
+ ), 'The outputs are different between Pytorch and ONNX'
+ print('The numerical values are same between Pytorch and ONNX')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Convert MMPose models to ONNX')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--show', action='store_true', help='show onnx graph')
+ parser.add_argument('--output-file', type=str, default='tmp.onnx')
+ parser.add_argument('--opset-version', type=int, default=11)
+ parser.add_argument(
+ '--verify',
+ action='store_true',
+ help='verify the onnx model output against pytorch output')
+ parser.add_argument(
+ '--shape',
+ type=int,
+ nargs='+',
+ default=[1, 3, 256, 192],
+ help='input size')
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ args = parse_args()
+
+ assert args.opset_version == 11, 'MMPose only supports opset 11 now'
+
+ # Following strings of text style are from colorama package
+ bright_style, reset_style = '\x1b[1m', '\x1b[0m'
+ red_text, blue_text = '\x1b[31m', '\x1b[34m'
+ white_background = '\x1b[107m'
+
+ msg = white_background + bright_style + red_text
+ msg += 'DeprecationWarning: This tool will be deprecated in future. '
+ msg += blue_text + 'Welcome to use the unified model deployment toolbox '
+ msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
+ msg += reset_style
+ warnings.warn(msg)
+
+ model = init_pose_model(args.config, args.checkpoint, device='cpu')
+ model = _convert_batchnorm(model)
+
+ # onnx.export does not support kwargs
+ if hasattr(model, 'forward_dummy'):
+ model.forward = model.forward_dummy
+ else:
+ raise NotImplementedError(
+ 'Please implement the forward method for exporting.')
+
+ # convert model to onnx file
+ pytorch2onnx(
+ model,
+ args.shape,
+ opset_version=args.opset_version,
+ show=args.show,
+ output_file=args.output_file,
+ verify=args.verify)
diff --git a/vendor/ViTPose/tools/deployment/test_torchserver.py b/vendor/ViTPose/tools/deployment/test_torchserver.py
new file mode 100644
index 0000000000000000000000000000000000000000..70e27c575be05fb4a72ce19063ceec5015fc6779
--- /dev/null
+++ b/vendor/ViTPose/tools/deployment/test_torchserver.py
@@ -0,0 +1,79 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import warnings
+from argparse import ArgumentParser
+
+import requests
+
+from mmpose.apis import (inference_bottom_up_pose_model,
+ inference_top_down_pose_model, init_pose_model,
+ vis_pose_result)
+from mmpose.models import AssociativeEmbedding, TopDown
+
+
+def parse_args():
+ parser = ArgumentParser()
+ parser.add_argument('img', help='Image file')
+ parser.add_argument('config', help='Config file')
+ parser.add_argument('checkpoint', help='Checkpoint file')
+ parser.add_argument('model_name', help='The model name in the server')
+ parser.add_argument(
+ '--inference-addr',
+ default='127.0.0.1:8080',
+ help='Address and port of the inference server')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--out-dir', default='vis_results', help='Visualization output path')
+ args = parser.parse_args()
+ return args
+
+
+def main(args):
+ os.makedirs(args.out_dir, exist_ok=True)
+
+ # Inference single image by native apis.
+ model = init_pose_model(args.config, args.checkpoint, device=args.device)
+ if isinstance(model, TopDown):
+ pytorch_result, _ = inference_top_down_pose_model(
+ model, args.img, person_results=None)
+ elif isinstance(model, (AssociativeEmbedding, )):
+ pytorch_result, _ = inference_bottom_up_pose_model(model, args.img)
+ else:
+ raise NotImplementedError()
+
+ vis_pose_result(
+ model,
+ args.img,
+ pytorch_result,
+ out_file=osp.join(args.out_dir, 'pytorch_result.png'))
+
+ # Inference single image by torchserve engine.
+ url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
+ with open(args.img, 'rb') as image:
+ response = requests.post(url, image)
+ server_result = response.json()
+
+ vis_pose_result(
+ model,
+ args.img,
+ server_result,
+ out_file=osp.join(args.out_dir, 'torchserve_result.png'))
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ main(args)
+
+ # Following strings of text style are from colorama package
+ bright_style, reset_style = '\x1b[1m', '\x1b[0m'
+ red_text, blue_text = '\x1b[31m', '\x1b[34m'
+ white_background = '\x1b[107m'
+
+ msg = white_background + bright_style + red_text
+ msg += 'DeprecationWarning: This tool will be deprecated in future. '
+ msg += blue_text + 'Welcome to use the unified model deployment toolbox '
+ msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
+ msg += reset_style
+ warnings.warn(msg)
diff --git a/vendor/ViTPose/tools/dist_test.sh b/vendor/ViTPose/tools/dist_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9dcb8851c9b25f1c5ec081ab1a0a59178bbf81ca
--- /dev/null
+++ b/vendor/ViTPose/tools/dist_test.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
diff --git a/vendor/ViTPose/tools/dist_train.sh b/vendor/ViTPose/tools/dist_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9727f5310ae78bcd02c3b08a12f135fdb3b93437
--- /dev/null
+++ b/vendor/ViTPose/tools/dist_train.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
diff --git a/vendor/ViTPose/tools/misc/keypoints2coco_without_mmdet.py b/vendor/ViTPose/tools/misc/keypoints2coco_without_mmdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..63220fcb19cb5d80435e69874022741b33e84ef0
--- /dev/null
+++ b/vendor/ViTPose/tools/misc/keypoints2coco_without_mmdet.py
@@ -0,0 +1,146 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os
+from argparse import ArgumentParser
+
+from mmcv import track_iter_progress
+from PIL import Image
+from xtcocotools.coco import COCO
+
+from mmpose.apis import inference_top_down_pose_model, init_pose_model
+
+
+def main():
+ """Visualize the demo images.
+
+ pose_keypoints require the json_file containing boxes.
+ """
+ parser = ArgumentParser()
+ parser.add_argument('pose_config', help='Config file for detection')
+ parser.add_argument('pose_checkpoint', help='Checkpoint file')
+ parser.add_argument('--img-root', type=str, default='', help='Image root')
+ parser.add_argument(
+ '--json-file',
+ type=str,
+ default='',
+ help='Json file containing image person bboxes in COCO format.')
+ parser.add_argument(
+ '--out-json-file',
+ type=str,
+ default='',
+ help='Output json contains pseudolabeled annotation')
+ parser.add_argument(
+ '--show',
+ action='store_true',
+ default=False,
+ help='whether to show img')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+
+ args = parser.parse_args()
+
+ coco = COCO(args.json_file)
+ # build the pose model from a config file and a checkpoint file
+ pose_model = init_pose_model(
+ args.pose_config, args.pose_checkpoint, device=args.device.lower())
+
+ dataset = pose_model.cfg.data['test']['type']
+
+ img_keys = list(coco.imgs.keys())
+
+ # optional
+ return_heatmap = False
+
+ # e.g. use ('backbone', ) to return backbone feature
+ output_layer_names = None
+
+ categories = [{'id': 1, 'name': 'person'}]
+ img_anno_dict = {'images': [], 'annotations': [], 'categories': categories}
+
+ # process each image
+ ann_uniq_id = int(0)
+ for i in track_iter_progress(range(len(img_keys))):
+ # get bounding box annotations
+ image_id = img_keys[i]
+ image = coco.loadImgs(image_id)[0]
+ image_name = os.path.join(args.img_root, image['file_name'])
+
+ width, height = Image.open(image_name).size
+ ann_ids = coco.getAnnIds(image_id)
+
+ # make person bounding boxes
+ person_results = []
+ for ann_id in ann_ids:
+ person = {}
+ ann = coco.anns[ann_id]
+ # bbox format is 'xywh'
+ person['bbox'] = ann['bbox']
+ person_results.append(person)
+
+ pose_results, returned_outputs = inference_top_down_pose_model(
+ pose_model,
+ image_name,
+ person_results,
+ bbox_thr=None,
+ format='xywh',
+ dataset=dataset,
+ return_heatmap=return_heatmap,
+ outputs=output_layer_names)
+
+ # add output of model and bboxes to dict
+ for indx, i in enumerate(pose_results):
+ pose_results[indx]['keypoints'][
+ pose_results[indx]['keypoints'][:, 2] < args.kpt_thr, :3] = 0
+ pose_results[indx]['keypoints'][
+ pose_results[indx]['keypoints'][:, 2] >= args.kpt_thr, 2] = 2
+ x = int(pose_results[indx]['bbox'][0])
+ y = int(pose_results[indx]['bbox'][1])
+ w = int(pose_results[indx]['bbox'][2] -
+ pose_results[indx]['bbox'][0])
+ h = int(pose_results[indx]['bbox'][3] -
+ pose_results[indx]['bbox'][1])
+ bbox = [x, y, w, h]
+ area = round((w * h), 0)
+
+ images = {
+ 'file_name': image_name.split('/')[-1],
+ 'height': height,
+ 'width': width,
+ 'id': int(image_id)
+ }
+
+ annotations = {
+ 'keypoints': [
+ int(i) for i in pose_results[indx]['keypoints'].reshape(
+ -1).tolist()
+ ],
+ 'num_keypoints':
+ len(pose_results[indx]['keypoints']),
+ 'area':
+ area,
+ 'iscrowd':
+ 0,
+ 'image_id':
+ int(image_id),
+ 'bbox':
+ bbox,
+ 'category_id':
+ 1,
+ 'id':
+ ann_uniq_id,
+ }
+
+ img_anno_dict['annotations'].append(annotations)
+ ann_uniq_id += 1
+
+ img_anno_dict['images'].append(images)
+
+ # create json
+ with open(args.out_json_file, 'w') as outfile:
+ json.dump(img_anno_dict, outfile, indent=2)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/misc/publish_model.py b/vendor/ViTPose/tools/misc/publish_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..393721ab06cde171f2b06afc8674c9f03046b65b
--- /dev/null
+++ b/vendor/ViTPose/tools/misc/publish_model.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import subprocess
+from datetime import date
+
+import torch
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Process a checkpoint to be published')
+ parser.add_argument('in_file', help='input checkpoint filename')
+ parser.add_argument('out_file', help='output checkpoint filename')
+ args = parser.parse_args()
+ return args
+
+
+def process_checkpoint(in_file, out_file):
+ checkpoint = torch.load(in_file, map_location='cpu')
+ # remove optimizer for smaller file size
+ if 'optimizer' in checkpoint:
+ del checkpoint['optimizer']
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
+ # add the code here.
+ torch.save(checkpoint, out_file)
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
+ if out_file.endswith('.pth'):
+ out_file_name = out_file[:-4]
+ else:
+ out_file_name = out_file
+
+ date_now = date.today().strftime('%Y%m%d')
+ final_file = out_file_name + f'-{sha[:8]}_{date_now}.pth'
+ subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+ args = parse_args()
+ process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/model_split.py b/vendor/ViTPose/tools/model_split.py
new file mode 100644
index 0000000000000000000000000000000000000000..928380a54e293579e43833264410fe7de4ee8954
--- /dev/null
+++ b/vendor/ViTPose/tools/model_split.py
@@ -0,0 +1,104 @@
+import torch
+import os
+import argparse
+import copy
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--source', type=str)
+ parser.add_argument('--target', type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+def main():
+
+ args = parse_args()
+
+ if args.target is None:
+ args.target = '/'.join(args.source.split('/')[:-1])
+
+ ckpt = torch.load(args.source, map_location='cpu')
+
+ experts = dict()
+
+ new_ckpt = copy.deepcopy(ckpt)
+
+ state_dict = new_ckpt['state_dict']
+
+ for key, value in state_dict.items():
+ if 'mlp.experts' in key:
+ experts[key] = value
+
+ keys = ckpt['state_dict'].keys()
+
+ target_expert = 0
+ new_ckpt = copy.deepcopy(ckpt)
+
+ for key in keys:
+ if 'mlp.fc2' in key:
+ value = new_ckpt['state_dict'][key]
+ value = torch.cat([value, experts[key.replace('fc2.', f'experts.{target_expert}.')]], dim=0)
+ new_ckpt['state_dict'][key] = value
+
+ torch.save(new_ckpt, os.path.join(args.target, 'coco.pth'))
+
+ names = ['aic', 'mpii', 'ap10k', 'apt36k','wholebody']
+ num_keypoints = [14, 16, 17, 17, 133]
+ weight_names = ['keypoint_head.deconv_layers.0.weight',
+ 'keypoint_head.deconv_layers.1.weight',
+ 'keypoint_head.deconv_layers.1.bias',
+ 'keypoint_head.deconv_layers.1.running_mean',
+ 'keypoint_head.deconv_layers.1.running_var',
+ 'keypoint_head.deconv_layers.1.num_batches_tracked',
+ 'keypoint_head.deconv_layers.3.weight',
+ 'keypoint_head.deconv_layers.4.weight',
+ 'keypoint_head.deconv_layers.4.bias',
+ 'keypoint_head.deconv_layers.4.running_mean',
+ 'keypoint_head.deconv_layers.4.running_var',
+ 'keypoint_head.deconv_layers.4.num_batches_tracked',
+ 'keypoint_head.final_layer.weight',
+ 'keypoint_head.final_layer.bias']
+
+ exist_range = True
+
+ for i in range(5):
+
+ new_ckpt = copy.deepcopy(ckpt)
+
+ target_expert = i + 1
+
+ for key in keys:
+ if 'mlp.fc2' in key:
+ expert_key = key.replace('fc2.', f'experts.{target_expert}.')
+ if expert_key in experts:
+ value = new_ckpt['state_dict'][key]
+ value = torch.cat([value, experts[expert_key]], dim=0)
+ else:
+ exist_range = False
+
+ new_ckpt['state_dict'][key] = value
+
+ if not exist_range:
+ break
+
+ for tensor_name in weight_names:
+ new_ckpt['state_dict'][tensor_name] = new_ckpt['state_dict'][tensor_name.replace('keypoint_head', f'associate_keypoint_heads.{i}')]
+
+ for tensor_name in ['keypoint_head.final_layer.weight', 'keypoint_head.final_layer.bias']:
+ new_ckpt['state_dict'][tensor_name] = new_ckpt['state_dict'][tensor_name][:num_keypoints[i]]
+
+ # remove unnecessary part in the state dict
+ for j in range(5):
+ # remove associate part
+ for tensor_name in weight_names:
+ new_ckpt['state_dict'].pop(tensor_name.replace('keypoint_head', f'associate_keypoint_heads.{j}'))
+ # remove expert part
+ keys = new_ckpt['state_dict'].keys()
+ for key in list(keys):
+ if 'expert' in keys:
+ new_ckpt['state_dict'].pop(key)
+
+ torch.save(new_ckpt, os.path.join(args.target, f'{names[i]}.pth'))
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/slurm_test.sh b/vendor/ViTPose/tools/slurm_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c528dc9d4514539d86e18371129ceb2bfff54dea
--- /dev/null
+++ b/vendor/ViTPose/tools/slurm_test.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
diff --git a/vendor/ViTPose/tools/slurm_train.sh b/vendor/ViTPose/tools/slurm_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c3b65490a5271b6e9967362a2a727685292e8a78
--- /dev/null
+++ b/vendor/ViTPose/tools/slurm_train.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Copyright (c) OpenMMLab. All rights reserved.
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+WORK_DIR=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${@:5}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
diff --git a/vendor/ViTPose/tools/test.py b/vendor/ViTPose/tools/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1539925f6b45a4c04a844b31521b0a202fcfbd0
--- /dev/null
+++ b/vendor/ViTPose/tools/test.py
@@ -0,0 +1,184 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import warnings
+
+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+
+from mmpose.apis import multi_gpu_test, single_gpu_test
+from mmpose.datasets import build_dataloader, build_dataset
+from mmpose.models import build_posenet
+from mmpose.utils import setup_multi_processes
+
+try:
+ from mmcv.runner import wrap_fp16_model
+except ImportError:
+ warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+ 'Please install mmcv>=1.1.4')
+ from mmpose.core import wrap_fp16_model
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='mmpose test model')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--out', help='output result file')
+ parser.add_argument(
+ '--work-dir', help='the dir to save evaluation results')
+ parser.add_argument(
+ '--fuse-conv-bn',
+ action='store_true',
+ help='Whether to fuse conv and bn, this will slightly increase'
+ 'the inference speed')
+ parser.add_argument(
+ '--gpu-id',
+ type=int,
+ default=0,
+ help='id of gpu to use '
+ '(only applicable to non-distributed testing)')
+ parser.add_argument(
+ '--eval',
+ default=None,
+ nargs='+',
+ help='evaluation metric, which depends on the dataset,'
+ ' e.g., "mAP" for MSCOCO')
+ parser.add_argument(
+ '--gpu_collect',
+ action='store_true',
+ help='whether to use gpu to collect results')
+ parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+ parser.add_argument(
+ '--cfg-options',
+ nargs='+',
+ action=DictAction,
+ default={},
+ help='override some settings in the used config, the key-value pair '
+ 'in xxx=yyy format will be merged into config file. For example, '
+ "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def merge_configs(cfg1, cfg2):
+ # Merge cfg2 into cfg1
+ # Overwrite cfg1 if repeated, ignore if value is None.
+ cfg1 = {} if cfg1 is None else cfg1.copy()
+ cfg2 = {} if cfg2 is None else cfg2
+ for k, v in cfg2.items():
+ if v:
+ cfg1[k] = v
+ return cfg1
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+
+ if args.cfg_options is not None:
+ cfg.merge_from_dict(args.cfg_options)
+
+ # set multi-process settings
+ setup_multi_processes(cfg)
+
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # work_dir is determined in this priority: CLI > segment in file > filename
+ if args.work_dir is not None:
+ # update configs according to CLI args if args.work_dir is not None
+ cfg.work_dir = args.work_dir
+ elif cfg.get('work_dir', None) is None:
+ # use config filename as default work_dir if cfg.work_dir is None
+ cfg.work_dir = osp.join('./work_dirs',
+ osp.splitext(osp.basename(args.config))[0])
+
+ mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # build the dataloader
+ dataset = build_dataset(cfg.data.test, dict(test_mode=True))
+ # step 1: give default values and override (if exist) from cfg.data
+ loader_cfg = {
+ **dict(seed=cfg.get('seed'), drop_last=False, dist=distributed),
+ **({} if torch.__version__ != 'parrots' else dict(
+ prefetch_num=2,
+ pin_memory=False,
+ )),
+ **dict((k, cfg.data[k]) for k in [
+ 'seed',
+ 'prefetch_num',
+ 'pin_memory',
+ 'persistent_workers',
+ ] if k in cfg.data)
+ }
+ # step2: cfg.data.test_dataloader has higher priority
+ test_loader_cfg = {
+ **loader_cfg,
+ **dict(shuffle=False, drop_last=False),
+ **dict(workers_per_gpu=cfg.data.get('workers_per_gpu', 1)),
+ **dict(samples_per_gpu=cfg.data.get('samples_per_gpu', 1)),
+ **cfg.data.get('test_dataloader', {})
+ }
+ data_loader = build_dataloader(dataset, **test_loader_cfg)
+
+ # build the model and load checkpoint
+ model = build_posenet(cfg.model)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+ load_checkpoint(model, args.checkpoint, map_location='cpu')
+
+ if args.fuse_conv_bn:
+ model = fuse_conv_bn(model)
+
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[args.gpu_id])
+ outputs = single_gpu_test(model, data_loader)
+ else:
+ model = MMDistributedDataParallel(
+ model.cuda(),
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False)
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+ args.gpu_collect)
+
+ rank, _ = get_dist_info()
+ eval_config = cfg.get('evaluation', {})
+ eval_config = merge_configs(eval_config, dict(metric=args.eval))
+
+ if rank == 0:
+ if args.out:
+ print(f'\nwriting results to {args.out}')
+ mmcv.dump(outputs, args.out)
+
+ results = dataset.evaluate(outputs, cfg.work_dir, **eval_config)
+ for k, v in sorted(results.items()):
+ print(f'{k}: {v}')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/train.py b/vendor/ViTPose/tools/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e1f7074b9cf77739f9d786c6589a2c8f1352aba
--- /dev/null
+++ b/vendor/ViTPose/tools/train.py
@@ -0,0 +1,195 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+import warnings
+
+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist, set_random_seed
+from mmcv.utils import get_git_hash
+
+from mmpose import __version__
+from mmpose.apis import init_random_seed, train_model
+from mmpose.datasets import build_dataset
+from mmpose.models import build_posenet
+from mmpose.utils import collect_env, get_root_logger, setup_multi_processes
+import mmcv_custom
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a pose model')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument('--work-dir', help='the dir to save logs and models')
+ parser.add_argument(
+ '--resume-from', help='the checkpoint file to resume from')
+ parser.add_argument(
+ '--no-validate',
+ action='store_true',
+ help='whether not to evaluate the checkpoint during training')
+ group_gpus = parser.add_mutually_exclusive_group()
+ group_gpus.add_argument(
+ '--gpus',
+ type=int,
+ help='(Deprecated, please use --gpu-id) number of gpus to use '
+ '(only applicable to non-distributed training)')
+ group_gpus.add_argument(
+ '--gpu-ids',
+ type=int,
+ nargs='+',
+ help='(Deprecated, please use --gpu-id) ids of gpus to use '
+ '(only applicable to non-distributed training)')
+ group_gpus.add_argument(
+ '--gpu-id',
+ type=int,
+ default=0,
+ help='id of gpu to use '
+ '(only applicable to non-distributed training)')
+ parser.add_argument('--seed', type=int, default=None, help='random seed')
+ parser.add_argument(
+ '--deterministic',
+ action='store_true',
+ help='whether to set deterministic options for CUDNN backend.')
+ parser.add_argument(
+ '--cfg-options',
+ nargs='+',
+ action=DictAction,
+ default={},
+ help='override some settings in the used config, the key-value pair '
+ 'in xxx=yyy format will be merged into config file. For example, '
+ "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ parser.add_argument(
+ '--autoscale-lr',
+ action='store_true',
+ help='automatically scale lr with the number of gpus')
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+
+ if args.cfg_options is not None:
+ cfg.merge_from_dict(args.cfg_options)
+
+ # set multi-process settings
+ setup_multi_processes(cfg)
+
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+
+ # work_dir is determined in this priority: CLI > segment in file > filename
+ if args.work_dir is not None:
+ # update configs according to CLI args if args.work_dir is not None
+ cfg.work_dir = args.work_dir
+ elif cfg.get('work_dir', None) is None:
+ # use config filename as default work_dir if cfg.work_dir is None
+ cfg.work_dir = osp.join('./work_dirs',
+ osp.splitext(osp.basename(args.config))[0])
+ if args.resume_from is not None:
+ cfg.resume_from = args.resume_from
+ if args.gpus is not None:
+ cfg.gpu_ids = range(1)
+ warnings.warn('`--gpus` is deprecated because we only support '
+ 'single GPU mode in non-distributed training. '
+ 'Use `gpus=1` now.')
+ if args.gpu_ids is not None:
+ cfg.gpu_ids = args.gpu_ids[0:1]
+ warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+ 'Because we only support single GPU mode in '
+ 'non-distributed training. Use the first GPU '
+ 'in `gpu_ids` now.')
+ if args.gpus is None and args.gpu_ids is None:
+ cfg.gpu_ids = [args.gpu_id]
+
+ if args.autoscale_lr:
+ # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+ cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ if len(cfg.gpu_ids) > 1:
+ warnings.warn(
+ f'We treat {cfg.gpu_ids} as gpu-ids, and reset to '
+ f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in '
+ 'non-distribute training time.')
+ cfg.gpu_ids = cfg.gpu_ids[0:1]
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+ # re-set gpu_ids with distributed training mode
+ _, world_size = get_dist_info()
+ cfg.gpu_ids = range(world_size)
+
+ # create work_dir
+ mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+ # init the logger before other steps
+ timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+ log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+ logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+
+ # init the meta dict to record some important information such as
+ # environment info and seed, which will be logged
+ meta = dict()
+ # log env info
+ env_info_dict = collect_env()
+ env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
+ dash_line = '-' * 60 + '\n'
+ logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+ dash_line)
+ meta['env_info'] = env_info
+
+ # log some basic info
+ logger.info(f'Distributed training: {distributed}')
+ logger.info(f'Config:\n{cfg.pretty_text}')
+
+ # set random seeds
+ seed = init_random_seed(args.seed)
+ logger.info(f'Set random seed to {seed}, '
+ f'deterministic: {args.deterministic}')
+ set_random_seed(seed, deterministic=args.deterministic)
+ cfg.seed = seed
+ meta['seed'] = seed
+
+ model = build_posenet(cfg.model)
+ datasets = [build_dataset(cfg.data.train)]
+
+ if len(cfg.workflow) == 2:
+ val_dataset = copy.deepcopy(cfg.data.val)
+ val_dataset.pipeline = cfg.data.train.pipeline
+ datasets.append(build_dataset(val_dataset))
+
+ if cfg.checkpoint_config is not None:
+ # save mmpose version, config file content
+ # checkpoints as meta data
+ cfg.checkpoint_config.meta = dict(
+ mmpose_version=__version__ + get_git_hash(digits=7),
+ config=cfg.pretty_text,
+ )
+ train_model(
+ model,
+ datasets,
+ cfg,
+ distributed=distributed,
+ validate=(not args.no_validate),
+ timestamp=timestamp,
+ meta=meta)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vendor/ViTPose/tools/webcam/README.md b/vendor/ViTPose/tools/webcam/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..30960fd4aeec6698f2f99d41bbb3c97e8f0b29ad
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/README.md
@@ -0,0 +1,28 @@
+# MMPose Webcam API
+
+MMPose Webcam API is a handy tool to develop interactive webcam applications with MMPose functions.
+
+
+
+
MMPose Webcam API Overview
+
+
+## Requirements
+
+* Python >= 3.7.0
+* MMPose >= 0.23.0
+* MMDetection >= 2.21.0
+
+## Tutorials
+
+* [Get started with MMPose Webcam API (Chinese)](/tools/webcam/docs/get_started_cn.md)
+* [Build a Webcam App: A Step-by-step Instruction (Chinese)](/tools/webcam/docs/example_cn.md)
+
+## Examples
+
+* [Pose Estimation](/tools/webcam/configs/examples/): A simple example to estimate and visualize human/animal pose.
+* [Eye Effects](/tools/webcam/configs/eyes/): Apply sunglasses and bug-eye effects.
+* [Face Swap](/tools/webcam/configs/face_swap/): Everybody gets someone else's face.
+* [Meow Dwen Dwen](/tools/webcam/configs/meow_dwen_dwen/): Dress up your cat in Bing Dwen Dwen costume.
+* [Super Saiyan](/tools/webcam/configs/supersaiyan/): Super Saiyan transformation!
+* [New Year](/tools/webcam/configs/newyear/): Set off some firecrackers to celebrate Chinese New Year.
diff --git a/vendor/ViTPose/tools/webcam/configs/background/README.md b/vendor/ViTPose/tools/webcam/configs/background/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7be8782e38717c6d537648e313921fb8c48b124e
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/background/README.md
@@ -0,0 +1,73 @@
+# Matting Effects
+
+We can apply background matting to the videos.
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/background/background.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| b | Toggle the background matting effect on/off. |
+| h | Show help information. |
+| m | Show the monitoring information. |
+| q | Exit. |
+
+Note that the demo will automatically save the output video into a file `record.mp4`.
+
+### Configuration
+
+- **Choose a detection model**
+
+Users can choose detection models from the [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/v2.20.0/model_zoo.html). Just set the `model_config` and `model_checkpoint` in the detector node accordingly, and the model will be automatically downloaded and loaded.
+Note that in order to perform background matting, the model should be able to produce segmentation masks.
+
+```python
+# 'DetectorNode':
+# This node performs object detection from the frame image using an
+# MMDetection model.
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+```
+
+- **Run the demo without GPU**
+
+If you don't have GPU and CUDA in your device, the demo can run with only CPU by setting `device='cpu'` in all model nodes. For example:
+
+```python
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ device='cpu',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+```
+
+- **Debug webcam and display**
+
+You can launch the webcam runner with a debug config:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/test_camera.py
+```
diff --git a/vendor/ViTPose/tools/webcam/configs/background/background.py b/vendor/ViTPose/tools/webcam/configs/background/background.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb9f4d616e929cbe7f3c789a729ce2c07d40b9a1
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/background/background.py
@@ -0,0 +1,93 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Matting Effects',
+ camera_id=0,
+ camera_fps=10,
+ synchronous=False,
+ # Define nodes.
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='human_pose',
+ output_buffer='frame'),
+ # 'MattingNode':
+ # This node draw the matting visualization result in the frame image.
+ # mask results is needed.
+ dict(
+ type='BackgroundNode',
+ name='Visualizer',
+ enable_key='b',
+ enable=True,
+ frame_buffer='frame',
+ output_buffer='vis_bg',
+ cls_names=['person']),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ frame_buffer='vis_bg',
+ output_buffer='vis',
+ content_lines=[
+ 'This is a demo for background changing effects. Have fun!',
+ '', 'Hot-keys:', '"b": Change background',
+ '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis',
+ output_buffer='_display_') # `_frame_` is a runner-reserved buffer
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/examples/README.md b/vendor/ViTPose/tools/webcam/configs/examples/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ec9b961d284631478b3c326872d75942437a7f0e
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/examples/README.md
@@ -0,0 +1,110 @@
+# Pose Estimation Demo
+
+This demo performs human bounding box and keypoint detection, and visualizes results.
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/pose_estimation.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| v | Toggle the pose visualization on/off. |
+| h | Show help information. |
+| m | Show the monitoring information. |
+| q | Exit. |
+
+Note that the demo will automatically save the output video into a file `record.mp4`.
+
+### Configuration
+
+- **Choose a detection model**
+
+Users can choose detection models from the [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/v2.20.0/model_zoo.html). Just set the `model_config` and `model_checkpoint` in the detector node accordingly, and the model will be automatically downloaded and loaded.
+
+```python
+# 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_',
+ output_buffer='det_result')
+```
+
+- **Choose a or more pose models**
+
+In this demo we use two [top-down](https://github.com/open-mmlab/mmpose/tree/master/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap) pose estimation models for humans and animals respectively. Users can choose models from the [MMPose Model Zoo](https://mmpose.readthedocs.io/en/latest/modelzoo.html). To apply different pose models on different instance types, you can add multiple pose estimator nodes with `cls_names` set accordingly.
+
+```python
+# 'TopDownPoseEstimatorNode':
+# This node performs keypoint detection from the frame image using an
+# MMPose top-down model. Detection results is needed.
+dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+dict(
+ type='TopDownPoseEstimatorNode',
+ name='Animal Pose Estimator',
+ model_config='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap'
+ '/animalpose/hrnet_w32_animalpose_256x256.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
+ 'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
+ cls_names=['cat', 'dog', 'horse', 'sheep', 'cow'],
+ input_buffer='human_pose',
+ output_buffer='animal_pose')
+```
+
+- **Run the demo without GPU**
+
+If you don't have GPU and CUDA in your device, the demo can run with only CPU by setting `device='cpu'` in all model nodes. For example:
+
+```python
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ device='cpu',
+ input_buffer='_input_',
+ output_buffer='det_result')
+```
+
+- **Debug webcam and display**
+
+You can lanch the webcam runner with a debug config:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/test_camera.py
+```
diff --git a/vendor/ViTPose/tools/webcam/configs/examples/pose_estimation.py b/vendor/ViTPose/tools/webcam/configs/examples/pose_estimation.py
new file mode 100644
index 0000000000000000000000000000000000000000..471333a448530c5b99f9016729b269953099f466
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/examples/pose_estimation.py
@@ -0,0 +1,115 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Pose Estimation',
+ camera_id=0,
+ camera_fps=20,
+ synchronous=False,
+ # Define nodes.
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/top_down/'
+ 'vipnas/vipnas_mbv3_coco_wholebody_256x192_dark'
+ '-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Animal Pose Estimator',
+ model_config='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap'
+ '/animalpose/hrnet_w32_animalpose_256x256.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
+ 'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
+ cls_names=['cat', 'dog', 'horse', 'sheep', 'cow'],
+ input_buffer='human_pose',
+ output_buffer='animal_pose'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='animal_pose',
+ output_buffer='frame'),
+ # 'PoseVisualizerNode':
+ # This node draw the pose visualization result in the frame image.
+ # Pose results is needed.
+ dict(
+ type='PoseVisualizerNode',
+ name='Visualizer',
+ enable_key='v',
+ frame_buffer='frame',
+ output_buffer='vis'),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ enable=True,
+ frame_buffer='vis',
+ output_buffer='vis_notice',
+ content_lines=[
+ 'This is a demo for pose visualization and simple image '
+ 'effects. Have fun!', '', 'Hot-keys:',
+ '"v": Pose estimation result visualization',
+ '"s": Sunglasses effect B-)', '"b": Bug-eye effect 0_0',
+ '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis_notice',
+ output_buffer='display'),
+ # 'RecorderNode':
+ # This node save the output video into a file.
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='record.mp4',
+ frame_buffer='display',
+ output_buffer='_display_'
+ # `_display_` is a runner-reserved buffer
+ )
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/examples/test_camera.py b/vendor/ViTPose/tools/webcam/configs/examples/test_camera.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0c1677f4f1cbe8fe3dad081c7b9889602a39956
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/examples/test_camera.py
@@ -0,0 +1,19 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ name='Debug CamRunner',
+ camera_id=0,
+ camera_fps=20,
+ nodes=[
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ frame_buffer='_frame_',
+ output_buffer='display'),
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='webcam_output.mp4',
+ frame_buffer='display',
+ output_buffer='_display_')
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/eyes/README.md b/vendor/ViTPose/tools/webcam/configs/eyes/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f9c37695eecb18a0e4becdbcc1aa59bde4e75247
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/eyes/README.md
@@ -0,0 +1,31 @@
+# Sunglasses and Bug-eye Effects
+
+We can apply fun effects on videos with pose estimation results, like adding sunglasses on the face, or make the eyes look bigger.
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/pose_estimation.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| s | Toggle the sunglasses effect on/off. |
+| b | Toggle the bug-eye effect on/off. |
+| h | Show help information. |
+| m | Show the monitoring information. |
+| q | Exit. |
+
+### Configuration
+
+See the [README](/tools/webcam/configs/examples/README.md#configuration) of pose estimation demo for model configurations.
diff --git a/vendor/ViTPose/tools/webcam/configs/eyes/eyes.py b/vendor/ViTPose/tools/webcam/configs/eyes/eyes.py
new file mode 100644
index 0000000000000000000000000000000000000000..91bbfba9d9f89f7c7071375bedcc73a1e18d1783
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/eyes/eyes.py
@@ -0,0 +1,114 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Eye Effects',
+ camera_id=0,
+ camera_fps=20,
+ synchronous=False,
+ # Define nodes.
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Animal Pose Estimator',
+ model_config='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap'
+ '/animalpose/hrnet_w32_animalpose_256x256.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
+ 'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
+ cls_names=['cat', 'dog', 'horse', 'sheep', 'cow'],
+ input_buffer='human_pose',
+ output_buffer='animal_pose'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='animal_pose',
+ output_buffer='frame'),
+ # 'SunglassesNode':
+ # This node draw the sunglasses effect in the frame image.
+ # Pose results is needed.
+ dict(
+ type='SunglassesNode',
+ name='Visualizer',
+ enable_key='s',
+ enable=True,
+ frame_buffer='frame',
+ output_buffer='vis_sunglasses'),
+ # 'BugEyeNode':
+ # This node draw the bug-eye effetc in the frame image.
+ # Pose results is needed.
+ dict(
+ type='BugEyeNode',
+ name='Visualizer',
+ enable_key='b',
+ enable=False,
+ frame_buffer='vis_sunglasses',
+ output_buffer='vis_bugeye'),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ frame_buffer='vis_bugeye',
+ output_buffer='vis',
+ content_lines=[
+ 'This is a demo for pose visualization and simple image '
+ 'effects. Have fun!', '', 'Hot-keys:',
+ '"s": Sunglasses effect B-)', '"b": Bug-eye effect 0_0',
+ '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis',
+ output_buffer='_display_') # `_frame_` is a runner-reserved buffer
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/face_swap/README.md b/vendor/ViTPose/tools/webcam/configs/face_swap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..02f4c8aa855702bf6a668970f8e7e071611caf8e
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/face_swap/README.md
@@ -0,0 +1,31 @@
+# Sunglasses and Bug-eye Effects
+
+Look! Where is my face?:eyes: And whose face is it?:laughing:
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/face_swap/face_swap.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| s | Switch between modes Shuffle: Randomly shuffle all faces Clone: Choose one face and clone it for everyone None: Nothing happens and everyone is safe :) |
+| v | Toggle the pose visualization on/off. |
+| h | Show help information. |
+| m | Show diagnostic information. |
+| q | Exit. |
+
+### Configuration
+
+See the [README](/tools/webcam/configs/examples/README.md#configuration) of pose estimation demo for model configurations.
diff --git a/vendor/ViTPose/tools/webcam/configs/face_swap/face_swap.py b/vendor/ViTPose/tools/webcam/configs/face_swap/face_swap.py
new file mode 100644
index 0000000000000000000000000000000000000000..403eaae4ace483d72a4baedbaf61072c24e3a1ec
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/face_swap/face_swap.py
@@ -0,0 +1,79 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ name='FaceSwap',
+ camera_id=0,
+ camera_fps=20,
+ synchronous=False,
+ nodes=[
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ device='cpu',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='TopDown Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_res50_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangzhou'
+ '.aliyuncs.com/mmpose/top_down/vipnas/'
+ 'vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth',
+ device='cpu',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='pose_result'),
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='pose_result',
+ output_buffer='frame'),
+ dict(
+ type='FaceSwapNode',
+ name='FaceSwapper',
+ mode_key='s',
+ frame_buffer='frame',
+ output_buffer='face_swap'),
+ dict(
+ type='PoseVisualizerNode',
+ name='Visualizer',
+ enable_key='v',
+ frame_buffer='face_swap',
+ output_buffer='vis_pose'),
+ dict(
+ type='NoticeBoardNode',
+ name='Help Information',
+ enable_key='h',
+ content_lines=[
+ 'Swap your faces! ',
+ 'Hot-keys:',
+ '"v": Toggle the pose visualization on/off.',
+ '"s": Switch between modes: Shuffle, Clone and None',
+ '"h": Show help information',
+ '"m": Show diagnostic information',
+ '"q": Exit',
+ ],
+ frame_buffer='vis_pose',
+ output_buffer='vis_notice'),
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis_notice',
+ output_buffer='display'),
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='faceswap_output.mp4',
+ frame_buffer='display',
+ output_buffer='_display_')
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/README.md b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..997ffc174bd70c2de6a22edee53f5b52275ae187
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/README.md
@@ -0,0 +1,44 @@
+# Meow Dwen Dwen
+
+Do you know [Bing DwenDwen (冰墩墩)](https://en.wikipedia.org/wiki/Bing_Dwen_Dwen_and_Shuey_Rhon_Rhon), the mascot of 2022 Beijing Olympic Games?
+
+
+
+
+
+Now you can dress your cat up in this costume and TA-DA! Be prepared for super cute **Meow Dwen Dwen**.
+
+
+
+
+
+You are a dog fan? Hold on, here comes Woof Dwen Dwen.
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/meow_dwen_dwen/meow_dwen_dwen.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| s | Change the background. |
+| h | Show help information. |
+| m | Show diagnostic information. |
+| q | Exit. |
+
+### Configuration
+
+- **Use video input**
+
+As you can see in the config, we set `camera_id` as the path of the input image. You can also set it as a video file path (or url), or a webcam ID number (e.g. `camera_id=0`), to capture the dynamic face from the video input.
diff --git a/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/meow_dwen_dwen.py b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/meow_dwen_dwen.py
new file mode 100644
index 0000000000000000000000000000000000000000..399d01cf7c8df103772913294f1c0612979330e6
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/meow_dwen_dwen.py
@@ -0,0 +1,92 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Little fans of 2022 Beijing Winter Olympics',
+ # Cat image
+ camera_id='https://user-images.githubusercontent.com/'
+ '15977946/152932036-b5554cf8-24cf-40d6-a358-35a106013f11.jpeg',
+ # Dog image
+ # camera_id='https://user-images.githubusercontent.com/'
+ # '15977946/152932051-cd280b35-8066-45a0-8f52-657c8631aaba.jpg',
+ camera_fps=20,
+ nodes=[
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Animal Pose Estimator',
+ model_config='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap'
+ '/ap10k/hrnet_w32_ap10k_256x256.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
+ 'hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth',
+ cls_names=['cat', 'dog'],
+ input_buffer='det_result',
+ output_buffer='animal_pose'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='TopDown Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_res50_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangzhou'
+ '.aliyuncs.com/mmpose/top_down/vipnas/'
+ 'vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth',
+ device='cpu',
+ cls_names=['person'],
+ input_buffer='animal_pose',
+ output_buffer='human_pose'),
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='human_pose',
+ output_buffer='frame'),
+ dict(
+ type='XDwenDwenNode',
+ name='XDwenDwen',
+ mode_key='s',
+ resource_file='tools/webcam/configs/meow_dwen_dwen/'
+ 'resource-info.json',
+ out_shape=(480, 480),
+ frame_buffer='frame',
+ output_buffer='vis'),
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ enable=False,
+ frame_buffer='vis',
+ output_buffer='vis_notice',
+ content_lines=[
+ 'Let your pet put on a costume of Bing-Dwen-Dwen, '
+ 'the mascot of 2022 Beijing Winter Olympics. Have fun!', '',
+ 'Hot-keys:', '"s": Change the background',
+ '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis_notice',
+ output_buffer='display'),
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='record.mp4',
+ frame_buffer='display',
+ output_buffer='_display_'
+ # `_display_` is a runner-reserved buffer
+ )
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/resource-info.json b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/resource-info.json
new file mode 100644
index 0000000000000000000000000000000000000000..adb811cc7f3eafea56ff4d3f577ec28e33e80f0a
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/meow_dwen_dwen/resource-info.json
@@ -0,0 +1,26 @@
+[
+ {
+ "id": 1,
+ "result": "{\"width\":690,\"height\":713,\"valid\":true,\"rotate\":0,\"step_1\":{\"toolName\":\"pointTool\",\"result\":[{\"x\":374.86387434554973,\"y\":262.8020942408377,\"attribute\":\"\",\"valid\":true,\"id\":\"8SK9cVyu\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":2},{\"x\":492.8261780104712,\"y\":285.2,\"attribute\":\"\",\"valid\":true,\"id\":\"qDk54WsI\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":1},{\"x\":430.11204188481673,\"y\":318.0502617801047,\"attribute\":\"\",\"valid\":true,\"id\":\"4H80L7lL\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":3}]},\"step_2\":{\"dataSourceStep\":0,\"toolName\":\"polygonTool\",\"result\":[{\"id\":\"pwUsrf9u\",\"sourceID\":\"\",\"valid\":true,\"textAttribute\":\"\",\"pointList\":[{\"x\":423.3926701570681,\"y\":191.87539267015708},{\"x\":488.3465968586388,\"y\":209.04712041884818},{\"x\":535.3821989528797,\"y\":248.6167539267016},{\"x\":549.5675392670157,\"y\":306.8513089005236},{\"x\":537.6219895287959,\"y\":349.407329842932},{\"x\":510.74450261780106,\"y\":381.51099476439794},{\"x\":480.1340314136126,\"y\":394.9497382198953},{\"x\":411.4471204188482,\"y\":390.47015706806286},{\"x\":355.45235602094243,\"y\":373.29842931937173},{\"x\":306.17696335078534,\"y\":327.00942408376966},{\"x\":294.97801047120424,\"y\":284.45340314136126},{\"x\":306.9235602094241,\"y\":245.6303664921466},{\"x\":333.8010471204189,\"y\":217.25968586387435},{\"x\":370.3842931937173,\"y\":196.35497382198955}],\"attribute\":\"\",\"order\":1}]}}",
+ "url": "https://user-images.githubusercontent.com/15977946/152742677-35fe8a01-bd06-4a12-a02e-949e7d71f28a.jpg",
+ "fileName": "bing_dwen_dwen1.jpg"
+ },
+ {
+ "id": 2,
+ "result": "{\"width\":690,\"height\":659,\"valid\":true,\"rotate\":0,\"step_1\":{\"dataSourceStep\":0,\"toolName\":\"pointTool\",\"result\":[{\"x\":293.2460732984293,\"y\":242.89842931937173,\"attribute\":\"\",\"valid\":true,\"id\":\"KgPs39bY\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":1},{\"x\":170.41675392670155,\"y\":270.50052356020944,\"attribute\":\"\",\"valid\":true,\"id\":\"XwHyoBFU\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":2},{\"x\":224.24083769633506,\"y\":308.45340314136126,\"attribute\":\"\",\"valid\":true,\"id\":\"Qfs4YfuB\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":3}]},\"step_2\":{\"dataSourceStep\":0,\"toolName\":\"polygonTool\",\"result\":[{\"id\":\"ts5jlJxb\",\"sourceID\":\"\",\"valid\":true,\"textAttribute\":\"\",\"pointList\":[{\"x\":178.69738219895285,\"y\":184.93403141361256},{\"x\":204.91937172774865,\"y\":172.5130890052356},{\"x\":252.5329842931937,\"y\":169.0628272251309},{\"x\":295.3162303664921,\"y\":175.27329842931937},{\"x\":333.95916230366487,\"y\":195.2848167539267},{\"x\":360.18115183246067,\"y\":220.1267015706806},{\"x\":376.0523560209424,\"y\":262.909947643979},{\"x\":373.98219895287957,\"y\":296.0324607329843},{\"x\":344.99999999999994,\"y\":335.365445026178},{\"x\":322.22827225130885,\"y\":355.37696335078533},{\"x\":272.544502617801,\"y\":378.1486910994764},{\"x\":221.48062827225127,\"y\":386.42931937172773},{\"x\":187.6680628272251,\"y\":385.7392670157068},{\"x\":158.68586387434553,\"y\":369.1780104712042},{\"x\":137.98429319371724,\"y\":337.43560209424083},{\"x\":127.63350785340312,\"y\":295.34240837696336},{\"x\":131.0837696335078,\"y\":242.89842931937173},{\"x\":147.64502617801045,\"y\":208.3958115183246}],\"attribute\":\"\",\"order\":1}]}}",
+ "url": "https://user-images.githubusercontent.com/15977946/152742707-c0c51844-e1d0-42d0-9a12-e369002e082f.jpg",
+ "fileName": "bing_dwen_dwen2.jpg"
+ },
+ {
+ "id": 3,
+ "result": "{\"width\":690,\"height\":811,\"valid\":true,\"rotate\":0,\"step_1\":{\"dataSourceStep\":0,\"toolName\":\"pointTool\",\"result\":[{\"x\":361.13507853403144,\"y\":300.62198952879584,\"attribute\":\"\",\"valid\":true,\"id\":\"uAtbXtf2\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":1},{\"x\":242.24502617801048,\"y\":317.60628272251313,\"attribute\":\"\",\"valid\":true,\"id\":\"iLtceHMA\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":2},{\"x\":302.5392670157068,\"y\":356.67015706806285,\"attribute\":\"\",\"valid\":true,\"id\":\"n9MTlJ6A\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":3}]},\"step_2\":{\"dataSourceStep\":0,\"toolName\":\"polygonTool\",\"result\":[{\"id\":\"5sTLU5wF\",\"sourceID\":\"\",\"valid\":true,\"textAttribute\":\"\",\"pointList\":[{\"x\":227.80837696335078,\"y\":247.12146596858642},{\"x\":248.18952879581153,\"y\":235.23246073298432},{\"x\":291.4994764397906,\"y\":225.04188481675394},{\"x\":351.7937172774869,\"y\":229.28795811518327},{\"x\":393.40523560209425,\"y\":245.42303664921468},{\"x\":424.8261780104712,\"y\":272.59790575916236},{\"x\":443.5089005235602,\"y\":298.07434554973827},{\"x\":436.7151832460733,\"y\":345.6303664921466},{\"x\":406.1434554973822,\"y\":382.9958115183247},{\"x\":355.1905759162304,\"y\":408.4722513089006},{\"x\":313.57905759162304,\"y\":419.5120418848168},{\"x\":262.6261780104712,\"y\":417.81361256544506},{\"x\":224.41151832460733,\"y\":399.9801047120419},{\"x\":201.48272251308902,\"y\":364.3130890052356},{\"x\":194.68900523560208,\"y\":315.0586387434555},{\"x\":202.33193717277487,\"y\":272.59790575916236}],\"attribute\":\"\",\"order\":1}]}}",
+ "url": "https://user-images.githubusercontent.com/15977946/152742728-99392ecf-8f5c-46cf-b5c4-fe7fb6b39976.jpg",
+ "fileName": "bing_dwen_dwen3.jpg"
+ },
+ {
+ "id": 4,
+ "result": "{\"width\":690,\"height\":690,\"valid\":true,\"rotate\":0,\"step_1\":{\"dataSourceStep\":0,\"toolName\":\"pointTool\",\"result\":[{\"x\":365.9528795811519,\"y\":464.5759162303665,\"attribute\":\"\",\"valid\":true,\"id\":\"IKprTuHS\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":1},{\"x\":470.71727748691103,\"y\":445.06806282722516,\"attribute\":\"\",\"valid\":true,\"id\":\"Z90CWkEI\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":2},{\"x\":410.74869109947645,\"y\":395.2146596858639,\"attribute\":\"\",\"valid\":true,\"id\":\"UWRstKZk\",\"sourceID\":\"\",\"textAttribute\":\"\",\"order\":3}]},\"step_2\":{\"dataSourceStep\":0,\"toolName\":\"polygonTool\",\"result\":[{\"id\":\"C30Pc9Ww\",\"sourceID\":\"\",\"valid\":true,\"textAttribute\":\"\",\"pointList\":[{\"x\":412.91623036649213,\"y\":325.85340314136124},{\"x\":468.5497382198953,\"y\":335.9685863874345},{\"x\":501.78534031413614,\"y\":369.2041884816754},{\"x\":514.0680628272252,\"y\":415.44502617801044},{\"x\":504.67539267015707,\"y\":472.5235602094241},{\"x\":484.44502617801044,\"y\":497.0890052356021},{\"x\":443.26178010471205,\"y\":512.9842931937172},{\"x\":389.7958115183246,\"y\":518.7643979057591},{\"x\":336.32984293193715,\"y\":504.31413612565444},{\"x\":302.3717277486911,\"y\":462.40837696335075},{\"x\":298.0366492146597,\"y\":416.89005235602093},{\"x\":318.26701570680626,\"y\":372.0942408376963},{\"x\":363.0628272251309,\"y\":341.0261780104712}],\"attribute\":\"\",\"order\":1}]}}",
+ "url": "https://user-images.githubusercontent.com/15977946/152742755-9dc75f89-4156-4103-9c6d-f35f1f409d11.jpg",
+ "fileName": "bing_dwen_dwen4.jpg"
+ }
+]
diff --git a/vendor/ViTPose/tools/webcam/configs/newyear/README.md b/vendor/ViTPose/tools/webcam/configs/newyear/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8c655c121e236146a00a378b5bf495dbf24e6888
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/newyear/README.md
@@ -0,0 +1,31 @@
+# New Year Hat and Firecracker Effects
+
+This demo provides new year effects with pose estimation results, like adding hat on the head and firecracker in the hands.
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/newyear/new_year.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| t | Toggle the hat effect on/off. |
+| f | Toggle the firecracker effect on/off. |
+| h | Show help information. |
+| m | Show the monitoring information. |
+| q | Exit. |
+
+### Configuration
+
+See the [README](/tools/webcam/configs/examples/README.md#configuration) of pose estimation demo for model configurations.
diff --git a/vendor/ViTPose/tools/webcam/configs/newyear/new_year.py b/vendor/ViTPose/tools/webcam/configs/newyear/new_year.py
new file mode 100644
index 0000000000000000000000000000000000000000..3551184053312da288ccac95ae9f37e7f116dd1b
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/newyear/new_year.py
@@ -0,0 +1,122 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Pose Estimation',
+ camera_id=0,
+ camera_fps=20,
+ synchronous=False,
+ # Define nodes.
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Animal Pose Estimator',
+ model_config='configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap'
+ '/animalpose/hrnet_w32_animalpose_256x256.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
+ 'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
+ cls_names=['cat', 'dog', 'horse', 'sheep', 'cow'],
+ input_buffer='human_pose',
+ output_buffer='animal_pose'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='animal_pose',
+ output_buffer='frame'),
+ # 'HatNode':
+ # This node draw the hat effect in the frame image.
+ # Pose results is needed.
+ dict(
+ type='HatNode',
+ name='Visualizer',
+ enable_key='t',
+ frame_buffer='frame',
+ output_buffer='vis_hat'),
+ # 'FirecrackerNode':
+ # This node draw the firecracker effect in the frame image.
+ # Pose results is needed.
+ dict(
+ type='FirecrackerNode',
+ name='Visualizer',
+ enable_key='f',
+ frame_buffer='vis_hat',
+ output_buffer='vis_firecracker'),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ enable=True,
+ frame_buffer='vis_firecracker',
+ output_buffer='vis_notice',
+ content_lines=[
+ 'This is a demo for pose visualization and simple image '
+ 'effects. Have fun!', '', 'Hot-keys:', '"t": Hat effect',
+ '"f": Firecracker effect', '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis_notice',
+ output_buffer='display'),
+ # 'RecorderNode':
+ # This node save the output video into a file.
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='record.mp4',
+ frame_buffer='display',
+ output_buffer='_display_'
+ # `_display_` is a runner-reserved buffer
+ )
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/supersaiyan/README.md b/vendor/ViTPose/tools/webcam/configs/supersaiyan/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9e9aef1bbaa7c62277a039cfad995a01e0491a10
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/supersaiyan/README.md
@@ -0,0 +1,96 @@
+# Super Saiyan Effects
+
+We can apply fun effects on videos with pose estimation results, like Super Saiyan transformation.
+
+https://user-images.githubusercontent.com/11788150/150138076-2192079f-068a-4d43-bf27-2f1fd708cabc.mp4
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/supersaiyan/saiyan.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| s | Toggle the Super Saiyan effect on/off. |
+| h | Show help information. |
+| m | Show the monitoring information. |
+| q | Exit. |
+
+Note that the demo will automatically save the output video into a file `record.mp4`.
+
+### Configuration
+
+- **Choose a detection model**
+
+Users can choose detection models from the [MMDetection Model Zoo](https://mmdetection.readthedocs.io/en/v2.20.0/model_zoo.html). Just set the `model_config` and `model_checkpoint` in the detector node accordingly, and the model will be automatically downloaded and loaded.
+
+```python
+# 'DetectorNode':
+# This node performs object detection from the frame image using an
+# MMDetection model.
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+```
+
+- **Choose a or more pose models**
+
+In this demo we use two [top-down](https://github.com/open-mmlab/mmpose/tree/master/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap) pose estimation models for humans and animals respectively. Users can choose models from the [MMPose Model Zoo](https://mmpose.readthedocs.io/en/latest/modelzoo.html). To apply different pose models on different instance types, you can add multiple pose estimator nodes with `cls_names` set accordingly.
+
+```python
+# 'TopDownPoseEstimatorNode':
+# This node performs keypoint detection from the frame image using an
+# MMPose top-down model. Detection results is needed.
+dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose')
+```
+
+- **Run the demo without GPU**
+
+If you don't have GPU and CUDA in your device, the demo can run with only CPU by setting `device='cpu'` in all model nodes. For example:
+
+```python
+dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ device='cpu',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+```
+
+- **Debug webcam and display**
+
+You can launch the webcam runner with a debug config:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/test_camera.py
+```
diff --git a/vendor/ViTPose/tools/webcam/configs/supersaiyan/saiyan.py b/vendor/ViTPose/tools/webcam/configs/supersaiyan/saiyan.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a8e7bc82c7ca53fb6a0350ce8b0bd3e3ac6e737
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/supersaiyan/saiyan.py
@@ -0,0 +1,93 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Super Saiyan Effects',
+ camera_id=0,
+ camera_fps=30,
+ synchronous=False,
+ # Define nodes.
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/mask_rcnn_r50_fpn_2x_coco.py',
+ model_checkpoint='https://download.openmmlab.com/'
+ 'mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/'
+ 'mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392'
+ '__segm_mAP-0.354_20200505_003907-3e542a40.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://openmmlab-share.oss-cn-hangz'
+ 'hou.aliyuncs.com/mmpose/top_down/vipnas/vipnas_mbv3_co'
+ 'co_wholebody_256x192_dark-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='human_pose'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='human_pose',
+ output_buffer='frame'),
+ # 'SaiyanNode':
+ # This node draw the Super Saiyan effect in the frame image.
+ # Pose results is needed.
+ dict(
+ type='SaiyanNode',
+ name='Visualizer',
+ enable_key='s',
+ cls_names=['person'],
+ enable=True,
+ frame_buffer='frame',
+ output_buffer='vis_saiyan'),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ frame_buffer='vis_saiyan',
+ output_buffer='vis',
+ content_lines=[
+ 'This is a demo for super saiyan effects. Have fun!', '',
+ 'Hot-keys:', '"s": Saiyan effect',
+ '"h": Show help information',
+ '"m": Show diagnostic information', '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis',
+ output_buffer='_display_') # `_frame_` is a runner-reserved buffer
+ ])
diff --git a/vendor/ViTPose/tools/webcam/configs/valentinemagic/README.md b/vendor/ViTPose/tools/webcam/configs/valentinemagic/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8063d2e18640a4312167ed1c022fce3cf613937e
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/valentinemagic/README.md
@@ -0,0 +1,35 @@
+# Valentine Magic
+
+Do you want to show your **love** to your beloved one, especially on Valentine's Day? Express it with your pose using MMPose right away and see the Valentine Magic!
+
+Try to pose a hand heart gesture, and see what will happen?
+
+Prefer a blow kiss? Here comes your flying heart~
+
+
+
+
+
+## Instruction
+
+### Get started
+
+Launch the demo from the mmpose root directory:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/valentinemagic/valentinemagic.py
+```
+
+### Hotkeys
+
+| Hotkey | Function |
+| -- | -- |
+| l | Toggle the Valentine Magic effect on/off. |
+| v | Toggle the pose visualization on/off. |
+| h | Show help information. |
+| m | Show diagnostic information. |
+| q | Exit. |
+
+### Configuration
+
+See the [README](/tools/webcam/configs/examples/README.md#configuration) of pose estimation demo for model configurations.
diff --git a/vendor/ViTPose/tools/webcam/configs/valentinemagic/valentinemagic.py b/vendor/ViTPose/tools/webcam/configs/valentinemagic/valentinemagic.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f921b07901805b490be264c28e12c7de3648f8b
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/configs/valentinemagic/valentinemagic.py
@@ -0,0 +1,118 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+runner = dict(
+ # Basic configurations of the runner
+ name='Human Pose and Effects',
+ camera_id=0,
+ camera_fps=30,
+
+ # Define nodes.
+ #
+ # The configuration of a node usually includes:
+ # 1. 'type': Node class name
+ # 2. 'name': Node name
+ # 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
+ # input and output buffer names. This may depend on the node class.
+ # 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
+ # This may depend on the node class.
+ # 5. Other class-specific arguments
+ nodes=[
+ # 'DetectorNode':
+ # This node performs object detection from the frame image using an
+ # MMDetection model.
+ dict(
+ type='DetectorNode',
+ name='Detector',
+ model_config='demo/mmdetection_cfg/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco.py',
+ model_checkpoint='https://download.openmmlab.com'
+ '/mmdetection/v2.0/ssd/'
+ 'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
+ 'scratch_600e_coco_20210629_110627-974d9307.pth',
+ input_buffer='_input_', # `_input_` is a runner-reserved buffer
+ output_buffer='det_result'),
+ # 'TopDownPoseEstimatorNode':
+ # This node performs keypoint detection from the frame image using an
+ # MMPose top-down model. Detection results is needed.
+ dict(
+ type='TopDownPoseEstimatorNode',
+ name='Human Pose Estimator',
+ model_config='configs/wholebody/2d_kpt_sview_rgb_img/'
+ 'topdown_heatmap/coco-wholebody/'
+ 'vipnas_mbv3_coco_wholebody_256x192_dark.py',
+ model_checkpoint='https://download.openmmlab.com/mmpose/top_down/'
+ 'vipnas/vipnas_mbv3_coco_wholebody_256x192_dark'
+ '-e2158108_20211205.pth',
+ cls_names=['person'],
+ input_buffer='det_result',
+ output_buffer='pose_result'),
+ # 'ModelResultBindingNode':
+ # This node binds the latest model inference result with the current
+ # frame. (This means the frame image and inference result may be
+ # asynchronous).
+ dict(
+ type='ModelResultBindingNode',
+ name='ResultBinder',
+ frame_buffer='_frame_', # `_frame_` is a runner-reserved buffer
+ result_buffer='pose_result',
+ output_buffer='frame'),
+ # 'PoseVisualizerNode':
+ # This node draw the pose visualization result in the frame image.
+ # Pose results is needed.
+ dict(
+ type='PoseVisualizerNode',
+ name='Visualizer',
+ enable_key='v',
+ enable=False,
+ frame_buffer='frame',
+ output_buffer='vis'),
+ # 'ValentineMagicNode':
+ # This node draw heart in the image.
+ # It can launch dynamically expanding heart from the middle of
+ # hands if the persons pose a "hand heart" gesture or blow a kiss.
+ # Only there are two persons in the image can trigger this effect.
+ # Pose results is needed.
+ dict(
+ type='ValentineMagicNode',
+ name='Visualizer',
+ enable_key='l',
+ frame_buffer='vis',
+ output_buffer='vis_heart',
+ ),
+ # 'NoticeBoardNode':
+ # This node show a notice board with given content, e.g. help
+ # information.
+ dict(
+ type='NoticeBoardNode',
+ name='Helper',
+ enable_key='h',
+ enable=False,
+ frame_buffer='vis_heart',
+ output_buffer='vis_notice',
+ content_lines=[
+ 'This is a demo for pose visualization and simple image '
+ 'effects. Have fun!', '', 'Hot-keys:',
+ '"h": Show help information', '"l": LoveHeart Effect',
+ '"v": PoseVisualizer', '"m": Show diagnostic information',
+ '"q": Exit'
+ ],
+ ),
+ # 'MonitorNode':
+ # This node show diagnostic information in the frame image. It can
+ # be used for debugging or monitoring system resource status.
+ dict(
+ type='MonitorNode',
+ name='Monitor',
+ enable_key='m',
+ enable=False,
+ frame_buffer='vis_notice',
+ output_buffer='display'), # `_frame_` is a runner-reserved buffer
+ # 'RecorderNode':
+ # This node record the frames into a local file. It can save the
+ # visualiztion results. Uncommit the following lines to turn it on.
+ dict(
+ type='RecorderNode',
+ name='Recorder',
+ out_video_file='record.mp4',
+ frame_buffer='display',
+ output_buffer='_display_')
+ ])
diff --git a/vendor/ViTPose/tools/webcam/docs/example_cn.md b/vendor/ViTPose/tools/webcam/docs/example_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..69b9898c3237ab6c81b6af28dfcb50224ac424df
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/docs/example_cn.md
@@ -0,0 +1,171 @@
+# 开发示例:给猫咪戴上太阳镜
+
+## 设计思路
+
+在动手之前,我们先考虑如何实现这个功能:
+
+- 首先,要做目标检测,找到图像中的猫咪
+- 接着,要估计猫咪的关键点位置,比如左右眼的位置
+- 最后,把太阳镜素材图片贴在合适的位置,TA-DA!
+
+按照这个思路,下面我们来看如何一步一步实现它。
+
+## Step 1:从一个现成的 Config 开始
+
+在 WebcamAPI 中,已经添加了一些实现常用功能的 Node,并提供了对应的 config 示例。利用这些可以减少用户的开发量。例如,我们可以以上面的姿态估计 demo 为基础。它的 config 位于 `tools/webcam/configs/example/pose_estimation.py`。为了更直观,我们把这个 config 中的功能节点表示成以下流程图:
+
+
+
+
Pose Estimation Config 示意
+
+
+可以看到,这个 config 已经实现了我们设计思路中“1-目标检测”和“2-关键点检测”的功能。我们还需要实现“3-贴素材图”功能,这就需要定义一个新的 Node了。
+
+## Step 2:实现一个新 Node
+
+在 WebcamAPI 我们定义了以下 2 个 Node 基类:
+
+1. Node:所有 node 的基类,实现了初始化,绑定 runner,启动运行,数据输入输出等基本功能。子类通过重写抽象方法`process()`方法定义具体的 node 功能。
+2. FrameDrawingNode:用来绘制图像的 node 基类。FrameDrawingNode继承自 Node 并进一步封装了`process()`方法,提供了抽象方法`draw()`供子类实现具体的图像绘制功能。
+
+显然,“贴素材图”这个功能属于图像绘制,因此我们只需要继承 BaseFrameEffectNode 类即可。具体实现如下:
+
+```python
+# 假设该文件路径为
+# /tools/webcam/webcam_apis/nodes/sunglasses_node.py
+from mmpose.core import apply_sunglasses_effect
+from ..utils import (load_image_from_disk_or_url,
+ get_eye_keypoint_ids)
+from .frame_drawing_node import FrameDrawingNode
+from .builder import NODES
+
+@NODES.register_module() # 将 SunglassesNode 注册到 NODES(Registry)
+class SunglassesNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ # 加载素材图片
+ if src_img_path is None:
+ # The image attributes to:
+ # https://www.vecteezy.com/free-vector/glass
+ # Glass Vectors by Vecteezy
+ src_img_path = ('https://raw.githubusercontent.com/open-mmlab/'
+ 'mmpose/master/demo/resources/sunglasses.jpg')
+ self.src_img = load_image_from_disk_or_url(src_img_path)
+
+ def draw(self, frame_msg):
+ # 获取当前帧图像
+ canvas = frame_msg.get_image()
+ # 获取姿态估计结果
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+
+ # 给每个目标添加太阳镜效果
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ # 获取目标左、右眼关键点位置
+ left_eye_idx, right_eye_idx = get_eye_keypoint_ids(model_cfg)
+ # 根据双眼位置,绘制太阳镜
+ canvas = apply_sunglasses_effect(canvas, preds, self.src_img,
+ left_eye_idx, right_eye_idx)
+ return canvas
+```
+
+这里对代码实现中用到的一些函数和类稍作说明:
+
+1. `NODES`:是一个 mmcv.Registry 实例。相信用过 OpenMMLab 系列的同学都对 Registry 不陌生。这里用 NODES来注册和管理所有的 node 类,从而让用户可以在 config 中通过类的名称(如 "DetectorNode","SunglassesNode" 等)来指定使用对应的 node。
+2. `load_image_from_disk_or_url`:用来从本地路径或 url 读取图片
+3. `get_eye_keypoint_ids`:根据模型配置文件(model_cfg)中记录的数据集信息,返回双眼关键点的索引。如 COCO 格式对应的左右眼索引为 $(1,2)$
+4. `apply_sunglasses_effect`:将太阳镜绘制到原图中的合适位置,具体步骤为:
+ - 在素材图片上定义一组源锚点 $(s_1, s_2, s_3, s_4)$
+ - 根据目标左右眼关键点位置 $(k_1, k_2)$,计算目标锚点 $(t_1, t_2, t_3, t_4)$
+ - 通过源锚点和目标锚点,计算几何变换矩阵(平移,缩放,旋转),将素材图片做变换后贴入原图片。即可将太阳镜绘制在合适的位置。
+
+
+
+
太阳镜特效原理示意
+
+
+### Get Advanced:关于 Node 和 FrameEffectNode
+
+[Node 类](/tools/webcam/webcam_apis/nodes/node.py) :继承自 Thread 类。正如我们在前面 数据流 部分提到的,所有节点都在各自的线程中彼此异步运行。在`Node.run()` 方法中定义了节点的基本运行逻辑:
+
+1. 当 buffer 中有数据时,会触发一次运行
+2. 调用`process()`来执行具体的功能。`process()`是一个抽象接口,由子类具体实现
+ - 特别地,如果节点需要实现“开/关”功能,则还需要实现`bypass()`方法,以定义节点“关”时的行为。`bypass()`与`process()`的输入输出接口完全相同。在run()中会根据`Node.enable`的状态,调用`process()`或`bypass()`
+3. 将运行结果发送到输出 buffer
+
+在继承 Node 类实现具体的节点类时,通常需要完成以下工作:
+
+1. 在__init__()中注册输入、输出 buffer,并调用基类的__init__()方法
+2. 实现process()和bypass()(如需要)方法
+
+[FrameDrawingNode 类](tools/webcam/webcam_apis/nodes/frame_drawing_node.py) :继承自 Node 类,对`process()`和`bypass()`方法做了进一步封装:
+
+- process():从接到输入中提取帧图像,传入draw()方法中绘图。draw()是一个抽象接口,有子类实现
+- bypass():直接将节点输入返回
+
+### Get Advanced: 关于节点的输入、输出格式
+
+我们定义了[FrameMessage 类](tools/webcam/webcam_apis/utils/message.py)作为节点间通信的数据结构。也就是说,通常情况下节点的输入、输出和 buffer 中存储的元素,都是 FrameMessage 类的实例。FrameMessage 通常用来存储视频中1帧的信息,它提供了简单的接口,用来提取和存入数据:
+
+- `get_image()`:返回图像
+- `set_image()`:设置图像
+- `add_detection_result()`:添加一个目标检测模型的结果
+- `get_detection_results()`:返回所有目标检测结果
+- `add_pose_result()`:添加一个姿态估计模型的结果
+- `get_pose_results()`:返回所有姿态估计结果
+
+## Step 3:调整 Config
+
+有了 Step 2 中实现的 SunglassesNode,我们只要把它加入 config 里就可以使用了。比如,我们可以把它放在“Visualizer” node 之后:
+
+
+
+
修改后的 Config,添加了 SunglassesNode 节点
+
+
+具体的写法如下:
+
+```python
+runner = dict(
+ # runner的基本参数
+ name='Everybody Wears Sunglasses',
+ camera_id=0,
+ camera_fps=20,
+ # 定义了若干节点(node)
+ nodes=[
+ ...,
+ dict(
+ type='SunglassesNode', # 节点类名称
+ name='Sunglasses', # 节点名,由用户自己定义
+ frame_buffer='vis', # 输入
+ output_buffer='sunglasses', # 输出
+ enable_key='s', # 定义开关快捷键
+ enable=True,) # 启动时默认的开关状态
+ ...] # 更多节点
+)
+```
+
+此外,用户还可以根据需求调整 config 中的参数。一些常用的设置包括:
+
+1. 选择摄像头:可以通过设置camera_id参数指定使用的摄像头。通常电脑上的默认摄像头 id 为 0,如果有多个则 id 数字依次增大。此外,也可以给camera_id设置一个本地视频文件的路径,从而使用该视频文件作为应用程序的输入
+2. 选择模型:可以通过模型推理节点(如 DetectorNode,TopDownPoseEstimationNode)的model_config和model_checkpoint参数来配置。用户可以根据自己的需求(如目标物体类别,关键点类别等)和硬件情况选用合适的模型
+3. 设置快捷键:一些 node 支持使用快捷键开关,用户可以设置对应的enable_key(快捷键)和enable(默认开关状态)参数
+4. 提示信息:通过设置 NoticeBoardNode 的 content_lines参数,可以在程序运行时在画面上显示提示信息,帮助使用者快速了解这个应用程序的功能和操作方法
+
+最后,将修改过的 config 存到文件`tools/webcam/configs/sunglasses.py`中,就可以运行了:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/sunglasses.py
+```
diff --git a/vendor/ViTPose/tools/webcam/docs/get_started_cn.md b/vendor/ViTPose/tools/webcam/docs/get_started_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..561ac10cd4d3f1eeeb0b808bf7526271deaa18c9
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/docs/get_started_cn.md
@@ -0,0 +1,123 @@
+# MMPose Webcam API 快速上手
+
+## 什么是 MMPose Webcam API
+
+MMPose WebcamAPI 是一套简单的应用开发接口,可以帮助用户方便的调用 MMPose 以及其他 OpenMMLab 算法库中的算法,实现基于摄像头输入视频的交互式应用。
+
+
+
+
MMPose Webcam API 框架概览
+
+
+## 运行一个 Demo
+
+我们将从一个简单的 Demo 开始,向您介绍 MMPose WebcamAPI 的功能和特性,并详细展示如何基于这个 API 搭建自己的应用。为了使用 MMPose WebcamAPI,您只需要做简单的准备:
+
+1. 一台计算机(最好有 GPU 和 CUDA 环境,但这并不是必须的)
+1. 一个摄像头。计算机自带摄像头或者外接 USB 摄像头均可
+1. 安装 MMPose
+ - 在 OpenMMLab [官方仓库](https://github.com/open-mmlab/mmpose) fork MMPose 到自己的 github,并 clone 到本地
+ - 安装 MMPose,只需要按照我们的 [安装文档](https://mmpose.readthedocs.io/zh_CN/latest/install.html) 中的步骤操作即可
+
+完成准备工作后,请在命令行进入 MMPose 根目录,执行以下指令,即可运行 demo:
+
+```shell
+python tools/webcam/run_webcam.py --config tools/webcam/configs/examples/pose_estimation.py
+```
+
+这个 demo 实现了目标检测,姿态估计和可视化功能,效果如下:
+
+
+
+
Pose Estimation Demo 效果
+
+
+## Demo 里面有什么?
+
+### 从 Config 说起
+
+成功运行 demo 后,我们来看一下它是怎样工作的。在启动脚本 `tools/webcam/run_webcam.py` 中可以看到,这里的操作很简单:首先读取了一个 config 文件,接着使用 config 构建了一个 runner ,最后调用了 runner 的 `run()` 方法,这样 demo 就开始运行了。
+
+```python
+# tools/webcam/run_webcam.py
+
+def launch():
+ # 读取 config 文件
+ args = parse_args()
+ cfg = mmcv.Config.fromfile(args.config)
+ # 构建 runner(WebcamRunner类的实例)
+ runner = WebcamRunner(**cfg.runner)
+ # 调用 run()方法,启动程序
+ runner.run()
+
+
+if __name__ == '__main__':
+ launch()
+```
+
+我们先不深究 runner 为何物,而是接着看一下这个 config 文件的内容。省略掉细节和注释,可以发现 config 的结构大致包含两部分(如下图所示):
+
+1. Runner 的基本参数,如 camera_id,camera_fps 等。这部分比較好理解,是一些在读取视频时的必要设置
+2. 一系列"节点"(Node),每个节点属于特定的类型(type),并有对应的一些参数
+
+```python
+runner = dict(
+ # runner的基本参数
+ name='Pose Estimation',
+ camera_id=0,
+ camera_fps=20,
+ # 定义了若干节点(Node)
+ Nodes=[
+ dict(
+ type='DetectorNode', # 节点1类型
+ name='Detector', # 节点1名字
+ input_buffer='_input_', # 节点1数据输入
+ output_buffer='det_result', # 节点1数据输出
+ ...), # 节点1其他参数
+ dict(
+ type='TopDownPoseEstimatorNode', # 节点2类型
+ name='Human Pose Estimator', # 节点2名字
+ input_buffer='det_result', # 节点2数据输入
+ output_buffer='pose_result', # 节点2数据输出
+ ...), # 节点2参数
+ ...] # 更多节点
+)
+```
+
+### 核心概念:Runner 和 Node
+
+到这里,我们已经引出了 MMPose WebcamAPI 的2个最重要的概念:runner 和 Node,下面做正式介绍:
+
+- Runner:Runner 类是程序的主体,提供了程序启动的入口runner.run()方法,并负责视频读入,输出显示等功能。此外,runner 中会包含若干个 Node,分别负责在视频帧的处理中执行不同的功能。
+- Node:Node 类用来定义功能模块,例如模型推理,可视化,特效绘制等都可以通过定义一个对应的 Node 来实现。如上面的 config 例子中,2 个节点的功能分别是做目标检测(Detector)和姿态估计(TopDownPoseEstimator)
+
+Runner 和 Node 的关系简单来说如下图所示:
+
+
+
+
Runner 和 Node 逻辑关系示意
+
+
+### 数据流
+
+一个重要的问题是:当一帧视频数据被 runner 读取后,会按照怎样的顺序通过所有的 Node 并最终被输出(显示)呢?
+答案就是 config 中每个 Node 的输入输出配置。如示例 config 中,可以看到每个 Node 都有`input_buffer`,`output_buffer`等参数,用来定义该节点的输入输出。通过这种连接关系,所有的 Node 构成了一个有向无环图结构,如下图所示:
+
+
+
+
数据流示意
+
+
+图中的每个 Data Buffer 就是一个用来存放数据的容器。用户不需要关注 buffer 的具体细节,只需要将其简单理解成 Node 输入输出的名字即可。用户在 config 中可以任意定义这些名字,不过要注意有以下几个特殊的名字:
+
+- _input_:存放 runner 读入的视频帧,用于模型推理
+- _frame_ :存放 runner 读入的视频帧,用于可视化
+- _display_:存放经过所以 Node 处理后的结果,用于在屏幕上显示
+
+当一帧视频数据被 runner 读入后,会被放进 _input_ 和 _frame_ 两个 buffer 中,然后按照 config 中定义的 Node 连接关系依次通过各个 Node ,最终到达 _display_ ,并被 runner 读出显示在屏幕上。
+
+#### Get Advanced: 关于 buffer
+
+- Buffer 本质是一个有限长度的队列,在 runner 中会包含一个 BufferManager 实例(见`mmpose/tools/webcam/webcam_apis/buffer.py')来生成和管理所有 buffer。Node 会按照 config 从对应的 buffer 中读出或写入数据。
+- 当一个 buffer 已满(达到最大长度)时,写入数据的操作通常不会被 block,而是会将 buffer 中已有的最早一条数据“挤出去”。
+- 为什么有_input_和_frame_两个输入呢?因为有些 Node 的操作较为耗时(如目标检测,姿态估计等需要模型推理的 Node)。为了保证显示的流畅,我们通常用_input_来作为这类耗时较大的操作的输入,而用_frame_来实时绘制可视化的结果。因为各个节点是异步运行的,这样就可以保证可视化的实时和流畅。
diff --git a/vendor/ViTPose/tools/webcam/run_webcam.py b/vendor/ViTPose/tools/webcam/run_webcam.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce8d92e78e385d5bfaf2782cfc5b9d627531d20b
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/run_webcam.py
@@ -0,0 +1,38 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from argparse import ArgumentParser
+
+from mmcv import Config, DictAction
+from webcam_apis import WebcamRunner
+
+
+def parse_args():
+ parser = ArgumentParser('Lauch webcam runner')
+ parser.add_argument(
+ '--config',
+ type=str,
+ default='tools/webcam/configs/meow_dwen_dwen/meow_dwen_dwen.py')
+
+ parser.add_argument(
+ '--cfg-options',
+ nargs='+',
+ action=DictAction,
+ default={},
+ help='override some settings in the used config, the key-value pair '
+ 'in xxx=yyy format will be merged into config file. For example, '
+ "'--cfg-options runner.camera_id=1 runner.synchronous=True'")
+
+ return parser.parse_args()
+
+
+def launch():
+ args = parse_args()
+ cfg = Config.fromfile(args.config)
+ cfg.merge_from_dict(args.cfg_options)
+
+ runner = WebcamRunner(**cfg.runner)
+ runner.run()
+
+
+if __name__ == '__main__':
+ launch()
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/__init__.py b/vendor/ViTPose/tools/webcam/webcam_apis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c8a2f5e0f6bf8d3c1b3d766dbe7a7d2c69cfaa4
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .webcam_runner import WebcamRunner
+
+__all__ = ['WebcamRunner']
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/__init__.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a882030b4a1b5aac87206e84fe69041bcd83035f
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import NODES
+from .faceswap_node import FaceSwapNode
+from .frame_effect_node import (BackgroundNode, BugEyeNode, MoustacheNode,
+ NoticeBoardNode, PoseVisualizerNode,
+ SaiyanNode, SunglassesNode)
+from .helper_node import ModelResultBindingNode, MonitorNode, RecorderNode
+from .mmdet_node import DetectorNode
+from .mmpose_node import TopDownPoseEstimatorNode
+from .valentinemagic_node import ValentineMagicNode
+from .xdwendwen_node import XDwenDwenNode
+
+__all__ = [
+ 'NODES', 'PoseVisualizerNode', 'DetectorNode', 'TopDownPoseEstimatorNode',
+ 'MonitorNode', 'BugEyeNode', 'SunglassesNode', 'ModelResultBindingNode',
+ 'NoticeBoardNode', 'RecorderNode', 'FaceSwapNode', 'MoustacheNode',
+ 'SaiyanNode', 'BackgroundNode', 'XDwenDwenNode', 'ValentineMagicNode'
+]
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/builder.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..44900b7efdc9822e693ce572cca16dafda388640
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/builder.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.utils import Registry
+
+NODES = Registry('node')
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/faceswap_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/faceswap_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac44207fc363680aef49cfa1ea2b77707682484
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/faceswap_node.py
@@ -0,0 +1,254 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from enum import IntEnum
+from typing import List, Union
+
+import cv2
+import numpy as np
+
+from mmpose.datasets import DatasetInfo
+from .builder import NODES
+from .frame_drawing_node import FrameDrawingNode
+
+
+class Mode(IntEnum):
+ NONE = 0,
+ SHUFFLE = 1,
+ CLONE = 2
+
+
+@NODES.register_module()
+class FaceSwapNode(FrameDrawingNode):
+
+ def __init__(
+ self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ mode_key: Union[str, int],
+ ):
+ super().__init__(name, frame_buffer, output_buffer, enable=True)
+
+ self.mode_key = mode_key
+ self.mode_index = 0
+ self.register_event(
+ self.mode_key, is_keyboard=True, handler_func=self.switch_mode)
+ self.history = dict(mode=None)
+ self._mode = Mode.SHUFFLE
+
+ @property
+ def mode(self):
+ return self._mode
+
+ def switch_mode(self):
+ """Switch modes by updating mode index."""
+ self._mode = Mode((self._mode + 1) % len(Mode))
+
+ def draw(self, frame_msg):
+
+ if self.mode == Mode.NONE:
+ self.history = {'mode': Mode.NONE}
+ return frame_msg.get_image()
+
+ # Init history
+ if self.history['mode'] != self.mode:
+ self.history = {'mode': self.mode, 'target_map': {}}
+
+ # Merge pose results
+ pose_preds = self._merge_pose_results(frame_msg.get_pose_results())
+ num_target = len(pose_preds)
+
+ # Show mode
+ img = frame_msg.get_image()
+ canvas = img.copy()
+ if self.mode == Mode.SHUFFLE:
+ mode_txt = 'Shuffle'
+ else:
+ mode_txt = 'Clone'
+
+ cv2.putText(canvas, mode_txt, (10, 50), cv2.FONT_HERSHEY_DUPLEX, 0.8,
+ (255, 126, 0), 1)
+
+ # Skip if target number is less than 2
+ if num_target >= 2:
+ # Generate new mapping if target number changes
+ if num_target != len(self.history['target_map']):
+ if self.mode == Mode.SHUFFLE:
+ self.history['target_map'] = self._get_swap_map(num_target)
+ else:
+ self.history['target_map'] = np.repeat(
+ np.random.choice(num_target), num_target)
+
+ # # Draw on canvas
+ for tar_idx, src_idx in enumerate(self.history['target_map']):
+ face_src = self._get_face_info(pose_preds[src_idx])
+ face_tar = self._get_face_info(pose_preds[tar_idx])
+ canvas = self._swap_face(img, canvas, face_src, face_tar)
+
+ return canvas
+
+ def _crop_face_by_contour(self, img, contour):
+ mask = np.zeros(img.shape[:2], dtype=np.uint8)
+ cv2.fillPoly(mask, [contour.astype(np.int32)], 1)
+ mask = cv2.dilate(
+ mask, kernel=np.ones((9, 9), dtype=np.uint8), anchor=(4, 0))
+ x1, y1, w, h = cv2.boundingRect(mask)
+ x2 = x1 + w
+ y2 = y1 + h
+ bbox = np.array([x1, y1, x2, y2], dtype=np.int64)
+ patch = img[y1:y2, x1:x2]
+ mask = mask[y1:y2, x1:x2]
+
+ return bbox, patch, mask
+
+ def _swap_face(self, img_src, img_tar, face_src, face_tar):
+
+ if face_src['dataset'] == face_tar['dataset']:
+ # Use full keypoints for face alignment
+ kpts_src = face_src['contour']
+ kpts_tar = face_tar['contour']
+ else:
+ # Use only common landmarks (eyes and nose) for face alignment if
+ # source and target have differenet data type
+ # (e.g. human vs animal)
+ kpts_src = face_src['landmarks']
+ kpts_tar = face_tar['landmarks']
+
+ # Get everything local
+ bbox_src, patch_src, mask_src = self._crop_face_by_contour(
+ img_src, face_src['contour'])
+
+ bbox_tar, _, mask_tar = self._crop_face_by_contour(
+ img_tar, face_tar['contour'])
+
+ kpts_src = kpts_src - bbox_src[:2]
+ kpts_tar = kpts_tar - bbox_tar[:2]
+
+ # Compute affine transformation matrix
+ trans_mat, _ = cv2.estimateAffine2D(
+ kpts_src.astype(np.float32), kpts_tar.astype(np.float32))
+ patch_warp = cv2.warpAffine(
+ patch_src,
+ trans_mat,
+ dsize=tuple(bbox_tar[2:] - bbox_tar[:2]),
+ borderValue=(0, 0, 0))
+ mask_warp = cv2.warpAffine(
+ mask_src,
+ trans_mat,
+ dsize=tuple(bbox_tar[2:] - bbox_tar[:2]),
+ borderValue=(0, 0, 0))
+
+ # Target mask
+ mask_tar = mask_tar & mask_warp
+ mask_tar_soft = cv2.GaussianBlur(mask_tar * 255, (3, 3), 3)
+
+ # Blending
+ center = tuple((0.5 * (bbox_tar[:2] + bbox_tar[2:])).astype(np.int64))
+ img_tar = cv2.seamlessClone(patch_warp, img_tar, mask_tar_soft, center,
+ cv2.NORMAL_CLONE)
+ return img_tar
+
+ @staticmethod
+ def _get_face_info(pose_pred):
+ keypoints = pose_pred['keypoints'][:, :2]
+ model_cfg = pose_pred['model_cfg']
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+
+ face_info = {
+ 'dataset': dataset_info.dataset_name,
+ 'landmarks': None, # For alignment
+ 'contour': None, # For mask generation
+ 'bbox': None # For image warping
+ }
+
+ # Fall back to hard coded keypoint id
+
+ if face_info['dataset'] == 'coco':
+ face_info['landmarks'] = np.stack([
+ keypoints[1], # left eye
+ keypoints[2], # right eye
+ keypoints[0], # nose
+ 0.5 * (keypoints[5] + keypoints[6]), # neck (shoulder center)
+ ])
+ elif face_info['dataset'] == 'coco_wholebody':
+ face_info['landmarks'] = np.stack([
+ keypoints[1], # left eye
+ keypoints[2], # right eye
+ keypoints[0], # nose
+ keypoints[32], # chin
+ ])
+ contour_ids = list(range(23, 40)) + list(range(40, 50))[::-1]
+ face_info['contour'] = keypoints[contour_ids]
+ elif face_info['dataset'] == 'ap10k':
+ face_info['landmarks'] = np.stack([
+ keypoints[0], # left eye
+ keypoints[1], # right eye
+ keypoints[2], # nose
+ keypoints[3], # neck
+ ])
+ elif face_info['dataset'] == 'animalpose':
+ face_info['landmarks'] = np.stack([
+ keypoints[0], # left eye
+ keypoints[1], # right eye
+ keypoints[4], # nose
+ keypoints[5], # throat
+ ])
+ elif face_info['dataset'] == 'wflw':
+ face_info['landmarks'] = np.stack([
+ keypoints[97], # left eye
+ keypoints[96], # right eye
+ keypoints[54], # nose
+ keypoints[16], # chine
+ ])
+ contour_ids = list(range(33))[::-1] + list(range(33, 38)) + list(
+ range(42, 47))
+ face_info['contour'] = keypoints[contour_ids]
+ else:
+ raise ValueError('Can not obtain face landmark information'
+ f'from dataset: {face_info["type"]}')
+
+ # Face region
+ if face_info['contour'] is None:
+ # Manually defined counter of face region
+ left_eye, right_eye, nose = face_info['landmarks'][:3]
+ eye_center = 0.5 * (left_eye + right_eye)
+ w_vec = right_eye - left_eye
+ eye_dist = np.linalg.norm(w_vec) + 1e-6
+ w_vec = w_vec / eye_dist
+ h_vec = np.array([w_vec[1], -w_vec[0]], dtype=w_vec.dtype)
+ w = max(0.5 * eye_dist, np.abs(np.dot(nose - eye_center, w_vec)))
+ h = np.abs(np.dot(nose - eye_center, h_vec))
+
+ left_top = eye_center + 1.5 * w * w_vec - 0.5 * h * h_vec
+ right_top = eye_center - 1.5 * w * w_vec - 0.5 * h * h_vec
+ left_bottom = eye_center + 1.5 * w * w_vec + 4 * h * h_vec
+ right_bottom = eye_center - 1.5 * w * w_vec + 4 * h * h_vec
+
+ face_info['contour'] = np.stack(
+ [left_top, right_top, right_bottom, left_bottom])
+
+ # Get tight bbox of face region
+ face_info['bbox'] = np.array([
+ face_info['contour'][:, 0].min(), face_info['contour'][:, 1].min(),
+ face_info['contour'][:, 0].max(), face_info['contour'][:, 1].max()
+ ]).astype(np.int64)
+
+ return face_info
+
+ @staticmethod
+ def _merge_pose_results(pose_results):
+ preds = []
+ if pose_results is not None:
+ for prefix, pose_result in enumerate(pose_results):
+ model_cfg = pose_result['model_cfg']
+ for idx, _pred in enumerate(pose_result['preds']):
+ pred = _pred.copy()
+ pred['id'] = f'{prefix}.{_pred.get("track_id", str(idx))}'
+ pred['model_cfg'] = model_cfg
+ preds.append(pred)
+ return preds
+
+ @staticmethod
+ def _get_swap_map(num_target):
+ ids = np.random.choice(num_target, num_target, replace=False)
+ target_map = ids[(ids + 1) % num_target]
+ return target_map
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_drawing_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_drawing_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfc3511cadc2e8db0fb393ba1f821ee8091fcada
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_drawing_node.py
@@ -0,0 +1,65 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import abstractmethod
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+
+from ..utils import FrameMessage, Message
+from .node import Node
+
+
+class FrameDrawingNode(Node):
+ """Base class for Node that draw on single frame images.
+
+ Args:
+ name (str, optional): The node name (also thread name).
+ frame_buffer (str): The name of the input buffer.
+ output_buffer (str | list): The name(s) of the output buffer(s).
+ enable_key (str | int, optional): Set a hot-key to toggle
+ enable/disable of the node. If an int value is given, it will be
+ treated as an ascii code of a key. Please note:
+ 1. If enable_key is set, the bypass method need to be
+ overridden to define the node behavior when disabled
+ 2. Some hot-key has been use for particular use. For example:
+ 'q', 'Q' and 27 are used for quit
+ Default: None
+ enable (bool): Default enable/disable status. Default: True.
+ """
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True):
+
+ super().__init__(name=name, enable_key=enable_key)
+
+ # Register buffers
+ self.register_input_buffer(frame_buffer, 'frame', essential=True)
+ self.register_output_buffer(output_buffer)
+
+ self._enabled = enable
+
+ def process(self, input_msgs: Dict[str, Message]) -> Union[Message, None]:
+ frame_msg = input_msgs['frame']
+
+ img = self.draw(frame_msg)
+ frame_msg.set_image(img)
+
+ return frame_msg
+
+ def bypass(self, input_msgs: Dict[str, Message]) -> Union[Message, None]:
+ return input_msgs['frame']
+
+ @abstractmethod
+ def draw(self, frame_msg: FrameMessage) -> np.ndarray:
+ """Draw on the frame image with information from the single frame.
+
+ Args:
+ frame_meg (FrameMessage): The frame to get information from and
+ draw on.
+
+ Returns:
+ array: The output image
+ """
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_effect_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_effect_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..c248c3820a944e6b5e7f0613794d6290fcda7bcc
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/frame_effect_node.py
@@ -0,0 +1,917 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, List, Optional, Tuple, Union
+
+import cv2
+import numpy as np
+from mmcv import color_val
+
+from mmpose.core import (apply_bugeye_effect, apply_sunglasses_effect,
+ imshow_bboxes, imshow_keypoints)
+from mmpose.datasets import DatasetInfo
+from ..utils import (FrameMessage, copy_and_paste, expand_and_clamp,
+ get_cached_file_path, get_eye_keypoint_ids,
+ get_face_keypoint_ids, get_wrist_keypoint_ids,
+ load_image_from_disk_or_url, screen_matting)
+from .builder import NODES
+from .frame_drawing_node import FrameDrawingNode
+
+try:
+ import psutil
+ psutil_proc = psutil.Process()
+except (ImportError, ModuleNotFoundError):
+ psutil_proc = None
+
+
+@NODES.register_module()
+class PoseVisualizerNode(FrameDrawingNode):
+ """Draw the bbox and keypoint detection results.
+
+ Args:
+ name (str, optional): The node name (also thread name).
+ frame_buffer (str): The name of the input buffer.
+ output_buffer (str|list): The name(s) of the output buffer(s).
+ enable_key (str|int, optional): Set a hot-key to toggle enable/disable
+ of the node. If an int value is given, it will be treated as an
+ ascii code of a key. Please note:
+ 1. If enable_key is set, the bypass method need to be
+ overridden to define the node behavior when disabled
+ 2. Some hot-key has been use for particular use. For example:
+ 'q', 'Q' and 27 are used for quit
+ Default: None
+ enable (bool): Default enable/disable status. Default: True.
+ kpt_thr (float): The threshold of keypoint score. Default: 0.3.
+ radius (int): The radius of keypoint. Default: 4.
+ thickness (int): The thickness of skeleton. Default: 2.
+ bbox_color (str|tuple|dict): If a single color (a str like 'green' or
+ a tuple like (0, 255, 0)), it will used to draw the bbox.
+ Optionally, a dict can be given as a map from class labels to
+ colors.
+ """
+
+ default_bbox_color = {
+ 'person': (148, 139, 255),
+ 'cat': (255, 255, 0),
+ 'dog': (255, 255, 0),
+ }
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ kpt_thr: float = 0.3,
+ radius: int = 4,
+ thickness: int = 2,
+ bbox_color: Optional[Union[str, Tuple, Dict]] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ self.kpt_thr = kpt_thr
+ self.radius = radius
+ self.thickness = thickness
+ if bbox_color is None:
+ self.bbox_color = self.default_bbox_color
+ elif isinstance(bbox_color, dict):
+ self.bbox_color = {k: color_val(v) for k, v in bbox_color.items()}
+ else:
+ self.bbox_color = color_val(bbox_color)
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+
+ if not pose_results:
+ return canvas
+
+ for pose_result in frame_msg.get_pose_results():
+ model_cfg = pose_result['model_cfg']
+ dataset_info = DatasetInfo(model_cfg.dataset_info)
+
+ # Extract bboxes and poses
+ bbox_preds = []
+ bbox_labels = []
+ pose_preds = []
+ for pred in pose_result['preds']:
+ if 'bbox' in pred:
+ bbox_preds.append(pred['bbox'])
+ bbox_labels.append(pred.get('label', None))
+ pose_preds.append(pred['keypoints'])
+
+ # Get bbox colors
+ if isinstance(self.bbox_color, dict):
+ bbox_colors = [
+ self.bbox_color.get(label, (0, 255, 0))
+ for label in bbox_labels
+ ]
+ else:
+ bbox_labels = self.bbox_color
+
+ # Draw bboxes
+ if bbox_preds:
+ bboxes = np.vstack(bbox_preds)
+
+ imshow_bboxes(
+ canvas,
+ bboxes,
+ labels=bbox_labels,
+ colors=bbox_colors,
+ text_color='white',
+ font_scale=0.5,
+ show=False)
+
+ # Draw poses
+ if pose_preds:
+ imshow_keypoints(
+ canvas,
+ pose_preds,
+ skeleton=dataset_info.skeleton,
+ kpt_score_thr=0.3,
+ pose_kpt_color=dataset_info.pose_kpt_color,
+ pose_link_color=dataset_info.pose_link_color,
+ radius=self.radius,
+ thickness=self.thickness)
+
+ return canvas
+
+
+@NODES.register_module()
+class SunglassesNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ if src_img_path is None:
+ # The image attributes to:
+ # https://www.vecteezy.com/free-vector/glass
+ # Glass Vectors by Vecteezy
+ src_img_path = 'demo/resources/sunglasses.jpg'
+ self.src_img = load_image_from_disk_or_url(src_img_path)
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ left_eye_idx, right_eye_idx = get_eye_keypoint_ids(model_cfg)
+
+ canvas = apply_sunglasses_effect(canvas, preds, self.src_img,
+ left_eye_idx, right_eye_idx)
+ return canvas
+
+
+@NODES.register_module()
+class SpriteNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ if src_img_path is None:
+ # Sprites of Touhou characters :)
+ # Come from https://www.deviantart.com/shadowbendy/art/Touhou-rpg-maker-vx-Sprite-1-812746920 # noqa: E501
+ src_img_path = (
+ 'https://user-images.githubusercontent.com/'
+ '26739999/151532276-33f968d9-917f-45e3-8a99-ebde60be83bb.png')
+ self.src_img = load_image_from_disk_or_url(
+ src_img_path, cv2.IMREAD_UNCHANGED)[:144, :108]
+ tmp = np.array(np.split(self.src_img, range(36, 144, 36), axis=0))
+ tmp = np.array(np.split(tmp, range(36, 108, 36), axis=2))
+ self.sprites = tmp
+ self.pos = None
+ self.anime_frame = 0
+
+ def apply_sprite_effect(self,
+ img,
+ pose_results,
+ left_hand_index,
+ right_hand_index,
+ kpt_thr=0.5):
+ """Apply sprite effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): detection result in [x, y, score]
+ left_hand_index (int): Keypoint index of left hand
+ right_hand_index (int): Keypoint index of right hand
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ hm, wm = self.sprites.shape[2:4]
+ # anchor points in the sunglasses mask
+ if self.pos is None:
+ self.pos = [img.shape[0] // 2, img.shape[1] // 2]
+
+ if len(pose_results) == 0:
+ return img
+
+ kpts = pose_results[0]['keypoints']
+
+ if kpts[left_hand_index, 2] < kpt_thr and kpts[right_hand_index,
+ 2] < kpt_thr:
+ aim = self.pos
+ else:
+ kpt_lhand = kpts[left_hand_index, :2][::-1]
+ kpt_rhand = kpts[right_hand_index, :2][::-1]
+
+ def distance(a, b):
+ return (a[0] - b[0])**2 + (a[1] - b[1])**2
+
+ # Go to the nearest hand
+ if distance(kpt_lhand, self.pos) < distance(kpt_rhand, self.pos):
+ aim = kpt_lhand
+ else:
+ aim = kpt_rhand
+
+ pos_thr = 15
+ if aim[0] < self.pos[0] - pos_thr:
+ # Go down
+ sprite = self.sprites[self.anime_frame][3]
+ self.pos[0] -= 1
+ elif aim[0] > self.pos[0] + pos_thr:
+ # Go up
+ sprite = self.sprites[self.anime_frame][0]
+ self.pos[0] += 1
+ elif aim[1] < self.pos[1] - pos_thr:
+ # Go right
+ sprite = self.sprites[self.anime_frame][1]
+ self.pos[1] -= 1
+ elif aim[1] > self.pos[1] + pos_thr:
+ # Go left
+ sprite = self.sprites[self.anime_frame][2]
+ self.pos[1] += 1
+ else:
+ # Stay
+ self.anime_frame = 0
+ sprite = self.sprites[self.anime_frame][0]
+
+ if self.anime_frame < 2:
+ self.anime_frame += 1
+ else:
+ self.anime_frame = 0
+
+ x = self.pos[0] - hm // 2
+ y = self.pos[1] - wm // 2
+ x = max(0, min(x, img.shape[0] - hm))
+ y = max(0, min(y, img.shape[0] - wm))
+
+ # Overlay image with transparent
+ img[x:x + hm, y:y +
+ wm] = (img[x:x + hm, y:y + wm] * (1 - sprite[:, :, 3:] / 255) +
+ sprite[:, :, :3] * (sprite[:, :, 3:] / 255)).astype('uint8')
+
+ return img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ # left_hand_idx, right_hand_idx = get_wrist_keypoint_ids(model_cfg) # noqa: E501
+ left_hand_idx, right_hand_idx = get_eye_keypoint_ids(model_cfg)
+
+ canvas = self.apply_sprite_effect(canvas, preds, left_hand_idx,
+ right_hand_idx)
+ return canvas
+
+
+@NODES.register_module()
+class BackgroundNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ src_img_path: Optional[str] = None,
+ cls_ids: Optional[List] = None,
+ cls_names: Optional[List] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ self.cls_ids = cls_ids
+ self.cls_names = cls_names
+
+ if src_img_path is None:
+ src_img_path = 'https://user-images.githubusercontent.com/'\
+ '11788150/149731957-abd5c908-9c7f-45b2-b7bf-'\
+ '821ab30c6a3e.jpg'
+ self.src_img = load_image_from_disk_or_url(src_img_path)
+
+ def apply_background_effect(self,
+ img,
+ det_results,
+ background_img,
+ effect_region=(0.2, 0.2, 0.8, 0.8)):
+ """Change background.
+
+ Args:
+ img (np.ndarray): Image data.
+ det_results (list[dict]): The detection results containing:
+
+ - "cls_id" (int): Class index.
+ - "label" (str): Class label (e.g. 'person').
+ - "bbox" (ndarray:(5, )): bounding box result
+ [x, y, w, h, score].
+ - "mask" (ndarray:(w, h)): instance segmentation result.
+ background_img (np.ndarray): Background image.
+ effect_region (tuple(4, )): The region to apply mask,
+ the coordinates are normalized (x1, y1, x2, y2).
+ """
+ if len(det_results) > 0:
+ # Choose the one with the highest score.
+ det_result = det_results[0]
+ bbox = det_result['bbox']
+ mask = det_result['mask'].astype(np.uint8)
+ img = copy_and_paste(img, background_img, mask, bbox,
+ effect_region)
+ return img
+ else:
+ return background_img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ if canvas.shape != self.src_img.shape:
+ self.src_img = cv2.resize(self.src_img, canvas.shape[:2])
+ det_results = frame_msg.get_detection_results()
+ if not det_results:
+ return canvas
+
+ full_preds = []
+ for det_result in det_results:
+ preds = det_result['preds']
+ if self.cls_ids:
+ # Filter results by class ID
+ filtered_preds = [
+ p for p in preds if p['cls_id'] in self.cls_ids
+ ]
+ elif self.cls_names:
+ # Filter results by class name
+ filtered_preds = [
+ p for p in preds if p['label'] in self.cls_names
+ ]
+ else:
+ filtered_preds = preds
+ full_preds.extend(filtered_preds)
+
+ canvas = self.apply_background_effect(canvas, full_preds, self.src_img)
+
+ return canvas
+
+
+@NODES.register_module()
+class SaiyanNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ hair_img_path: Optional[str] = None,
+ light_video_path: Optional[str] = None,
+ cls_ids: Optional[List] = None,
+ cls_names: Optional[List] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ self.cls_ids = cls_ids
+ self.cls_names = cls_names
+
+ if hair_img_path is None:
+ hair_img_path = 'https://user-images.githubusercontent.com/'\
+ '11788150/149732117-fcd2d804-dc2c-426c-bee7-'\
+ '94be6146e05c.png'
+ self.hair_img = load_image_from_disk_or_url(hair_img_path)
+
+ if light_video_path is None:
+ light_video_path = get_cached_file_path(
+ 'https://'
+ 'user-images.githubusercontent.com/11788150/149732080'
+ '-ea6cfeda-0dc5-4bbb-892a-3831e5580520.mp4')
+ self.light_video_path = light_video_path
+ self.light_video = cv2.VideoCapture(self.light_video_path)
+
+ def apply_saiyan_effect(self,
+ img,
+ pose_results,
+ saiyan_img,
+ light_frame,
+ face_indices,
+ bbox_thr=0.3,
+ kpt_thr=0.5):
+ """Apply saiyan hair effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result
+ in [x, y, score]
+ saiyan_img (np.ndarray): Saiyan image with transparent background.
+ light_frame (np.ndarray): Light image with green screen.
+ face_indices (int): Keypoint index of the face
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+ img = img.copy()
+ im_shape = img.shape
+ # Apply lightning effects.
+ light_mask = screen_matting(light_frame, color='green')
+
+ # anchor points in the mask
+ pts_src = np.array(
+ [
+ [84, 398], # face kpt 0
+ [331, 393], # face kpt 16
+ [84, 145],
+ [331, 140]
+ ],
+ dtype=np.float32)
+
+ for pose in pose_results:
+ bbox = pose['bbox']
+
+ if bbox[-1] < bbox_thr:
+ continue
+
+ mask_inst = pose['mask']
+ # cache
+ fg = img[np.where(mask_inst)]
+
+ bbox = expand_and_clamp(bbox[:4], im_shape, s=3.0)
+ # Apply light effects between fg and bg
+ img = copy_and_paste(
+ light_frame,
+ img,
+ light_mask,
+ effect_region=(bbox[0] / im_shape[1], bbox[1] / im_shape[0],
+ bbox[2] / im_shape[1], bbox[3] / im_shape[0]))
+ # pop
+ img[np.where(mask_inst)] = fg
+
+ # Apply Saiyan hair effects
+ kpts = pose['keypoints']
+ if kpts[face_indices[0], 2] < kpt_thr or kpts[face_indices[16],
+ 2] < kpt_thr:
+ continue
+
+ kpt_0 = kpts[face_indices[0], :2]
+ kpt_16 = kpts[face_indices[16], :2]
+ # orthogonal vector
+ vo = (kpt_0 - kpt_16)[::-1] * [-1, 1]
+
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack([kpt_0, kpt_16, kpt_0 + vo, kpt_16 + vo])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ saiyan_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(0, 0, 0))
+ mask_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask_patch = (mask_patch > 1).astype(np.uint8)
+ img = cv2.copyTo(patch, mask_patch, img)
+
+ return img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+
+ det_results = frame_msg.get_detection_results()
+ if not det_results:
+ return canvas
+
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ face_indices = get_face_keypoint_ids(model_cfg)
+
+ ret, frame = self.light_video.read()
+ if not ret:
+ self.light_video = cv2.VideoCapture(self.light_video_path)
+ ret, frame = self.light_video.read()
+
+ canvas = self.apply_saiyan_effect(canvas, preds, self.hair_img,
+ frame, face_indices)
+
+ return canvas
+
+
+@NODES.register_module()
+class MoustacheNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ if src_img_path is None:
+ src_img_path = 'https://user-images.githubusercontent.com/'\
+ '11788150/149732141-3afbab55-252a-428c-b6d8'\
+ '-0e352f432651.jpeg'
+ self.src_img = load_image_from_disk_or_url(src_img_path)
+
+ def apply_moustache_effect(self,
+ img,
+ pose_results,
+ moustache_img,
+ face_indices,
+ kpt_thr=0.5):
+ """Apply moustache effect.
+
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result
+ in [x, y, score]
+ moustache_img (np.ndarray): Moustache image with white background.
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ hm, wm = moustache_img.shape[:2]
+ # anchor points in the moustache mask
+ pts_src = np.array([[1164, 741], [1729, 741], [1164, 1244],
+ [1729, 1244]],
+ dtype=np.float32)
+
+ for pose in pose_results:
+ kpts = pose['keypoints']
+ if kpts[face_indices[32], 2] < kpt_thr \
+ or kpts[face_indices[34], 2] < kpt_thr \
+ or kpts[face_indices[61], 2] < kpt_thr \
+ or kpts[face_indices[63], 2] < kpt_thr:
+ continue
+
+ kpt_32 = kpts[face_indices[32], :2]
+ kpt_34 = kpts[face_indices[34], :2]
+ kpt_61 = kpts[face_indices[61], :2]
+ kpt_63 = kpts[face_indices[63], :2]
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack([kpt_32, kpt_34, kpt_61, kpt_63])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ moustache_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with a threshold 200
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 200).astype(np.uint8)
+ img = cv2.copyTo(patch, mask, img)
+
+ return img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ face_indices = get_face_keypoint_ids(model_cfg)
+ canvas = self.apply_moustache_effect(canvas, preds, self.src_img,
+ face_indices)
+ return canvas
+
+
+@NODES.register_module()
+class BugEyeNode(FrameDrawingNode):
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ left_eye_idx, right_eye_idx = get_eye_keypoint_ids(model_cfg)
+
+ canvas = apply_bugeye_effect(canvas, preds, left_eye_idx,
+ right_eye_idx)
+ return canvas
+
+
+@NODES.register_module()
+class NoticeBoardNode(FrameDrawingNode):
+
+ default_content_lines = ['This is a notice board!']
+
+ def __init__(
+ self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ content_lines: Optional[List[str]] = None,
+ x_offset: int = 20,
+ y_offset: int = 20,
+ y_delta: int = 15,
+ text_color: Union[str, Tuple[int, int, int]] = 'black',
+ background_color: Union[str, Tuple[int, int, int]] = (255, 183, 0),
+ text_scale: float = 0.4,
+ ):
+ super().__init__(name, frame_buffer, output_buffer, enable_key, enable)
+
+ self.x_offset = x_offset
+ self.y_offset = y_offset
+ self.y_delta = y_delta
+ self.text_color = color_val(text_color)
+ self.background_color = color_val(background_color)
+ self.text_scale = text_scale
+
+ if content_lines:
+ self.content_lines = content_lines
+ else:
+ self.content_lines = self.default_content_lines
+
+ def draw(self, frame_msg: FrameMessage) -> np.ndarray:
+ img = frame_msg.get_image()
+ canvas = np.full(img.shape, self.background_color, dtype=img.dtype)
+
+ x = self.x_offset
+ y = self.y_offset
+
+ max_len = max([len(line) for line in self.content_lines])
+
+ def _put_line(line=''):
+ nonlocal y
+ cv2.putText(canvas, line, (x, y), cv2.FONT_HERSHEY_DUPLEX,
+ self.text_scale, self.text_color, 1)
+ y += self.y_delta
+
+ for line in self.content_lines:
+ _put_line(line)
+
+ x1 = max(0, self.x_offset)
+ x2 = min(img.shape[1], int(x + max_len * self.text_scale * 20))
+ y1 = max(0, self.y_offset - self.y_delta)
+ y2 = min(img.shape[0], y)
+
+ src1 = canvas[y1:y2, x1:x2]
+ src2 = img[y1:y2, x1:x2]
+ img[y1:y2, x1:x2] = cv2.addWeighted(src1, 0.5, src2, 0.5, 0)
+
+ return img
+
+
+@NODES.register_module()
+class HatNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key)
+
+ if src_img_path is None:
+ # The image attributes to:
+ # http://616pic.com/sucai/1m9i70p52.html
+ src_img_path = 'https://user-images.githubusercontent.' \
+ 'com/28900607/149766271-2f591c19-9b67-4' \
+ 'd92-8f94-c272396ca141.png'
+ self.src_img = load_image_from_disk_or_url(src_img_path,
+ cv2.IMREAD_UNCHANGED)
+
+ @staticmethod
+ def apply_hat_effect(img,
+ pose_results,
+ hat_img,
+ left_eye_index,
+ right_eye_index,
+ kpt_thr=0.5):
+ """Apply hat effect.
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result in
+ [x, y, score]
+ hat_img (np.ndarray): Hat image with white alpha channel.
+ left_eye_index (int): Keypoint index of left eye
+ right_eye_index (int): Keypoint index of right eye
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+ img_orig = img.copy()
+
+ img = img_orig.copy()
+ hm, wm = hat_img.shape[:2]
+ # anchor points in the sunglasses mask
+ a = 0.3
+ b = 0.7
+ pts_src = np.array([[a * wm, a * hm], [a * wm, b * hm],
+ [b * wm, a * hm], [b * wm, b * hm]],
+ dtype=np.float32)
+
+ for pose in pose_results:
+ kpts = pose['keypoints']
+
+ if kpts[left_eye_index, 2] < kpt_thr or \
+ kpts[right_eye_index, 2] < kpt_thr:
+ continue
+
+ kpt_leye = kpts[left_eye_index, :2]
+ kpt_reye = kpts[right_eye_index, :2]
+ # orthogonal vector to the left-to-right eyes
+ vo = 0.5 * (kpt_reye - kpt_leye)[::-1] * [-1, 1]
+ veye = 0.5 * (kpt_reye - kpt_leye)
+
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack([
+ kpt_reye + 1 * veye + 5 * vo, kpt_reye + 1 * veye + 1 * vo,
+ kpt_leye - 1 * veye + 5 * vo, kpt_leye - 1 * veye + 1 * vo
+ ])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ hat_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with a threshold 200
+ mask = (patch[:, :, -1] > 128)
+ patch = patch[:, :, :-1]
+ mask = mask * (cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY) > 30)
+ mask = mask.astype(np.uint8)
+
+ img = cv2.copyTo(patch, mask, img)
+ return img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ left_eye_idx, right_eye_idx = get_eye_keypoint_ids(model_cfg)
+
+ canvas = self.apply_hat_effect(canvas, preds, self.src_img,
+ left_eye_idx, right_eye_idx)
+ return canvas
+
+
+@NODES.register_module()
+class FirecrackerNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ src_img_path: Optional[str] = None):
+
+ super().__init__(name, frame_buffer, output_buffer, enable_key)
+
+ if src_img_path is None:
+ self.src_img_path = 'https://user-images.githubusercontent' \
+ '.com/28900607/149766281-6376055c-ed8b' \
+ '-472b-991f-60e6ae6ee1da.gif'
+ src_img = cv2.VideoCapture(self.src_img_path)
+
+ self.frame_list = []
+ ret, frame = src_img.read()
+ while frame is not None:
+ self.frame_list.append(frame)
+ ret, frame = src_img.read()
+ self.num_frames = len(self.frame_list)
+ self.frame_idx = 0
+ self.frame_period = 4 # each frame in gif lasts for 4 frames in video
+
+ @staticmethod
+ def apply_firecracker_effect(img,
+ pose_results,
+ firecracker_img,
+ left_wrist_idx,
+ right_wrist_idx,
+ kpt_thr=0.5):
+ """Apply firecracker effect.
+ Args:
+ img (np.ndarray): Image data.
+ pose_results (list[dict]): The pose estimation results containing:
+ - "keypoints" ([K,3]): keypoint detection result in
+ [x, y, score]
+ firecracker_img (np.ndarray): Firecracker image with white
+ background.
+ left_wrist_idx (int): Keypoint index of left wrist
+ right_wrist_idx (int): Keypoint index of right wrist
+ kpt_thr (float): The score threshold of required keypoints.
+ """
+
+ hm, wm = firecracker_img.shape[:2]
+ # anchor points in the firecracker mask
+ pts_src = np.array([[0. * wm, 0. * hm], [0. * wm, 1. * hm],
+ [1. * wm, 0. * hm], [1. * wm, 1. * hm]],
+ dtype=np.float32)
+
+ h, w = img.shape[:2]
+ h_tar = h / 3
+ w_tar = h_tar / hm * wm
+
+ for pose in pose_results:
+ kpts = pose['keypoints']
+
+ if kpts[left_wrist_idx, 2] > kpt_thr:
+ kpt_lwrist = kpts[left_wrist_idx, :2]
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack([
+ kpt_lwrist - [w_tar / 2, 0],
+ kpt_lwrist - [w_tar / 2, -h_tar],
+ kpt_lwrist + [w_tar / 2, 0],
+ kpt_lwrist + [w_tar / 2, h_tar]
+ ])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ firecracker_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with
+ # a threshold 200
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 240).astype(np.uint8)
+ img = cv2.copyTo(patch, mask, img)
+
+ if kpts[right_wrist_idx, 2] > kpt_thr:
+ kpt_rwrist = kpts[right_wrist_idx, :2]
+
+ # anchor points in the image by eye positions
+ pts_tar = np.vstack([
+ kpt_rwrist - [w_tar / 2, 0],
+ kpt_rwrist - [w_tar / 2, -h_tar],
+ kpt_rwrist + [w_tar / 2, 0],
+ kpt_rwrist + [w_tar / 2, h_tar]
+ ])
+
+ h_mat, _ = cv2.findHomography(pts_src, pts_tar)
+ patch = cv2.warpPerspective(
+ firecracker_img,
+ h_mat,
+ dsize=(img.shape[1], img.shape[0]),
+ borderValue=(255, 255, 255))
+ # mask the white background area in the patch with
+ # a threshold 200
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 240).astype(np.uint8)
+ img = cv2.copyTo(patch, mask, img)
+
+ return img
+
+ def draw(self, frame_msg):
+ canvas = frame_msg.get_image()
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+
+ frame = self.frame_list[self.frame_idx // self.frame_period]
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+ preds = pose_result['preds']
+ left_wrist_idx, right_wrist_idx = get_wrist_keypoint_ids(model_cfg)
+
+ canvas = self.apply_firecracker_effect(canvas, preds, frame,
+ left_wrist_idx,
+ right_wrist_idx)
+ self.frame_idx = (self.frame_idx + 1) % (
+ self.num_frames * self.frame_period)
+
+ return canvas
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/helper_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/helper_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..349c4f423456781a092d83fc6382d7f9f3376fd8
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/helper_node.py
@@ -0,0 +1,296 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+import time
+from queue import Full, Queue
+from threading import Thread
+from typing import List, Optional, Union
+
+import cv2
+import numpy as np
+from mmcv import color_val
+
+from mmpose.utils.timer import RunningAverage
+from .builder import NODES
+from .node import Node
+
+try:
+ import psutil
+ psutil_proc = psutil.Process()
+except (ImportError, ModuleNotFoundError):
+ psutil_proc = None
+
+
+@NODES.register_module()
+class ModelResultBindingNode(Node):
+
+ def __init__(self, name: str, frame_buffer: str, result_buffer: str,
+ output_buffer: Union[str, List[str]]):
+ super().__init__(name=name, enable=True)
+ self.synchronous = None
+
+ # Cache the latest model result
+ self.last_result_msg = None
+ self.last_output_msg = None
+
+ # Inference speed analysis
+ self.frame_fps = RunningAverage(window=10)
+ self.frame_lag = RunningAverage(window=10)
+ self.result_fps = RunningAverage(window=10)
+ self.result_lag = RunningAverage(window=10)
+
+ # Register buffers
+ # Note that essential buffers will be set in set_runner() because
+ # it depends on the runner.synchronous attribute.
+ self.register_input_buffer(result_buffer, 'result', essential=False)
+ self.register_input_buffer(frame_buffer, 'frame', essential=False)
+ self.register_output_buffer(output_buffer)
+
+ def set_runner(self, runner):
+ super().set_runner(runner)
+
+ # Set synchronous according to the runner
+ if runner.synchronous:
+ self.synchronous = True
+ essential_input = 'result'
+ else:
+ self.synchronous = False
+ essential_input = 'frame'
+
+ # Set essential input buffer according to the synchronous setting
+ for buffer_info in self._input_buffers:
+ if buffer_info.input_name == essential_input:
+ buffer_info.essential = True
+
+ def process(self, input_msgs):
+ result_msg = input_msgs['result']
+
+ # Update last result
+ if result_msg is not None:
+ # Update result FPS
+ if self.last_result_msg is not None:
+ self.result_fps.update(
+ 1.0 /
+ (result_msg.timestamp - self.last_result_msg.timestamp))
+ # Update inference latency
+ self.result_lag.update(time.time() - result_msg.timestamp)
+ # Update last inference result
+ self.last_result_msg = result_msg
+
+ if not self.synchronous:
+ # Asynchronous mode: Bind the latest result with the current frame.
+ frame_msg = input_msgs['frame']
+
+ self.frame_lag.update(time.time() - frame_msg.timestamp)
+
+ # Bind result to frame
+ if self.last_result_msg is not None:
+ frame_msg.set_full_results(
+ self.last_result_msg.get_full_results())
+ frame_msg.merge_route_info(
+ self.last_result_msg.get_route_info())
+
+ output_msg = frame_msg
+
+ else:
+ # Synchronous mode: Directly output the frame that the model result
+ # was obtained from.
+ self.frame_lag.update(time.time() - result_msg.timestamp)
+ output_msg = result_msg
+
+ # Update frame fps and lag
+ if self.last_output_msg is not None:
+ self.frame_lag.update(time.time() - output_msg.timestamp)
+ self.frame_fps.update(
+ 1.0 / (output_msg.timestamp - self.last_output_msg.timestamp))
+ self.last_output_msg = output_msg
+
+ return output_msg
+
+ def _get_node_info(self):
+ info = super()._get_node_info()
+ info['result_fps'] = self.result_fps.average()
+ info['result_lag (ms)'] = self.result_lag.average() * 1000
+ info['frame_fps'] = self.frame_fps.average()
+ info['frame_lag (ms)'] = self.frame_lag.average() * 1000
+ return info
+
+
+@NODES.register_module()
+class MonitorNode(Node):
+
+ _default_ignore_items = ['timestamp']
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = False,
+ x_offset=20,
+ y_offset=20,
+ y_delta=15,
+ text_color='black',
+ background_color=(255, 183, 0),
+ text_scale=0.4,
+ ignore_items: Optional[List[str]] = None):
+ super().__init__(name=name, enable_key=enable_key, enable=enable)
+
+ self.x_offset = x_offset
+ self.y_offset = y_offset
+ self.y_delta = y_delta
+ self.text_color = color_val(text_color)
+ self.background_color = color_val(background_color)
+ self.text_scale = text_scale
+ if ignore_items is None:
+ self.ignore_items = self._default_ignore_items
+ else:
+ self.ignore_items = ignore_items
+
+ self.register_input_buffer(frame_buffer, 'frame', essential=True)
+ self.register_output_buffer(output_buffer)
+
+ def process(self, input_msgs):
+ frame_msg = input_msgs['frame']
+
+ frame_msg.update_route_info(
+ node_name='System Info',
+ node_type='dummy',
+ info=self._get_system_info())
+
+ img = frame_msg.get_image()
+ route_info = frame_msg.get_route_info()
+ img = self._show_route_info(img, route_info)
+
+ frame_msg.set_image(img)
+ return frame_msg
+
+ def _get_system_info(self):
+ sys_info = {}
+ if psutil_proc is not None:
+ sys_info['CPU(%)'] = psutil_proc.cpu_percent()
+ sys_info['Memory(%)'] = psutil_proc.memory_percent()
+ return sys_info
+
+ def _show_route_info(self, img, route_info):
+ canvas = np.full(img.shape, self.background_color, dtype=img.dtype)
+
+ x = self.x_offset
+ y = self.y_offset
+
+ max_len = 0
+
+ def _put_line(line=''):
+ nonlocal y, max_len
+ cv2.putText(canvas, line, (x, y), cv2.FONT_HERSHEY_DUPLEX,
+ self.text_scale, self.text_color, 1)
+ y += self.y_delta
+ max_len = max(max_len, len(line))
+
+ for node_info in route_info:
+ title = f'{node_info["node"]}({node_info["node_type"]})'
+ _put_line(title)
+ for k, v in node_info['info'].items():
+ if k in self.ignore_items:
+ continue
+ if isinstance(v, float):
+ v = f'{v:.1f}'
+ _put_line(f' {k}: {v}')
+
+ x1 = max(0, self.x_offset)
+ x2 = min(img.shape[1], int(x + max_len * self.text_scale * 20))
+ y1 = max(0, self.y_offset - self.y_delta)
+ y2 = min(img.shape[0], y)
+
+ src1 = canvas[y1:y2, x1:x2]
+ src2 = img[y1:y2, x1:x2]
+ img[y1:y2, x1:x2] = cv2.addWeighted(src1, 0.5, src2, 0.5, 0)
+
+ return img
+
+ def bypass(self, input_msgs):
+ return input_msgs['frame']
+
+
+@NODES.register_module()
+class RecorderNode(Node):
+ """Record the frames into a local file."""
+
+ def __init__(
+ self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ out_video_file: str,
+ out_video_fps: int = 30,
+ out_video_codec: str = 'mp4v',
+ buffer_size: int = 30,
+ ):
+ super().__init__(name=name, enable_key=None, enable=True)
+
+ self.queue = Queue(maxsize=buffer_size)
+ self.out_video_file = out_video_file
+ self.out_video_fps = out_video_fps
+ self.out_video_codec = out_video_codec
+ self.vwriter = None
+
+ # Register buffers
+ self.register_input_buffer(frame_buffer, 'frame', essential=True)
+ self.register_output_buffer(output_buffer)
+
+ # Start a new thread to write frame
+ self.t_record = Thread(target=self._record, args=(), daemon=True)
+ self.t_record.start()
+
+ def process(self, input_msgs):
+
+ frame_msg = input_msgs['frame']
+ img = frame_msg.get_image() if frame_msg is not None else None
+ img_queued = False
+
+ while not img_queued:
+ try:
+ self.queue.put(img, timeout=1)
+ img_queued = True
+ logging.info(f'{self.name}: recorder received one frame!')
+ except Full:
+ logging.info(f'{self.name}: recorder jamed!')
+
+ return frame_msg
+
+ def _record(self):
+
+ while True:
+
+ img = self.queue.get()
+
+ if img is None:
+ break
+
+ if self.vwriter is None:
+ fourcc = cv2.VideoWriter_fourcc(*self.out_video_codec)
+ fps = self.out_video_fps
+ frame_size = (img.shape[1], img.shape[0])
+ self.vwriter = cv2.VideoWriter(self.out_video_file, fourcc,
+ fps, frame_size)
+ assert self.vwriter.isOpened()
+
+ self.vwriter.write(img)
+
+ logging.info('Video recorder released!')
+ if self.vwriter is not None:
+ self.vwriter.release()
+
+ def on_exit(self):
+ try:
+ # Try putting a None into the output queue so the self.vwriter will
+ # be released after all queue frames have been written to file.
+ self.queue.put(None, timeout=1)
+ self.t_record.join(timeout=1)
+ except Full:
+ pass
+
+ if self.t_record.is_alive():
+ # Force to release self.vwriter
+ logging.info('Video recorder forced release!')
+ if self.vwriter is not None:
+ self.vwriter.release()
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmdet_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmdet_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..4207647c927dfbd34af225454ed5c2ef7466a012
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmdet_node.py
@@ -0,0 +1,84 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Optional, Union
+
+from .builder import NODES
+from .node import Node
+
+try:
+ from mmdet.apis import inference_detector, init_detector
+ has_mmdet = True
+except (ImportError, ModuleNotFoundError):
+ has_mmdet = False
+
+
+@NODES.register_module()
+class DetectorNode(Node):
+
+ def __init__(self,
+ name: str,
+ model_config: str,
+ model_checkpoint: str,
+ input_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ device: str = 'cuda:0'):
+ # Check mmdetection is installed
+ assert has_mmdet, 'Please install mmdet to run the demo.'
+ super().__init__(name=name, enable_key=enable_key, enable=True)
+
+ self.model_config = model_config
+ self.model_checkpoint = model_checkpoint
+ self.device = device.lower()
+
+ # Init model
+ self.model = init_detector(
+ self.model_config,
+ self.model_checkpoint,
+ device=self.device.lower())
+
+ # Register buffers
+ self.register_input_buffer(input_buffer, 'input', essential=True)
+ self.register_output_buffer(output_buffer)
+
+ def bypass(self, input_msgs):
+ return input_msgs['input']
+
+ def process(self, input_msgs):
+ input_msg = input_msgs['input']
+
+ img = input_msg.get_image()
+
+ preds = inference_detector(self.model, img)
+ det_result = self._post_process(preds)
+
+ input_msg.add_detection_result(det_result, tag=self.name)
+ return input_msg
+
+ def _post_process(self, preds):
+ if isinstance(preds, tuple):
+ dets = preds[0]
+ segms = preds[1]
+ else:
+ dets = preds
+ segms = [None] * len(dets)
+
+ assert len(dets) == len(self.model.CLASSES)
+ assert len(segms) == len(self.model.CLASSES)
+ result = {'preds': [], 'model_cfg': self.model.cfg.copy()}
+
+ for i, (cls_name, bboxes,
+ masks) in enumerate(zip(self.model.CLASSES, dets, segms)):
+ if masks is None:
+ masks = [None] * len(bboxes)
+ else:
+ assert len(masks) == len(bboxes)
+
+ preds_i = [{
+ 'cls_id': i,
+ 'label': cls_name,
+ 'bbox': bbox,
+ 'mask': mask,
+ } for (bbox, mask) in zip(bboxes, masks)]
+ result['preds'].extend(preds_i)
+
+ return result
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmpose_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmpose_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..167d7413ea48943b9373525bf5f392b5f1aa248b
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/mmpose_node.py
@@ -0,0 +1,122 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import time
+from typing import Dict, List, Optional, Union
+
+from mmpose.apis import (get_track_id, inference_top_down_pose_model,
+ init_pose_model)
+from ..utils import Message
+from .builder import NODES
+from .node import Node
+
+
+@NODES.register_module()
+class TopDownPoseEstimatorNode(Node):
+
+ def __init__(self,
+ name: str,
+ model_config: str,
+ model_checkpoint: str,
+ input_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ enable: bool = True,
+ device: str = 'cuda:0',
+ cls_ids: Optional[List] = None,
+ cls_names: Optional[List] = None,
+ bbox_thr: float = 0.5):
+ super().__init__(name=name, enable_key=enable_key, enable=enable)
+
+ # Init model
+ self.model_config = model_config
+ self.model_checkpoint = model_checkpoint
+ self.device = device.lower()
+
+ self.cls_ids = cls_ids
+ self.cls_names = cls_names
+ self.bbox_thr = bbox_thr
+
+ # Init model
+ self.model = init_pose_model(
+ self.model_config,
+ self.model_checkpoint,
+ device=self.device.lower())
+
+ # Store history for pose tracking
+ self.track_info = {
+ 'next_id': 0,
+ 'last_pose_preds': [],
+ 'last_time': None
+ }
+
+ # Register buffers
+ self.register_input_buffer(input_buffer, 'input', essential=True)
+ self.register_output_buffer(output_buffer)
+
+ def bypass(self, input_msgs):
+ return input_msgs['input']
+
+ def process(self, input_msgs: Dict[str, Message]) -> Message:
+
+ input_msg = input_msgs['input']
+ img = input_msg.get_image()
+ det_results = input_msg.get_detection_results()
+
+ if det_results is None:
+ raise ValueError(
+ 'No detection results are found in the frame message.'
+ f'{self.__class__.__name__} should be used after a '
+ 'detector node.')
+
+ full_det_preds = []
+ for det_result in det_results:
+ det_preds = det_result['preds']
+ if self.cls_ids:
+ # Filter detection results by class ID
+ det_preds = [
+ p for p in det_preds if p['cls_id'] in self.cls_ids
+ ]
+ elif self.cls_names:
+ # Filter detection results by class name
+ det_preds = [
+ p for p in det_preds if p['label'] in self.cls_names
+ ]
+ full_det_preds.extend(det_preds)
+
+ # Inference pose
+ pose_preds, _ = inference_top_down_pose_model(
+ self.model,
+ img,
+ full_det_preds,
+ bbox_thr=self.bbox_thr,
+ format='xyxy')
+
+ # Pose tracking
+ current_time = time.time()
+ if self.track_info['last_time'] is None:
+ fps = None
+ elif self.track_info['last_time'] >= current_time:
+ fps = None
+ else:
+ fps = 1.0 / (current_time - self.track_info['last_time'])
+
+ pose_preds, next_id = get_track_id(
+ pose_preds,
+ self.track_info['last_pose_preds'],
+ self.track_info['next_id'],
+ use_oks=False,
+ tracking_thr=0.3,
+ use_one_euro=True,
+ fps=fps)
+
+ self.track_info['next_id'] = next_id
+ self.track_info['last_pose_preds'] = pose_preds.copy()
+ self.track_info['last_time'] = current_time
+
+ pose_result = {
+ 'preds': pose_preds,
+ 'model_cfg': self.model.cfg.copy(),
+ }
+
+ input_msg.add_pose_result(pose_result, tag=self.name)
+
+ return input_msg
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/node.py
new file mode 100644
index 0000000000000000000000000000000000000000..31e48d089dd18f8845125f50676cc175dbc2d24d
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/node.py
@@ -0,0 +1,372 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+import time
+from abc import ABCMeta, abstractmethod
+from dataclasses import dataclass
+from queue import Empty
+from threading import Thread
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+from mmcv.utils.misc import is_method_overridden
+
+from mmpose.utils import StopWatch
+from ..utils import Message, VideoEndingMessage, limit_max_fps
+
+
+@dataclass
+class BufferInfo():
+ """Dataclass for buffer information."""
+ buffer_name: str
+ input_name: Optional[str] = None
+ essential: bool = False
+
+
+@dataclass
+class EventInfo():
+ """Dataclass for event handler information."""
+ event_name: str
+ is_keyboard: bool = False
+ handler_func: Optional[Callable] = None
+
+
+class Node(Thread, metaclass=ABCMeta):
+ """Base interface of functional module.
+
+ Parameters:
+ name (str, optional): The node name (also thread name).
+ enable_key (str|int, optional): Set a hot-key to toggle enable/disable
+ of the node. If an int value is given, it will be treated as an
+ ascii code of a key. Please note:
+ 1. If enable_key is set, the bypass method need to be
+ overridden to define the node behavior when disabled
+ 2. Some hot-key has been use for particular use. For example:
+ 'q', 'Q' and 27 are used for quit
+ Default: None
+ max_fps (int): Maximum FPS of the node. This is to avoid the node
+ running unrestrictedly and causing large resource consuming.
+ Default: 30
+ input_check_interval (float): Minimum interval (in millisecond) between
+ checking if input is ready. Default: 0.001
+ enable (bool): Default enable/disable status. Default: True.
+ daemon (bool): Whether node is a daemon. Default: True.
+ """
+
+ def __init__(self,
+ name: Optional[str] = None,
+ enable_key: Optional[Union[str, int]] = None,
+ max_fps: int = 30,
+ input_check_interval: float = 0.01,
+ enable: bool = True,
+ daemon=False):
+ super().__init__(name=name, daemon=daemon)
+ self._runner = None
+ self._enabled = enable
+ self.enable_key = enable_key
+ self.max_fps = max_fps
+ self.input_check_interval = input_check_interval
+
+ # A partitioned buffer manager the runner's buffer manager that
+ # only accesses the buffers related to the node
+ self._buffer_manager = None
+
+ # Input/output buffers are a list of registered buffers' information
+ self._input_buffers = []
+ self._output_buffers = []
+
+ # Event manager is a copy of assigned runner's event manager
+ self._event_manager = None
+
+ # A list of registered event information
+ # See register_event() for more information
+ # Note that we recommend to handle events in nodes by registering
+ # handlers, but one can still access the raw event by _event_manager
+ self._registered_events = []
+
+ # A list of (listener_threads, event_info)
+ # See set_runner() for more information
+ self._event_listener_threads = []
+
+ # A timer to calculate node FPS
+ self._timer = StopWatch(window=10)
+
+ # Register enable toggle key
+ if self.enable_key:
+ # If the node allows toggling enable, it should override the
+ # `bypass` method to define the node behavior when disabled.
+ if not is_method_overridden('bypass', Node, self.__class__):
+ raise NotImplementedError(
+ f'The node {self.__class__} does not support toggling'
+ 'enable but got argument `enable_key`. To support toggling'
+ 'enable, please override the `bypass` method of the node.')
+
+ self.register_event(
+ event_name=self.enable_key,
+ is_keyboard=True,
+ handler_func=self._toggle_enable,
+ )
+
+ @property
+ def registered_buffers(self):
+ return self._input_buffers + self._output_buffers
+
+ @property
+ def registered_events(self):
+ return self._registered_events.copy()
+
+ def _toggle_enable(self):
+ self._enabled = not self._enabled
+
+ def register_input_buffer(self,
+ buffer_name: str,
+ input_name: str,
+ essential: bool = False):
+ """Register an input buffer, so that Node can automatically check if
+ data is ready, fetch data from the buffers and format the inputs to
+ feed into `process` method.
+
+ This method can be invoked multiple times to register multiple input
+ buffers.
+
+ The subclass of Node should invoke `register_input_buffer` in its
+ `__init__` method.
+
+ Args:
+ buffer_name (str): The name of the buffer
+ input_name (str): The name of the fetched message from the
+ corresponding buffer
+ essential (bool): An essential input means the node will wait
+ until the input is ready before processing. Otherwise, an
+ inessential input will not block the processing, instead
+ a None will be fetched if the buffer is not ready.
+ """
+ buffer_info = BufferInfo(buffer_name, input_name, essential)
+ self._input_buffers.append(buffer_info)
+
+ def register_output_buffer(self, buffer_name: Union[str, List[str]]):
+ """Register one or multiple output buffers, so that the Node can
+ automatically send the output of the `process` method to these buffers.
+
+ The subclass of Node should invoke `register_output_buffer` in its
+ `__init__` method.
+
+ Args:
+ buffer_name (str|list): The name(s) of the output buffer(s).
+ """
+
+ if not isinstance(buffer_name, list):
+ buffer_name = [buffer_name]
+
+ for name in buffer_name:
+ buffer_info = BufferInfo(name)
+ self._output_buffers.append(buffer_info)
+
+ def register_event(self,
+ event_name: str,
+ is_keyboard: bool = False,
+ handler_func: Optional[Callable] = None):
+ """Register an event. All events used in the node need to be registered
+ in __init__(). If a callable handler is given, a thread will be create
+ to listen and handle the event when the node starts.
+
+ Args:
+ Args:
+ event_name (str|int): The event name. If is_keyboard==True,
+ event_name should be a str (as char) or an int (as ascii)
+ is_keyboard (bool): Indicate whether it is an keyboard
+ event. If True, the argument event_name will be regarded as a
+ key indicator.
+ handler_func (callable, optional): The event handler function,
+ which should be a collable object with no arguments or
+ return values. Default: None.
+ """
+ event_info = EventInfo(event_name, is_keyboard, handler_func)
+ self._registered_events.append(event_info)
+
+ def set_runner(self, runner):
+ # Get partitioned buffer manager
+ buffer_names = [
+ buffer.buffer_name
+ for buffer in self._input_buffers + self._output_buffers
+ ]
+ self._buffer_manager = runner.buffer_manager.get_sub_manager(
+ buffer_names)
+
+ # Get event manager
+ self._event_manager = runner.event_manager
+
+ def _get_input_from_buffer(self) -> Tuple[bool, Optional[Dict]]:
+ """Get and pack input data if it's ready. The function returns a tuple
+ of a status flag and a packed data dictionary. If input_buffer is
+ ready, the status flag will be True, and the packed data is a dict
+ whose items are buffer names and corresponding messages (unready
+ additional buffers will give a `None`). Otherwise, the status flag is
+ False and the packed data is None.
+
+ Returns:
+ bool: status flag
+ dict[str, Message]: the packed inputs where the key is the buffer
+ name and the value is the Message got from the corresponding
+ buffer.
+ """
+ buffer_manager = self._buffer_manager
+
+ if buffer_manager is None:
+ raise ValueError(f'{self.name}: Runner not set!')
+
+ # Check that essential buffers are ready
+ for buffer_info in self._input_buffers:
+ if buffer_info.essential and buffer_manager.is_empty(
+ buffer_info.buffer_name):
+ return False, None
+
+ # Default input
+ result = {
+ buffer_info.input_name: None
+ for buffer_info in self._input_buffers
+ }
+
+ for buffer_info in self._input_buffers:
+ try:
+ result[buffer_info.input_name] = buffer_manager.get(
+ buffer_info.buffer_name, block=False)
+ except Empty:
+ if buffer_info.essential:
+ # Return unsuccessful flag if any
+ # essential input is unready
+ return False, None
+
+ return True, result
+
+ def _send_output_to_buffers(self, output_msg):
+ """Send output of the process method to registered output buffers.
+
+ Args:
+ output_msg (Message): output message
+ force (bool, optional): If True, block until the output message
+ has been put into all output buffers. Default: False
+ """
+ for buffer_info in self._output_buffers:
+ buffer_name = buffer_info.buffer_name
+ self._buffer_manager.put_force(buffer_name, output_msg)
+
+ @abstractmethod
+ def process(self, input_msgs: Dict[str, Message]) -> Union[Message, None]:
+ """The core method that implement the function of the node. This method
+ will be invoked when the node is enabled and the input data is ready.
+
+ All subclasses of Node should override this method.
+
+ Args:
+ input_msgs (dict): The input data collected from the buffers. For
+ each item, the key is the `input_name` of the registered input
+ buffer, while the value is a Message instance fetched from the
+ buffer (or None if the buffer is unessential and not ready).
+
+ Returns:
+ Message: The output message of the node. It will be send to all
+ registered output buffers.
+ """
+
+ def bypass(self, input_msgs: Dict[str, Message]) -> Union[Message, None]:
+ """The method that defines the node behavior when disabled. Note that
+ if the node has an `enable_key`, this method should be override.
+
+ The method input/output is same as it of `process` method.
+
+ Args:
+ input_msgs (dict): The input data collected from the buffers. For
+ each item, the key is the `input_name` of the registered input
+ buffer, while the value is a Message instance fetched from the
+ buffer (or None if the buffer is unessential and not ready).
+
+ Returns:
+ Message: The output message of the node. It will be send to all
+ registered output buffers.
+ """
+ raise NotImplementedError
+
+ def _get_node_info(self):
+ """Get route information of the node."""
+ info = {'fps': self._timer.report('_FPS_'), 'timestamp': time.time()}
+ return info
+
+ def on_exit(self):
+ """This method will be invoked on event `_exit_`.
+
+ Subclasses should override this method to specifying the exiting
+ behavior.
+ """
+
+ def run(self):
+ """Method representing the Node's activity.
+
+ This method override the standard run() method of Thread. Users should
+ not override this method in subclasses.
+ """
+
+ logging.info(f'Node {self.name} starts')
+
+ # Create event listener threads
+ for event_info in self._registered_events:
+
+ if event_info.handler_func is None:
+ continue
+
+ def event_listener():
+ while True:
+ with self._event_manager.wait_and_handle(
+ event_info.event_name, event_info.is_keyboard):
+ event_info.handler_func()
+
+ t_listener = Thread(target=event_listener, args=(), daemon=True)
+ t_listener.start()
+ self._event_listener_threads.append(t_listener)
+
+ # Loop
+ while True:
+ # Exit
+ if self._event_manager.is_set('_exit_'):
+ self.on_exit()
+ break
+
+ # Check if input is ready
+ input_status, input_msgs = self._get_input_from_buffer()
+
+ # Input is not ready
+ if not input_status:
+ time.sleep(self.input_check_interval)
+ continue
+
+ # If a VideoEndingMessage is received, broadcast the signal
+ # without invoking process() or bypass()
+ video_ending = False
+ for _, msg in input_msgs.items():
+ if isinstance(msg, VideoEndingMessage):
+ self._send_output_to_buffers(msg)
+ video_ending = True
+ break
+
+ if video_ending:
+ self.on_exit()
+ break
+
+ # Check if enabled
+ if not self._enabled:
+ # Override bypass method to define node behavior when disabled
+ output_msg = self.bypass(input_msgs)
+ else:
+ with self._timer.timeit():
+ with limit_max_fps(self.max_fps):
+ # Process
+ output_msg = self.process(input_msgs)
+
+ if output_msg:
+ # Update route information
+ node_info = self._get_node_info()
+ output_msg.update_route_info(node=self, info=node_info)
+
+ # Send output message
+ if output_msg is not None:
+ self._send_output_to_buffers(output_msg)
+
+ logging.info(f'{self.name}: process ending.')
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/valentinemagic_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/valentinemagic_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b1c6a585065416b50f1c889272d7e869942354e
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/valentinemagic_node.py
@@ -0,0 +1,340 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import time
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple, Union
+
+import cv2
+import numpy as np
+
+from ..utils import (FrameMessage, get_eye_keypoint_ids, get_hand_keypoint_ids,
+ get_mouth_keypoint_ids, load_image_from_disk_or_url)
+from .builder import NODES
+from .frame_drawing_node import FrameDrawingNode
+
+
+@dataclass
+class HeartInfo():
+ """Dataclass for heart information."""
+ heart_type: int
+ start_time: float
+ start_pos: Tuple[int, int]
+ end_pos: Tuple[int, int]
+
+
+@NODES.register_module()
+class ValentineMagicNode(FrameDrawingNode):
+
+ def __init__(self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ enable_key: Optional[Union[str, int]] = None,
+ kpt_vis_thr: float = 0.3,
+ hand_heart_angle_thr: float = 90.0,
+ longest_duration: float = 2.0,
+ largest_ratio: float = 0.25,
+ hand_heart_img_path: Optional[str] = None,
+ flying_heart_img_path: Optional[str] = None,
+ hand_heart_dis_ratio_thr: float = 1.0,
+ flying_heart_dis_ratio_thr: float = 3.5,
+ num_persons: int = 2):
+
+ super().__init__(
+ name, frame_buffer, output_buffer, enable_key=enable_key)
+
+ if hand_heart_img_path is None:
+ hand_heart_img_path = 'https://user-images.githubusercontent.com/'\
+ '87690686/149731850-ea946766-a4e8-4efa-82f5'\
+ '-e2f0515db8ae.png'
+ if flying_heart_img_path is None:
+ flying_heart_img_path = 'https://user-images.githubusercontent.'\
+ 'com/87690686/153554948-937ce496-33dd-4'\
+ '9ab-9829-0433fd7c13c4.png'
+
+ self.hand_heart = load_image_from_disk_or_url(hand_heart_img_path)
+ self.flying_heart = load_image_from_disk_or_url(flying_heart_img_path)
+
+ self.kpt_vis_thr = kpt_vis_thr
+ self.hand_heart_angle_thr = hand_heart_angle_thr
+ self.hand_heart_dis_ratio_thr = hand_heart_dis_ratio_thr
+ self.flying_heart_dis_ratio_thr = flying_heart_dis_ratio_thr
+ self.longest_duration = longest_duration
+ self.largest_ratio = largest_ratio
+ self.num_persons = num_persons
+
+ # record the heart infos for each person
+ self.heart_infos = {}
+
+ def _cal_distance(self, p1: np.ndarray, p2: np.ndarray) -> np.float64:
+ """calculate the distance of points p1 and p2."""
+ return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)
+
+ def _cal_angle(self, p1: np.ndarray, p2: np.ndarray, p3: np.ndarray,
+ p4: np.ndarray) -> np.float64:
+ """calculate the angle of vectors v1(constructed by points p2 and p1)
+ and v2(constructed by points p4 and p3)"""
+ v1 = p2 - p1
+ v2 = p4 - p3
+
+ vector_prod = v1[0] * v2[0] + v1[1] * v2[1]
+ length_prod = np.sqrt(pow(v1[0], 2) + pow(v1[1], 2)) * np.sqrt(
+ pow(v2[0], 2) + pow(v2[1], 2))
+ cos = vector_prod * 1.0 / (length_prod * 1.0 + 1e-6)
+
+ return (np.arccos(cos) / np.pi) * 180
+
+ def _check_heart(self, pred: Dict[str,
+ np.ndarray], hand_indices: List[int],
+ mouth_index: int, eye_indices: List[int]) -> int:
+ """Check the type of Valentine Magic based on the pose results and
+ keypoint indices of hand, mouth. and eye.
+
+ Args:
+ pred(dict): The pose estimation results containing:
+ - "keypoints" (np.ndarray[K,3]): keypoint detection result
+ in [x, y, score]
+ hand_indices(list[int]): keypoint indices of hand
+ mouth_index(int): keypoint index of mouth
+ eye_indices(list[int]): keypoint indices of eyes
+
+ Returns:
+ int: a number representing the type of heart pose,
+ 0: None, 1: hand heart, 2: left hand blow kiss,
+ 3: right hand blow kiss
+ """
+ kpts = pred['keypoints']
+
+ left_eye_idx, right_eye_idx = eye_indices
+ left_eye_pos = kpts[left_eye_idx][:2]
+ right_eye_pos = kpts[right_eye_idx][:2]
+ eye_dis = self._cal_distance(left_eye_pos, right_eye_pos)
+
+ # these indices are corresoponding to the following keypoints:
+ # left_hand_root, left_pinky_finger1,
+ # left_pinky_finger3, left_pinky_finger4,
+ # right_hand_root, right_pinky_finger1
+ # right_pinky_finger3, right_pinky_finger4
+
+ both_hands_vis = True
+ for i in [0, 17, 19, 20, 21, 38, 40, 41]:
+ if kpts[hand_indices[i]][2] < self.kpt_vis_thr:
+ both_hands_vis = False
+
+ if both_hands_vis:
+ p1 = kpts[hand_indices[20]][:2]
+ p2 = kpts[hand_indices[19]][:2]
+ p3 = kpts[hand_indices[17]][:2]
+ p4 = kpts[hand_indices[0]][:2]
+ left_angle = self._cal_angle(p1, p2, p3, p4)
+
+ p1 = kpts[hand_indices[41]][:2]
+ p2 = kpts[hand_indices[40]][:2]
+ p3 = kpts[hand_indices[38]][:2]
+ p4 = kpts[hand_indices[21]][:2]
+ right_angle = self._cal_angle(p1, p2, p3, p4)
+
+ hand_dis = self._cal_distance(kpts[hand_indices[20]][:2],
+ kpts[hand_indices[41]][:2])
+
+ if (left_angle < self.hand_heart_angle_thr
+ and right_angle < self.hand_heart_angle_thr
+ and hand_dis / eye_dis < self.hand_heart_dis_ratio_thr):
+ return 1
+
+ # these indices are corresoponding to the following keypoints:
+ # left_middle_finger1, left_middle_finger4,
+ left_hand_vis = True
+ for i in [9, 12]:
+ if kpts[hand_indices[i]][2] < self.kpt_vis_thr:
+ left_hand_vis = False
+ break
+ # right_middle_finger1, right_middle_finger4
+
+ right_hand_vis = True
+ for i in [30, 33]:
+ if kpts[hand_indices[i]][2] < self.kpt_vis_thr:
+ right_hand_vis = False
+ break
+
+ mouth_vis = True
+ if kpts[mouth_index][2] < self.kpt_vis_thr:
+ mouth_vis = False
+
+ if (not left_hand_vis and not right_hand_vis) or not mouth_vis:
+ return 0
+
+ mouth_pos = kpts[mouth_index]
+
+ left_mid_hand_pos = (kpts[hand_indices[9]][:2] +
+ kpts[hand_indices[12]][:2]) / 2
+ lefthand_mouth_dis = self._cal_distance(left_mid_hand_pos, mouth_pos)
+
+ if lefthand_mouth_dis / eye_dis < self.flying_heart_dis_ratio_thr:
+ return 2
+
+ right_mid_hand_pos = (kpts[hand_indices[30]][:2] +
+ kpts[hand_indices[33]][:2]) / 2
+ righthand_mouth_dis = self._cal_distance(right_mid_hand_pos, mouth_pos)
+
+ if righthand_mouth_dis / eye_dis < self.flying_heart_dis_ratio_thr:
+ return 3
+
+ return 0
+
+ def _get_heart_route(self, heart_type: int, cur_pred: Dict[str,
+ np.ndarray],
+ tar_pred: Dict[str,
+ np.ndarray], hand_indices: List[int],
+ mouth_index: int) -> Tuple[int, int]:
+ """get the start and end position of the heart, based on two keypoint
+ results and keypoint indices of hand and mouth.
+
+ Args:
+ cur_pred(dict): The pose estimation results of current person,
+ containing: the following keys:
+ - "keypoints" (np.ndarray[K,3]): keypoint detection result
+ in [x, y, score]
+ tar_pred(dict): The pose estimation results of target person,
+ containing: the following keys:
+ - "keypoints" (np.ndarray[K,3]): keypoint detection result
+ in [x, y, score]
+ hand_indices(list[int]): keypoint indices of hand
+ mouth_index(int): keypoint index of mouth
+
+ Returns:
+ tuple(int): the start position of heart
+ tuple(int): the end position of heart
+ """
+ cur_kpts = cur_pred['keypoints']
+
+ assert heart_type in [1, 2,
+ 3], 'Can not determine the type of heart effect'
+
+ if heart_type == 1:
+ p1 = cur_kpts[hand_indices[20]][:2]
+ p2 = cur_kpts[hand_indices[41]][:2]
+ elif heart_type == 2:
+ p1 = cur_kpts[hand_indices[9]][:2]
+ p2 = cur_kpts[hand_indices[12]][:2]
+ elif heart_type == 3:
+ p1 = cur_kpts[hand_indices[30]][:2]
+ p2 = cur_kpts[hand_indices[33]][:2]
+
+ cur_x, cur_y = (p1 + p2) / 2
+ # the mid point of two fingers
+ start_pos = (int(cur_x), int(cur_y))
+
+ tar_kpts = tar_pred['keypoints']
+ end_pos = tar_kpts[mouth_index][:2]
+
+ return start_pos, end_pos
+
+ def _draw_heart(self, canvas: np.ndarray, heart_info: HeartInfo,
+ t_pass: float) -> np.ndarray:
+ """draw the heart according to heart info and time."""
+ start_x, start_y = heart_info.start_pos
+ end_x, end_y = heart_info.end_pos
+
+ scale = t_pass / self.longest_duration
+
+ max_h, max_w = canvas.shape[:2]
+ hm, wm = self.largest_ratio * max_h, self.largest_ratio * max_h
+ new_h, new_w = int(hm * scale), int(wm * scale)
+
+ x = int(start_x + scale * (end_x - start_x))
+ y = int(start_y + scale * (end_y - start_y))
+
+ y1 = max(0, y - int(new_h / 2))
+ y2 = min(max_h - 1, y + int(new_h / 2))
+
+ x1 = max(0, x - int(new_w / 2))
+ x2 = min(max_w - 1, x + int(new_w / 2))
+
+ target = canvas[y1:y2 + 1, x1:x2 + 1].copy()
+ new_h, new_w = target.shape[:2]
+
+ if new_h == 0 or new_w == 0:
+ return canvas
+
+ assert heart_info.heart_type in [
+ 1, 2, 3
+ ], 'Can not determine the type of heart effect'
+ if heart_info.heart_type == 1: # hand heart
+ patch = self.hand_heart.copy()
+ elif heart_info.heart_type >= 2: # hand blow kiss
+ patch = self.flying_heart.copy()
+ if heart_info.start_pos[0] > heart_info.end_pos[0]:
+ patch = patch[:, ::-1]
+
+ patch = cv2.resize(patch, (new_w, new_h))
+ mask = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
+ mask = (mask < 100)[..., None].astype(np.float32) * 0.8
+
+ canvas[y1:y2 + 1, x1:x2 + 1] = patch * mask + target * (1 - mask)
+
+ return canvas
+
+ def draw(self, frame_msg: FrameMessage) -> np.ndarray:
+ canvas = frame_msg.get_image()
+
+ pose_results = frame_msg.get_pose_results()
+ if not pose_results:
+ return canvas
+
+ for pose_result in pose_results:
+ model_cfg = pose_result['model_cfg']
+
+ preds = [pred.copy() for pred in pose_result['preds']]
+ # if number of persons in the image is less than 2,
+ # no heart effect will be triggered
+ if len(preds) < self.num_persons:
+ continue
+
+ # if number of persons in the image is more than 2,
+ # only use the first two pose results
+ preds = preds[:self.num_persons]
+ ids = [preds[i]['track_id'] for i in range(self.num_persons)]
+
+ for id in self.heart_infos.copy():
+ if id not in ids:
+ # if the id of a person not in previous heart_infos,
+ # delete the corresponding field
+ del self.heart_infos[id]
+
+ for i in range(self.num_persons):
+ id = preds[i]['track_id']
+
+ # if the predicted person in previous heart_infos,
+ # draw the heart
+ if id in self.heart_infos.copy():
+ t_pass = time.time() - self.heart_infos[id].start_time
+
+ # the time passed since last heart pose less than
+ # longest_duration, continue to draw the heart
+ if t_pass < self.longest_duration:
+ canvas = self._draw_heart(canvas, self.heart_infos[id],
+ t_pass)
+ # reset corresponding heart info
+ else:
+ del self.heart_infos[id]
+ else:
+ hand_indices = get_hand_keypoint_ids(model_cfg)
+ mouth_index = get_mouth_keypoint_ids(model_cfg)
+ eye_indices = get_eye_keypoint_ids(model_cfg)
+
+ # check the type of Valentine Magic based on pose results
+ # and keypoint indices of hand and mouth
+ heart_type = self._check_heart(preds[i], hand_indices,
+ mouth_index, eye_indices)
+ # trigger a Valentine Magic effect
+ if heart_type:
+ # get the route of heart
+ start_pos, end_pos = self._get_heart_route(
+ heart_type, preds[i],
+ preds[self.num_persons - 1 - i], hand_indices,
+ mouth_index)
+ start_time = time.time()
+ self.heart_infos[id] = HeartInfo(
+ heart_type, start_time, start_pos, end_pos)
+
+ return canvas
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/nodes/xdwendwen_node.py b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/xdwendwen_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a0914d3bf473f278023ed1569ae18d6d1b5fcf3
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/nodes/xdwendwen_node.py
@@ -0,0 +1,240 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+from dataclasses import dataclass
+from typing import List, Tuple, Union
+
+import cv2
+import numpy as np
+
+from mmpose.datasets.dataset_info import DatasetInfo
+from ..utils import load_image_from_disk_or_url
+from .builder import NODES
+from .frame_drawing_node import FrameDrawingNode
+
+
+@dataclass
+class DynamicInfo:
+ pos_curr: Tuple[int, int] = (0, 0)
+ pos_step: Tuple[int, int] = (0, 0)
+ step_curr: int = 0
+
+
+@NODES.register_module()
+class XDwenDwenNode(FrameDrawingNode):
+ """An effect drawing node that captures the face of a cat or dog and blend
+ it into a Bing-Dwen-Dwen (the mascot of 2022 Beijing Winter Olympics).
+
+ Parameters:
+ name (str, optional): The node name (also thread name).
+ frame_buffer (str): The name of the input buffer.
+ output_buffer (str | list): The name(s) of the output buffer(s).
+ mode_key (str | int): A hot key to switch the background image.
+ resource_file (str): The annotation file of resource images, which
+ should be in Labelbee format and contain both facial keypoint and
+ region annotations.
+ out_shape (tuple): The shape of output frame in (width, height).
+ """
+
+ dynamic_scale = 0.15
+ dynamic_max_step = 15
+
+ def __init__(
+ self,
+ name: str,
+ frame_buffer: str,
+ output_buffer: Union[str, List[str]],
+ mode_key: Union[str, int],
+ resource_file: str,
+ out_shape: Tuple[int, int] = (480, 480),
+ rigid_transform: bool = True,
+ ):
+ super().__init__(name, frame_buffer, output_buffer, enable=True)
+
+ self.mode_key = mode_key
+ self.mode_index = 0
+ self.out_shape = out_shape
+ self.rigid = rigid_transform
+
+ self.latest_pred = None
+
+ self.dynamic_info = DynamicInfo()
+
+ self.register_event(
+ self.mode_key, is_keyboard=True, handler_func=self.switch_mode)
+
+ self._init_resource(resource_file)
+
+ def _init_resource(self, resource_file):
+
+ # The resource_file is a JSON file that contains the facial
+ # keypoint and mask annotation information of the resource files.
+ # The annotations should follow the label-bee standard format.
+ # See https://github.com/open-mmlab/labelbee-client for details.
+ with open(resource_file) as f:
+ anns = json.load(f)
+ resource_infos = []
+
+ for ann in anns:
+ # Load image
+ img = load_image_from_disk_or_url(ann['url'])
+ # Load result
+ rst = json.loads(ann['result'])
+
+ # Check facial keypoint information
+ assert rst['step_1']['toolName'] == 'pointTool'
+ assert len(rst['step_1']['result']) == 3
+
+ keypoints = sorted(
+ rst['step_1']['result'], key=lambda x: x['order'])
+ keypoints = np.array([[pt['x'], pt['y']] for pt in keypoints])
+
+ # Check facial mask
+ assert rst['step_2']['toolName'] == 'polygonTool'
+ assert len(rst['step_2']['result']) == 1
+ assert len(rst['step_2']['result'][0]['pointList']) > 2
+
+ mask_pts = np.array(
+ [[pt['x'], pt['y']]
+ for pt in rst['step_2']['result'][0]['pointList']])
+
+ mul = 1.0 + self.dynamic_scale
+
+ w_scale = self.out_shape[0] / img.shape[1] * mul
+ h_scale = self.out_shape[1] / img.shape[0] * mul
+
+ img = cv2.resize(
+ img,
+ dsize=None,
+ fx=w_scale,
+ fy=h_scale,
+ interpolation=cv2.INTER_CUBIC)
+
+ keypoints *= [w_scale, h_scale]
+ mask_pts *= [w_scale, h_scale]
+
+ mask = cv2.fillPoly(
+ np.zeros(img.shape[:2], dtype=np.uint8),
+ [mask_pts.astype(np.int32)],
+ color=1)
+
+ res = {
+ 'img': img,
+ 'keypoints': keypoints,
+ 'mask': mask,
+ }
+ resource_infos.append(res)
+
+ self.resource_infos = resource_infos
+
+ self._reset_dynamic()
+
+ def switch_mode(self):
+ self.mode_index = (self.mode_index + 1) % len(self.resource_infos)
+
+ def _reset_dynamic(self):
+ x_tar = np.random.randint(int(self.out_shape[0] * self.dynamic_scale))
+ y_tar = np.random.randint(int(self.out_shape[1] * self.dynamic_scale))
+
+ x_step = (x_tar -
+ self.dynamic_info.pos_curr[0]) / self.dynamic_max_step
+ y_step = (y_tar -
+ self.dynamic_info.pos_curr[1]) / self.dynamic_max_step
+
+ self.dynamic_info.pos_step = (x_step, y_step)
+ self.dynamic_info.step_curr = 0
+
+ def draw(self, frame_msg):
+
+ full_pose_results = frame_msg.get_pose_results()
+
+ pred = None
+ if full_pose_results:
+ for pose_results in full_pose_results:
+ if not pose_results['preds']:
+ continue
+
+ pred = pose_results['preds'][0].copy()
+ pred['dataset'] = DatasetInfo(pose_results['model_cfg'].data.
+ test.dataset_info).dataset_name
+
+ self.latest_pred = pred
+ break
+
+ # Use the latest pose result if there is none available in
+ # the current frame.
+ if pred is None:
+ pred = self.latest_pred
+
+ # Get the background image and facial annotations
+ res = self.resource_infos[self.mode_index]
+ img = frame_msg.get_image()
+ canvas = res['img'].copy()
+ mask = res['mask']
+ kpts_tar = res['keypoints']
+
+ if pred is not None:
+ if pred['dataset'] == 'ap10k':
+ # left eye: 0, right eye: 1, nose: 2
+ kpts_src = pred['keypoints'][[0, 1, 2], :2]
+ elif pred['dataset'] == 'coco_wholebody':
+ # left eye: 1, right eye 2, nose: 0
+ kpts_src = pred['keypoints'][[1, 2, 0], :2]
+ else:
+ raise ValueError('Can not obtain face landmark information'
+ f'from dataset: {pred["type"]}')
+
+ trans_mat = self._get_transform(kpts_src, kpts_tar)
+
+ warp = cv2.warpAffine(img, trans_mat, dsize=canvas.shape[:2])
+ cv2.copyTo(warp, mask, canvas)
+
+ # Add random movement to the background
+ xc, yc = self.dynamic_info.pos_curr
+ xs, ys = self.dynamic_info.pos_step
+ w, h = self.out_shape
+
+ x = min(max(int(xc), 0), canvas.shape[1] - w + 1)
+ y = min(max(int(yc), 0), canvas.shape[0] - h + 1)
+
+ canvas = canvas[y:y + h, x:x + w]
+
+ self.dynamic_info.pos_curr = (xc + xs, yc + ys)
+ self.dynamic_info.step_curr += 1
+
+ if self.dynamic_info.step_curr == self.dynamic_max_step:
+ self._reset_dynamic()
+
+ return canvas
+
+ def _get_transform(self, kpts_src, kpts_tar):
+ if self.rigid:
+ # rigid transform
+ n = kpts_src.shape[0]
+ X = np.zeros((n * 2, 4), dtype=np.float32)
+ U = np.zeros((n * 2, 1), dtype=np.float32)
+ X[:n, :2] = kpts_src
+ X[:n, 2] = 1
+ X[n:, 0] = kpts_src[:, 1]
+ X[n:, 1] = -kpts_src[:, 0]
+ X[n:, 3] = 1
+
+ U[:n, 0] = kpts_tar[:, 0]
+ U[n:, 0] = kpts_tar[:, 1]
+
+ M = np.linalg.pinv(X).dot(U).flatten()
+
+ trans_mat = np.array([[M[0], M[1], M[2]], [-M[1], M[0], M[3]]],
+ dtype=np.float32)
+
+ else:
+ # normal affine transform
+ # adaptive horizontal flipping
+ if (np.linalg.norm(kpts_tar[0] - kpts_tar[2]) -
+ np.linalg.norm(kpts_tar[1] - kpts_tar[2])) * (
+ np.linalg.norm(kpts_src[0] - kpts_src[2]) -
+ np.linalg.norm(kpts_src[1] - kpts_src[2])) < 0:
+ kpts_src = kpts_src[[1, 0, 2], :]
+ trans_mat, _ = cv2.estimateAffine2D(
+ kpts_src.astype(np.float32), kpts_tar.astype(np.float32))
+
+ return trans_mat
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/__init__.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d906df0748cd6e5f87642ea6fdc9511e833e22ff
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .buffer import BufferManager
+from .event import EventManager
+from .message import FrameMessage, Message, VideoEndingMessage
+from .misc import (ImageCapture, copy_and_paste, expand_and_clamp,
+ get_cached_file_path, is_image_file, limit_max_fps,
+ load_image_from_disk_or_url, screen_matting)
+from .pose import (get_eye_keypoint_ids, get_face_keypoint_ids,
+ get_hand_keypoint_ids, get_mouth_keypoint_ids,
+ get_wrist_keypoint_ids)
+
+__all__ = [
+ 'BufferManager',
+ 'EventManager',
+ 'FrameMessage',
+ 'Message',
+ 'limit_max_fps',
+ 'VideoEndingMessage',
+ 'load_image_from_disk_or_url',
+ 'get_cached_file_path',
+ 'screen_matting',
+ 'expand_and_clamp',
+ 'copy_and_paste',
+ 'is_image_file',
+ 'ImageCapture',
+ 'get_eye_keypoint_ids',
+ 'get_face_keypoint_ids',
+ 'get_wrist_keypoint_ids',
+ 'get_mouth_keypoint_ids',
+ 'get_hand_keypoint_ids',
+]
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/buffer.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9fca4c392703bccb710a9659db21f56ea92e282
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/buffer.py
@@ -0,0 +1,106 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from functools import wraps
+from queue import Queue
+from typing import Dict, List, Optional
+
+from mmcv import is_seq_of
+
+__all__ = ['BufferManager']
+
+
+def check_buffer_registered(exist=True):
+
+ def wrapper(func):
+
+ @wraps(func)
+ def wrapped(manager, name, *args, **kwargs):
+ if exist:
+ # Assert buffer exist
+ if name not in manager:
+ raise ValueError(f'Fail to call {func.__name__}: '
+ f'buffer "{name}" is not registered.')
+ else:
+ # Assert buffer not exist
+ if name in manager:
+ raise ValueError(f'Fail to call {func.__name__}: '
+ f'buffer "{name}" is already registered.')
+ return func(manager, name, *args, **kwargs)
+
+ return wrapped
+
+ return wrapper
+
+
+class Buffer(Queue):
+
+ def put_force(self, item):
+ """Force to put an item into the buffer.
+
+ If the buffer is already full, the earliest item in the buffer will be
+ remove to make room for the incoming item.
+ """
+ with self.mutex:
+ if self.maxsize > 0:
+ while self._qsize() >= self.maxsize:
+ _ = self._get()
+ self.unfinished_tasks -= 1
+
+ self._put(item)
+ self.unfinished_tasks += 1
+ self.not_empty.notify()
+
+
+class BufferManager():
+
+ def __init__(self,
+ buffer_type: type = Buffer,
+ buffers: Optional[Dict] = None):
+ self.buffer_type = buffer_type
+ if buffers is None:
+ self._buffers = {}
+ else:
+ if is_seq_of(list(buffers.values()), buffer_type):
+ self._buffers = buffers.copy()
+ else:
+ raise ValueError('The values of buffers should be instance '
+ f'of {buffer_type}')
+
+ def __contains__(self, name):
+ return name in self._buffers
+
+ @check_buffer_registered(False)
+ def register_buffer(self, name, maxsize=0):
+ self._buffers[name] = self.buffer_type(maxsize)
+
+ @check_buffer_registered()
+ def put(self, name, item, block=True, timeout=None):
+ self._buffers[name].put(item, block, timeout)
+
+ @check_buffer_registered()
+ def put_force(self, name, item):
+ self._buffers[name].put_force(item)
+
+ @check_buffer_registered()
+ def get(self, name, block=True, timeout=None):
+ return self._buffers[name].get(block, timeout)
+
+ @check_buffer_registered()
+ def is_empty(self, name):
+ return self._buffers[name].empty()
+
+ @check_buffer_registered()
+ def is_full(self, name):
+ return self._buffers[name].full()
+
+ def get_sub_manager(self, buffer_names: List[str]):
+ buffers = {name: self._buffers[name] for name in buffer_names}
+ return BufferManager(self.buffer_type, buffers)
+
+ def get_info(self):
+ buffer_info = {}
+ for name, buffer in self._buffers.items():
+ buffer_info[name] = {
+ 'size': buffer.size,
+ 'maxsize': buffer.maxsize
+ }
+ return buffer_info
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/event.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/event.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceab26f72b63d03bc574cda3a713fed67f20f0c0
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/event.py
@@ -0,0 +1,59 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from collections import defaultdict
+from contextlib import contextmanager
+from threading import Event
+from typing import Optional
+
+
+class EventManager():
+
+ def __init__(self):
+ self._events = defaultdict(Event)
+
+ def register_event(self,
+ event_name: str = None,
+ is_keyboard: bool = False):
+ if is_keyboard:
+ event_name = self._get_keyboard_event_name(event_name)
+ self._events[event_name] = Event()
+
+ def set(self, event_name: str = None, is_keyboard: bool = False):
+ if is_keyboard:
+ event_name = self._get_keyboard_event_name(event_name)
+ return self._events[event_name].set()
+
+ def wait(self,
+ event_name: str = None,
+ is_keyboard: Optional[bool] = False,
+ timeout: Optional[float] = None):
+ if is_keyboard:
+ event_name = self._get_keyboard_event_name(event_name)
+ return self._events[event_name].wait(timeout)
+
+ def is_set(self,
+ event_name: str = None,
+ is_keyboard: Optional[bool] = False):
+ if is_keyboard:
+ event_name = self._get_keyboard_event_name(event_name)
+ return self._events[event_name].is_set()
+
+ def clear(self,
+ event_name: str = None,
+ is_keyboard: Optional[bool] = False):
+ if is_keyboard:
+ event_name = self._get_keyboard_event_name(event_name)
+ return self._events[event_name].clear()
+
+ @staticmethod
+ def _get_keyboard_event_name(key):
+ return f'_keyboard_{chr(key) if isinstance(key,int) else key}'
+
+ @contextmanager
+ def wait_and_handle(self,
+ event_name: str = None,
+ is_keyboard: Optional[bool] = False):
+ self.wait(event_name, is_keyboard)
+ try:
+ yield
+ finally:
+ self.clear(event_name, is_keyboard)
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/message.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/message.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7b1529c5ece3970dfae189d910720786f32612d
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/message.py
@@ -0,0 +1,204 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import time
+import uuid
+import warnings
+from typing import Dict, List, Optional
+
+import numpy as np
+
+
+class Message():
+ """Message base class.
+
+ All message class should inherit this class. The basic use of a Message
+ instance is to carray a piece of text message (self.msg) and a dict that
+ stores structured data (self.data), e.g. frame image, model prediction,
+ et al.
+
+ A message may also hold route information, which is composed of
+ information of all nodes the message has passed through.
+
+ Parameters:
+ msg (str): The text message.
+ data (dict, optional): The structured data.
+ """
+
+ def __init__(self, msg: str = '', data: Optional[Dict] = None):
+ self.msg = msg
+ self.data = data if data else {}
+ self.route_info = []
+ self.timestamp = time.time()
+ self.id = uuid.uuid4()
+
+ def update_route_info(self,
+ node=None,
+ node_name: Optional[str] = None,
+ node_type: Optional[str] = None,
+ info: Optional[Dict] = None):
+ """Append new node information to the route information.
+
+ Args:
+ node (Node, optional): An instance of Node that provides basic
+ information like the node name and type. Default: None.
+ node_name (str, optional): The node name. If node is given,
+ node_name will be ignored. Default: None.
+ node_type (str, optional): The class name of the node. If node
+ is given, node_type will be ignored. Default: None.
+ info (dict, optional): The node information, which is usually
+ given by node.get_node_info(). Default: None.
+ """
+ if node is not None:
+ if node_name is not None or node_type is not None:
+ warnings.warn(
+ '`node_name` and `node_type` will be overridden if node'
+ 'is provided.')
+ node_name = node.name
+ node_type = node.__class__.__name__
+
+ node_info = {'node': node_name, 'node_type': node_type, 'info': info}
+ self.route_info.append(node_info)
+
+ def set_route_info(self, route_info: List):
+ """Directly set the entire route information.
+
+ Args:
+ route_info (list): route information to set to the message.
+ """
+ self.route_info = route_info
+
+ def merge_route_info(self, route_info: List):
+ """Merge the given route information into the original one of the
+ message. This is used for combining route information from multiple
+ messages. The node information in the route will be reordered according
+ to their timestamps.
+
+ Args:
+ route_info (list): route information to merge.
+ """
+ self.route_info += route_info
+ self.route_info.sort(key=lambda x: x.get('timestamp', np.inf))
+
+ def get_route_info(self) -> List:
+ return self.route_info.copy()
+
+
+class VideoEndingMessage(Message):
+ """A special message to indicate the input video is ending."""
+
+
+class FrameMessage(Message):
+ """The message to store information of a video frame.
+
+ A FrameMessage instance usually holds following data in self.data:
+ - image (array): The frame image
+ - detection_results (list): A list to hold detection results of
+ multiple detectors. Each element is a tuple (tag, result)
+ - pose_results (list): A list to hold pose estimation results of
+ multiple pose estimator. Each element is a tuple (tag, result)
+ """
+
+ def __init__(self, img):
+ super().__init__(data=dict(image=img))
+
+ def get_image(self):
+ """Get the frame image.
+
+ Returns:
+ array: The frame image.
+ """
+ return self.data.get('image', None)
+
+ def set_image(self, img):
+ """Set the frame image to the message."""
+ self.data['image'] = img
+
+ def add_detection_result(self, result, tag: str = None):
+ """Add the detection result from one model into the message's
+ detection_results.
+
+ Args:
+ tag (str, optional): Give a tag to the result, which can be used
+ to retrieve specific results.
+ """
+ if 'detection_results' not in self.data:
+ self.data['detection_results'] = []
+ self.data['detection_results'].append((tag, result))
+
+ def get_detection_results(self, tag: str = None):
+ """Get detection results of the message.
+
+ Args:
+ tag (str, optional): If given, only the results with the tag
+ will be retrieved. Otherwise all results will be retrieved.
+ Default: None.
+
+ Returns:
+ list[dict]: The retrieved detection results
+ """
+ if 'detection_results' not in self.data:
+ return None
+ if tag is None:
+ results = [res for _, res in self.data['detection_results']]
+ else:
+ results = [
+ res for _tag, res in self.data['detection_results']
+ if _tag == tag
+ ]
+ return results
+
+ def add_pose_result(self, result, tag=None):
+ """Add the pose estimation result from one model into the message's
+ pose_results.
+
+ Args:
+ tag (str, optional): Give a tag to the result, which can be used
+ to retrieve specific results.
+ """
+ if 'pose_results' not in self.data:
+ self.data['pose_results'] = []
+ self.data['pose_results'].append((tag, result))
+
+ def get_pose_results(self, tag=None):
+ """Get pose estimation results of the message.
+
+ Args:
+ tag (str, optional): If given, only the results with the tag
+ will be retrieved. Otherwise all results will be retrieved.
+ Default: None.
+
+ Returns:
+ list[dict]: The retrieved pose results
+ """
+ if 'pose_results' not in self.data:
+ return None
+ if tag is None:
+ results = [res for _, res in self.data['pose_results']]
+ else:
+ results = [
+ res for _tag, res in self.data['pose_results'] if _tag == tag
+ ]
+ return results
+
+ def get_full_results(self):
+ """Get all model predictions of the message.
+
+ See set_full_results() for inference.
+
+ Returns:
+ dict: All model predictions, including:
+ - detection_results
+ - pose_results
+ """
+ result_keys = ['detection_results', 'pose_results']
+ results = {k: self.data[k] for k in result_keys}
+ return results
+
+ def set_full_results(self, results):
+ """Set full model results directly.
+
+ Args:
+ results (dict): All model predictions including:
+ - detection_results (list): see also add_detection_results()
+ - pose_results (list): see also add_pose_results()
+ """
+ self.data.update(results)
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/misc.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..c64f4179db8a3618b38e3d6933992e9b3294af55
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/misc.py
@@ -0,0 +1,343 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import sys
+import time
+from contextlib import contextmanager
+from typing import Optional
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import cv2
+import numpy as np
+from torch.hub import HASH_REGEX, download_url_to_file
+
+
+@contextmanager
+def limit_max_fps(fps: Optional[float]):
+ t_start = time.time()
+ try:
+ yield
+ finally:
+ t_end = time.time()
+ if fps is not None:
+ t_sleep = 1.0 / fps - t_end + t_start
+ if t_sleep > 0:
+ time.sleep(t_sleep)
+
+
+def _is_url(filename):
+ """Check if the file is a url link.
+
+ Args:
+ filename (str): the file name or url link.
+
+ Returns:
+ bool: is url or not.
+ """
+ prefixes = ['http://', 'https://']
+ for p in prefixes:
+ if filename.startswith(p):
+ return True
+ return False
+
+
+def load_image_from_disk_or_url(filename, readFlag=cv2.IMREAD_COLOR):
+ """Load an image file, from disk or url.
+
+ Args:
+ filename (str): file name on the disk or url link.
+ readFlag (int): readFlag for imdecode.
+
+ Returns:
+ np.ndarray: A loaded image
+ """
+ if _is_url(filename):
+ # download the image, convert it to a NumPy array, and then read
+ # it into OpenCV format
+ resp = urlopen(filename)
+ image = np.asarray(bytearray(resp.read()), dtype='uint8')
+ image = cv2.imdecode(image, readFlag)
+ return image
+ else:
+ image = cv2.imread(filename, readFlag)
+ return image
+
+
+def mkdir_or_exist(dir_name, mode=0o777):
+ if dir_name == '':
+ return
+ dir_name = osp.expanduser(dir_name)
+ os.makedirs(dir_name, mode=mode, exist_ok=True)
+
+
+def get_cached_file_path(url,
+ save_dir=None,
+ progress=True,
+ check_hash=False,
+ file_name=None):
+ r"""Loads the Torch serialized object at the given URL.
+
+ If downloaded file is a zip file, it will be automatically decompressed
+
+ If the object is already present in `model_dir`, it's deserialized and
+ returned.
+ The default value of ``model_dir`` is ``/checkpoints`` where
+ ``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`.
+
+ Args:
+ url (str): URL of the object to download
+ save_dir (str, optional): directory in which to save the object
+ progress (bool, optional): whether or not to display a progress bar
+ to stderr. Default: True
+ check_hash(bool, optional): If True, the filename part of the URL
+ should follow the naming convention ``filename-.ext``
+ where ```` is the first eight or more digits of the
+ SHA256 hash of the contents of the file. The hash is used to
+ ensure unique names and to verify the contents of the file.
+ Default: False
+ file_name (str, optional): name for the downloaded file. Filename
+ from ``url`` will be used if not set. Default: None.
+ """
+ if save_dir is None:
+ save_dir = os.path.join('webcam_resources')
+
+ mkdir_or_exist(save_dir)
+
+ parts = urlparse(url)
+ filename = os.path.basename(parts.path)
+ if file_name is not None:
+ filename = file_name
+ cached_file = os.path.join(save_dir, filename)
+ if not os.path.exists(cached_file):
+ sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
+ hash_prefix = None
+ if check_hash:
+ r = HASH_REGEX.search(filename) # r is Optional[Match[str]]
+ hash_prefix = r.group(1) if r else None
+ download_url_to_file(url, cached_file, hash_prefix, progress=progress)
+ return cached_file
+
+
+def screen_matting(img, color_low=None, color_high=None, color=None):
+ """Screen Matting.
+
+ Args:
+ img (np.ndarray): Image data.
+ color_low (tuple): Lower limit (b, g, r).
+ color_high (tuple): Higher limit (b, g, r).
+ color (str): Support colors include:
+
+ - 'green' or 'g'
+ - 'blue' or 'b'
+ - 'black' or 'k'
+ - 'white' or 'w'
+ """
+
+ if color_high is None or color_low is None:
+ if color is not None:
+ if color.lower() == 'g' or color.lower() == 'green':
+ color_low = (0, 200, 0)
+ color_high = (60, 255, 60)
+ elif color.lower() == 'b' or color.lower() == 'blue':
+ color_low = (230, 0, 0)
+ color_high = (255, 40, 40)
+ elif color.lower() == 'k' or color.lower() == 'black':
+ color_low = (0, 0, 0)
+ color_high = (40, 40, 40)
+ elif color.lower() == 'w' or color.lower() == 'white':
+ color_low = (230, 230, 230)
+ color_high = (255, 255, 255)
+ else:
+ NotImplementedError(f'Not supported color: {color}.')
+ else:
+ ValueError('color or color_high | color_low should be given.')
+
+ mask = cv2.inRange(img, np.array(color_low), np.array(color_high)) == 0
+
+ return mask.astype(np.uint8)
+
+
+def expand_and_clamp(box, im_shape, s=1.25):
+ """Expand the bbox and clip it to fit the image shape.
+
+ Args:
+ box (list): x1, y1, x2, y2
+ im_shape (ndarray): image shape (h, w, c)
+ s (float): expand ratio
+
+ Returns:
+ list: x1, y1, x2, y2
+ """
+
+ x1, y1, x2, y2 = box[:4]
+ w = x2 - x1
+ h = y2 - y1
+ deta_w = w * (s - 1) / 2
+ deta_h = h * (s - 1) / 2
+
+ x1, y1, x2, y2 = x1 - deta_w, y1 - deta_h, x2 + deta_w, y2 + deta_h
+
+ img_h, img_w = im_shape[:2]
+
+ x1 = min(max(0, int(x1)), img_w - 1)
+ y1 = min(max(0, int(y1)), img_h - 1)
+ x2 = min(max(0, int(x2)), img_w - 1)
+ y2 = min(max(0, int(y2)), img_h - 1)
+
+ return [x1, y1, x2, y2]
+
+
+def _find_connected_components(mask):
+ """Find connected components and sort with areas.
+
+ Args:
+ mask (ndarray): instance segmentation result.
+
+ Returns:
+ ndarray (N, 5): Each item contains (x, y, w, h, area).
+ """
+ num, labels, stats, centroids = cv2.connectedComponentsWithStats(mask)
+ stats = stats[stats[:, 4].argsort()]
+ return stats
+
+
+def _find_bbox(mask):
+ """Find the bounding box for the mask.
+
+ Args:
+ mask (ndarray): Mask.
+
+ Returns:
+ list(4, ): Returned box (x1, y1, x2, y2).
+ """
+ mask_shape = mask.shape
+ if len(mask_shape) == 3:
+ assert mask_shape[-1] == 1, 'the channel of the mask should be 1.'
+ elif len(mask_shape) == 2:
+ pass
+ else:
+ NotImplementedError()
+
+ h, w = mask_shape[:2]
+ mask_w = mask.sum(0)
+ mask_h = mask.sum(1)
+
+ left = 0
+ right = w - 1
+ up = 0
+ down = h - 1
+
+ for i in range(w):
+ if mask_w[i] > 0:
+ break
+ left += 1
+
+ for i in range(w - 1, left, -1):
+ if mask_w[i] > 0:
+ break
+ right -= 1
+
+ for i in range(h):
+ if mask_h[i] > 0:
+ break
+ up += 1
+
+ for i in range(h - 1, up, -1):
+ if mask_h[i] > 0:
+ break
+ down -= 1
+
+ return [left, up, right, down]
+
+
+def copy_and_paste(img,
+ background_img,
+ mask,
+ bbox=None,
+ effect_region=(0.2, 0.2, 0.8, 0.8),
+ min_size=(20, 20)):
+ """Copy the image region and paste to the background.
+
+ Args:
+ img (np.ndarray): Image data.
+ background_img (np.ndarray): Background image data.
+ mask (ndarray): instance segmentation result.
+ bbox (ndarray): instance bbox, (x1, y1, x2, y2).
+ effect_region (tuple(4, )): The region to apply mask, the coordinates
+ are normalized (x1, y1, x2, y2).
+ """
+ background_img = background_img.copy()
+ background_h, background_w = background_img.shape[:2]
+ region_h = (effect_region[3] - effect_region[1]) * background_h
+ region_w = (effect_region[2] - effect_region[0]) * background_w
+ region_aspect_ratio = region_w / region_h
+
+ if bbox is None:
+ bbox = _find_bbox(mask)
+ instance_w = bbox[2] - bbox[0]
+ instance_h = bbox[3] - bbox[1]
+
+ if instance_w > min_size[0] and instance_h > min_size[1]:
+ aspect_ratio = instance_w / instance_h
+ if region_aspect_ratio > aspect_ratio:
+ resize_rate = region_h / instance_h
+ else:
+ resize_rate = region_w / instance_w
+
+ mask_inst = mask[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
+ img_inst = img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
+ img_inst = cv2.resize(img_inst, (int(
+ resize_rate * instance_w), int(resize_rate * instance_h)))
+ mask_inst = cv2.resize(
+ mask_inst,
+ (int(resize_rate * instance_w), int(resize_rate * instance_h)),
+ interpolation=cv2.INTER_NEAREST)
+
+ mask_ids = list(np.where(mask_inst == 1))
+ mask_ids[1] += int(effect_region[0] * background_w)
+ mask_ids[0] += int(effect_region[1] * background_h)
+
+ background_img[tuple(mask_ids)] = img_inst[np.where(mask_inst == 1)]
+
+ return background_img
+
+
+def is_image_file(path):
+ if isinstance(path, str):
+ if path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
+ return True
+ return False
+
+
+class ImageCapture:
+ """A mock-up version of cv2.VideoCapture that always return a const image.
+
+ Args:
+ image (str | ndarray): The image or image path
+ """
+
+ def __init__(self, image):
+ if isinstance(image, str):
+ self.image = load_image_from_disk_or_url(image)
+ else:
+ self.image = image
+
+ def isOpened(self):
+ return (self.image is not None)
+
+ def read(self):
+ return True, self.image.copy()
+
+ def release(self):
+ pass
+
+ def get(self, propId):
+ if propId == cv2.CAP_PROP_FRAME_WIDTH:
+ return self.image.shape[1]
+ elif propId == cv2.CAP_PROP_FRAME_HEIGHT:
+ return self.image.shape[0]
+ elif propId == cv2.CAP_PROP_FPS:
+ return np.nan
+ else:
+ raise NotImplementedError()
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/utils/pose.py b/vendor/ViTPose/tools/webcam/webcam_apis/utils/pose.py
new file mode 100644
index 0000000000000000000000000000000000000000..196b40ef53d78173742d4d6f953176cf76238308
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/utils/pose.py
@@ -0,0 +1,226 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Tuple
+
+from mmcv import Config
+
+from mmpose.datasets.dataset_info import DatasetInfo
+
+
+def get_eye_keypoint_ids(model_cfg: Config) -> Tuple[int, int]:
+ """A helpfer function to get the keypoint indices of left and right eyes
+ from the model config.
+
+ Args:
+ model_cfg (Config): pose model config.
+
+ Returns:
+ int: left eye keypoint index.
+ int: right eye keypoint index.
+ """
+ left_eye_idx = None
+ right_eye_idx = None
+
+ # try obtaining eye point ids from dataset_info
+ try:
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+ left_eye_idx = dataset_info.keypoint_name2id.get('left_eye', None)
+ right_eye_idx = dataset_info.keypoint_name2id.get('right_eye', None)
+ except AttributeError:
+ left_eye_idx = None
+ right_eye_idx = None
+
+ if left_eye_idx is None or right_eye_idx is None:
+ # Fall back to hard coded keypoint id
+ dataset_name = model_cfg.data.test.type
+ if dataset_name in {
+ 'TopDownCocoDataset', 'TopDownCocoWholeBodyDataset'
+ }:
+ left_eye_idx = 1
+ right_eye_idx = 2
+ elif dataset_name in {'AnimalPoseDataset', 'AnimalAP10KDataset'}:
+ left_eye_idx = 0
+ right_eye_idx = 1
+ else:
+ raise ValueError('Can not determine the eye keypoint id of '
+ f'{dataset_name}')
+
+ return left_eye_idx, right_eye_idx
+
+
+def get_face_keypoint_ids(model_cfg: Config) -> Tuple[int, int]:
+ """A helpfer function to get the keypoint indices of the face from the
+ model config.
+
+ Args:
+ model_cfg (Config): pose model config.
+
+ Returns:
+ list[int]: face keypoint index.
+ """
+ face_indices = None
+
+ # try obtaining nose point ids from dataset_info
+ try:
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+ for id in range(68):
+ face_indices.append(
+ dataset_info.keypoint_name2id.get(f'face_{id}', None))
+ except AttributeError:
+ face_indices = None
+
+ if face_indices is None:
+ # Fall back to hard coded keypoint id
+ dataset_name = model_cfg.data.test.type
+ if dataset_name in {'TopDownCocoWholeBodyDataset'}:
+ face_indices = list(range(23, 91))
+ else:
+ raise ValueError('Can not determine the face id of '
+ f'{dataset_name}')
+
+ return face_indices
+
+
+def get_wrist_keypoint_ids(model_cfg: Config) -> Tuple[int, int]:
+ """A helpfer function to get the keypoint indices of left and right wrist
+ from the model config.
+
+ Args:
+ model_cfg (Config): pose model config.
+ Returns:
+ int: left wrist keypoint index.
+ int: right wrist keypoint index.
+ """
+
+ # try obtaining eye point ids from dataset_info
+ try:
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+ left_wrist_idx = dataset_info.keypoint_name2id.get('left_wrist', None)
+ right_wrist_idx = dataset_info.keypoint_name2id.get(
+ 'right_wrist', None)
+ except AttributeError:
+ left_wrist_idx = None
+ right_wrist_idx = None
+
+ if left_wrist_idx is None or right_wrist_idx is None:
+ # Fall back to hard coded keypoint id
+ dataset_name = model_cfg.data.test.type
+ if dataset_name in {
+ 'TopDownCocoDataset', 'TopDownCocoWholeBodyDataset'
+ }:
+ left_wrist_idx = 9
+ right_wrist_idx = 10
+ elif dataset_name == 'AnimalPoseDataset':
+ left_wrist_idx = 16
+ right_wrist_idx = 17
+ elif dataset_name == 'AnimalAP10KDataset':
+ left_wrist_idx = 7
+ right_wrist_idx = 10
+ else:
+ raise ValueError('Can not determine the eye keypoint id of '
+ f'{dataset_name}')
+
+ return left_wrist_idx, right_wrist_idx
+
+
+def get_mouth_keypoint_ids(model_cfg: Config) -> Tuple[int, int]:
+ """A helpfer function to get the keypoint indices of the left and right
+ part of mouth from the model config.
+
+ Args:
+ model_cfg (Config): pose model config.
+ Returns:
+ int: left-part mouth keypoint index.
+ int: right-part mouth keypoint index.
+ """
+ # try obtaining mouth point ids from dataset_info
+ try:
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+ mouth_index = dataset_info.keypoint_name2id.get('face-62', None)
+ except AttributeError:
+ mouth_index = None
+
+ if mouth_index is None:
+ # Fall back to hard coded keypoint id
+ dataset_name = model_cfg.data.test.type
+ if dataset_name == 'TopDownCocoWholeBodyDataset':
+ mouth_index = 85
+ else:
+ raise ValueError('Can not determine the eye keypoint id of '
+ f'{dataset_name}')
+
+ return mouth_index
+
+
+def get_hand_keypoint_ids(model_cfg: Config) -> List[int]:
+ """A helpfer function to get the keypoint indices of left and right hand
+ from the model config.
+
+ Args:
+ model_cfg (Config): pose model config.
+ Returns:
+ list[int]: hand keypoint indices.
+ """
+ # try obtaining hand keypoint ids from dataset_info
+ try:
+ hand_indices = []
+ dataset_info = DatasetInfo(model_cfg.data.test.dataset_info)
+
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get('left_hand_root', None))
+
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'left_thumb{id}', None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'left_forefinger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'left_middle_finger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'left_ring_finger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'left_pinky_finger{id}',
+ None))
+
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get('right_hand_root', None))
+
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'right_thumb{id}', None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'right_forefinger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'right_middle_finger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'right_ring_finger{id}',
+ None))
+ for id in range(1, 5):
+ hand_indices.append(
+ dataset_info.keypoint_name2id.get(f'right_pinky_finger{id}',
+ None))
+
+ except AttributeError:
+ hand_indices = None
+
+ if hand_indices is None:
+ # Fall back to hard coded keypoint id
+ dataset_name = model_cfg.data.test.type
+ if dataset_name in {'TopDownCocoWholeBodyDataset'}:
+ hand_indices = list(range(91, 133))
+ else:
+ raise ValueError('Can not determine the hand id of '
+ f'{dataset_name}')
+
+ return hand_indices
diff --git a/vendor/ViTPose/tools/webcam/webcam_apis/webcam_runner.py b/vendor/ViTPose/tools/webcam/webcam_apis/webcam_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..7843b392cfd367d778109794a345f1c361395407
--- /dev/null
+++ b/vendor/ViTPose/tools/webcam/webcam_apis/webcam_runner.py
@@ -0,0 +1,272 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+import sys
+import time
+import warnings
+from contextlib import nullcontext
+from threading import Thread
+from typing import Dict, List, Optional, Tuple, Union
+
+import cv2
+
+from .nodes import NODES
+from .utils import (BufferManager, EventManager, FrameMessage, ImageCapture,
+ VideoEndingMessage, is_image_file, limit_max_fps)
+
+DEFAULT_FRAME_BUFFER_SIZE = 1
+DEFAULT_INPUT_BUFFER_SIZE = 1
+DEFAULT_DISPLAY_BUFFER_SIZE = 0
+DEFAULT_USER_BUFFER_SIZE = 1
+
+
+class WebcamRunner():
+ """An interface for building webcam application from config.
+
+ Parameters:
+ name (str): Runner name.
+ camera_id (int | str): The camera ID (usually the ID of the default
+ camera is 0). Alternatively a file path or a URL can be given
+ to load from a video or image file.
+ camera_frame_shape (tuple, optional): Set the frame shape of the
+ camera in (width, height). If not given, the default frame shape
+ will be used. This argument is only valid when using a camera
+ as the input source. Default: None
+ camera_fps (int): Video reading maximum FPS. Default: 30
+ buffer_sizes (dict, optional): A dict to specify buffer sizes. The
+ key is the buffer name and the value is the buffer size.
+ Default: None
+ nodes (list): Node configs.
+ """
+
+ def __init__(self,
+ name: str = 'Default Webcam Runner',
+ camera_id: Union[int, str] = 0,
+ camera_fps: int = 30,
+ camera_frame_shape: Optional[Tuple[int, int]] = None,
+ synchronous: bool = False,
+ buffer_sizes: Optional[Dict[str, int]] = None,
+ nodes: Optional[List[Dict]] = None):
+
+ # Basic parameters
+ self.name = name
+ self.camera_id = camera_id
+ self.camera_fps = camera_fps
+ self.camera_frame_shape = camera_frame_shape
+ self.synchronous = synchronous
+
+ # self.buffer_manager manages data flow between runner and nodes
+ self.buffer_manager = BufferManager()
+ # self.event_manager manages event-based asynchronous communication
+ self.event_manager = EventManager()
+ # self.node_list holds all node instance
+ self.node_list = []
+ # self.vcap is used to read camera frames. It will be built when the
+ # runner starts running
+ self.vcap = None
+
+ # Register runner events
+ self.event_manager.register_event('_exit_', is_keyboard=False)
+ if self.synchronous:
+ self.event_manager.register_event('_idle_', is_keyboard=False)
+
+ # Register nodes
+ if not nodes:
+ raise ValueError('No node is registered to the runner.')
+
+ # Register default buffers
+ if buffer_sizes is None:
+ buffer_sizes = {}
+ # _frame_ buffer
+ frame_buffer_size = buffer_sizes.get('_frame_',
+ DEFAULT_FRAME_BUFFER_SIZE)
+ self.buffer_manager.register_buffer('_frame_', frame_buffer_size)
+ # _input_ buffer
+ input_buffer_size = buffer_sizes.get('_input_',
+ DEFAULT_INPUT_BUFFER_SIZE)
+ self.buffer_manager.register_buffer('_input_', input_buffer_size)
+ # _display_ buffer
+ display_buffer_size = buffer_sizes.get('_display_',
+ DEFAULT_DISPLAY_BUFFER_SIZE)
+ self.buffer_manager.register_buffer('_display_', display_buffer_size)
+
+ # Build all nodes:
+ for node_cfg in nodes:
+ logging.info(f'Create node: {node_cfg.name}({node_cfg.type})')
+ node = NODES.build(node_cfg)
+
+ # Register node
+ self.node_list.append(node)
+
+ # Register buffers
+ for buffer_info in node.registered_buffers:
+ buffer_name = buffer_info.buffer_name
+ if buffer_name in self.buffer_manager:
+ continue
+ buffer_size = buffer_sizes.get(buffer_name,
+ DEFAULT_USER_BUFFER_SIZE)
+ self.buffer_manager.register_buffer(buffer_name, buffer_size)
+ logging.info(
+ f'Register user buffer: {buffer_name}({buffer_size})')
+
+ # Register events
+ for event_info in node.registered_events:
+ self.event_manager.register_event(
+ event_name=event_info.event_name,
+ is_keyboard=event_info.is_keyboard)
+ logging.info(f'Register event: {event_info.event_name}')
+
+ # Set runner for nodes
+ # This step is performed after node building when the runner has
+ # create full buffer/event managers and can
+ for node in self.node_list:
+ logging.info(f'Set runner for node: {node.name})')
+ node.set_runner(self)
+
+ def _read_camera(self):
+ """Continually read video frames and put them into buffers."""
+
+ camera_id = self.camera_id
+ fps = self.camera_fps
+
+ # Build video capture
+ if is_image_file(camera_id):
+ self.vcap = ImageCapture(camera_id)
+ else:
+ self.vcap = cv2.VideoCapture(camera_id)
+ if self.camera_frame_shape is not None:
+ width, height = self.camera_frame_shape
+ self.vcap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+ self.vcap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+
+ if not self.vcap.isOpened():
+ warnings.warn(f'Cannot open camera (ID={camera_id})')
+ sys.exit()
+
+ # Read video frames in a loop
+ first_frame = True
+ while not self.event_manager.is_set('_exit_'):
+ if self.synchronous:
+ if first_frame:
+ cm = nullcontext()
+ else:
+ # Read a new frame until the last frame has been processed
+ cm = self.event_manager.wait_and_handle('_idle_')
+ else:
+ # Read frames with a maximum FPS
+ cm = limit_max_fps(fps)
+
+ first_frame = False
+
+ with cm:
+ # Read a frame
+ ret_val, frame = self.vcap.read()
+ if ret_val:
+ # Put frame message (for display) into buffer `_frame_`
+ frame_msg = FrameMessage(frame)
+ self.buffer_manager.put('_frame_', frame_msg)
+
+ # Put input message (for model inference or other use)
+ # into buffer `_input_`
+ input_msg = FrameMessage(frame.copy())
+ input_msg.update_route_info(
+ node_name='Camera Info',
+ node_type='dummy',
+ info=self._get_camera_info())
+ self.buffer_manager.put_force('_input_', input_msg)
+
+ else:
+ # Put a video ending signal
+ self.buffer_manager.put('_frame_', VideoEndingMessage())
+
+ self.vcap.release()
+
+ def _display(self):
+ """Continually obtain and display output frames."""
+
+ output_msg = None
+
+ while not self.event_manager.is_set('_exit_'):
+ while self.buffer_manager.is_empty('_display_'):
+ time.sleep(0.001)
+
+ # Set _idle_ to allow reading next frame
+ if self.synchronous:
+ self.event_manager.set('_idle_')
+
+ # acquire output from buffer
+ output_msg = self.buffer_manager.get('_display_')
+
+ # None indicates input stream ends
+ if isinstance(output_msg, VideoEndingMessage):
+ self.event_manager.set('_exit_')
+ break
+
+ img = output_msg.get_image()
+
+ # show in a window
+ cv2.imshow(self.name, img)
+
+ # handle keyboard input
+ key = cv2.waitKey(1)
+ if key != -1:
+ self._on_keyboard_input(key)
+
+ cv2.destroyAllWindows()
+
+ def _on_keyboard_input(self, key):
+ """Handle the keyboard input."""
+
+ if key in (27, ord('q'), ord('Q')):
+ logging.info(f'Exit event captured: {key}')
+ self.event_manager.set('_exit_')
+ else:
+ logging.info(f'Keyboard event captured: {key}')
+ self.event_manager.set(key, is_keyboard=True)
+
+ def _get_camera_info(self):
+ """Return the camera information in a dict."""
+
+ frame_width = self.vcap.get(cv2.CAP_PROP_FRAME_WIDTH)
+ frame_height = self.vcap.get(cv2.CAP_PROP_FRAME_HEIGHT)
+ frame_rate = self.vcap.get(cv2.CAP_PROP_FPS)
+
+ cam_info = {
+ 'Camera ID': self.camera_id,
+ 'Source resolution': f'{frame_width}x{frame_height}',
+ 'Source FPS': frame_rate,
+ }
+
+ return cam_info
+
+ def run(self):
+ """Program entry.
+
+ This method starts all nodes as well as video I/O in separate threads.
+ """
+
+ try:
+ # Start node threads
+ non_daemon_nodes = []
+ for node in self.node_list:
+ node.start()
+ if not node.daemon:
+ non_daemon_nodes.append(node)
+
+ # Create a thread to read video frames
+ t_read = Thread(target=self._read_camera, args=())
+ t_read.start()
+
+ # Run display in the main thread
+ self._display()
+ logging.info('Display shut down')
+
+ # joint non-daemon nodes and runner threads
+ logging.info('Camera reading about to join')
+ t_read.join()
+
+ for node in non_daemon_nodes:
+ logging.info(f'Node {node.name} about to join')
+ node.join()
+
+ except KeyboardInterrupt:
+ pass
diff --git a/vendor/detectron2/.circleci/config.yml b/vendor/detectron2/.circleci/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9a2148c3c8df3efadc7b0e3f1e755078fcade3d5
--- /dev/null
+++ b/vendor/detectron2/.circleci/config.yml
@@ -0,0 +1,271 @@
+version: 2.1
+
+# -------------------------------------------------------------------------------------
+# Environments to run the jobs in
+# -------------------------------------------------------------------------------------
+cpu: &cpu
+ machine:
+ image: ubuntu-2004:202107-02
+ resource_class: medium
+
+gpu: &gpu
+ machine:
+ # NOTE: use a cuda version that's supported by all our pytorch versions
+ image: ubuntu-1604-cuda-11.1:202012-01
+ resource_class: gpu.nvidia.small
+
+windows-cpu: &windows_cpu
+ machine:
+ resource_class: windows.medium
+ image: windows-server-2019-vs2019:stable
+ shell: powershell.exe
+
+# windows-gpu: &windows_gpu
+# machine:
+# resource_class: windows.gpu.nvidia.medium
+# image: windows-server-2019-nvidia:stable
+
+version_parameters: &version_parameters
+ parameters:
+ pytorch_version:
+ type: string
+ torchvision_version:
+ type: string
+ pytorch_index:
+ type: string
+ # use test wheels index to have access to RC wheels
+ # https://download.pytorch.org/whl/test/torch_test.html
+ default: "https://download.pytorch.org/whl/torch_stable.html"
+ python_version: # NOTE: only affect linux
+ type: string
+ default: '3.8.6'
+
+ environment:
+ PYTORCH_VERSION: << parameters.pytorch_version >>
+ TORCHVISION_VERSION: << parameters.torchvision_version >>
+ PYTORCH_INDEX: << parameters.pytorch_index >>
+ PYTHON_VERSION: << parameters.python_version>>
+ # point datasets to ~/.torch so it's cached in CI
+ DETECTRON2_DATASETS: ~/.torch/datasets
+
+# -------------------------------------------------------------------------------------
+# Re-usable commands
+# -------------------------------------------------------------------------------------
+# install_nvidia_driver: &install_nvidia_driver
+# - run:
+# name: Install nvidia driver
+# working_directory: ~/
+# command: |
+# wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
+# sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
+# nvidia-smi
+
+add_ssh_keys: &add_ssh_keys
+ # https://circleci.com/docs/2.0/add-ssh-key/
+ - add_ssh_keys:
+ fingerprints:
+ - "e4:13:f2:22:d4:49:e8:e4:57:5a:ac:20:2f:3f:1f:ca"
+
+install_python: &install_python
+ - run:
+ name: Install Python
+ working_directory: ~/
+ command: |
+ # upgrade pyenv
+ cd /opt/circleci/.pyenv/plugins/python-build/../.. && git pull && cd -
+ pyenv install -s $PYTHON_VERSION
+ pyenv global $PYTHON_VERSION
+ python --version
+ which python
+ pip install --upgrade pip
+
+setup_venv: &setup_venv
+ - run:
+ name: Setup Virtual Env
+ working_directory: ~/
+ command: |
+ python -m venv ~/venv
+ echo ". ~/venv/bin/activate" >> $BASH_ENV
+ . ~/venv/bin/activate
+ python --version
+ which python
+ which pip
+ pip install --upgrade pip
+
+setup_venv_win: &setup_venv_win
+ - run:
+ name: Setup Virtual Env for Windows
+ command: |
+ pip install virtualenv
+ python -m virtualenv env
+ .\env\Scripts\activate
+ python --version
+ which python
+ which pip
+
+install_linux_dep: &install_linux_dep
+ - run:
+ name: Install Dependencies
+ command: |
+ # disable crash coredump, so unittests fail fast
+ sudo systemctl stop apport.service
+ # install from github to get latest; install iopath first since fvcore depends on it
+ pip install --progress-bar off -U 'git+https://github.com/facebookresearch/iopath'
+ pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
+ # Don't use pytest-xdist: cuda tests are unstable under multi-process workers.
+ # Don't use opencv 4.7.0.68: https://github.com/opencv/opencv-python/issues/765
+ pip install --progress-bar off ninja opencv-python-headless!=4.7.0.68 pytest tensorboard pycocotools onnx
+ pip install --progress-bar off torch==$PYTORCH_VERSION -f $PYTORCH_INDEX
+ if [[ "$TORCHVISION_VERSION" == "master" ]]; then
+ pip install git+https://github.com/pytorch/vision.git
+ else
+ pip install --progress-bar off torchvision==$TORCHVISION_VERSION -f $PYTORCH_INDEX
+ fi
+
+ python -c 'import torch; print("CUDA:", torch.cuda.is_available())'
+ gcc --version
+
+install_detectron2: &install_detectron2
+ - run:
+ name: Install Detectron2
+ command: |
+ # Remove first, in case it's in the CI cache
+ pip uninstall -y detectron2
+
+ pip install --progress-bar off -e .[all]
+ python -m detectron2.utils.collect_env
+ ./datasets/prepare_for_tests.sh
+
+run_unittests: &run_unittests
+ - run:
+ name: Run Unit Tests
+ command: |
+ pytest -sv --durations=15 tests # parallel causes some random failures
+
+uninstall_tests: &uninstall_tests
+ - run:
+ name: Run Tests After Uninstalling
+ command: |
+ pip uninstall -y detectron2
+ # Remove built binaries
+ rm -rf build/ detectron2/*.so
+ # Tests that code is importable without installation
+ PYTHONPATH=. ./.circleci/import-tests.sh
+
+
+# -------------------------------------------------------------------------------------
+# Jobs to run
+# -------------------------------------------------------------------------------------
+jobs:
+ linux_cpu_tests:
+ <<: *cpu
+ <<: *version_parameters
+
+ working_directory: ~/detectron2
+
+ steps:
+ - checkout
+
+ # Cache the venv directory that contains python, dependencies, and checkpoints
+ # Refresh the key when dependencies should be updated (e.g. when pytorch releases)
+ - restore_cache:
+ keys:
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
+
+ - <<: *install_python
+ - <<: *install_linux_dep
+ - <<: *install_detectron2
+ - <<: *run_unittests
+ - <<: *uninstall_tests
+
+ - save_cache:
+ paths:
+ - /opt/circleci/.pyenv
+ - ~/.torch
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
+
+
+ linux_gpu_tests:
+ <<: *gpu
+ <<: *version_parameters
+
+ working_directory: ~/detectron2
+
+ steps:
+ - checkout
+
+ - restore_cache:
+ keys:
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
+
+ - <<: *install_python
+ - <<: *install_linux_dep
+ - <<: *install_detectron2
+ - <<: *run_unittests
+ - <<: *uninstall_tests
+
+ - save_cache:
+ paths:
+ - /opt/circleci/.pyenv
+ - ~/.torch
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
+
+ windows_cpu_build:
+ <<: *windows_cpu
+ <<: *version_parameters
+ steps:
+ - <<: *add_ssh_keys
+ - checkout
+ - <<: *setup_venv_win
+
+ # Cache the env directory that contains dependencies
+ - restore_cache:
+ keys:
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
+
+ - run:
+ name: Install Dependencies
+ command: |
+ pip install certifi --ignore-installed # required on windows to workaround some cert issue
+ pip install numpy cython # required on windows before pycocotools
+ pip install opencv-python-headless pytest-xdist pycocotools tensorboard onnx
+ pip install -U git+https://github.com/facebookresearch/iopath
+ pip install -U git+https://github.com/facebookresearch/fvcore
+ pip install torch==$env:PYTORCH_VERSION torchvision==$env:TORCHVISION_VERSION -f $env:PYTORCH_INDEX
+
+ - save_cache:
+ paths:
+ - env
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
+
+ - <<: *install_detectron2
+ # TODO: unittest fails for now
+
+workflows:
+ version: 2
+ regular_test:
+ jobs:
+ - linux_cpu_tests:
+ name: linux_cpu_tests_pytorch1.10
+ pytorch_version: '1.10.0+cpu'
+ torchvision_version: '0.11.1+cpu'
+ - linux_gpu_tests:
+ name: linux_gpu_tests_pytorch1.8
+ pytorch_version: '1.8.1+cu111'
+ torchvision_version: '0.9.1+cu111'
+ - linux_gpu_tests:
+ name: linux_gpu_tests_pytorch1.9
+ pytorch_version: '1.9+cu111'
+ torchvision_version: '0.10+cu111'
+ - linux_gpu_tests:
+ name: linux_gpu_tests_pytorch1.10
+ pytorch_version: '1.10+cu111'
+ torchvision_version: '0.11.1+cu111'
+ - linux_gpu_tests:
+ name: linux_gpu_tests_pytorch1.10_python39
+ pytorch_version: '1.10+cu111'
+ torchvision_version: '0.11.1+cu111'
+ python_version: '3.9.6'
+ - windows_cpu_build:
+ pytorch_version: '1.10+cpu'
+ torchvision_version: '0.11.1+cpu'
diff --git a/vendor/detectron2/.circleci/import-tests.sh b/vendor/detectron2/.circleci/import-tests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8e8deb6ad699fd673fea0f66b91aa3ec6e3c7c7c
--- /dev/null
+++ b/vendor/detectron2/.circleci/import-tests.sh
@@ -0,0 +1,16 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# Test that import works without building detectron2.
+
+# Check that _C is not importable
+python -c "from detectron2 import _C" > /dev/null 2>&1 && {
+ echo "This test should be run without building detectron2."
+ exit 1
+}
+
+# Check that other modules are still importable, even when _C is not importable
+python -c "from detectron2 import modeling"
+python -c "from detectron2 import modeling, data"
+python -c "from detectron2 import evaluation, export, checkpoint"
+python -c "from detectron2 import utils, engine"
diff --git a/vendor/detectron2/.clang-format b/vendor/detectron2/.clang-format
new file mode 100644
index 0000000000000000000000000000000000000000..39b1b3d603ed0cf6b7f94c9c08067f148f35613f
--- /dev/null
+++ b/vendor/detectron2/.clang-format
@@ -0,0 +1,85 @@
+AccessModifierOffset: -1
+AlignAfterOpenBracket: AlwaysBreak
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ]
+IncludeCategories:
+ - Regex: '^<.*\.h(pp)?>'
+ Priority: 1
+ - Regex: '^<.*'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 3
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes: true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 8
+UseTab: Never
diff --git a/vendor/detectron2/.flake8 b/vendor/detectron2/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..28881e488263c5693835063be9455f2fb1fdc849
--- /dev/null
+++ b/vendor/detectron2/.flake8
@@ -0,0 +1,15 @@
+# This is an example .flake8 config, used when developing *Black* itself.
+# Keep in sync with setup.cfg which is used for source packages.
+
+[flake8]
+ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811, C101, EXE001, EXE002
+max-line-length = 100
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
+exclude = build
+per-file-ignores =
+ **/__init__.py:F401,F403,E402
+ **/configs/**.py:F401,E402
+ configs/**.py:F401,E402
+ **/tests/config/**.py:F401,E402
+ tests/config/**.py:F401,E402
diff --git a/vendor/detectron2/.github/CODE_OF_CONDUCT.md b/vendor/detectron2/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..0f7ad8bfc173eac554f0b6ef7c684861e8014bbe
--- /dev/null
+++ b/vendor/detectron2/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,5 @@
+# Code of Conduct
+
+Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
+Please read the [full text](https://code.fb.com/codeofconduct/)
+so that you can understand what actions will and will not be tolerated.
diff --git a/vendor/detectron2/.github/CONTRIBUTING.md b/vendor/detectron2/.github/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..9bab709cae689ba3b92dd52f7fbcc0c6926f4a38
--- /dev/null
+++ b/vendor/detectron2/.github/CONTRIBUTING.md
@@ -0,0 +1,68 @@
+# Contributing to detectron2
+
+## Issues
+We use GitHub issues to track public bugs and questions.
+Please make sure to follow one of the
+[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
+when reporting any issues.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Pull Requests
+We actively welcome pull requests.
+
+However, if you're adding any significant features (e.g. > 50 lines), please
+make sure to discuss with maintainers about your motivation and proposals in an issue
+before sending a PR. This is to save your time so you don't spend time on a PR that we'll not accept.
+
+We do not always accept new features, and we take the following
+factors into consideration:
+
+1. Whether the same feature can be achieved without modifying detectron2.
+ Detectron2 is designed so that you can implement many extensions from the outside, e.g.
+ those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
+ * If some part of detectron2 is not extensible enough, you can also bring up a more general issue to
+ improve it. Such feature request may be useful to more users.
+2. Whether the feature is potentially useful to a large audience (e.g. an impactful detection paper, a popular dataset,
+ a significant speedup, a widely useful utility),
+ or only to a small portion of users (e.g., a less-known paper, an improvement not in the object
+ detection field, a trick that's not very popular in the community, code to handle a non-standard type of data)
+ * Adoption of additional models, datasets, new task are by default not added to detectron2 before they
+ receive significant popularity in the community.
+ We sometimes accept such features in `projects/`, or as a link in `projects/README.md`.
+3. Whether the proposed solution has a good design / interface. This can be discussed in the issue prior to PRs, or
+ in the form of a draft PR.
+4. Whether the proposed solution adds extra mental/practical overhead to users who don't
+ need such feature.
+5. Whether the proposed solution breaks existing APIs.
+
+To add a feature to an existing function/class `Func`, there are always two approaches:
+(1) add new arguments to `Func`; (2) write a new `Func_with_new_feature`.
+To meet the above criteria, we often prefer approach (2), because:
+
+1. It does not involve modifying or potentially breaking existing code.
+2. It does not add overhead to users who do not need the new feature.
+3. Adding new arguments to a function/class is not scalable w.r.t. all the possible new research ideas in the future.
+
+When sending a PR, please do:
+
+1. If a PR contains multiple orthogonal changes, split it to several PRs.
+2. If you've added code that should be tested, add tests.
+3. For PRs that need experiments (e.g. adding a new model or new methods),
+ you don't need to update model zoo, but do provide experiment results in the description of the PR.
+4. If APIs are changed, update the documentation.
+5. We use the [Google style docstrings](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) in python.
+6. Make sure your code lints with `./dev/linter.sh`.
+
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here:
+
+## License
+By contributing to detectron2, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
diff --git a/vendor/detectron2/.github/Detectron2-Logo-Horz.svg b/vendor/detectron2/.github/Detectron2-Logo-Horz.svg
new file mode 100644
index 0000000000000000000000000000000000000000..eb2d643ddd940cd8bdb5eaad093029969ff2364c
--- /dev/null
+++ b/vendor/detectron2/.github/Detectron2-Logo-Horz.svg
@@ -0,0 +1 @@
+Detectron2-Logo-Horz
\ No newline at end of file
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE.md b/vendor/detectron2/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e8aaa2d3722e7e73a3d94b2b7dfc4f751d7a240
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,5 @@
+
+Please select an issue template from
+https://github.com/facebookresearch/detectron2/issues/new/choose .
+
+Otherwise your issue will be closed.
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE/bugs.md b/vendor/detectron2/.github/ISSUE_TEMPLATE/bugs.md
new file mode 100644
index 0000000000000000000000000000000000000000..d0235c708ab6b0cdadb5865110e9e8c22ca313aa
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE/bugs.md
@@ -0,0 +1,38 @@
+---
+name: "🐛 Bugs"
+about: Report bugs in detectron2
+title: Please read & provide the following
+
+---
+
+## Instructions To Reproduce the 🐛 Bug:
+1. Full runnable code or full changes you made:
+```
+If making changes to the project itself, please use output of the following command:
+git rev-parse HEAD; git diff
+
+
+```
+2. What exact command you run:
+3. __Full logs__ or other relevant observations:
+```
+
+```
+4. please simplify the steps as much as possible so they do not require additional resources to
+ run, such as a private dataset.
+
+## Expected behavior:
+
+If there are no obvious error in "full logs" provided above,
+please tell us the expected behavior.
+
+## Environment:
+
+Provide your environment information using the following command:
+```
+wget -nc -q https://github.com/facebookresearch/detectron2/raw/main/detectron2/utils/collect_env.py && python collect_env.py
+```
+
+If your issue looks like an installation issue / environment issue,
+please first try to solve it yourself with the instructions in
+https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE/config.yml b/vendor/detectron2/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c60c2e14309be9a93293a64e7481f2a91385f76a
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,17 @@
+# require an issue template to be chosen
+blank_issues_enabled: false
+
+contact_links:
+ - name: How-To / All Other Questions
+ url: https://github.com/facebookresearch/detectron2/discussions
+ about: Use "github discussions" for community support on general questions that don't belong to the above issue categories
+ - name: Detectron2 Documentation
+ url: https://detectron2.readthedocs.io/index.html
+ about: Check if your question is answered in tutorials or API docs
+
+# Unexpected behaviors & bugs are split to two templates.
+# When they are one template, users think "it's not a bug" and don't choose the template.
+#
+# But the file name is still "unexpected-problems-bugs.md" so that old references
+# to this issue template still works.
+# It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs)
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE/documentation.md b/vendor/detectron2/.github/ISSUE_TEMPLATE/documentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..88214d62e5228639491e019c78bb4171d535cdd1
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,14 @@
+---
+name: "\U0001F4DA Documentation Issue"
+about: Report a problem about existing documentation, comments, website or tutorials.
+labels: documentation
+
+---
+
+## 📚 Documentation Issue
+
+This issue category is for problems about existing documentation, not for asking how-to questions.
+
+* Provide a link to an existing documentation/comment/tutorial:
+
+* How should the above documentation/comment/tutorial improve:
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE/feature-request.md b/vendor/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 0000000000000000000000000000000000000000..03a1e93d7293948042120b875af8be0c6964e59c
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,31 @@
+---
+name: "\U0001F680Feature Request"
+about: Suggest an improvement or new feature
+labels: enhancement
+
+---
+
+## 🚀 Feature
+A clear and concise description of the feature proposal.
+
+## Motivation & Examples
+
+Tell us why the feature is useful.
+
+Describe what the feature would look like, if it is implemented.
+Best demonstrated using **code examples** in addition to words.
+
+## Note
+
+We only consider adding new features if they are relevant to many users.
+
+If you request implementation of research papers -- we only consider papers that have enough significance and prevalance in the object detection field.
+
+We do not take requests for most projects in the `projects/` directory, because they are research code release that is mainly for other researchers to reproduce results.
+
+"Make X faster/accurate" is not a valid feature request. "Implement a concrete feature that can make X faster/accurate" can be a valid feature request.
+
+Instead of adding features inside detectron2,
+you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html).
+The [projects/](https://github.com/facebookresearch/detectron2/tree/main/projects/) directory contains many of such examples.
+
diff --git a/vendor/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md b/vendor/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
new file mode 100644
index 0000000000000000000000000000000000000000..5db8f22415ff5c857ce83fb0d3de68211f775080
--- /dev/null
+++ b/vendor/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
@@ -0,0 +1,44 @@
+---
+name: "😩 Unexpected behaviors"
+about: Report unexpected behaviors when using detectron2
+title: Please read & provide the following
+
+---
+
+If you do not know the root cause of the problem, please post according to this template:
+
+## Instructions To Reproduce the Issue:
+
+Check https://stackoverflow.com/help/minimal-reproducible-example for how to ask good questions.
+Simplify the steps to reproduce the issue using suggestions from the above link, and provide them below:
+
+1. Full runnable code or full changes you made:
+```
+If making changes to the project itself, please use output of the following command:
+git rev-parse HEAD; git diff
+
+
+```
+2. What exact command you run:
+3. __Full logs__ or other relevant observations:
+```
+
+```
+
+## Expected behavior:
+
+If there are no obvious crash in "full logs" provided above,
+please tell us the expected behavior.
+
+If you expect a model to converge / work better, we do not help with such issues, unless
+a model fails to reproduce the results in detectron2 model zoo, or proves existence of bugs.
+
+## Environment:
+
+Paste the output of the following command:
+```
+wget -nc -nv https://github.com/facebookresearch/detectron2/raw/main/detectron2/utils/collect_env.py && python collect_env.py
+```
+
+If your issue looks like an installation issue / environment issue,
+please first check common issues in https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
diff --git a/vendor/detectron2/.github/pull_request_template.md b/vendor/detectron2/.github/pull_request_template.md
new file mode 100644
index 0000000000000000000000000000000000000000..d71729baee1ec324ab9db6e7562965cf9e2a091b
--- /dev/null
+++ b/vendor/detectron2/.github/pull_request_template.md
@@ -0,0 +1,10 @@
+Thanks for your contribution!
+
+If you're sending a large PR (e.g., >100 lines),
+please open an issue first about the feature / bug, and indicate how you want to contribute.
+
+We do not always accept features.
+See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests about how we handle PRs.
+
+Before submitting a PR, please run `dev/linter.sh` to lint the code.
+
diff --git a/vendor/detectron2/.github/workflows/check-template.yml b/vendor/detectron2/.github/workflows/check-template.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3caed9df3caa50c0d3b606e4a56a1959c463b710
--- /dev/null
+++ b/vendor/detectron2/.github/workflows/check-template.yml
@@ -0,0 +1,86 @@
+name: Check issue template
+
+on:
+ issues:
+ types: [opened]
+
+jobs:
+ check-template:
+ runs-on: ubuntu-latest
+ # comment this out when testing with https://github.com/nektos/act
+ if: ${{ github.repository_owner == 'facebookresearch' }}
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/github-script@v3
+ with:
+ github-token: ${{secrets.GITHUB_TOKEN}}
+ script: |
+ // Arguments available:
+ // - github: A pre-authenticated octokit/rest.js client
+ // - context: An object containing the context of the workflow run
+ // - core: A reference to the @actions/core package
+ // - io: A reference to the @actions/io package
+ const fs = require('fs');
+ const editDistance = require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/levenshtein.js`).getEditDistance
+ issue = await github.issues.get({
+ owner: context.issue.owner,
+ repo: context.issue.repo,
+ issue_number: context.issue.number,
+ });
+ const hasLabel = issue.data.labels.length > 0;
+ if (hasLabel || issue.state === "closed") {
+ // don't require template on them
+ core.debug("Issue " + issue.data.title + " was skipped.");
+ return;
+ }
+
+ sameAsTemplate = function(filename, body) {
+ let tmpl = fs.readFileSync(`.github/ISSUE_TEMPLATE/${filename}`, 'utf8');
+ tmpl = tmpl.toLowerCase().split("---").slice(2).join("").trim();
+ tmpl = tmpl.replace(/(\r\n|\n|\r)/gm, "");
+ let bodyr = body.replace(/(\r\n|\n|\r)/gm, "");
+ let dist = editDistance(tmpl, bodyr);
+ return dist < 8;
+ };
+
+ checkFail = async function(msg) {
+ core.info("Processing '" + issue.data.title + "' with message: " + msg);
+ await github.issues.addLabels({
+ owner: context.issue.owner,
+ repo: context.issue.repo,
+ issue_number: context.issue.number,
+ labels: ["needs-more-info"],
+ });
+ await github.issues.createComment({
+ owner: context.issue.owner,
+ repo: context.issue.repo,
+ issue_number: context.issue.number,
+ body: msg,
+ });
+ };
+
+ const body = issue.data.body.toLowerCase().trim();
+
+ if (sameAsTemplate("bugs.md", body) || sameAsTemplate("unexpected-problems-bugs.md", body)) {
+ await checkFail(`
+ We found that not enough information is provided about this issue.
+ Please provide details following the [issue template](https://github.com/facebookresearch/detectron2/issues/new/choose).`)
+ return;
+ }
+
+ const hasInstructions = body.indexOf("reproduce") != -1;
+ const hasEnvironment = (body.indexOf("environment") != -1) || (body.indexOf("colab") != -1) || (body.indexOf("docker") != -1);
+ if (hasInstructions && hasEnvironment) {
+ core.debug("Issue " + issue.data.title + " follows template.");
+ return;
+ }
+
+ let message = "You've chosen to report an unexpected problem or bug. Unless you already know the root cause of it, please include details about it by filling the [issue template](https://github.com/facebookresearch/detectron2/issues/new/choose).\n";
+ message += "The following information is missing: ";
+ if (!hasInstructions) {
+ message += "\"Instructions To Reproduce the Issue and __Full__ Logs\"; ";
+ }
+ if (!hasEnvironment) {
+ message += "\"Your Environment\"; ";
+ }
+ await checkFail(message);
diff --git a/vendor/detectron2/.github/workflows/levenshtein.js b/vendor/detectron2/.github/workflows/levenshtein.js
new file mode 100644
index 0000000000000000000000000000000000000000..67a5e3613c0072d124035ee8933a23de2105cfe3
--- /dev/null
+++ b/vendor/detectron2/.github/workflows/levenshtein.js
@@ -0,0 +1,44 @@
+/*
+Copyright (c) 2011 Andrei Mackenzie
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+// Compute the edit distance between the two given strings
+exports.getEditDistance = function(a, b){
+ if(a.length == 0) return b.length;
+ if(b.length == 0) return a.length;
+
+ var matrix = [];
+
+ // increment along the first column of each row
+ var i;
+ for(i = 0; i <= b.length; i++){
+ matrix[i] = [i];
+ }
+
+ // increment each column in the first row
+ var j;
+ for(j = 0; j <= a.length; j++){
+ matrix[0][j] = j;
+ }
+
+ // Fill in the rest of the matrix
+ for(i = 1; i <= b.length; i++){
+ for(j = 1; j <= a.length; j++){
+ if(b.charAt(i-1) == a.charAt(j-1)){
+ matrix[i][j] = matrix[i-1][j-1];
+ } else {
+ matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution
+ Math.min(matrix[i][j-1] + 1, // insertion
+ matrix[i-1][j] + 1)); // deletion
+ }
+ }
+ }
+
+ return matrix[b.length][a.length];
+};
diff --git a/vendor/detectron2/.github/workflows/needs-reply.yml b/vendor/detectron2/.github/workflows/needs-reply.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4affabd3498290a752fab6d848fc667758bedaf2
--- /dev/null
+++ b/vendor/detectron2/.github/workflows/needs-reply.yml
@@ -0,0 +1,98 @@
+name: Close/Lock issues after inactivity
+
+on:
+ schedule:
+ - cron: "0 0 * * *"
+
+jobs:
+ close-issues-needs-more-info:
+ runs-on: ubuntu-latest
+ if: ${{ github.repository_owner == 'facebookresearch' }}
+ steps:
+ - name: Close old issues that need reply
+ uses: actions/github-script@v3
+ with:
+ github-token: ${{secrets.GITHUB_TOKEN}}
+ # Modified from https://github.com/dwieeb/needs-reply
+ script: |
+ // Arguments available:
+ // - github: A pre-authenticated octokit/rest.js client
+ // - context: An object containing the context of the workflow run
+ // - core: A reference to the @actions/core package
+ // - io: A reference to the @actions/io package
+ const kLabelToCheck = "needs-more-info";
+ const kInvalidLabel = "invalid/unrelated";
+ const kDaysBeforeClose = 7;
+ const kMessage = "Requested information was not provided in 7 days, so we're closing this issue.\n\nPlease open new issue if information becomes available. Otherwise, use [github discussions](https://github.com/facebookresearch/detectron2/discussions) for free-form discussions."
+
+ issues = await github.issues.listForRepo({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ state: 'open',
+ labels: kLabelToCheck,
+ sort: 'updated',
+ direction: 'asc',
+ per_page: 30,
+ page: 1,
+ });
+ issues = issues.data;
+ if (issues.length === 0) {
+ core.info('No more issues found to process. Exiting.');
+ return;
+ }
+ for (const issue of issues) {
+ if (!!issue.pull_request)
+ continue;
+ core.info(`Processing issue #${issue.number}`);
+
+ let updatedAt = new Date(issue.updated_at).getTime();
+ const numComments = issue.comments;
+ const comments = await github.issues.listComments({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issue.number,
+ per_page: 30,
+ page: Math.floor((numComments - 1) / 30) + 1, // the last page
+ });
+ const lastComments = comments.data
+ .map(l => new Date(l.created_at).getTime())
+ .sort();
+ if (lastComments.length > 0) {
+ updatedAt = lastComments[lastComments.length - 1];
+ }
+
+ const now = new Date().getTime();
+ const daysSinceUpdated = (now - updatedAt) / 1000 / 60 / 60 / 24;
+
+ if (daysSinceUpdated < kDaysBeforeClose) {
+ core.info(`Skipping #${issue.number} because it has been updated in the last ${daysSinceUpdated} days`);
+ continue;
+ }
+ core.info(`Closing #${issue.number} because it has not been updated in the last ${daysSinceUpdated} days`);
+ await github.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issue.number,
+ body: kMessage,
+ });
+ const newLabels = numComments <= 2 ? [kInvalidLabel, kLabelToCheck] : issue.labels;
+ await github.issues.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issue.number,
+ labels: newLabels,
+ state: 'closed',
+ });
+ }
+
+ lock-issues-after-closed:
+ runs-on: ubuntu-latest
+ if: ${{ github.repository_owner == 'facebookresearch' }}
+ steps:
+ - name: Lock closed issues that have no activity for a while
+ uses: dessant/lock-threads@v2
+ with:
+ github-token: ${{ github.token }}
+ issue-lock-inactive-days: '300'
+ process-only: 'issues'
+ issue-exclude-labels: 'enhancement,bug,documentation'
diff --git a/vendor/detectron2/.github/workflows/remove-needs-reply.yml b/vendor/detectron2/.github/workflows/remove-needs-reply.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1f000b28ca27ef9c219d197f95251be1cb8c0979
--- /dev/null
+++ b/vendor/detectron2/.github/workflows/remove-needs-reply.yml
@@ -0,0 +1,25 @@
+name: Remove needs-more-info label
+
+on:
+ issue_comment:
+ types: [created]
+ issues:
+ types: [edited]
+
+jobs:
+ remove-needs-more-info-label:
+ runs-on: ubuntu-latest
+ # 1. issue_comment events could include PR comment, filter them out
+ # 2. Only trigger action if event was produced by the original author
+ if: ${{ !github.event.issue.pull_request && github.event.sender.login == github.event.issue.user.login }}
+ steps:
+ - name: Remove needs-more-info label
+ uses: octokit/request-action@v2.x
+ continue-on-error: true
+ with:
+ route: DELETE /repos/:repository/issues/:issue/labels/:label
+ repository: ${{ github.repository }}
+ issue: ${{ github.event.issue.number }}
+ label: needs-more-info
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/vendor/detectron2/.github/workflows/workflow.yml b/vendor/detectron2/.github/workflows/workflow.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3de246c9a04850ecec7f52f5264ba2e6102e6881
--- /dev/null
+++ b/vendor/detectron2/.github/workflows/workflow.yml
@@ -0,0 +1,81 @@
+name: CI
+on: [push, pull_request]
+
+# Run linter with github actions for quick feedbacks.
+# Run macos tests with github actions. Linux (CPU & GPU) tests currently runs on CircleCI
+jobs:
+ linter:
+ runs-on: ubuntu-latest
+ # run on PRs, or commits to facebookresearch (not internal)
+ if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }}
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python 3.9
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.9
+ - name: Install dependencies
+ # flake8-bugbear flake8-comprehensions are useful but not available internally
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8==3.8.1 isort==4.3.21
+ python -m pip install black==22.3.0
+ flake8 --version
+ - name: Lint
+ run: |
+ echo "Running isort"
+ isort -c -sp .
+ echo "Running black"
+ black -l 100 --check .
+ echo "Running flake8"
+ flake8 .
+
+ macos_tests:
+ runs-on: macos-latest
+ # run on PRs, or commits to facebookresearch (not internal)
+ if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }}
+ strategy:
+ fail-fast: false
+ matrix:
+ torch: ["1.8", "1.9", "1.10"]
+ include:
+ - torch: "1.8"
+ torchvision: 0.9
+ - torch: "1.9"
+ torchvision: "0.10"
+ - torch: "1.10"
+ torchvision: "0.11.1"
+ env:
+ # point datasets to ~/.torch so it's cached by CI
+ DETECTRON2_DATASETS: ~/.torch/datasets
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ - name: Set up Python 3.8
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Cache dependencies
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ env.pythonLocation }}/lib/python3.8/site-packages
+ ~/.torch
+ key: ${{ runner.os }}-torch${{ matrix.torch }}-${{ hashFiles('setup.py') }}-20220119
+
+ - name: Install dependencies
+ run: |
+ python -m pip install -U pip
+ python -m pip install ninja opencv-python-headless onnx pytest-xdist
+ python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
+ # install from github to get latest; install iopath first since fvcore depends on it
+ python -m pip install -U 'git+https://github.com/facebookresearch/iopath'
+ python -m pip install -U 'git+https://github.com/facebookresearch/fvcore'
+
+ - name: Build and install
+ run: |
+ CC=clang CXX=clang++ python -m pip install -e .[all]
+ python -m detectron2.utils.collect_env
+ ./datasets/prepare_for_tests.sh
+ - name: Run unittests
+ run: python -m pytest -n 4 --durations=15 -sv tests/
diff --git a/vendor/detectron2/.gitignore b/vendor/detectron2/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..9953d9b49bd150ffb251886f755b7a4150c4e35d
--- /dev/null
+++ b/vendor/detectron2/.gitignore
@@ -0,0 +1,53 @@
+# output dir
+output
+instant_test_output
+inference_test_output
+
+
+*.png
+*.json
+*.diff
+*.jpg
+!/projects/DensePose/doc/images/*.jpg
+
+# compilation and distribution
+__pycache__
+_ext
+*.pyc
+*.pyd
+*.so
+*.dll
+*.egg-info/
+build/
+dist/
+wheels/
+
+# pytorch/python/numpy formats
+*.pth
+*.pkl
+*.npy
+*.ts
+model_ts*.txt
+
+# ipython/jupyter notebooks
+*.ipynb
+**/.ipynb_checkpoints/
+
+# Editor temporaries
+*.swn
+*.swo
+*.swp
+*~
+
+# editor settings
+.idea
+.vscode
+_darcs
+
+# project dirs
+/detectron2/model_zoo/configs
+/datasets/*
+!/datasets/*.*
+/projects/*/datasets
+/models
+/snippet
diff --git a/vendor/detectron2/GETTING_STARTED.md b/vendor/detectron2/GETTING_STARTED.md
new file mode 100644
index 0000000000000000000000000000000000000000..404b0c8f467264d1adf61e8274e5f864e24018e8
--- /dev/null
+++ b/vendor/detectron2/GETTING_STARTED.md
@@ -0,0 +1,79 @@
+## Getting Started with Detectron2
+
+This document provides a brief intro of the usage of builtin command-line tools in detectron2.
+
+For a tutorial that involves actual coding with the API,
+see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+which covers how to run inference with an
+existing model, and how to train a builtin model on a custom dataset.
+
+
+### Inference Demo with Pre-trained Models
+
+1. Pick a model and its config file from
+ [model zoo](MODEL_ZOO.md),
+ for example, `mask_rcnn_R_50_FPN_3x.yaml`.
+2. We provide `demo.py` that is able to demo builtin configs. Run it with:
+```
+cd demo/
+python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+ --input input1.jpg input2.jpg \
+ [--other-options]
+ --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
+```
+The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
+This command will run the inference and show visualizations in an OpenCV window.
+
+For details of the command line arguments, see `demo.py -h` or look at its source code
+to understand its behavior. Some common arguments are:
+* To run __on your webcam__, replace `--input files` with `--webcam`.
+* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
+* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
+* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
+
+
+### Training & Evaluation in Command Line
+
+We provide two scripts in "tools/plain_train_net.py" and "tools/train_net.py",
+that are made to train all the configs provided in detectron2. You may want to
+use it as a reference to write your own training script.
+
+Compared to "train_net.py", "plain_train_net.py" supports fewer default
+features. It also includes fewer abstraction, therefore is easier to add custom
+logic.
+
+To train a model with "train_net.py", first
+setup the corresponding datasets following
+[datasets/README.md](./datasets/README.md),
+then run:
+```
+cd tools/
+./train_net.py --num-gpus 8 \
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+```
+
+The configs are made for 8-GPU training.
+To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
+```
+./train_net.py \
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+ --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
+```
+
+To evaluate a model's performance, use
+```
+./train_net.py \
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+ --eval-only MODEL.WEIGHTS /path/to/checkpoint_file
+```
+For more options, see `./train_net.py -h`.
+
+### Use Detectron2 APIs in Your Code
+
+See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+to learn how to use detectron2 APIs to:
+1. run inference with an existing model
+2. train a builtin model on a custom dataset
+
+See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/main/projects)
+for more ways to build your project on detectron2.
diff --git a/vendor/detectron2/INSTALL.md b/vendor/detectron2/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..f522e6f624372f39ee5366f5b032c0cd1ebcf5c8
--- /dev/null
+++ b/vendor/detectron2/INSTALL.md
@@ -0,0 +1,261 @@
+## Installation
+
+### Requirements
+- Linux or macOS with Python ≥ 3.7
+- PyTorch ≥ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
+ Install them together at [pytorch.org](https://pytorch.org) to make sure of this
+- OpenCV is optional but needed by demo and visualization
+
+
+### Build Detectron2 from Source
+
+gcc & g++ ≥ 5.4 are required. [ninja](https://ninja-build.org/) is optional but recommended for faster build.
+After having them, run:
+```
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+# (add --user if you don't have permission)
+
+# Or, to install it from a local clone:
+git clone https://github.com/facebookresearch/detectron2.git
+python -m pip install -e detectron2
+
+# On macOS, you may need to prepend the above commands with a few environment variables:
+CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install ...
+```
+
+To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
+old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
+
+### Install Pre-Built Detectron2 (Linux only)
+
+Choose from this table to install [v0.6 (Oct 2021)](https://github.com/facebookresearch/detectron2/releases):
+
+ CUDA torch 1.10 torch 1.9 torch 1.8 11.3 install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
+
11.1 install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.8/index.html
+
10.2 install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.10/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.8/index.html
+
10.1 install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
+
cpu install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html
+
install python -m pip install detectron2 -f \
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.8/index.html
+
+
+Note that:
+1. The pre-built packages have to be used with corresponding version of CUDA and the official package of PyTorch.
+ Otherwise, please build detectron2 from source.
+2. New packages are released every few months. Therefore, packages may not contain latest features in the main
+ branch and may not be compatible with the main branch of a research project that uses detectron2
+ (e.g. those in [projects](projects)).
+
+### Common Installation Issues
+
+Click each issue for its solutions:
+
+
+
+Undefined symbols that looks like "TH..","at::Tensor...","torch..."
+
+
+
+This usually happens when detectron2 or torchvision is not
+compiled with the version of PyTorch you're running.
+
+If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
+following [pytorch.org](http://pytorch.org). So the versions will match.
+
+If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases),
+uninstall and reinstall the correct pre-built detectron2 that matches pytorch version.
+
+If the error comes from detectron2 or torchvision that you built manually from source,
+remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
+
+If the above instructions do not resolve this problem, please provide an environment (e.g. a dockerfile) that can reproduce the issue.
+
+
+
+
+Missing torch dynamic libraries, OR segmentation fault immediately when using detectron2.
+
+This usually happens when detectron2 or torchvision is not
+compiled with the version of PyTorch you're running. See the previous common issue for the solution.
+
+
+
+
+Undefined C++ symbols (e.g. "GLIBCXX..") or C++ symbols not found.
+
+
+Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
+
+This often happens with old anaconda.
+It may help to run `conda update libgcc` to upgrade its runtime.
+
+The fundamental solution is to avoid the mismatch, either by compiling using older version of C++
+compiler, or run the code with proper C++ runtime.
+To run the code with a specific C++ runtime, you can use environment variable `LD_PRELOAD=/path/to/libstdc++.so`.
+
+
+
+
+
+"nvcc not found" or "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
+
+
+CUDA is not found when building detectron2.
+You should make sure
+
+```
+python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
+```
+
+print `(True, a directory with cuda)` at the time you build detectron2.
+
+Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
+
+
+
+
+"invalid device function" or "no kernel image is available for execution".
+
+
+Two possibilities:
+
+* You build detectron2 with one version of CUDA but run it with a different version.
+
+ To check whether it is the case,
+ use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
+ In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
+ to contain cuda libraries of the same version.
+
+ When they are inconsistent,
+ you need to either install a different build of PyTorch (or build by yourself)
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+
+* PyTorch/torchvision/Detectron2 is not built for the correct GPU SM architecture (aka. compute capability).
+
+ The architecture included by PyTorch/detectron2/torchvision is available in the "architecture flags" in
+ `python -m detectron2.utils.collect_env`. It must include
+ the architecture of your GPU, which can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
+
+ If you're using pre-built PyTorch/detectron2/torchvision, they have included support for most popular GPUs already.
+ If not supported, you need to build them from source.
+
+ When building detectron2/torchvision from source, they detect the GPU device and build for only the device.
+ This means the compiled code may not work on a different GPU device.
+ To recompile them for the correct architecture, remove all installed/compiled files,
+ and rebuild them with the `TORCH_CUDA_ARCH_LIST` environment variable set properly.
+ For example, `export TORCH_CUDA_ARCH_LIST="6.0;7.0"` makes it compile for both P100s and V100s.
+
+
+
+
+Undefined CUDA symbols; Cannot open libcudart.so
+
+
+The version of NVCC you use to build detectron2 or torchvision does
+not match the version of CUDA you are running with.
+This often happens when using anaconda's CUDA runtime.
+
+Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
+In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
+to contain cuda libraries of the same version.
+
+When they are inconsistent,
+you need to either install a different build of PyTorch (or build by yourself)
+to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+
+
+
+
+
+C++ compilation errors from NVCC / NVRTC, or "Unsupported gpu architecture"
+
+
+A few possibilities:
+
+1. Local CUDA/NVCC version has to match the CUDA version of your PyTorch. Both can be found in `python collect_env.py`
+ (download from [here](./detectron2/utils/collect_env.py)).
+ When they are inconsistent, you need to either install a different build of PyTorch (or build by yourself)
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+
+2. Local CUDA/NVCC version shall support the SM architecture (a.k.a. compute capability) of your GPU.
+ The capability of your GPU can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
+ The capability supported by NVCC is listed at [here](https://gist.github.com/ax3l/9489132).
+ If your NVCC version is too old, this can be workaround by setting environment variable
+ `TORCH_CUDA_ARCH_LIST` to a lower, supported capability.
+
+3. The combination of NVCC and GCC you use is incompatible. You need to change one of their versions.
+ See [here](https://gist.github.com/ax3l/9489132) for some valid combinations.
+ Notably, CUDA<=10.1.105 doesn't support GCC>7.3.
+
+ The CUDA/GCC version used by PyTorch can be found by `print(torch.__config__.show())`.
+
+
+
+
+
+
+"ImportError: cannot import name '_C'".
+
+
+Please build and install detectron2 following the instructions above.
+
+Or, if you are running code from detectron2's root directory, `cd` to a different one.
+Otherwise you may not import the code that you installed.
+
+
+
+
+
+Any issue on windows.
+
+
+
+Detectron2 is continuously built on windows with [CircleCI](https://app.circleci.com/pipelines/github/facebookresearch/detectron2?branch=main).
+However we do not provide official support for it.
+PRs that improves code compatibility on windows are welcome.
+
+
+
+
+ONNX conversion segfault after some "TraceWarning".
+
+
+The ONNX package is compiled with a too old compiler.
+
+Please build and install ONNX from its source code using a compiler
+whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
+
+
+
+
+
+"library not found for -lstdc++" on older version of MacOS
+
+
+
+See [this stackoverflow answer](https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package).
+
+
+
+
+### Installation inside specific environments:
+
+* __Colab__: see our [Colab Tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+ which has step-by-step instructions.
+
+* __Docker__: The official [Dockerfile](docker) installs detectron2 with a few simple commands.
diff --git a/vendor/detectron2/LICENSE b/vendor/detectron2/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..cd1b070674331757508398d99c830664dce6eaec
--- /dev/null
+++ b/vendor/detectron2/LICENSE
@@ -0,0 +1,202 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+that You distribute, all copyright, patent, trademark, and
+attribution notices from the Source form of the Work,
+excluding those notices that do not pertain to any part of
+the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+distribution, then any Derivative Works that You distribute must
+include a readable copy of the attribution notices contained
+within such NOTICE file, excluding those notices that do not
+pertain to any part of the Derivative Works, in at least one
+of the following places: within a NOTICE text file distributed
+as part of the Derivative Works; within the Source form or
+documentation, if provided along with the Derivative Works; or,
+within a display generated by the Derivative Works, if and
+wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and
+do not modify the License. You may add Your own attribution
+notices within Derivative Works that You distribute, alongside
+or as an addendum to the NOTICE text from the Work, provided
+that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following
+boilerplate notice, with the fields enclosed by brackets "[]"
+replaced with your own identifying information. (Don't include
+the brackets!) The text should be enclosed in the appropriate
+comment syntax for the file format. We also recommend that a
+file or class name and description of purpose be included on the
+same "printed page" as the copyright notice for easier
+identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/vendor/detectron2/MODEL_ZOO.md b/vendor/detectron2/MODEL_ZOO.md
new file mode 100644
index 0000000000000000000000000000000000000000..69db2728563c680e89a0d5d3e6ba272b8d78bdbd
--- /dev/null
+++ b/vendor/detectron2/MODEL_ZOO.md
@@ -0,0 +1,1052 @@
+# Detectron2 Model Zoo and Baselines
+
+## Introduction
+
+This file documents a large collection of baselines trained
+with detectron2 in Sep-Oct, 2019.
+All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
+servers with 8 NVIDIA V100 GPUs & NVLink. The speed numbers are periodically updated with latest PyTorch/CUDA/cuDNN versions.
+You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
+
+In addition to these official baseline models, you can find more models in [projects/](projects/).
+
+#### How to Read the Tables
+* The "Name" column contains a link to the config file. Models can be reproduced using `tools/train_net.py` with the corresponding yaml config file,
+ or `tools/lazyconfig_train_net.py` for python config files.
+* Training speed is averaged across the entire training.
+ We keep updating the speed with latest version of detectron2/pytorch/etc.,
+ so they might be different from the `metrics` file.
+ Training speed for multi-machine jobs is not provided.
+* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
+ with batch size 1 in detectron2 directly.
+ Measuring it with custom code may introduce other overhead.
+ Actual deployment in production should in general be faster than the given inference
+ speed due to more optimizations.
+* The *model id* column is provided for ease of reference.
+ To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
+* Training curves and other statistics can be found in `metrics` for each model.
+
+#### Common Settings for COCO Models
+* All COCO models were trained on `train2017` and evaluated on `val2017`.
+* The default settings are __not directly comparable__ with Detectron's standard settings.
+ For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
+
+ To make fair comparisons with Detectron's settings, see
+ [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
+ and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
+ for speed comparison.
+* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
+ * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
+ respectively. It obtains the best
+ speed/accuracy tradeoff, but the other two are still useful for research.
+ * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
+ * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
+ for mask and box prediction, respectively.
+ This is used by the Deformable ConvNet paper.
+* Most models are trained with the 3x schedule (~37 COCO epochs).
+ Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
+ training schedule for comparison when doing quick research iteration.
+
+#### ImageNet Pretrained Models
+
+It's common to initialize from backbone models pre-trained on ImageNet classification tasks. The following backbone models are available:
+
+* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
+* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
+* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
+* [R-50.pkl (torchvision)](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/torchvision/R-50.pkl): converted copy of [torchvision's ResNet-50](https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.resnet50) model.
+ More details can be found in [the conversion script](tools/convert-torchvision-to-d2.py).
+
+Note that the above models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
+Pretrained models in Detectron's format can still be used. For example:
+* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
+ ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
+* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
+ ResNet-50 with Group Normalization.
+* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
+ ResNet-101 with Group Normalization.
+
+These models require slightly different settings regarding normalization and architecture. See the model zoo configs for reference.
+
+#### License
+
+All models available for download through this document are licensed under the
+[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
+
+### COCO Object Detection Baselines
+
+#### Faster R-CNN:
+
+
+
+
+
+#### RetinaNet:
+
+
+
+
+
+Name
+lr sched
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+model id
+download
+
+
+ R50
+1x
+0.205
+0.041
+4.1
+37.4
+190397773
+model | metrics
+
+
+ R50
+3x
+0.205
+0.041
+4.1
+38.7
+190397829
+model | metrics
+
+
+ R101
+3x
+0.291
+0.054
+5.2
+40.4
+190397697
+model | metrics
+
+
+
+
+#### RPN & Fast R-CNN:
+
+
+
+
+### COCO Instance Segmentation Baselines with Mask R-CNN
+
+
+
+
+
+
+
+
+#### New baselines using Large-Scale Jitter and Longer Training Schedule
+
+The following baselines of COCO Instance Segmentation with Mask R-CNN are generated
+using a longer training schedule and large-scale jitter as described in Google's
+[Simple Copy-Paste Data Augmentation](https://arxiv.org/pdf/2012.07177.pdf) paper. These
+models are trained from scratch using random initialization. These baselines exceed the
+previous Mask R-CNN baselines.
+
+In the following table, one epoch consists of training on 118000 COCO images.
+
+
+
+### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
+
+
+
+
+
+
+Name
+lr sched
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+kp. AP
+model id
+download
+
+
+ R50-FPN
+1x
+0.315
+0.072
+5.0
+53.6
+64.0
+137261548
+model | metrics
+
+
+ R50-FPN
+3x
+0.316
+0.066
+5.0
+55.4
+65.5
+137849621
+model | metrics
+
+
+ R101-FPN
+3x
+0.390
+0.076
+6.1
+56.4
+66.1
+138363331
+model | metrics
+
+
+ X101-FPN
+3x
+0.738
+0.121
+8.7
+57.3
+66.0
+139686956
+model | metrics
+
+
+
+### COCO Panoptic Segmentation Baselines with Panoptic FPN
+
+
+
+
+
+
+Name
+lr sched
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+mask AP
+PQ
+model id
+download
+
+
+ R50-FPN
+1x
+0.304
+0.053
+4.8
+37.6
+34.7
+39.4
+139514544
+model | metrics
+
+
+ R50-FPN
+3x
+0.302
+0.053
+4.8
+40.0
+36.5
+41.5
+139514569
+model | metrics
+
+
+ R101-FPN
+3x
+0.392
+0.066
+6.0
+42.4
+38.5
+43.0
+139514519
+model | metrics
+
+
+
+
+### LVIS Instance Segmentation Baselines with Mask R-CNN
+
+Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
+These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
+
+NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
+They are roughly 24 epochs of LVISv0.5 data.
+The final results of these configs have large variance across different runs.
+
+
+
+
+
+
+
+Name
+lr sched
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+mask AP
+model id
+download
+
+
+ R50-FPN
+1x
+0.292
+0.107
+7.1
+23.6
+24.4
+144219072
+model | metrics
+
+
+ R101-FPN
+1x
+0.371
+0.114
+7.8
+25.6
+25.9
+144219035
+model | metrics
+
+
+ X101-FPN
+1x
+0.712
+0.151
+10.2
+26.7
+27.1
+144219108
+model | metrics
+
+
+
+
+
+### Cityscapes & Pascal VOC Baselines
+
+Simple baselines for
+* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
+* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
+
+
+
+
+
+
+
+Name
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+box AP50
+mask AP
+model id
+download
+
+
+ R50-FPN, Cityscapes
+0.240
+0.078
+4.4
+
+
+36.5
+142423278
+model | metrics
+
+
+ R50-C4, VOC
+0.537
+0.081
+4.8
+51.9
+80.3
+
+142202221
+model | metrics
+
+
+
+
+
+### Other Settings
+
+Ablations for Deformable Conv and Cascade R-CNN:
+
+
+
+
+
+
+
+Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
+(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
+
+
+
+
+
+
+A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
+
+
+
+
+
+
+
+Name
+inference time (s/im)
+train mem (GB)
+box AP
+mask AP
+PQ
+model id
+download
+
+
+ Panoptic FPN R101
+0.098
+11.4
+47.4
+41.3
+46.1
+139797668
+model | metrics
+
+
+ Mask R-CNN X152
+0.234
+15.1
+50.2
+44.0
+
+18131413
+model | metrics
+
+
+ above + test-time aug.
+
+
+51.9
+45.9
+
+
+
+
+
diff --git a/vendor/detectron2/README.md b/vendor/detectron2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..75db3c52f216dbcff9a4730ff0fa139853fc4670
--- /dev/null
+++ b/vendor/detectron2/README.md
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+Detectron2 is Facebook AI Research's next generation library
+that provides state-of-the-art detection and segmentation algorithms.
+It is the successor of
+[Detectron](https://github.com/facebookresearch/Detectron/)
+and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
+It supports a number of computer vision research projects and production applications in Facebook.
+
+
+
+
+
+
+## Learn More about Detectron2
+
+Explain Like I’m 5: Detectron2 | Using Machine Learning with Detectron2
+:-------------------------:|:-------------------------:
+[![Explain Like I’m 5: Detectron2](https://img.youtube.com/vi/1oq1Ye7dFqc/0.jpg)](https://www.youtube.com/watch?v=1oq1Ye7dFqc) | [![Using Machine Learning with Detectron2](https://img.youtube.com/vi/eUSgtfK4ivk/0.jpg)](https://www.youtube.com/watch?v=eUSgtfK4ivk)
+
+## What's New
+* Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend,
+ DeepLab, ViTDet, MViTv2 etc.
+* Used as a library to support building [research projects](projects/) on top of it.
+* Models can be exported to TorchScript format or Caffe2 format for deployment.
+* It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
+
+See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
+to see more demos and learn about detectron2.
+
+## Installation
+
+See [installation instructions](https://detectron2.readthedocs.io/tutorials/install.html).
+
+## Getting Started
+
+See [Getting Started with Detectron2](https://detectron2.readthedocs.io/tutorials/getting_started.html),
+and the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+to learn about basic usage.
+
+Learn more at our [documentation](https://detectron2.readthedocs.org).
+And see [projects/](projects/) for some projects that are built on top of detectron2.
+
+## Model Zoo and Baselines
+
+We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
+
+## License
+
+Detectron2 is released under the [Apache 2.0 license](LICENSE).
+
+## Citing Detectron2
+
+If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
+
+```BibTeX
+@misc{wu2019detectron2,
+ author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and
+ Wan-Yen Lo and Ross Girshick},
+ title = {Detectron2},
+ howpublished = {\url{https://github.com/facebookresearch/detectron2}},
+ year = {2019}
+}
+```
diff --git a/vendor/detectron2/configs/Base-RCNN-C4.yaml b/vendor/detectron2/configs/Base-RCNN-C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad
--- /dev/null
+++ b/vendor/detectron2/configs/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ RPN:
+ PRE_NMS_TOPK_TEST: 6000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "Res5ROIHeads"
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/vendor/detectron2/configs/Base-RCNN-DilatedC5.yaml b/vendor/detectron2/configs/Base-RCNN-DilatedC5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27
--- /dev/null
+++ b/vendor/detectron2/configs/Base-RCNN-DilatedC5.yaml
@@ -0,0 +1,31 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ RESNETS:
+ OUT_FEATURES: ["res5"]
+ RES5_DILATION: 2
+ RPN:
+ IN_FEATURES: ["res5"]
+ PRE_NMS_TOPK_TEST: 6000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["res5"]
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/vendor/detectron2/configs/Base-RCNN-FPN.yaml b/vendor/detectron2/configs/Base-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e020f2e7b2f26765be317f907126a1556621abf
--- /dev/null
+++ b/vendor/detectron2/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,42 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/vendor/detectron2/configs/Base-RetinaNet.yaml b/vendor/detectron2/configs/Base-RetinaNet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8
--- /dev/null
+++ b/vendor/detectron2/configs/Base-RetinaNet.yaml
@@ -0,0 +1,25 @@
+MODEL:
+ META_ARCHITECTURE: "RetinaNet"
+ BACKBONE:
+ NAME: "build_retinanet_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+ FPN:
+ IN_FEATURES: ["res3", "res4", "res5"]
+ RETINANET:
+ IOU_THRESHOLDS: [0.4, 0.5]
+ IOU_LABELS: [0, -1, 1]
+ SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/vendor/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ LOAD_PROPOSALS: True
+ RESNETS:
+ DEPTH: 50
+ PROPOSAL_GENERATOR:
+ NAME: "PrecomputedProposals"
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
+ TEST: ("coco_2017_val",)
+ PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+DATALOADER:
+ # proposals are part of the dataset_dicts, and take a lot of RAM
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57a098f53ee8c54ecfa354cc96efefd890dc1b72
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: False
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py b/vendor/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..86f83c68786f5995c462ade5f3067072d69f047e
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py
@@ -0,0 +1,11 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.fcos import model
+from ..common.train import train
+
+dataloader.train.mapper.use_instance_mask = False
+optimizer.lr = 0.01
+
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..43057a8eeed38c78183e26d21b74261eb4dbc1b9
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py
@@ -0,0 +1,11 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.retinanet import model
+from ..common.train import train
+
+dataloader.train.mapper.use_instance_mask = False
+model.backbone.bottom_up.freeze_at = 2
+optimizer.lr = 0.01
+
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/vendor/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ META_ARCHITECTURE: "ProposalNetwork"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ PRE_NMS_TOPK_TEST: 12000
+ POST_NMS_TOPK_TEST: 2000
diff --git a/vendor/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "ProposalNetwork"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ POST_NMS_TOPK_TEST: 2000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..22016be150df4abbe912700d7ca29f8b7b72554a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py
@@ -0,0 +1,8 @@
+from ..common.train import train
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_c4 import model
+
+model.backbone.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..40844ddeb8d47ff58a6af49ab35bad84e14f5721
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,8 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d50fb866ca7811a87b42555c7213f88e00bf6df1
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ BBOX_REG_LOSS_TYPE: "giou"
+ BBOX_REG_LOSS_WEIGHT: 2.0
+ ROI_BOX_HEAD:
+ BBOX_REG_LOSS_TYPE: "giou"
+ BBOX_REG_LOSS_WEIGHT: 10.0
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: True
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7bbdd7d00505f1e51154379c99ab621cb648a6d
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
@@ -0,0 +1,34 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+
+# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=23,
+ w_a=38.65,
+ w_0=96,
+ w_m=2.43,
+ group_width=40,
+ freeze_at=2,
+ norm="FrozenBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+ "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c6b7a5c8939970bd0e1e4a3c1155695943b19a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
@@ -0,0 +1,35 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+
+# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=22,
+ w_a=31.41,
+ w_0=96,
+ w_m=2.24,
+ group_width=64,
+ se_ratio=0.25,
+ freeze_at=2,
+ norm="FrozenBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+ "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/vendor/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/vendor/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ KEYPOINT_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 1
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+ RPN:
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+ # 1000 proposals per-image is found to hurt box AP.
+ # Therefore we increase it to 1500 per-image.
+ POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_train",)
+ TEST: ("keypoints_coco_2017_val",)
diff --git a/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aad53bfef62fb584d5022585d567e346f671a55
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,8 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_keypoint import dataloader
+from ..common.models.keypoint_rcnn_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/vendor/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ MASK_ON: True
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_train_panoptic_separated",)
+ TEST: ("coco_2017_val_panoptic_separated",)
+DATALOADER:
+ FILTER_EMPTY_ANNOTATIONS: False
diff --git a/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..40cf18131810307157a9a7d1f6d5922b00fd73d5
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
@@ -0,0 +1,8 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_panoptic_separated import dataloader
+from ..common.models.panoptic_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
diff --git a/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794
--- /dev/null
+++ b/vendor/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/vendor/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e
--- /dev/null
+++ b/vendor/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ # For better, more stable performance initialize from COCO
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+ MASK_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+ MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+ MIN_SIZE_TRAIN_SAMPLING: "choice"
+ MIN_SIZE_TEST: 1024
+ MAX_SIZE_TRAIN: 2048
+ MAX_SIZE_TEST: 2048
+DATASETS:
+ TRAIN: ("cityscapes_fine_instance_seg_train",)
+ TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+ BASE_LR: 0.01
+ STEPS: (18000,)
+ MAX_ITER: 24000
+ IMS_PER_BATCH: 8
+TEST:
+ EVAL_PERIOD: 8000
diff --git a/vendor/detectron2/configs/Detectron1-Comparisons/README.md b/vendor/detectron2/configs/Detectron1-Comparisons/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..924fd00af642ddf1a4ff4c4f5947f676134eb7de
--- /dev/null
+++ b/vendor/detectron2/configs/Detectron1-Comparisons/README.md
@@ -0,0 +1,84 @@
+
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+ affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+
+
+
+
+
+
+
+Name
+lr sched
+train time (s/iter)
+inference time (s/im)
+train mem (GB)
+box AP
+mask AP
+kp. AP
+model id
+download
+
+
+ Faster R-CNN
+1x
+0.219
+0.038
+3.1
+36.9
+
+
+137781054
+model | metrics
+
+
+ Keypoint R-CNN
+1x
+0.313
+0.071
+5.0
+53.1
+
+64.2
+137781195
+model | metrics
+
+
+ Mask R-CNN
+1x
+0.273
+0.043
+3.4
+37.8
+34.9
+
+137781281
+model | metrics
+
+
+
+## Comparisons:
+
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+ [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+ compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+ See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
+
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
diff --git a/vendor/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/vendor/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa
--- /dev/null
+++ b/vendor/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/vendor/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808
--- /dev/null
+++ b/vendor/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+ # 1000 proposals per-image is found to hurt box AP.
+ # Therefore we increase it to 1500 per-image.
+ POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_train",)
+ TEST: ("keypoints_coco_2017_val",)
diff --git a/vendor/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/vendor/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6
--- /dev/null
+++ b/vendor/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ ROI_MASK_HEAD:
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64b4caa4ef2b284782367ea702e1ae6653472630
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898
--- /dev/null
+++ b/vendor/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,26 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
diff --git a/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@@ -0,0 +1,36 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: True
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 152
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
+ ROI_HEADS:
+ NAME: "CascadeROIHeads"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "GN"
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ NUM_CONV: 8
+ NORM: "GN"
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ IMS_PER_BATCH: 128
+ STEPS: (35000, 45000)
+ MAX_ITER: 50000
+ BASE_LR: 0.16
+INPUT:
+ MIN_SIZE_TRAIN: (640, 864)
+ MIN_SIZE_TRAIN_SAMPLING: "range"
+ MAX_SIZE_TRAIN: 1440
+ CROP:
+ ENABLED: True
+TEST:
+ EVAL_PERIOD: 2500
diff --git a/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ CLS_AGNOSTIC_MASK: True
diff --git a/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
diff --git a/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ NORM: "GN"
+ STRIDE_IN_1X1: False
+ FPN:
+ NORM: "GN"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "GN"
+ ROI_MASK_HEAD:
+ NORM: "GN"
+SOLVER:
+ # 3x schedule
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
@@ -0,0 +1,24 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ NORM: "SyncBN"
+ STRIDE_IN_1X1: True
+ FPN:
+ NORM: "SyncBN"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "SyncBN"
+ ROI_MASK_HEAD:
+ NORM: "SyncBN"
+SOLVER:
+ # 3x schedule
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
+TEST:
+ PRECISE_BN:
+ ENABLED: True
diff --git a/vendor/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/vendor/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdd49a4566d1d0c79d0613c34a8cffd616f74fd2
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,152 @@
+# An example config to train a mmdetection model using detectron2.
+
+from ..common.data.coco import dataloader
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+from ..common.data.constants import constants
+
+from detectron2.modeling.mmdet_wrapper import MMDetDetector
+from detectron2.config import LazyCall as L
+
+model = L(MMDetDetector)(
+ detector=dict(
+ type="MaskRCNN",
+ pretrained="torchvision://resnet50",
+ backbone=dict(
+ type="ResNet",
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type="BN", requires_grad=True),
+ norm_eval=True,
+ style="pytorch",
+ ),
+ neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
+ rpn_head=dict(
+ type="RPNHead",
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type="AnchorGenerator",
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64],
+ ),
+ bbox_coder=dict(
+ type="DeltaXYWHBBoxCoder",
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0],
+ ),
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+ ),
+ roi_head=dict(
+ type="StandardRoIHead",
+ bbox_roi_extractor=dict(
+ type="SingleRoIExtractor",
+ roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32],
+ ),
+ bbox_head=dict(
+ type="Shared2FCBBoxHead",
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type="DeltaXYWHBBoxCoder",
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2],
+ ),
+ reg_class_agnostic=False,
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+ ),
+ mask_roi_extractor=dict(
+ type="SingleRoIExtractor",
+ roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32],
+ ),
+ mask_head=dict(
+ type="FCNMaskHead",
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=80,
+ loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+ ),
+ ),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type="MaxIoUAssigner",
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1,
+ ),
+ sampler=dict(
+ type="RandomSampler",
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False,
+ ),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False,
+ ),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type="nms", iou_threshold=0.7),
+ min_bbox_size=0,
+ ),
+ rcnn=dict(
+ assigner=dict(
+ type="MaxIoUAssigner",
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=True,
+ ignore_iof_thr=-1,
+ ),
+ sampler=dict(
+ type="RandomSampler",
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True,
+ ),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False,
+ ),
+ ),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type="nms", iou_threshold=0.7),
+ min_bbox_size=0,
+ ),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type="nms", iou_threshold=0.5),
+ max_per_img=100,
+ mask_thr_binary=0.5,
+ ),
+ ),
+ ),
+ pixel_mean=constants.imagenet_rgb256_mean,
+ pixel_std=constants.imagenet_rgb256_std,
+)
+
+dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
+train.init_checkpoint = None # pretrained model is loaded inside backbone
diff --git a/vendor/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/vendor/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
@@ -0,0 +1,26 @@
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+ RESNETS:
+ DEPTH: 101
+ NORM: "GN"
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
+ STRIDE_IN_1X1: False
+ FPN:
+ NORM: "GN"
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ NORM: "GN"
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ STEPS: (105000, 125000)
+ MAX_ITER: 135000
+ IMS_PER_BATCH: 32
+ BASE_LR: 0.04
diff --git a/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,13 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+ # Train from random initialization.
+ WEIGHTS: ""
+ # It makes sense to divide by STD when training from scratch
+ # But it seems to make no difference on the results and C2's models didn't do this.
+ # So we keep things consistent with C2.
+ # PIXEL_STD: [57.375, 57.12, 58.395]
+ MASK_ON: True
+ BACKBONE:
+ FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+ PIXEL_STD: [57.375, 57.12, 58.395]
+ WEIGHTS: ""
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False
+ BACKBONE:
+ FREEZE_AT: 0
+SOLVER:
+ # 9x schedule
+ IMS_PER_BATCH: 64 # 4x the standard
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
+ MAX_ITER: 202500 # 90k * 9 / 4
+ BASE_LR: 0.08
+TEST:
+ EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+ PIXEL_STD: [57.375, 57.12, 58.395]
+ WEIGHTS: ""
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False
+ BACKBONE:
+ FREEZE_AT: 0
+SOLVER:
+ # 9x schedule
+ IMS_PER_BATCH: 64 # 4x the standard
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
+ MAX_ITER: 202500 # 90k * 9 / 4
+ BASE_LR: 0.08
+TEST:
+ EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/vendor/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml b/vendor/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/vendor/detectron2/configs/Misc/torchvision_imagenet_R_50.py b/vendor/detectron2/configs/Misc/torchvision_imagenet_R_50.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d75305bcf7445b98db84b3d489a1505d2fce5af
--- /dev/null
+++ b/vendor/detectron2/configs/Misc/torchvision_imagenet_R_50.py
@@ -0,0 +1,150 @@
+"""
+An example config file to train a ImageNet classifier with detectron2.
+Model and dataloader both come from torchvision.
+This shows how to use detectron2 as a general engine for any new models and tasks.
+
+To run, use the following command:
+
+python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
+ --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
+
+"""
+
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from omegaconf import OmegaConf
+import torchvision
+from torchvision.transforms import transforms as T
+from torchvision.models.resnet import ResNet, Bottleneck
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.solver import WarmupParamScheduler
+from detectron2.solver.build import get_default_optimizer_params
+from detectron2.config import LazyCall as L
+from detectron2.model_zoo import get_config
+from detectron2.data.samplers import TrainingSampler, InferenceSampler
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.utils import comm
+
+
+"""
+Note: Here we put reusable code (models, evaluation, data) together with configs just as a
+proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
+Writing code in configs offers extreme flexibility but is often not a good engineering practice.
+In practice, you might want to put code in your project and import them instead.
+"""
+
+
+def build_data_loader(dataset, batch_size, num_workers, training=True):
+ return torch.utils.data.DataLoader(
+ dataset,
+ sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
+ batch_size=batch_size,
+ num_workers=num_workers,
+ pin_memory=True,
+ )
+
+
+class ClassificationNet(nn.Module):
+ def __init__(self, model: nn.Module):
+ super().__init__()
+ self.model = model
+
+ @property
+ def device(self):
+ return list(self.model.parameters())[0].device
+
+ def forward(self, inputs):
+ image, label = inputs
+ pred = self.model(image.to(self.device))
+ if self.training:
+ label = label.to(self.device)
+ return F.cross_entropy(pred, label)
+ else:
+ return pred
+
+
+class ClassificationAcc(DatasetEvaluator):
+ def reset(self):
+ self.corr = self.total = 0
+
+ def process(self, inputs, outputs):
+ image, label = inputs
+ self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
+ self.total += len(label)
+
+ def evaluate(self):
+ all_corr_total = comm.all_gather([self.corr, self.total])
+ corr = sum(x[0] for x in all_corr_total)
+ total = sum(x[1] for x in all_corr_total)
+ return {"accuracy": corr / total}
+
+
+# --- End of code that could be in a project and be imported
+
+
+dataloader = OmegaConf.create()
+dataloader.train = L(build_data_loader)(
+ dataset=L(torchvision.datasets.ImageNet)(
+ root="/path/to/imagenet",
+ split="train",
+ transform=L(T.Compose)(
+ transforms=[
+ L(T.RandomResizedCrop)(size=224),
+ L(T.RandomHorizontalFlip)(),
+ T.ToTensor(),
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+ ]
+ ),
+ ),
+ batch_size=256 // 8,
+ num_workers=4,
+ training=True,
+)
+
+dataloader.test = L(build_data_loader)(
+ dataset=L(torchvision.datasets.ImageNet)(
+ root="${...train.dataset.root}",
+ split="val",
+ transform=L(T.Compose)(
+ transforms=[
+ L(T.Resize)(size=256),
+ L(T.CenterCrop)(size=224),
+ T.ToTensor(),
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+ ]
+ ),
+ ),
+ batch_size=256 // 8,
+ num_workers=4,
+ training=False,
+)
+
+dataloader.evaluator = L(ClassificationAcc)()
+
+model = L(ClassificationNet)(
+ model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
+)
+
+
+optimizer = L(torch.optim.SGD)(
+ params=L(get_default_optimizer_params)(),
+ lr=0.1,
+ momentum=0.9,
+ weight_decay=1e-4,
+)
+
+lr_multiplier = L(WarmupParamScheduler)(
+ scheduler=L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
+ ),
+ warmup_length=1 / 100,
+ warmup_factor=0.1,
+)
+
+
+train = get_config("common/train.py").train
+train.init_checkpoint = None
+train.max_iter = 100 * 1281167 // 256
diff --git a/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e
--- /dev/null
+++ b/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 20
+INPUT:
+ MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+ MIN_SIZE_TEST: 800
+DATASETS:
+ TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+ TEST: ('voc_2007_test',)
+SOLVER:
+ STEPS: (12000, 16000)
+ MAX_ITER: 18000 # 17.4 epochs
+ WARMUP_ITERS: 100
diff --git a/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514
--- /dev/null
+++ b/vendor/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 20
+INPUT:
+ MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+ MIN_SIZE_TEST: 800
+DATASETS:
+ TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+ TEST: ('voc_2007_test',)
+SOLVER:
+ STEPS: (12000, 16000)
+ MAX_ITER: 18000 # 17.4 epochs
+ WARMUP_ITERS: 100
diff --git a/vendor/detectron2/configs/common/README.md b/vendor/detectron2/configs/common/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..912cc29927542bfe4258d3208cf52d73cb0ea477
--- /dev/null
+++ b/vendor/detectron2/configs/common/README.md
@@ -0,0 +1,6 @@
+This directory provides definitions for a few common models, dataloaders, scheduler,
+and optimizers that are often used in training.
+The definition of these objects are provided in the form of lazy instantiation:
+their arguments can be edited by users before constructing the objects.
+
+They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
diff --git a/vendor/detectron2/configs/common/coco_schedule.py b/vendor/detectron2/configs/common/coco_schedule.py
new file mode 100644
index 0000000000000000000000000000000000000000..355e66a1d213cb599a7ffe55089d854089c8ead2
--- /dev/null
+++ b/vendor/detectron2/configs/common/coco_schedule.py
@@ -0,0 +1,47 @@
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.config import LazyCall as L
+from detectron2.solver import WarmupParamScheduler
+
+
+def default_X_scheduler(num_X):
+ """
+ Returns the config for a default multi-step LR scheduler such as "1x", "3x",
+ commonly referred to in papers, where every 1x has the total length of 1440k
+ training images (~12 COCO epochs). LR is decayed twice at the end of training
+ following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
+
+ Args:
+ num_X: a positive real number
+
+ Returns:
+ DictConfig: configs that define the multiplier for LR during training
+ """
+ # total number of iterations assuming 16 batch size, using 1440000/16=90000
+ total_steps_16bs = num_X * 90000
+
+ if num_X <= 2:
+ scheduler = L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ # note that scheduler is scale-invariant. This is equivalent to
+ # milestones=[6, 8, 9]
+ milestones=[60000, 80000, 90000],
+ )
+ else:
+ scheduler = L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
+ )
+ return L(WarmupParamScheduler)(
+ scheduler=scheduler,
+ warmup_length=1000 / total_steps_16bs,
+ warmup_method="linear",
+ warmup_factor=0.001,
+ )
+
+
+lr_multiplier_1x = default_X_scheduler(1)
+lr_multiplier_2x = default_X_scheduler(2)
+lr_multiplier_3x = default_X_scheduler(3)
+lr_multiplier_6x = default_X_scheduler(6)
+lr_multiplier_9x = default_X_scheduler(9)
diff --git a/vendor/detectron2/configs/common/data/coco.py b/vendor/detectron2/configs/common/data/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..703c4385c7ddc7eb0759c98d102ab2384d6a9e3e
--- /dev/null
+++ b/vendor/detectron2/configs/common/data/coco.py
@@ -0,0 +1,48 @@
+from omegaconf import OmegaConf
+
+import detectron2.data.transforms as T
+from detectron2.config import LazyCall as L
+from detectron2.data import (
+ DatasetMapper,
+ build_detection_test_loader,
+ build_detection_train_loader,
+ get_detection_dataset_dicts,
+)
+from detectron2.evaluation import COCOEvaluator
+
+dataloader = OmegaConf.create()
+
+dataloader.train = L(build_detection_train_loader)(
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
+ mapper=L(DatasetMapper)(
+ is_train=True,
+ augmentations=[
+ L(T.ResizeShortestEdge)(
+ short_edge_length=(640, 672, 704, 736, 768, 800),
+ sample_style="choice",
+ max_size=1333,
+ ),
+ L(T.RandomFlip)(horizontal=True),
+ ],
+ image_format="BGR",
+ use_instance_mask=True,
+ ),
+ total_batch_size=16,
+ num_workers=4,
+)
+
+dataloader.test = L(build_detection_test_loader)(
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
+ mapper=L(DatasetMapper)(
+ is_train=False,
+ augmentations=[
+ L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
+ ],
+ image_format="${...train.mapper.image_format}",
+ ),
+ num_workers=4,
+)
+
+dataloader.evaluator = L(COCOEvaluator)(
+ dataset_name="${..test.dataset.names}",
+)
diff --git a/vendor/detectron2/configs/common/data/coco_keypoint.py b/vendor/detectron2/configs/common/data/coco_keypoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4ceb066faf696954244205dc75376b767071217
--- /dev/null
+++ b/vendor/detectron2/configs/common/data/coco_keypoint.py
@@ -0,0 +1,13 @@
+from detectron2.data.detection_utils import create_keypoint_hflip_indices
+
+from .coco import dataloader
+
+dataloader.train.dataset.min_keypoints = 1
+dataloader.train.dataset.names = "keypoints_coco_2017_train"
+dataloader.test.dataset.names = "keypoints_coco_2017_val"
+
+dataloader.train.mapper.update(
+ use_instance_mask=False,
+ use_keypoint=True,
+ keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
+)
diff --git a/vendor/detectron2/configs/common/data/coco_panoptic_separated.py b/vendor/detectron2/configs/common/data/coco_panoptic_separated.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d
--- /dev/null
+++ b/vendor/detectron2/configs/common/data/coco_panoptic_separated.py
@@ -0,0 +1,26 @@
+from detectron2.config import LazyCall as L
+from detectron2.evaluation import (
+ COCOEvaluator,
+ COCOPanopticEvaluator,
+ DatasetEvaluators,
+ SemSegEvaluator,
+)
+
+from .coco import dataloader
+
+dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
+dataloader.train.dataset.filter_empty = False
+dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
+
+
+dataloader.evaluator = [
+ L(COCOEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+ L(SemSegEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+ L(COCOPanopticEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+]
diff --git a/vendor/detectron2/configs/common/data/constants.py b/vendor/detectron2/configs/common/data/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..be11cb5ac7c32a260af96ed27c32ed767b2f2bcd
--- /dev/null
+++ b/vendor/detectron2/configs/common/data/constants.py
@@ -0,0 +1,9 @@
+constants = dict(
+ imagenet_rgb256_mean=[123.675, 116.28, 103.53],
+ imagenet_rgb256_std=[58.395, 57.12, 57.375],
+ imagenet_bgr256_mean=[103.530, 116.280, 123.675],
+ # When using pre-trained models in Detectron1 or any MSRA models,
+ # std has been absorbed into its conv1 weights, so the std needs to be set 1.
+ # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+ imagenet_bgr256_std=[1.0, 1.0, 1.0],
+)
diff --git a/vendor/detectron2/configs/common/models/cascade_rcnn.py b/vendor/detectron2/configs/common/models/cascade_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7372a801dc00d7fec4db8cda8c2612ce281d48a
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/cascade_rcnn.py
@@ -0,0 +1,36 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
+
+from .mask_rcnn_fpn import model
+
+# arguments that don't exist for Cascade R-CNN
+[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
+
+model.roi_heads.update(
+ _target_=CascadeROIHeads,
+ box_heads=[
+ L(FastRCNNConvFCHead)(
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
+ conv_dims=[],
+ fc_dims=[1024, 1024],
+ )
+ for k in range(3)
+ ],
+ box_predictors=[
+ L(FastRCNNOutputLayers)(
+ input_shape=ShapeSpec(channels=1024),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
+ cls_agnostic_bbox_reg=True,
+ num_classes="${...num_classes}",
+ )
+ for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
+ ],
+ proposal_matchers=[
+ L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
+ for th in [0.5, 0.6, 0.7]
+ ],
+)
diff --git a/vendor/detectron2/configs/common/models/fcos.py b/vendor/detectron2/configs/common/models/fcos.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c752029b7fc64ec375a55182e5342c9eb48bb33
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/fcos.py
@@ -0,0 +1,23 @@
+from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead
+
+from .retinanet import model
+
+model._target_ = FCOS
+
+del model.anchor_generator
+del model.box2box_transform
+del model.anchor_matcher
+del model.input_format
+
+# Use P5 instead of C5 to compute P6/P7
+# (Sec 2.2 of https://arxiv.org/abs/2006.09214)
+model.backbone.top_block.in_feature = "p5"
+model.backbone.top_block.in_channels = 256
+
+# New score threshold determined based on sqrt(cls_score * centerness)
+model.test_score_thresh = 0.2
+model.test_nms_thresh = 0.6
+
+model.head._target_ = FCOSHead
+del model.head.num_anchors
+model.head.norm = "GN"
diff --git a/vendor/detectron2/configs/common/models/keypoint_rcnn_fpn.py b/vendor/detectron2/configs/common/models/keypoint_rcnn_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..56b3994df249884d4816fc9a5c7f553a9ab6f400
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/keypoint_rcnn_fpn.py
@@ -0,0 +1,33 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
+
+from .mask_rcnn_fpn import model
+
+[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
+
+model.roi_heads.update(
+ num_classes=1,
+ keypoint_in_features=["p2", "p3", "p4", "p5"],
+ keypoint_pooler=L(ROIPooler)(
+ output_size=14,
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
+ num_keypoints=17,
+ conv_dims=[512] * 8,
+ loss_normalizer="visible",
+ ),
+)
+
+# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+# 1000 proposals per-image is found to hurt box AP.
+# Therefore we increase it to 1500 per-image.
+model.proposal_generator.post_nms_topk = (1500, 1000)
+
+# Keypoint AP degrades (though box AP improves) when using plain L1 loss
+model.roi_heads.box_predictor.smooth_l1_beta = 0.5
diff --git a/vendor/detectron2/configs/common/models/mask_rcnn_c4.py b/vendor/detectron2/configs/common/models/mask_rcnn_c4.py
new file mode 100644
index 0000000000000000000000000000000000000000..902d5b195f66881c67a37ec0fe606101a6812260
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/mask_rcnn_c4.py
@@ -0,0 +1,90 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+ FastRCNNOutputLayers,
+ MaskRCNNConvUpsampleHead,
+ Res5ROIHeads,
+)
+
+from ..data.constants import constants
+
+model = L(GeneralizedRCNN)(
+ backbone=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res4"],
+ ),
+ proposal_generator=L(RPN)(
+ in_features=["res4"],
+ head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[32, 64, 128, 256, 512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[16],
+ offset=0.0,
+ ),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ batch_size_per_image=256,
+ positive_fraction=0.5,
+ pre_nms_topk=(12000, 6000),
+ post_nms_topk=(2000, 1000),
+ nms_thresh=0.7,
+ ),
+ roi_heads=L(Res5ROIHeads)(
+ num_classes=80,
+ batch_size_per_image=512,
+ positive_fraction=0.25,
+ proposal_matcher=L(Matcher)(
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+ ),
+ in_features=["res4"],
+ pooler=L(ROIPooler)(
+ output_size=14,
+ scales=(1.0 / 16,),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ res5=L(ResNet.make_stage)(
+ block_class=BottleneckBlock,
+ num_blocks=3,
+ stride_per_block=[2, 1, 1],
+ in_channels=1024,
+ bottleneck_channels=512,
+ out_channels=2048,
+ norm="FrozenBN",
+ stride_in_1x1=True,
+ ),
+ box_predictor=L(FastRCNNOutputLayers)(
+ input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+ num_classes="${..num_classes}",
+ ),
+ mask_head=L(MaskRCNNConvUpsampleHead)(
+ input_shape=L(ShapeSpec)(
+ channels="${...res5.out_channels}",
+ width="${...pooler.output_size}",
+ height="${...pooler.output_size}",
+ ),
+ num_classes="${..num_classes}",
+ conv_dims=[256],
+ ),
+ ),
+ pixel_mean=constants.imagenet_bgr256_mean,
+ pixel_std=constants.imagenet_bgr256_std,
+ input_format="BGR",
+)
diff --git a/vendor/detectron2/configs/common/models/mask_rcnn_fpn.py b/vendor/detectron2/configs/common/models/mask_rcnn_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e5c501cd1da6cece55210efefc4ec712075ca8a
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/mask_rcnn_fpn.py
@@ -0,0 +1,95 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelMaxPool
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+ StandardROIHeads,
+ FastRCNNOutputLayers,
+ MaskRCNNConvUpsampleHead,
+ FastRCNNConvFCHead,
+)
+
+from ..data.constants import constants
+
+model = L(GeneralizedRCNN)(
+ backbone=L(FPN)(
+ bottom_up=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res2", "res3", "res4", "res5"],
+ ),
+ in_features="${.bottom_up.out_features}",
+ out_channels=256,
+ top_block=L(LastLevelMaxPool)(),
+ ),
+ proposal_generator=L(RPN)(
+ in_features=["p2", "p3", "p4", "p5", "p6"],
+ head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[32], [64], [128], [256], [512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64],
+ offset=0.0,
+ ),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ batch_size_per_image=256,
+ positive_fraction=0.5,
+ pre_nms_topk=(2000, 1000),
+ post_nms_topk=(1000, 1000),
+ nms_thresh=0.7,
+ ),
+ roi_heads=L(StandardROIHeads)(
+ num_classes=80,
+ batch_size_per_image=512,
+ positive_fraction=0.25,
+ proposal_matcher=L(Matcher)(
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+ ),
+ box_in_features=["p2", "p3", "p4", "p5"],
+ box_pooler=L(ROIPooler)(
+ output_size=7,
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ box_head=L(FastRCNNConvFCHead)(
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
+ conv_dims=[],
+ fc_dims=[1024, 1024],
+ ),
+ box_predictor=L(FastRCNNOutputLayers)(
+ input_shape=ShapeSpec(channels=1024),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+ num_classes="${..num_classes}",
+ ),
+ mask_in_features=["p2", "p3", "p4", "p5"],
+ mask_pooler=L(ROIPooler)(
+ output_size=14,
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ mask_head=L(MaskRCNNConvUpsampleHead)(
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
+ num_classes="${..num_classes}",
+ conv_dims=[256, 256, 256, 256, 256],
+ ),
+ ),
+ pixel_mean=constants.imagenet_bgr256_mean,
+ pixel_std=constants.imagenet_bgr256_std,
+ input_format="BGR",
+)
diff --git a/vendor/detectron2/configs/common/models/mask_rcnn_vitdet.py b/vendor/detectron2/configs/common/models/mask_rcnn_vitdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6f5244402734a3f9f675c5c4e42439ea708d24d
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/mask_rcnn_vitdet.py
@@ -0,0 +1,59 @@
+from functools import partial
+import torch.nn as nn
+from detectron2.config import LazyCall as L
+from detectron2.modeling import ViT, SimpleFeaturePyramid
+from detectron2.modeling.backbone.fpn import LastLevelMaxPool
+
+from .mask_rcnn_fpn import model
+from ..data.constants import constants
+
+model.pixel_mean = constants.imagenet_rgb256_mean
+model.pixel_std = constants.imagenet_rgb256_std
+model.input_format = "RGB"
+
+# Base
+embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
+# Creates Simple Feature Pyramid from ViT backbone
+model.backbone = L(SimpleFeaturePyramid)(
+ net=L(ViT)( # Single-scale ViT backbone
+ img_size=1024,
+ patch_size=16,
+ embed_dim=embed_dim,
+ depth=depth,
+ num_heads=num_heads,
+ drop_path_rate=dp,
+ window_size=14,
+ mlp_ratio=4,
+ qkv_bias=True,
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
+ window_block_indexes=[
+ # 2, 5, 8 11 for global attention
+ 0,
+ 1,
+ 3,
+ 4,
+ 6,
+ 7,
+ 9,
+ 10,
+ ],
+ residual_block_indexes=[],
+ use_rel_pos=True,
+ out_feature="last_feat",
+ ),
+ in_feature="${.net.out_feature}",
+ out_channels=256,
+ scale_factors=(4.0, 2.0, 1.0, 0.5),
+ top_block=L(LastLevelMaxPool)(),
+ norm="LN",
+ square_pad=1024,
+)
+
+model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN"
+
+# 2conv in RPN:
+model.proposal_generator.head.conv_dims = [-1, -1]
+
+# 4conv1fc box head
+model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
+model.roi_heads.box_head.fc_dims = [1024]
diff --git a/vendor/detectron2/configs/common/models/panoptic_fpn.py b/vendor/detectron2/configs/common/models/panoptic_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..88f55d2ce9db62e61445d6a3700067d9d864ecae
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/panoptic_fpn.py
@@ -0,0 +1,20 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import PanopticFPN
+from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
+
+from .mask_rcnn_fpn import model
+
+model._target_ = PanopticFPN
+model.sem_seg_head = L(SemSegFPNHead)(
+ input_shape={
+ f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
+ for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
+ },
+ ignore_value=255,
+ num_classes=54, # COCO stuff + 1
+ conv_dims=128,
+ common_stride=4,
+ loss_weight=0.5,
+ norm="GN",
+)
diff --git a/vendor/detectron2/configs/common/models/retinanet.py b/vendor/detectron2/configs/common/models/retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..784e5317f594db966dac02792e9c9db1774623d6
--- /dev/null
+++ b/vendor/detectron2/configs/common/models/retinanet.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import RetinaNet
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelP6P7
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
+
+from ..data.constants import constants
+
+model = L(RetinaNet)(
+ backbone=L(FPN)(
+ bottom_up=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res3", "res4", "res5"],
+ ),
+ in_features=["res3", "res4", "res5"],
+ out_channels=256,
+ top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
+ ),
+ head=L(RetinaNetHead)(
+ # Shape for each input feature map
+ input_shape=[ShapeSpec(channels=256)] * 5,
+ num_classes="${..num_classes}",
+ conv_dims=[256, 256, 256, 256],
+ prior_prob=0.01,
+ num_anchors=9,
+ ),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[8, 16, 32, 64, 128],
+ offset=0.0,
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ num_classes=80,
+ head_in_features=["p3", "p4", "p5", "p6", "p7"],
+ focal_loss_alpha=0.25,
+ focal_loss_gamma=2.0,
+ pixel_mean=constants.imagenet_bgr256_mean,
+ pixel_std=constants.imagenet_bgr256_std,
+ input_format="BGR",
+)
diff --git a/vendor/detectron2/configs/common/optim.py b/vendor/detectron2/configs/common/optim.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cf43e835f55739fbb80102b870efab950a0486d
--- /dev/null
+++ b/vendor/detectron2/configs/common/optim.py
@@ -0,0 +1,28 @@
+import torch
+
+from detectron2.config import LazyCall as L
+from detectron2.solver.build import get_default_optimizer_params
+
+SGD = L(torch.optim.SGD)(
+ params=L(get_default_optimizer_params)(
+ # params.model is meant to be set to the model object, before instantiating
+ # the optimizer.
+ weight_decay_norm=0.0
+ ),
+ lr=0.02,
+ momentum=0.9,
+ weight_decay=1e-4,
+)
+
+
+AdamW = L(torch.optim.AdamW)(
+ params=L(get_default_optimizer_params)(
+ # params.model is meant to be set to the model object, before instantiating
+ # the optimizer.
+ base_lr="${..lr}",
+ weight_decay_norm=0.0,
+ ),
+ lr=1e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.1,
+)
diff --git a/vendor/detectron2/configs/common/train.py b/vendor/detectron2/configs/common/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6ed02bd59f540ca58df20bf72d462f195210a32
--- /dev/null
+++ b/vendor/detectron2/configs/common/train.py
@@ -0,0 +1,18 @@
+# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
+# You can use your own instead, together with your own train_net.py
+train = dict(
+ output_dir="./output",
+ init_checkpoint="",
+ max_iter=90000,
+ amp=dict(enabled=False), # options for Automatic Mixed Precision
+ ddp=dict( # options for DistributedDataParallel
+ broadcast_buffers=False,
+ find_unused_parameters=False,
+ fp16_compression=False,
+ ),
+ checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
+ eval_period=5000,
+ log_period=20,
+ device="cuda"
+ # ...
+)
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..3740e9bb08c5f168a9ab3a6d94561678bad1775c
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
@@ -0,0 +1,9 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+model.backbone.bottom_up.stages.depth = 101
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..18e5f0720c568db4ef0c97b59688b5e7866df606
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c54ee9a5ce2368494b775cc90fada1439feaa5
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..df7a2aedf480ed8dc4aa3645e37420e9b893fae4
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
@@ -0,0 +1,72 @@
+import detectron2.data.transforms as T
+from detectron2.config.lazy import LazyCall as L
+from detectron2.layers.batch_norm import NaiveSyncBatchNorm
+from detectron2.solver import WarmupParamScheduler
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+# train from scratch
+train.init_checkpoint = ""
+train.amp.enabled = True
+train.ddp.fp16_compression = True
+model.backbone.bottom_up.freeze_at = 0
+
+# SyncBN
+# fmt: off
+model.backbone.bottom_up.stem.norm = \
+ model.backbone.bottom_up.stages.norm = \
+ model.backbone.norm = "SyncBN"
+
+# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
+# torch.nn.SyncBatchNorm. We can remove this after
+# https://github.com/pytorch/pytorch/issues/36530 is fixed.
+model.roi_heads.box_head.conv_norm = \
+ model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
+ stats_mode="N")
+# fmt: on
+
+# 2conv in RPN:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950
+model.proposal_generator.head.conv_dims = [-1, -1]
+
+# 4conv1fc box head
+model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
+model.roi_heads.box_head.fc_dims = [1024]
+
+# resize_and_crop_image in:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950
+image_size = 1024
+dataloader.train.mapper.augmentations = [
+ L(T.ResizeScale)(
+ min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
+ ),
+ L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
+ L(T.RandomFlip)(horizontal=True),
+]
+
+# recompute boxes due to cropping
+dataloader.train.mapper.recompute_boxes = True
+
+# larger batch-size.
+dataloader.train.total_batch_size = 64
+
+# Equivalent to 100 epochs.
+# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
+train.max_iter = 184375
+
+lr_multiplier = L(WarmupParamScheduler)(
+ scheduler=L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ milestones=[163889, 177546],
+ num_updates=train.max_iter,
+ ),
+ warmup_length=500 / train.max_iter,
+ warmup_factor=0.067,
+)
+
+optimizer.lr = 0.1
+optimizer.weight_decay = 4e-5
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7c376da5f9269197c44079f3e0f3b09cdc63fa
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..97586b8f5330a9d995a0bffd1f5e7bd5b5656462
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ca1ede262cf5c37a3a54778458c74aff1479411
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter //= 2 # 100ep -> 50ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone // 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef0b6d16d4403fb5d16a3aeb71a22621a0be5e21
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,29 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=23,
+ w_a=38.65,
+ w_0=96,
+ w_m=2.43,
+ group_width=40,
+ norm="SyncBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..731320e74ebed4d8ceec58c07cb906542b8b021b
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba2c3274a493d5136507364558c8289eb6ee6259
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,30 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=22,
+ w_a=31.41,
+ w_0=96,
+ w_m=2.24,
+ group_width=64,
+ se_ratio=0.25,
+ norm="SyncBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..b867cc865e5ac4d7b70221da141894efd7cbd75c
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34
--- /dev/null
+++ b/vendor/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/vendor/detectron2/configs/quick_schedules/README.md b/vendor/detectron2/configs/quick_schedules/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/README.md
@@ -0,0 +1,8 @@
+These are quick configs for performance or accuracy regression tracking purposes.
+
+* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
+ successfully finish. They are not expected to produce reasonable training results.
+* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
+ the results are as expected.
+* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
+ is within the normal range.
diff --git a/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+ TEST: ("coco_2017_val_100",)
+ PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
+DATASETS:
+ TEST: ("keypoints_coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 1
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val_100",)
+ TEST: ("keypoints_coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
+ LOSS_WEIGHT: 4.0
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss
+ RPN:
+ SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val",)
+ TEST: ("keypoints_coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ WARMUP_FACTOR: 0.33333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,28 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss
+ RPN:
+ SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val",)
+ TEST: ("keypoints_coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ WARMUP_FACTOR: 0.33333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.001
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "value"
+ CLIP_VALUE: 1.0
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.001
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val",)
+ TEST: ("coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (600,)
+ MAX_SIZE_TRAIN: 1000
+ MIN_SIZE_TEST: 800
+ MAX_SIZE_TEST: 1000
+SOLVER:
+ IMS_PER_BATCH: 8 # base uses 16
+ WARMUP_FACTOR: 0.33333
+ WARMUP_ITERS: 100
+ STEPS: (11000, 11600)
+ MAX_ITER: 12000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+ AUG:
+ ENABLED: True
+ MIN_SIZES: (700, 800) # to save some time
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
@@ -0,0 +1,6 @@
+_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
+MODEL:
+ ROI_BOX_HEAD:
+ TRAIN_ON_PRED_BOXES: True
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val",)
+ TEST: ("coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (600,)
+ MAX_SIZE_TRAIN: 1000
+ MIN_SIZE_TEST: 800
+ MAX_SIZE_TEST: 1000
+SOLVER:
+ WARMUP_FACTOR: 0.3333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_val_100_panoptic_separated",)
+ TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 1
diff --git a/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_val_panoptic_separated",)
+ TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+ BASE_LR: 0.01
+ WARMUP_FACTOR: 0.001
+ WARMUP_ITERS: 500
+ STEPS: (5500,)
+ MAX_ITER: 7000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ STEPS: (30,)
+ MAX_ITER: 40
+ BASE_LR: 0.005
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+ EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61
--- /dev/null
+++ b/vendor/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+ BASE_LR: 0.01
+ WARMUP_FACTOR: 0.001
+ WARMUP_ITERS: 300
+ STEPS: (5500,)
+ MAX_ITER: 7000
+TEST:
+ EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/vendor/detectron2/datasets/README.md b/vendor/detectron2/datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0eb44cc3b23beeb1755ab8d12002d26f13434235
--- /dev/null
+++ b/vendor/detectron2/datasets/README.md
@@ -0,0 +1,140 @@
+# Use Builtin Datasets
+
+A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog)
+for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc).
+This document explains how to setup the builtin datasets so they can be used by the above APIs.
+[Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`,
+and how to add new datasets to them.
+
+Detectron2 has builtin support for a few datasets.
+The datasets are assumed to exist in a directory specified by the environment variable
+`DETECTRON2_DATASETS`.
+Under this directory, detectron2 will look for datasets in the structure described below, if needed.
+```
+$DETECTRON2_DATASETS/
+ coco/
+ lvis/
+ cityscapes/
+ VOC20{07,12}/
+```
+
+You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
+If left unset, the default is `./datasets` relative to your current working directory.
+
+The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md)
+contains configs and models that use these builtin datasets.
+
+## Expected dataset structure for [COCO instance/keypoint detection](https://cocodataset.org/#download):
+
+```
+coco/
+ annotations/
+ instances_{train,val}2017.json
+ person_keypoints_{train,val}2017.json
+ {train,val}2017/
+ # image files that are mentioned in the corresponding json
+```
+
+You can use the 2014 version of the dataset as well.
+
+Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
+which you can download with `./datasets/prepare_for_tests.sh`.
+
+## Expected dataset structure for PanopticFPN:
+
+Extract panoptic annotations from [COCO website](https://cocodataset.org/#download)
+into the following structure:
+```
+coco/
+ annotations/
+ panoptic_{train,val}2017.json
+ panoptic_{train,val}2017/ # png annotations
+ panoptic_stuff_{train,val}2017/ # generated by the script mentioned below
+```
+
+Install panopticapi by:
+```
+pip install git+https://github.com/cocodataset/panopticapi.git
+```
+Then, run `python datasets/prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
+
+## Expected dataset structure for [LVIS instance segmentation](https://www.lvisdataset.org/dataset):
+```
+coco/
+ {train,val,test}2017/
+lvis/
+ lvis_v0.5_{train,val}.json
+ lvis_v0.5_image_info_test.json
+ lvis_v1_{train,val}.json
+ lvis_v1_image_info_test{,_challenge}.json
+```
+
+Install lvis-api by:
+```
+pip install git+https://github.com/lvis-dataset/lvis-api.git
+```
+
+To evaluate models trained on the COCO dataset using LVIS annotations,
+run `python datasets/prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations.
+
+## Expected dataset structure for [cityscapes](https://www.cityscapes-dataset.com/downloads/):
+```
+cityscapes/
+ gtFine/
+ train/
+ aachen/
+ color.png, instanceIds.png, labelIds.png, polygons.json,
+ labelTrainIds.png
+ ...
+ val/
+ test/
+ # below are generated Cityscapes panoptic annotation
+ cityscapes_panoptic_train.json
+ cityscapes_panoptic_train/
+ cityscapes_panoptic_val.json
+ cityscapes_panoptic_val/
+ cityscapes_panoptic_test.json
+ cityscapes_panoptic_test/
+ leftImg8bit/
+ train/
+ val/
+ test/
+```
+Install cityscapes scripts by:
+```
+pip install git+https://github.com/mcordts/cityscapesScripts.git
+```
+
+Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with:
+```
+CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
+```
+These files are not needed for instance segmentation.
+
+Note: to generate Cityscapes panoptic dataset, run cityscapesescript with:
+```
+CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createPanopticImgs.py
+```
+These files are not needed for semantic and instance segmentation.
+
+## Expected dataset structure for [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/index.html):
+```
+VOC20{07,12}/
+ Annotations/
+ ImageSets/
+ Main/
+ trainval.txt
+ test.txt
+ # train.txt or val.txt, if you use these splits
+ JPEGImages/
+```
+
+## Expected dataset structure for [ADE20k Scene Parsing](http://sceneparsing.csail.mit.edu/):
+```
+ADEChallengeData2016/
+ annotations/
+ annotations_detectron2/
+ images/
+ objectInfo150.txt
+```
+The directory `annotations_detectron2` is generated by running `python datasets/prepare_ade20k_sem_seg.py`.
diff --git a/vendor/detectron2/datasets/prepare_ade20k_sem_seg.py b/vendor/detectron2/datasets/prepare_ade20k_sem_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b4a58d8f2877544498e328b6d269f23aa1eb59f
--- /dev/null
+++ b/vendor/detectron2/datasets/prepare_ade20k_sem_seg.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import os
+from pathlib import Path
+import tqdm
+from PIL import Image
+
+
+def convert(input, output):
+ img = np.asarray(Image.open(input))
+ assert img.dtype == np.uint8
+ img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1
+ Image.fromarray(img).save(output)
+
+
+if __name__ == "__main__":
+ dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
+ for name in ["training", "validation"]:
+ annotation_dir = dataset_dir / "annotations" / name
+ output_dir = dataset_dir / "annotations_detectron2" / name
+ output_dir.mkdir(parents=True, exist_ok=True)
+ for file in tqdm.tqdm(list(annotation_dir.iterdir())):
+ output_file = output_dir / file.name
+ convert(file, output_file)
diff --git a/vendor/detectron2/datasets/prepare_cocofied_lvis.py b/vendor/detectron2/datasets/prepare_cocofied_lvis.py
new file mode 100644
index 0000000000000000000000000000000000000000..245c88482a9e2405e5a912b5c560aed78a614a13
--- /dev/null
+++ b/vendor/detectron2/datasets/prepare_cocofied_lvis.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+import json
+import os
+from collections import defaultdict
+
+# This mapping is extracted from the official LVIS mapping:
+# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json
+COCO_SYNSET_CATEGORIES = [
+ {"synset": "person.n.01", "coco_cat_id": 1},
+ {"synset": "bicycle.n.01", "coco_cat_id": 2},
+ {"synset": "car.n.01", "coco_cat_id": 3},
+ {"synset": "motorcycle.n.01", "coco_cat_id": 4},
+ {"synset": "airplane.n.01", "coco_cat_id": 5},
+ {"synset": "bus.n.01", "coco_cat_id": 6},
+ {"synset": "train.n.01", "coco_cat_id": 7},
+ {"synset": "truck.n.01", "coco_cat_id": 8},
+ {"synset": "boat.n.01", "coco_cat_id": 9},
+ {"synset": "traffic_light.n.01", "coco_cat_id": 10},
+ {"synset": "fireplug.n.01", "coco_cat_id": 11},
+ {"synset": "stop_sign.n.01", "coco_cat_id": 13},
+ {"synset": "parking_meter.n.01", "coco_cat_id": 14},
+ {"synset": "bench.n.01", "coco_cat_id": 15},
+ {"synset": "bird.n.01", "coco_cat_id": 16},
+ {"synset": "cat.n.01", "coco_cat_id": 17},
+ {"synset": "dog.n.01", "coco_cat_id": 18},
+ {"synset": "horse.n.01", "coco_cat_id": 19},
+ {"synset": "sheep.n.01", "coco_cat_id": 20},
+ {"synset": "beef.n.01", "coco_cat_id": 21},
+ {"synset": "elephant.n.01", "coco_cat_id": 22},
+ {"synset": "bear.n.01", "coco_cat_id": 23},
+ {"synset": "zebra.n.01", "coco_cat_id": 24},
+ {"synset": "giraffe.n.01", "coco_cat_id": 25},
+ {"synset": "backpack.n.01", "coco_cat_id": 27},
+ {"synset": "umbrella.n.01", "coco_cat_id": 28},
+ {"synset": "bag.n.04", "coco_cat_id": 31},
+ {"synset": "necktie.n.01", "coco_cat_id": 32},
+ {"synset": "bag.n.06", "coco_cat_id": 33},
+ {"synset": "frisbee.n.01", "coco_cat_id": 34},
+ {"synset": "ski.n.01", "coco_cat_id": 35},
+ {"synset": "snowboard.n.01", "coco_cat_id": 36},
+ {"synset": "ball.n.06", "coco_cat_id": 37},
+ {"synset": "kite.n.03", "coco_cat_id": 38},
+ {"synset": "baseball_bat.n.01", "coco_cat_id": 39},
+ {"synset": "baseball_glove.n.01", "coco_cat_id": 40},
+ {"synset": "skateboard.n.01", "coco_cat_id": 41},
+ {"synset": "surfboard.n.01", "coco_cat_id": 42},
+ {"synset": "tennis_racket.n.01", "coco_cat_id": 43},
+ {"synset": "bottle.n.01", "coco_cat_id": 44},
+ {"synset": "wineglass.n.01", "coco_cat_id": 46},
+ {"synset": "cup.n.01", "coco_cat_id": 47},
+ {"synset": "fork.n.01", "coco_cat_id": 48},
+ {"synset": "knife.n.01", "coco_cat_id": 49},
+ {"synset": "spoon.n.01", "coco_cat_id": 50},
+ {"synset": "bowl.n.03", "coco_cat_id": 51},
+ {"synset": "banana.n.02", "coco_cat_id": 52},
+ {"synset": "apple.n.01", "coco_cat_id": 53},
+ {"synset": "sandwich.n.01", "coco_cat_id": 54},
+ {"synset": "orange.n.01", "coco_cat_id": 55},
+ {"synset": "broccoli.n.01", "coco_cat_id": 56},
+ {"synset": "carrot.n.01", "coco_cat_id": 57},
+ {"synset": "frank.n.02", "coco_cat_id": 58},
+ {"synset": "pizza.n.01", "coco_cat_id": 59},
+ {"synset": "doughnut.n.02", "coco_cat_id": 60},
+ {"synset": "cake.n.03", "coco_cat_id": 61},
+ {"synset": "chair.n.01", "coco_cat_id": 62},
+ {"synset": "sofa.n.01", "coco_cat_id": 63},
+ {"synset": "pot.n.04", "coco_cat_id": 64},
+ {"synset": "bed.n.01", "coco_cat_id": 65},
+ {"synset": "dining_table.n.01", "coco_cat_id": 67},
+ {"synset": "toilet.n.02", "coco_cat_id": 70},
+ {"synset": "television_receiver.n.01", "coco_cat_id": 72},
+ {"synset": "laptop.n.01", "coco_cat_id": 73},
+ {"synset": "mouse.n.04", "coco_cat_id": 74},
+ {"synset": "remote_control.n.01", "coco_cat_id": 75},
+ {"synset": "computer_keyboard.n.01", "coco_cat_id": 76},
+ {"synset": "cellular_telephone.n.01", "coco_cat_id": 77},
+ {"synset": "microwave.n.02", "coco_cat_id": 78},
+ {"synset": "oven.n.01", "coco_cat_id": 79},
+ {"synset": "toaster.n.02", "coco_cat_id": 80},
+ {"synset": "sink.n.01", "coco_cat_id": 81},
+ {"synset": "electric_refrigerator.n.01", "coco_cat_id": 82},
+ {"synset": "book.n.01", "coco_cat_id": 84},
+ {"synset": "clock.n.01", "coco_cat_id": 85},
+ {"synset": "vase.n.01", "coco_cat_id": 86},
+ {"synset": "scissors.n.01", "coco_cat_id": 87},
+ {"synset": "teddy.n.01", "coco_cat_id": 88},
+ {"synset": "hand_blower.n.01", "coco_cat_id": 89},
+ {"synset": "toothbrush.n.01", "coco_cat_id": 90},
+]
+
+
+def cocofy_lvis(input_filename, output_filename):
+ """
+ Filter LVIS instance segmentation annotations to remove all categories that are not included in
+ COCO. The new json files can be used to evaluate COCO AP using `lvis-api`. The category ids in
+ the output json are the incontiguous COCO dataset ids.
+
+ Args:
+ input_filename (str): path to the LVIS json file.
+ output_filename (str): path to the COCOfied json file.
+ """
+
+ with open(input_filename, "r") as f:
+ lvis_json = json.load(f)
+
+ lvis_annos = lvis_json.pop("annotations")
+ cocofied_lvis = copy.deepcopy(lvis_json)
+ lvis_json["annotations"] = lvis_annos
+
+ # Mapping from lvis cat id to coco cat id via synset
+ lvis_cat_id_to_synset = {cat["id"]: cat["synset"] for cat in lvis_json["categories"]}
+ synset_to_coco_cat_id = {x["synset"]: x["coco_cat_id"] for x in COCO_SYNSET_CATEGORIES}
+ # Synsets that we will keep in the dataset
+ synsets_to_keep = set(synset_to_coco_cat_id.keys())
+ coco_cat_id_with_instances = defaultdict(int)
+
+ new_annos = []
+ ann_id = 1
+ for ann in lvis_annos:
+ lvis_cat_id = ann["category_id"]
+ synset = lvis_cat_id_to_synset[lvis_cat_id]
+ if synset not in synsets_to_keep:
+ continue
+ coco_cat_id = synset_to_coco_cat_id[synset]
+ new_ann = copy.deepcopy(ann)
+ new_ann["category_id"] = coco_cat_id
+ new_ann["id"] = ann_id
+ ann_id += 1
+ new_annos.append(new_ann)
+ coco_cat_id_with_instances[coco_cat_id] += 1
+ cocofied_lvis["annotations"] = new_annos
+
+ for image in cocofied_lvis["images"]:
+ for key in ["not_exhaustive_category_ids", "neg_category_ids"]:
+ new_category_list = []
+ for lvis_cat_id in image[key]:
+ synset = lvis_cat_id_to_synset[lvis_cat_id]
+ if synset not in synsets_to_keep:
+ continue
+ coco_cat_id = synset_to_coco_cat_id[synset]
+ new_category_list.append(coco_cat_id)
+ coco_cat_id_with_instances[coco_cat_id] += 1
+ image[key] = new_category_list
+
+ coco_cat_id_with_instances = set(coco_cat_id_with_instances.keys())
+
+ new_categories = []
+ for cat in lvis_json["categories"]:
+ synset = cat["synset"]
+ if synset not in synsets_to_keep:
+ continue
+ coco_cat_id = synset_to_coco_cat_id[synset]
+ if coco_cat_id not in coco_cat_id_with_instances:
+ continue
+ new_cat = copy.deepcopy(cat)
+ new_cat["id"] = coco_cat_id
+ new_categories.append(new_cat)
+ cocofied_lvis["categories"] = new_categories
+
+ with open(output_filename, "w") as f:
+ json.dump(cocofied_lvis, f)
+ print("{} is COCOfied and stored in {}.".format(input_filename, output_filename))
+
+
+if __name__ == "__main__":
+ dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "lvis")
+ for s in ["lvis_v0.5_train", "lvis_v0.5_val"]:
+ print("Start COCOfing {}.".format(s))
+ cocofy_lvis(
+ os.path.join(dataset_dir, "{}.json".format(s)),
+ os.path.join(dataset_dir, "{}_cocofied.json".format(s)),
+ )
diff --git a/vendor/detectron2/datasets/prepare_for_tests.sh b/vendor/detectron2/datasets/prepare_for_tests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..67e875a41da652b2fcae6631b76d94584935ddb9
--- /dev/null
+++ b/vendor/detectron2/datasets/prepare_for_tests.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# Download the mini dataset (coco val2017_100, with only 100 images)
+# to be used in unittests & integration tests.
+
+cd "${0%/*}"
+
+BASE=https://dl.fbaipublicfiles.com/detectron2
+ROOT=${DETECTRON2_DATASETS:-./}
+ROOT=${ROOT/#\~/$HOME} # expand ~ to HOME
+mkdir -p $ROOT/coco/annotations
+
+for anno in instances_val2017_100 \
+ person_keypoints_val2017_100 ; do
+
+ dest=$ROOT/coco/annotations/$anno.json
+ [[ -s $dest ]] && {
+ echo "$dest exists. Skipping ..."
+ } || {
+ wget $BASE/annotations/coco/$anno.json -O $dest
+ }
+done
+
+dest=$ROOT/coco/val2017_100.tgz
+[[ -d $ROOT/coco/val2017 ]] && {
+ echo "$ROOT/coco/val2017 exists. Skipping ..."
+} || {
+ wget $BASE/annotations/coco/val2017_100.tgz -O $dest
+ tar xzf $dest -C $ROOT/coco/ && rm -f $dest
+}
diff --git a/vendor/detectron2/datasets/prepare_panoptic_fpn.py b/vendor/detectron2/datasets/prepare_panoptic_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..597d791afab1bcc0013203a66c7fba225065eebe
--- /dev/null
+++ b/vendor/detectron2/datasets/prepare_panoptic_fpn.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import json
+import multiprocessing as mp
+import numpy as np
+import os
+import time
+from fvcore.common.download import download
+from panopticapi.utils import rgb2id
+from PIL import Image
+
+from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
+
+
+def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map):
+ panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
+ panoptic = rgb2id(panoptic)
+ output = np.zeros_like(panoptic, dtype=np.uint8) + 255
+ for seg in segments:
+ cat_id = seg["category_id"]
+ new_cat_id = id_map[cat_id]
+ output[panoptic == seg["id"]] = new_cat_id
+ Image.fromarray(output).save(output_semantic)
+
+
+def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories):
+ """
+ Create semantic segmentation annotations from panoptic segmentation
+ annotations, to be used by PanopticFPN.
+
+ It maps all thing categories to class 0, and maps all unlabeled pixels to class 255.
+ It maps all stuff categories to contiguous ids starting from 1.
+
+ Args:
+ panoptic_json (str): path to the panoptic json file, in COCO's format.
+ panoptic_root (str): a directory with panoptic annotation files, in COCO's format.
+ sem_seg_root (str): a directory to output semantic annotation files
+ categories (list[dict]): category metadata. Each dict needs to have:
+ "id": corresponds to the "category_id" in the json annotations
+ "isthing": 0 or 1
+ """
+ os.makedirs(sem_seg_root, exist_ok=True)
+
+ stuff_ids = [k["id"] for k in categories if k["isthing"] == 0]
+ thing_ids = [k["id"] for k in categories if k["isthing"] == 1]
+ id_map = {} # map from category id to id in the output semantic annotation
+ assert len(stuff_ids) <= 254
+ for i, stuff_id in enumerate(stuff_ids):
+ id_map[stuff_id] = i + 1
+ for thing_id in thing_ids:
+ id_map[thing_id] = 0
+ id_map[0] = 255
+
+ with open(panoptic_json) as f:
+ obj = json.load(f)
+
+ pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
+
+ def iter_annotations():
+ for anno in obj["annotations"]:
+ file_name = anno["file_name"]
+ segments = anno["segments_info"]
+ input = os.path.join(panoptic_root, file_name)
+ output = os.path.join(sem_seg_root, file_name)
+ yield input, output, segments
+
+ print("Start writing to {} ...".format(sem_seg_root))
+ start = time.time()
+ pool.starmap(
+ functools.partial(_process_panoptic_to_semantic, id_map=id_map),
+ iter_annotations(),
+ chunksize=100,
+ )
+ print("Finished. time: {:.2f}s".format(time.time() - start))
+
+
+if __name__ == "__main__":
+ dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco")
+ for s in ["val2017", "train2017"]:
+ separate_coco_semantic_from_panoptic(
+ os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
+ os.path.join(dataset_dir, "panoptic_{}".format(s)),
+ os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)),
+ COCO_CATEGORIES,
+ )
+
+ # Prepare val2017_100 for quick testing:
+
+ dest_dir = os.path.join(dataset_dir, "annotations/")
+ URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+ download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir)
+ with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f:
+ obj = json.load(f)
+
+ def link_val100(dir_full, dir_100):
+ print("Creating " + dir_100 + " ...")
+ os.makedirs(dir_100, exist_ok=True)
+ for img in obj["images"]:
+ basename = os.path.splitext(img["file_name"])[0]
+ src = os.path.join(dir_full, basename + ".png")
+ dst = os.path.join(dir_100, basename + ".png")
+ src = os.path.relpath(src, start=dir_100)
+ os.symlink(src, dst)
+
+ link_val100(
+ os.path.join(dataset_dir, "panoptic_val2017"),
+ os.path.join(dataset_dir, "panoptic_val2017_100"),
+ )
+
+ link_val100(
+ os.path.join(dataset_dir, "panoptic_stuff_val2017"),
+ os.path.join(dataset_dir, "panoptic_stuff_val2017_100"),
+ )
diff --git a/vendor/detectron2/demo/README.md b/vendor/detectron2/demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..133d8d38e5e9f5f44aca92c59f73309e166d7132
--- /dev/null
+++ b/vendor/detectron2/demo/README.md
@@ -0,0 +1,8 @@
+
+## Detectron2 Demo
+
+We provide a command line tool to run a simple demo of builtin configs.
+The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
+
+See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-)
+for a high-quality demo generated with this tool.
diff --git a/vendor/detectron2/demo/demo.py b/vendor/detectron2/demo/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..4baa8767f7b299f18253aadb15a9bac5b9cc07fc
--- /dev/null
+++ b/vendor/detectron2/demo/demo.py
@@ -0,0 +1,188 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import glob
+import multiprocessing as mp
+import numpy as np
+import os
+import tempfile
+import time
+import warnings
+import cv2
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+
+from predictor import VisualizationDemo
+
+# constants
+WINDOW_NAME = "COCO detections"
+
+
+def setup_cfg(args):
+ # load config from file and command-line arguments
+ cfg = get_cfg()
+ # To use demo for Panoptic-DeepLab, please uncomment the following two lines.
+ # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa
+ # add_panoptic_deeplab_config(cfg)
+ cfg.merge_from_file(args.config_file)
+ cfg.merge_from_list(args.opts)
+ # Set score_threshold for builtin models
+ cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+ cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+ cfg.freeze()
+ return cfg
+
+
+def get_parser():
+ parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
+ parser.add_argument(
+ "--config-file",
+ default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+ metavar="FILE",
+ help="path to config file",
+ )
+ parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+ parser.add_argument("--video-input", help="Path to video file.")
+ parser.add_argument(
+ "--input",
+ nargs="+",
+ help="A list of space separated input images; "
+ "or a single glob pattern such as 'directory/*.jpg'",
+ )
+ parser.add_argument(
+ "--output",
+ help="A file or directory to save output visualizations. "
+ "If not given, will show output in an OpenCV window.",
+ )
+
+ parser.add_argument(
+ "--confidence-threshold",
+ type=float,
+ default=0.5,
+ help="Minimum score for instance predictions to be shown",
+ )
+ parser.add_argument(
+ "--opts",
+ help="Modify config options using the command-line 'KEY VALUE' pairs",
+ default=[],
+ nargs=argparse.REMAINDER,
+ )
+ return parser
+
+
+def test_opencv_video_format(codec, file_ext):
+ with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
+ filename = os.path.join(dir, "test_file" + file_ext)
+ writer = cv2.VideoWriter(
+ filename=filename,
+ fourcc=cv2.VideoWriter_fourcc(*codec),
+ fps=float(30),
+ frameSize=(10, 10),
+ isColor=True,
+ )
+ [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
+ writer.release()
+ if os.path.isfile(filename):
+ return True
+ return False
+
+
+if __name__ == "__main__":
+ mp.set_start_method("spawn", force=True)
+ args = get_parser().parse_args()
+ setup_logger(name="fvcore")
+ logger = setup_logger()
+ logger.info("Arguments: " + str(args))
+
+ cfg = setup_cfg(args)
+
+ demo = VisualizationDemo(cfg)
+
+ if args.input:
+ if len(args.input) == 1:
+ args.input = glob.glob(os.path.expanduser(args.input[0]))
+ assert args.input, "The input path(s) was not found"
+ for path in tqdm.tqdm(args.input, disable=not args.output):
+ # use PIL, to be consistent with evaluation
+ img = read_image(path, format="BGR")
+ start_time = time.time()
+ predictions, visualized_output = demo.run_on_image(img)
+ logger.info(
+ "{}: {} in {:.2f}s".format(
+ path,
+ "detected {} instances".format(len(predictions["instances"]))
+ if "instances" in predictions
+ else "finished",
+ time.time() - start_time,
+ )
+ )
+
+ if args.output:
+ if os.path.isdir(args.output):
+ assert os.path.isdir(args.output), args.output
+ out_filename = os.path.join(args.output, os.path.basename(path))
+ else:
+ assert len(args.input) == 1, "Please specify a directory with args.output"
+ out_filename = args.output
+ visualized_output.save(out_filename)
+ else:
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+ cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+ if cv2.waitKey(0) == 27:
+ break # esc to quit
+ elif args.webcam:
+ assert args.input is None, "Cannot have both --input and --webcam!"
+ assert args.output is None, "output not yet supported with --webcam!"
+ cam = cv2.VideoCapture(0)
+ for vis in tqdm.tqdm(demo.run_on_video(cam)):
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+ cv2.imshow(WINDOW_NAME, vis)
+ if cv2.waitKey(1) == 27:
+ break # esc to quit
+ cam.release()
+ cv2.destroyAllWindows()
+ elif args.video_input:
+ video = cv2.VideoCapture(args.video_input)
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ frames_per_second = video.get(cv2.CAP_PROP_FPS)
+ num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+ basename = os.path.basename(args.video_input)
+ codec, file_ext = (
+ ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
+ )
+ if codec == ".mp4v":
+ warnings.warn("x264 codec not available, switching to mp4v")
+ if args.output:
+ if os.path.isdir(args.output):
+ output_fname = os.path.join(args.output, basename)
+ output_fname = os.path.splitext(output_fname)[0] + file_ext
+ else:
+ output_fname = args.output
+ assert not os.path.isfile(output_fname), output_fname
+ output_file = cv2.VideoWriter(
+ filename=output_fname,
+ # some installation of opencv may not support x264 (due to its license),
+ # you can try other format (e.g. MPEG)
+ fourcc=cv2.VideoWriter_fourcc(*codec),
+ fps=float(frames_per_second),
+ frameSize=(width, height),
+ isColor=True,
+ )
+ assert os.path.isfile(args.video_input)
+ for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
+ if args.output:
+ output_file.write(vis_frame)
+ else:
+ cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+ cv2.imshow(basename, vis_frame)
+ if cv2.waitKey(1) == 27:
+ break # esc to quit
+ video.release()
+ if args.output:
+ output_file.release()
+ else:
+ cv2.destroyAllWindows()
diff --git a/vendor/detectron2/demo/predictor.py b/vendor/detectron2/demo/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b7ebd3f846850172c1f560f8492d51e5667f76d
--- /dev/null
+++ b/vendor/detectron2/demo/predictor.py
@@ -0,0 +1,220 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import atexit
+import bisect
+import multiprocessing as mp
+from collections import deque
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.utils.video_visualizer import VideoVisualizer
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+
+class VisualizationDemo(object):
+ def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
+ """
+ Args:
+ cfg (CfgNode):
+ instance_mode (ColorMode):
+ parallel (bool): whether to run the model in different processes from visualization.
+ Useful since the visualization logic can be slow.
+ """
+ self.metadata = MetadataCatalog.get(
+ cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+ )
+ self.cpu_device = torch.device("cpu")
+ self.instance_mode = instance_mode
+
+ self.parallel = parallel
+ if parallel:
+ num_gpu = torch.cuda.device_count()
+ self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
+ else:
+ self.predictor = DefaultPredictor(cfg)
+
+ def run_on_image(self, image):
+ """
+ Args:
+ image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+ This is the format used by OpenCV.
+
+ Returns:
+ predictions (dict): the output of the model.
+ vis_output (VisImage): the visualized image output.
+ """
+ vis_output = None
+ predictions = self.predictor(image)
+ # Convert image from OpenCV BGR format to Matplotlib RGB format.
+ image = image[:, :, ::-1]
+ visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
+ if "panoptic_seg" in predictions:
+ panoptic_seg, segments_info = predictions["panoptic_seg"]
+ vis_output = visualizer.draw_panoptic_seg_predictions(
+ panoptic_seg.to(self.cpu_device), segments_info
+ )
+ else:
+ if "sem_seg" in predictions:
+ vis_output = visualizer.draw_sem_seg(
+ predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+ )
+ if "instances" in predictions:
+ instances = predictions["instances"].to(self.cpu_device)
+ vis_output = visualizer.draw_instance_predictions(predictions=instances)
+
+ return predictions, vis_output
+
+ def _frame_from_video(self, video):
+ while video.isOpened():
+ success, frame = video.read()
+ if success:
+ yield frame
+ else:
+ break
+
+ def run_on_video(self, video):
+ """
+ Visualizes predictions on frames of the input video.
+
+ Args:
+ video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
+ either a webcam or a video file.
+
+ Yields:
+ ndarray: BGR visualizations of each video frame.
+ """
+ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
+
+ def process_predictions(frame, predictions):
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ if "panoptic_seg" in predictions:
+ panoptic_seg, segments_info = predictions["panoptic_seg"]
+ vis_frame = video_visualizer.draw_panoptic_seg_predictions(
+ frame, panoptic_seg.to(self.cpu_device), segments_info
+ )
+ elif "instances" in predictions:
+ predictions = predictions["instances"].to(self.cpu_device)
+ vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
+ elif "sem_seg" in predictions:
+ vis_frame = video_visualizer.draw_sem_seg(
+ frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+ )
+
+ # Converts Matplotlib RGB format to OpenCV BGR format
+ vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
+ return vis_frame
+
+ frame_gen = self._frame_from_video(video)
+ if self.parallel:
+ buffer_size = self.predictor.default_buffer_size
+
+ frame_data = deque()
+
+ for cnt, frame in enumerate(frame_gen):
+ frame_data.append(frame)
+ self.predictor.put(frame)
+
+ if cnt >= buffer_size:
+ frame = frame_data.popleft()
+ predictions = self.predictor.get()
+ yield process_predictions(frame, predictions)
+
+ while len(frame_data):
+ frame = frame_data.popleft()
+ predictions = self.predictor.get()
+ yield process_predictions(frame, predictions)
+ else:
+ for frame in frame_gen:
+ yield process_predictions(frame, self.predictor(frame))
+
+
+class AsyncPredictor:
+ """
+ A predictor that runs the model asynchronously, possibly on >1 GPUs.
+ Because rendering the visualization takes considerably amount of time,
+ this helps improve throughput a little bit when rendering videos.
+ """
+
+ class _StopToken:
+ pass
+
+ class _PredictWorker(mp.Process):
+ def __init__(self, cfg, task_queue, result_queue):
+ self.cfg = cfg
+ self.task_queue = task_queue
+ self.result_queue = result_queue
+ super().__init__()
+
+ def run(self):
+ predictor = DefaultPredictor(self.cfg)
+
+ while True:
+ task = self.task_queue.get()
+ if isinstance(task, AsyncPredictor._StopToken):
+ break
+ idx, data = task
+ result = predictor(data)
+ self.result_queue.put((idx, result))
+
+ def __init__(self, cfg, num_gpus: int = 1):
+ """
+ Args:
+ cfg (CfgNode):
+ num_gpus (int): if 0, will run on CPU
+ """
+ num_workers = max(num_gpus, 1)
+ self.task_queue = mp.Queue(maxsize=num_workers * 3)
+ self.result_queue = mp.Queue(maxsize=num_workers * 3)
+ self.procs = []
+ for gpuid in range(max(num_gpus, 1)):
+ cfg = cfg.clone()
+ cfg.defrost()
+ cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
+ self.procs.append(
+ AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
+ )
+
+ self.put_idx = 0
+ self.get_idx = 0
+ self.result_rank = []
+ self.result_data = []
+
+ for p in self.procs:
+ p.start()
+ atexit.register(self.shutdown)
+
+ def put(self, image):
+ self.put_idx += 1
+ self.task_queue.put((self.put_idx, image))
+
+ def get(self):
+ self.get_idx += 1 # the index needed for this request
+ if len(self.result_rank) and self.result_rank[0] == self.get_idx:
+ res = self.result_data[0]
+ del self.result_data[0], self.result_rank[0]
+ return res
+
+ while True:
+ # make sure the results are returned in the correct order
+ idx, res = self.result_queue.get()
+ if idx == self.get_idx:
+ return res
+ insert = bisect.bisect(self.result_rank, idx)
+ self.result_rank.insert(insert, idx)
+ self.result_data.insert(insert, res)
+
+ def __len__(self):
+ return self.put_idx - self.get_idx
+
+ def __call__(self, image):
+ self.put(image)
+ return self.get()
+
+ def shutdown(self):
+ for _ in self.procs:
+ self.task_queue.put(AsyncPredictor._StopToken())
+
+ @property
+ def default_buffer_size(self):
+ return len(self.procs) * 5
diff --git a/vendor/detectron2/detectron2/__init__.py b/vendor/detectron2/detectron2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdd994b49294485c27610772f97f177741f5518f
--- /dev/null
+++ b/vendor/detectron2/detectron2/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .utils.env import setup_environment
+
+setup_environment()
+
+
+# This line will be programatically read/write by setup.py.
+# Leave them at the bottom of this file and don't touch them.
+__version__ = "0.6"
diff --git a/vendor/detectron2/detectron2/checkpoint/__init__.py b/vendor/detectron2/detectron2/checkpoint/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99da0469ae7e169d8970e4b642fed3f870076860
--- /dev/null
+++ b/vendor/detectron2/detectron2/checkpoint/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# File:
+
+
+from . import catalog as _UNUSED # register the handler
+from .detection_checkpoint import DetectionCheckpointer
+from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
+
+__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
diff --git a/vendor/detectron2/detectron2/checkpoint/c2_model_loading.py b/vendor/detectron2/detectron2/checkpoint/c2_model_loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6de2a3c830089aa7a0d27df96bb4a45fc5a7b0d
--- /dev/null
+++ b/vendor/detectron2/detectron2/checkpoint/c2_model_loading.py
@@ -0,0 +1,412 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import re
+from typing import Dict, List
+import torch
+from tabulate import tabulate
+
+
+def convert_basic_c2_names(original_keys):
+ """
+ Apply some basic name conversion to names in C2 weights.
+ It only deals with typical backbone models.
+
+ Args:
+ original_keys (list[str]):
+ Returns:
+ list[str]: The same number of strings matching those in original_keys.
+ """
+ layer_keys = copy.deepcopy(original_keys)
+ layer_keys = [
+ {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
+ ] # some hard-coded mappings
+
+ layer_keys = [k.replace("_", ".") for k in layer_keys]
+ layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
+ layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
+ # Uniform both bn and gn names to "norm"
+ layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
+ layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
+ layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
+ layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
+
+ # stem
+ layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
+ # to avoid mis-matching with "conv1" in other components (e.g. detection head)
+ layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
+
+ # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
+ # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
+ # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
+ # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
+ # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
+
+ # blocks
+ layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
+ layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
+ layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
+ layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
+
+ # DensePose substitutions
+ layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
+ layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
+ layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
+ layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
+ layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
+ return layer_keys
+
+
+def convert_c2_detectron_names(weights):
+ """
+ Map Caffe2 Detectron weight names to Detectron2 names.
+
+ Args:
+ weights (dict): name -> tensor
+
+ Returns:
+ dict: detectron2 names -> tensor
+ dict: detectron2 names -> C2 names
+ """
+ logger = logging.getLogger(__name__)
+ logger.info("Renaming Caffe2 weights ......")
+ original_keys = sorted(weights.keys())
+ layer_keys = copy.deepcopy(original_keys)
+
+ layer_keys = convert_basic_c2_names(layer_keys)
+
+ # --------------------------------------------------------------------------
+ # RPN hidden representation conv
+ # --------------------------------------------------------------------------
+ # FPN case
+ # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
+ # shared for all other levels, hence the appearance of "fpn2"
+ layer_keys = [
+ k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
+ ]
+ # Non-FPN case
+ layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
+
+ # --------------------------------------------------------------------------
+ # RPN box transformation conv
+ # --------------------------------------------------------------------------
+ # FPN case (see note above about "fpn2")
+ layer_keys = [
+ k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
+ for k in layer_keys
+ ]
+ layer_keys = [
+ k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
+ for k in layer_keys
+ ]
+ # Non-FPN case
+ layer_keys = [
+ k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
+ ]
+ layer_keys = [
+ k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
+ for k in layer_keys
+ ]
+
+ # --------------------------------------------------------------------------
+ # Fast R-CNN box head
+ # --------------------------------------------------------------------------
+ layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
+ layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
+ layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
+ layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
+ # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
+ layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
+
+ # --------------------------------------------------------------------------
+ # FPN lateral and output convolutions
+ # --------------------------------------------------------------------------
+ def fpn_map(name):
+ """
+ Look for keys with the following patterns:
+ 1) Starts with "fpn.inner."
+ Example: "fpn.inner.res2.2.sum.lateral.weight"
+ Meaning: These are lateral pathway convolutions
+ 2) Starts with "fpn.res"
+ Example: "fpn.res2.2.sum.weight"
+ Meaning: These are FPN output convolutions
+ """
+ splits = name.split(".")
+ norm = ".norm" if "norm" in splits else ""
+ if name.startswith("fpn.inner."):
+ # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
+ stage = int(splits[2][len("res") :])
+ return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
+ elif name.startswith("fpn.res"):
+ # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
+ stage = int(splits[1][len("res") :])
+ return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
+ return name
+
+ layer_keys = [fpn_map(k) for k in layer_keys]
+
+ # --------------------------------------------------------------------------
+ # Mask R-CNN mask head
+ # --------------------------------------------------------------------------
+ # roi_heads.StandardROIHeads case
+ layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
+ layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
+ layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
+ # roi_heads.Res5ROIHeads case
+ layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
+
+ # --------------------------------------------------------------------------
+ # Keypoint R-CNN head
+ # --------------------------------------------------------------------------
+ # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
+ layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
+ layer_keys = [
+ k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
+ ]
+ layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
+
+ # --------------------------------------------------------------------------
+ # Done with replacements
+ # --------------------------------------------------------------------------
+ assert len(set(layer_keys)) == len(layer_keys)
+ assert len(original_keys) == len(layer_keys)
+
+ new_weights = {}
+ new_keys_to_original_keys = {}
+ for orig, renamed in zip(original_keys, layer_keys):
+ new_keys_to_original_keys[renamed] = orig
+ if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
+ # remove the meaningless prediction weight for background class
+ new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
+ new_weights[renamed] = weights[orig][new_start_idx:]
+ logger.info(
+ "Remove prediction weight for background class in {}. The shape changes from "
+ "{} to {}.".format(
+ renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
+ )
+ )
+ elif renamed.startswith("cls_score."):
+ # move weights of bg class from original index 0 to last index
+ logger.info(
+ "Move classification weights for background class in {} from index 0 to "
+ "index {}.".format(renamed, weights[orig].shape[0] - 1)
+ )
+ new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
+ else:
+ new_weights[renamed] = weights[orig]
+
+ return new_weights, new_keys_to_original_keys
+
+
+# Note the current matching is not symmetric.
+# it assumes model_state_dict will have longer names.
+def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
+ """
+ Match names between the two state-dict, and returns a new chkpt_state_dict with names
+ converted to match model_state_dict with heuristics. The returned dict can be later
+ loaded with fvcore checkpointer.
+ If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
+ model and will be renamed at first.
+
+ Strategy: suppose that the models that we will create will have prefixes appended
+ to each of its keys, for example due to an extra level of nesting that the original
+ pre-trained weights from ImageNet won't contain. For example, model.state_dict()
+ might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
+ res2.conv1.weight. We thus want to match both parameters together.
+ For that, we look for each model weight, look among all loaded keys if there is one
+ that is a suffix of the current weight name, and use it if that's the case.
+ If multiple matches exist, take the one with longest size
+ of the corresponding name. For example, for the same model as before, the pretrained
+ weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
+ we want to match backbone[0].body.conv1.weight to conv1.weight, and
+ backbone[0].body.res2.conv1.weight to res2.conv1.weight.
+ """
+ model_keys = sorted(model_state_dict.keys())
+ if c2_conversion:
+ ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
+ # original_keys: the name in the original dict (before renaming)
+ else:
+ original_keys = {x: x for x in ckpt_state_dict.keys()}
+ ckpt_keys = sorted(ckpt_state_dict.keys())
+
+ def match(a, b):
+ # Matched ckpt_key should be a complete (starts with '.') suffix.
+ # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
+ # but matches whatever_conv1 or mesh_head.whatever_conv1.
+ return a == b or a.endswith("." + b)
+
+ # get a matrix of string matches, where each (i, j) entry correspond to the size of the
+ # ckpt_key string, if it matches
+ match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
+ match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
+ # use the matched one with longest size in case of multiple matches
+ max_match_size, idxs = match_matrix.max(1)
+ # remove indices that correspond to no-match
+ idxs[max_match_size == 0] = -1
+
+ logger = logging.getLogger(__name__)
+ # matched_pairs (matched checkpoint key --> matched model key)
+ matched_keys = {}
+ result_state_dict = {}
+ for idx_model, idx_ckpt in enumerate(idxs.tolist()):
+ if idx_ckpt == -1:
+ continue
+ key_model = model_keys[idx_model]
+ key_ckpt = ckpt_keys[idx_ckpt]
+ value_ckpt = ckpt_state_dict[key_ckpt]
+ shape_in_model = model_state_dict[key_model].shape
+
+ if shape_in_model != value_ckpt.shape:
+ logger.warning(
+ "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+ key_ckpt, value_ckpt.shape, key_model, shape_in_model
+ )
+ )
+ logger.warning(
+ "{} will not be loaded. Please double check and see if this is desired.".format(
+ key_ckpt
+ )
+ )
+ continue
+
+ assert key_model not in result_state_dict
+ result_state_dict[key_model] = value_ckpt
+ if key_ckpt in matched_keys: # already added to matched_keys
+ logger.error(
+ "Ambiguity found for {} in checkpoint!"
+ "It matches at least two keys in the model ({} and {}).".format(
+ key_ckpt, key_model, matched_keys[key_ckpt]
+ )
+ )
+ raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
+
+ matched_keys[key_ckpt] = key_model
+
+ # logging:
+ matched_model_keys = sorted(matched_keys.values())
+ if len(matched_model_keys) == 0:
+ logger.warning("No weights in checkpoint matched with model.")
+ return ckpt_state_dict
+ common_prefix = _longest_common_prefix(matched_model_keys)
+ rev_matched_keys = {v: k for k, v in matched_keys.items()}
+ original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
+
+ model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
+ table = []
+ memo = set()
+ for key_model in matched_model_keys:
+ if key_model in memo:
+ continue
+ if key_model in model_key_groups:
+ group = model_key_groups[key_model]
+ memo |= set(group)
+ shapes = [tuple(model_state_dict[k].shape) for k in group]
+ table.append(
+ (
+ _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
+ _group_str([original_keys[k] for k in group]),
+ " ".join([str(x).replace(" ", "") for x in shapes]),
+ )
+ )
+ else:
+ key_checkpoint = original_keys[key_model]
+ shape = str(tuple(model_state_dict[key_model].shape))
+ table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
+ table_str = tabulate(
+ table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"]
+ )
+ logger.info(
+ "Following weights matched with "
+ + (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
+ + ":\n"
+ + table_str
+ )
+
+ unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
+ for k in unmatched_ckpt_keys:
+ result_state_dict[k] = ckpt_state_dict[k]
+ return result_state_dict
+
+
+def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
+ """
+ Params in the same submodule are grouped together.
+
+ Args:
+ keys: names of all parameters
+ original_names: mapping from parameter name to their name in the checkpoint
+
+ Returns:
+ dict[name -> all other names in the same group]
+ """
+
+ def _submodule_name(key):
+ pos = key.rfind(".")
+ if pos < 0:
+ return None
+ prefix = key[: pos + 1]
+ return prefix
+
+ all_submodules = [_submodule_name(k) for k in keys]
+ all_submodules = [x for x in all_submodules if x]
+ all_submodules = sorted(all_submodules, key=len)
+
+ ret = {}
+ for prefix in all_submodules:
+ group = [k for k in keys if k.startswith(prefix)]
+ if len(group) <= 1:
+ continue
+ original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
+ if len(original_name_lcp) == 0:
+ # don't group weights if original names don't share prefix
+ continue
+
+ for k in group:
+ if k in ret:
+ continue
+ ret[k] = group
+ return ret
+
+
+def _longest_common_prefix(names: List[str]) -> str:
+ """
+ ["abc.zfg", "abc.zef"] -> "abc."
+ """
+ names = [n.split(".") for n in names]
+ m1, m2 = min(names), max(names)
+ ret = [a for a, b in zip(m1, m2) if a == b]
+ ret = ".".join(ret) + "." if len(ret) else ""
+ return ret
+
+
+def _longest_common_prefix_str(names: List[str]) -> str:
+ m1, m2 = min(names), max(names)
+ lcp = []
+ for a, b in zip(m1, m2):
+ if a == b:
+ lcp.append(a)
+ else:
+ break
+ lcp = "".join(lcp)
+ return lcp
+
+
+def _group_str(names: List[str]) -> str:
+ """
+ Turn "common1", "common2", "common3" into "common{1,2,3}"
+ """
+ lcp = _longest_common_prefix_str(names)
+ rest = [x[len(lcp) :] for x in names]
+ rest = "{" + ",".join(rest) + "}"
+ ret = lcp + rest
+
+ # add some simplification for BN specifically
+ ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
+ ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
+ return ret
diff --git a/vendor/detectron2/detectron2/checkpoint/catalog.py b/vendor/detectron2/detectron2/checkpoint/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a85736754a0de4550df96c22f38fc515bd02d71
--- /dev/null
+++ b/vendor/detectron2/detectron2/checkpoint/catalog.py
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+
+from detectron2.utils.file_io import PathHandler, PathManager
+
+
+class ModelCatalog(object):
+ """
+ Store mappings from names to third-party models.
+ """
+
+ S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
+
+ # MSRA models have STRIDE_IN_1X1=True. False otherwise.
+ # NOTE: all BN models here have fused BN into an affine layer.
+ # As a result, you should only load them to a model with "FrozenBN".
+ # Loading them to a model with regular BN or SyncBN is wrong.
+ # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
+ # which should be negligible for training.
+ # NOTE: all models here uses PIXEL_STD=[1,1,1]
+ # NOTE: Most of the BN models here are no longer used. We use the
+ # re-converted pre-trained models under detectron2 model zoo instead.
+ C2_IMAGENET_MODELS = {
+ "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
+ "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
+ "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
+ "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
+ "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
+ "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
+ "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
+ }
+
+ C2_DETECTRON_PATH_FORMAT = (
+ "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
+ )
+
+ C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
+ C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
+
+ # format: {model_name} -> part of the url
+ C2_DETECTRON_MODELS = {
+ "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
+ "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
+ "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
+ "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
+ "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
+ "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
+ "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
+ "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
+ "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
+ "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
+ "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
+ "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
+ "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
+ }
+
+ @staticmethod
+ def get(name):
+ if name.startswith("Caffe2Detectron/COCO"):
+ return ModelCatalog._get_c2_detectron_baseline(name)
+ if name.startswith("ImageNetPretrained/"):
+ return ModelCatalog._get_c2_imagenet_pretrained(name)
+ raise RuntimeError("model not present in the catalog: {}".format(name))
+
+ @staticmethod
+ def _get_c2_imagenet_pretrained(name):
+ prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
+ name = name[len("ImageNetPretrained/") :]
+ name = ModelCatalog.C2_IMAGENET_MODELS[name]
+ url = "/".join([prefix, name])
+ return url
+
+ @staticmethod
+ def _get_c2_detectron_baseline(name):
+ name = name[len("Caffe2Detectron/COCO/") :]
+ url = ModelCatalog.C2_DETECTRON_MODELS[name]
+ if "keypoint_rcnn" in name:
+ dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
+ else:
+ dataset = ModelCatalog.C2_DATASET_COCO
+
+ if "35998355/rpn_R-50-C4_1x" in name:
+ # this one model is somehow different from others ..
+ type = "rpn"
+ else:
+ type = "generalized_rcnn"
+
+ # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
+ url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
+ prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
+ )
+ return url
+
+
+class ModelCatalogHandler(PathHandler):
+ """
+ Resolve URL like catalog://.
+ """
+
+ PREFIX = "catalog://"
+
+ def _get_supported_prefixes(self):
+ return [self.PREFIX]
+
+ def _get_local_path(self, path, **kwargs):
+ logger = logging.getLogger(__name__)
+ catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
+ logger.info("Catalog entry {} points to {}".format(path, catalog_path))
+ return PathManager.get_local_path(catalog_path, **kwargs)
+
+ def _open(self, path, mode="r", **kwargs):
+ return PathManager.open(self._get_local_path(path), mode, **kwargs)
+
+
+PathManager.register_handler(ModelCatalogHandler())
diff --git a/vendor/detectron2/detectron2/checkpoint/detection_checkpoint.py b/vendor/detectron2/detectron2/checkpoint/detection_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..cecb1fc2cfe46283b47096bcbcb2be3181431bf2
--- /dev/null
+++ b/vendor/detectron2/detectron2/checkpoint/detection_checkpoint.py
@@ -0,0 +1,143 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import os
+import pickle
+from urllib.parse import parse_qs, urlparse
+import torch
+from fvcore.common.checkpoint import Checkpointer
+from torch.nn.parallel import DistributedDataParallel
+
+import detectron2.utils.comm as comm
+from detectron2.utils.file_io import PathManager
+
+from .c2_model_loading import align_and_update_state_dicts
+
+
+class DetectionCheckpointer(Checkpointer):
+ """
+ Same as :class:`Checkpointer`, but is able to:
+ 1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models.
+ 2. correctly load checkpoints that are only available on the master worker
+ """
+
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+ is_main_process = comm.is_main_process()
+ super().__init__(
+ model,
+ save_dir,
+ save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
+ **checkpointables,
+ )
+ self.path_manager = PathManager
+ self._parsed_url_during_load = None
+
+ def load(self, path, *args, **kwargs):
+ assert self._parsed_url_during_load is None
+ need_sync = False
+ logger = logging.getLogger(__name__)
+ logger.info("[DetectionCheckpointer] Loading from {} ...".format(path))
+
+ if path and isinstance(self.model, DistributedDataParallel):
+ path = self.path_manager.get_local_path(path)
+ has_file = os.path.isfile(path)
+ all_has_file = comm.all_gather(has_file)
+ if not all_has_file[0]:
+ raise OSError(f"File {path} not found on main worker.")
+ if not all(all_has_file):
+ logger.warning(
+ f"Not all workers can read checkpoint {path}. "
+ "Training may fail to fully resume."
+ )
+ # TODO: broadcast the checkpoint file contents from main
+ # worker, and load from it instead.
+ need_sync = True
+ if not has_file:
+ path = None # don't load if not readable
+
+ if path:
+ parsed_url = urlparse(path)
+ self._parsed_url_during_load = parsed_url
+ path = parsed_url._replace(query="").geturl() # remove query from filename
+ path = self.path_manager.get_local_path(path)
+ ret = super().load(path, *args, **kwargs)
+
+ if need_sync:
+ logger.info("Broadcasting model states from main worker ...")
+ self.model._sync_params_and_buffers()
+ self._parsed_url_during_load = None # reset to None
+ return ret
+
+ def _load_file(self, filename):
+ if filename.endswith(".pkl"):
+ with PathManager.open(filename, "rb") as f:
+ data = pickle.load(f, encoding="latin1")
+ if "model" in data and "__author__" in data:
+ # file is in Detectron2 model zoo format
+ self.logger.info("Reading a file from '{}'".format(data["__author__"]))
+ return data
+ else:
+ # assume file is from Caffe2 / Detectron1 model zoo
+ if "blobs" in data:
+ # Detection models have "blobs", but ImageNet models don't
+ data = data["blobs"]
+ data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
+ return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
+ elif filename.endswith(".pyth"):
+ # assume file is from pycls; no one else seems to use the ".pyth" extension
+ with PathManager.open(filename, "rb") as f:
+ data = torch.load(f)
+ assert (
+ "model_state" in data
+ ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'."
+ model_state = {
+ k: v
+ for k, v in data["model_state"].items()
+ if not k.endswith("num_batches_tracked")
+ }
+ return {"model": model_state, "__author__": "pycls", "matching_heuristics": True}
+
+ loaded = self._torch_load(filename)
+ if "model" not in loaded:
+ loaded = {"model": loaded}
+ assert self._parsed_url_during_load is not None, "`_load_file` must be called inside `load`"
+ parsed_url = self._parsed_url_during_load
+ queries = parse_qs(parsed_url.query)
+ if queries.pop("matching_heuristics", "False") == ["True"]:
+ loaded["matching_heuristics"] = True
+ if len(queries) > 0:
+ raise ValueError(
+ f"Unsupported query remaining: f{queries}, orginal filename: {parsed_url.geturl()}"
+ )
+ return loaded
+
+ def _torch_load(self, f):
+ return super()._load_file(f)
+
+ def _load_model(self, checkpoint):
+ if checkpoint.get("matching_heuristics", False):
+ self._convert_ndarray_to_tensor(checkpoint["model"])
+ # convert weights by name-matching heuristics
+ checkpoint["model"] = align_and_update_state_dicts(
+ self.model.state_dict(),
+ checkpoint["model"],
+ c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
+ )
+ # for non-caffe2 models, use standard ways to load it
+ incompatible = super()._load_model(checkpoint)
+
+ model_buffers = dict(self.model.named_buffers(recurse=False))
+ for k in ["pixel_mean", "pixel_std"]:
+ # Ignore missing key message about pixel_mean/std.
+ # Though they may be missing in old checkpoints, they will be correctly
+ # initialized from config anyway.
+ if k in model_buffers:
+ try:
+ incompatible.missing_keys.remove(k)
+ except ValueError:
+ pass
+ for k in incompatible.unexpected_keys[:]:
+ # Ignore unexpected keys about cell anchors. They exist in old checkpoints
+ # but now they are non-persistent buffers and will not be in new checkpoints.
+ if "anchor_generator.cell_anchors" in k:
+ incompatible.unexpected_keys.remove(k)
+ return incompatible
diff --git a/vendor/detectron2/detectron2/config/__init__.py b/vendor/detectron2/detectron2/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e648e632d55c70f160d49630378d202fbde4e45
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .compat import downgrade_config, upgrade_config
+from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
+from .instantiate import instantiate
+from .lazy import LazyCall, LazyConfig
+
+__all__ = [
+ "CfgNode",
+ "get_cfg",
+ "global_cfg",
+ "set_global_cfg",
+ "downgrade_config",
+ "upgrade_config",
+ "configurable",
+ "instantiate",
+ "LazyCall",
+ "LazyConfig",
+]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/vendor/detectron2/detectron2/config/compat.py b/vendor/detectron2/detectron2/config/compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a08c439bf14defd880e37a938fab8a08e68eeb
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/compat.py
@@ -0,0 +1,229 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Backward compatibility of configs.
+
+Instructions to bump version:
++ It's not needed to bump version if new keys are added.
+ It's only needed when backward-incompatible changes happen
+ (i.e., some existing keys disappear, or the meaning of a key changes)
++ To bump version, do the following:
+ 1. Increment _C.VERSION in defaults.py
+ 2. Add a converter in this file.
+
+ Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
+ and a function "downgrade" which in-place downgrades config from X to X-1
+
+ In each function, VERSION is left unchanged.
+
+ Each converter assumes that its input has the relevant keys
+ (i.e., the input is not a partial config).
+ 3. Run the tests (test_config.py) to make sure the upgrade & downgrade
+ functions are consistent.
+"""
+
+import logging
+from typing import List, Optional, Tuple
+
+from .config import CfgNode as CN
+from .defaults import _C
+
+__all__ = ["upgrade_config", "downgrade_config"]
+
+
+def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
+ """
+ Upgrade a config from its current version to a newer version.
+
+ Args:
+ cfg (CfgNode):
+ to_version (int): defaults to the latest version.
+ """
+ cfg = cfg.clone()
+ if to_version is None:
+ to_version = _C.VERSION
+
+ assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
+ cfg.VERSION, to_version
+ )
+ for k in range(cfg.VERSION, to_version):
+ converter = globals()["ConverterV" + str(k + 1)]
+ converter.upgrade(cfg)
+ cfg.VERSION = k + 1
+ return cfg
+
+
+def downgrade_config(cfg: CN, to_version: int) -> CN:
+ """
+ Downgrade a config from its current version to an older version.
+
+ Args:
+ cfg (CfgNode):
+ to_version (int):
+
+ Note:
+ A general downgrade of arbitrary configs is not always possible due to the
+ different functionalities in different versions.
+ The purpose of downgrade is only to recover the defaults in old versions,
+ allowing it to load an old partial yaml config.
+ Therefore, the implementation only needs to fill in the default values
+ in the old version when a general downgrade is not possible.
+ """
+ cfg = cfg.clone()
+ assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
+ cfg.VERSION, to_version
+ )
+ for k in range(cfg.VERSION, to_version, -1):
+ converter = globals()["ConverterV" + str(k)]
+ converter.downgrade(cfg)
+ cfg.VERSION = k - 1
+ return cfg
+
+
+def guess_version(cfg: CN, filename: str) -> int:
+ """
+ Guess the version of a partial config where the VERSION field is not specified.
+ Returns the version, or the latest if cannot make a guess.
+
+ This makes it easier for users to migrate.
+ """
+ logger = logging.getLogger(__name__)
+
+ def _has(name: str) -> bool:
+ cur = cfg
+ for n in name.split("."):
+ if n not in cur:
+ return False
+ cur = cur[n]
+ return True
+
+ # Most users' partial configs have "MODEL.WEIGHT", so guess on it
+ ret = None
+ if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
+ ret = 1
+
+ if ret is not None:
+ logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
+ else:
+ ret = _C.VERSION
+ logger.warning(
+ "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
+ filename, ret
+ )
+ )
+ return ret
+
+
+def _rename(cfg: CN, old: str, new: str) -> None:
+ old_keys = old.split(".")
+ new_keys = new.split(".")
+
+ def _set(key_seq: List[str], val: str) -> None:
+ cur = cfg
+ for k in key_seq[:-1]:
+ if k not in cur:
+ cur[k] = CN()
+ cur = cur[k]
+ cur[key_seq[-1]] = val
+
+ def _get(key_seq: List[str]) -> CN:
+ cur = cfg
+ for k in key_seq:
+ cur = cur[k]
+ return cur
+
+ def _del(key_seq: List[str]) -> None:
+ cur = cfg
+ for k in key_seq[:-1]:
+ cur = cur[k]
+ del cur[key_seq[-1]]
+ if len(cur) == 0 and len(key_seq) > 1:
+ _del(key_seq[:-1])
+
+ _set(new_keys, _get(old_keys))
+ _del(old_keys)
+
+
+class _RenameConverter:
+ """
+ A converter that handles simple rename.
+ """
+
+ RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
+
+ @classmethod
+ def upgrade(cls, cfg: CN) -> None:
+ for old, new in cls.RENAME:
+ _rename(cfg, old, new)
+
+ @classmethod
+ def downgrade(cls, cfg: CN) -> None:
+ for old, new in cls.RENAME[::-1]:
+ _rename(cfg, new, old)
+
+
+class ConverterV1(_RenameConverter):
+ RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
+
+
+class ConverterV2(_RenameConverter):
+ """
+ A large bulk of rename, before public release.
+ """
+
+ RENAME = [
+ ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
+ ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
+ ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
+ ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
+ ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
+ (
+ "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
+ "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
+ ),
+ (
+ "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
+ "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
+ ),
+ (
+ "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
+ "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
+ ),
+ ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
+ ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
+ ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
+ ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
+ ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
+ ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
+ ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
+ ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
+ ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
+ ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
+ ]
+
+ @classmethod
+ def upgrade(cls, cfg: CN) -> None:
+ super().upgrade(cfg)
+
+ if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
+ _rename(
+ cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
+ )
+ _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+ del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
+ del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
+ else:
+ _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
+ _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
+
+ @classmethod
+ def downgrade(cls, cfg: CN) -> None:
+ super().downgrade(cfg)
+
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
+ cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
+ cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
+ cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
diff --git a/vendor/detectron2/detectron2/config/config.py b/vendor/detectron2/detectron2/config/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..49a55b1bc87509e2bb24b902ae12c21d5aaeda81
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/config.py
@@ -0,0 +1,265 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import inspect
+import logging
+from fvcore.common.config import CfgNode as _CfgNode
+
+from detectron2.utils.file_io import PathManager
+
+
+class CfgNode(_CfgNode):
+ """
+ The same as `fvcore.common.config.CfgNode`, but different in:
+
+ 1. Use unsafe yaml loading by default.
+ Note that this may lead to arbitrary code execution: you must not
+ load a config file from untrusted sources before manually inspecting
+ the content of the file.
+ 2. Support config versioning.
+ When attempting to merge an old config, it will convert the old config automatically.
+
+ .. automethod:: clone
+ .. automethod:: freeze
+ .. automethod:: defrost
+ .. automethod:: is_frozen
+ .. automethod:: load_yaml_with_base
+ .. automethod:: merge_from_list
+ .. automethod:: merge_from_other_cfg
+ """
+
+ @classmethod
+ def _open_cfg(cls, filename):
+ return PathManager.open(filename, "r")
+
+ # Note that the default value of allow_unsafe is changed to True
+ def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
+ """
+ Load content from the given config file and merge it into self.
+
+ Args:
+ cfg_filename: config filename
+ allow_unsafe: allow unsafe yaml syntax
+ """
+ assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
+ loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
+ loaded_cfg = type(self)(loaded_cfg)
+
+ # defaults.py needs to import CfgNode
+ from .defaults import _C
+
+ latest_ver = _C.VERSION
+ assert (
+ latest_ver == self.VERSION
+ ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
+
+ logger = logging.getLogger(__name__)
+
+ loaded_ver = loaded_cfg.get("VERSION", None)
+ if loaded_ver is None:
+ from .compat import guess_version
+
+ loaded_ver = guess_version(loaded_cfg, cfg_filename)
+ assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
+ loaded_ver, self.VERSION
+ )
+
+ if loaded_ver == self.VERSION:
+ self.merge_from_other_cfg(loaded_cfg)
+ else:
+ # compat.py needs to import CfgNode
+ from .compat import upgrade_config, downgrade_config
+
+ logger.warning(
+ "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
+ "See docs/CHANGELOG.md for instructions to update your files.".format(
+ loaded_ver, cfg_filename, self.VERSION
+ )
+ )
+ # To convert, first obtain a full config at an old version
+ old_self = downgrade_config(self, to_version=loaded_ver)
+ old_self.merge_from_other_cfg(loaded_cfg)
+ new_config = upgrade_config(old_self)
+ self.clear()
+ self.update(new_config)
+
+ def dump(self, *args, **kwargs):
+ """
+ Returns:
+ str: a yaml string representation of the config
+ """
+ # to make it show up in docs
+ return super().dump(*args, **kwargs)
+
+
+global_cfg = CfgNode()
+
+
+def get_cfg() -> CfgNode:
+ """
+ Get a copy of the default config.
+
+ Returns:
+ a detectron2 CfgNode instance.
+ """
+ from .defaults import _C
+
+ return _C.clone()
+
+
+def set_global_cfg(cfg: CfgNode) -> None:
+ """
+ Let the global config point to the given cfg.
+
+ Assume that the given "cfg" has the key "KEY", after calling
+ `set_global_cfg(cfg)`, the key can be accessed by:
+ ::
+ from detectron2.config import global_cfg
+ print(global_cfg.KEY)
+
+ By using a hacky global config, you can access these configs anywhere,
+ without having to pass the config object or the values deep into the code.
+ This is a hacky feature introduced for quick prototyping / research exploration.
+ """
+ global global_cfg
+ global_cfg.clear()
+ global_cfg.update(cfg)
+
+
+def configurable(init_func=None, *, from_config=None):
+ """
+ Decorate a function or a class's __init__ method so that it can be called
+ with a :class:`CfgNode` object using a :func:`from_config` function that translates
+ :class:`CfgNode` to arguments.
+
+ Examples:
+ ::
+ # Usage 1: Decorator on __init__:
+ class A:
+ @configurable
+ def __init__(self, a, b=2, c=3):
+ pass
+
+ @classmethod
+ def from_config(cls, cfg): # 'cfg' must be the first argument
+ # Returns kwargs to be passed to __init__
+ return {"a": cfg.A, "b": cfg.B}
+
+ a1 = A(a=1, b=2) # regular construction
+ a2 = A(cfg) # construct with a cfg
+ a3 = A(cfg, b=3, c=4) # construct with extra overwrite
+
+ # Usage 2: Decorator on any function. Needs an extra from_config argument:
+ @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
+ def a_func(a, b=2, c=3):
+ pass
+
+ a1 = a_func(a=1, b=2) # regular call
+ a2 = a_func(cfg) # call with a cfg
+ a3 = a_func(cfg, b=3, c=4) # call with extra overwrite
+
+ Args:
+ init_func (callable): a class's ``__init__`` method in usage 1. The
+ class must have a ``from_config`` classmethod which takes `cfg` as
+ the first argument.
+ from_config (callable): the from_config function in usage 2. It must take `cfg`
+ as its first argument.
+ """
+
+ if init_func is not None:
+ assert (
+ inspect.isfunction(init_func)
+ and from_config is None
+ and init_func.__name__ == "__init__"
+ ), "Incorrect use of @configurable. Check API documentation for examples."
+
+ @functools.wraps(init_func)
+ def wrapped(self, *args, **kwargs):
+ try:
+ from_config_func = type(self).from_config
+ except AttributeError as e:
+ raise AttributeError(
+ "Class with @configurable must have a 'from_config' classmethod."
+ ) from e
+ if not inspect.ismethod(from_config_func):
+ raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
+
+ if _called_with_cfg(*args, **kwargs):
+ explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
+ init_func(self, **explicit_args)
+ else:
+ init_func(self, *args, **kwargs)
+
+ return wrapped
+
+ else:
+ if from_config is None:
+ return configurable # @configurable() is made equivalent to @configurable
+ assert inspect.isfunction(
+ from_config
+ ), "from_config argument of configurable must be a function!"
+
+ def wrapper(orig_func):
+ @functools.wraps(orig_func)
+ def wrapped(*args, **kwargs):
+ if _called_with_cfg(*args, **kwargs):
+ explicit_args = _get_args_from_config(from_config, *args, **kwargs)
+ return orig_func(**explicit_args)
+ else:
+ return orig_func(*args, **kwargs)
+
+ wrapped.from_config = from_config
+ return wrapped
+
+ return wrapper
+
+
+def _get_args_from_config(from_config_func, *args, **kwargs):
+ """
+ Use `from_config` to obtain explicit arguments.
+
+ Returns:
+ dict: arguments to be used for cls.__init__
+ """
+ signature = inspect.signature(from_config_func)
+ if list(signature.parameters.keys())[0] != "cfg":
+ if inspect.isfunction(from_config_func):
+ name = from_config_func.__name__
+ else:
+ name = f"{from_config_func.__self__}.from_config"
+ raise TypeError(f"{name} must take 'cfg' as the first argument!")
+ support_var_arg = any(
+ param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
+ for param in signature.parameters.values()
+ )
+ if support_var_arg: # forward all arguments to from_config, if from_config accepts them
+ ret = from_config_func(*args, **kwargs)
+ else:
+ # forward supported arguments to from_config
+ supported_arg_names = set(signature.parameters.keys())
+ extra_kwargs = {}
+ for name in list(kwargs.keys()):
+ if name not in supported_arg_names:
+ extra_kwargs[name] = kwargs.pop(name)
+ ret = from_config_func(*args, **kwargs)
+ # forward the other arguments to __init__
+ ret.update(extra_kwargs)
+ return ret
+
+
+def _called_with_cfg(*args, **kwargs):
+ """
+ Returns:
+ bool: whether the arguments contain CfgNode and should be considered
+ forwarded to from_config.
+ """
+ from omegaconf import DictConfig
+
+ if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
+ return True
+ if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
+ return True
+ # `from_config`'s first argument is forced to be "cfg".
+ # So the above check covers all cases.
+ return False
diff --git a/vendor/detectron2/detectron2/config/defaults.py b/vendor/detectron2/detectron2/config/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd2a5f6b2de4af2caa1f65c64ab93a5e3ac21780
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/defaults.py
@@ -0,0 +1,650 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .config import CfgNode as CN
+
+# NOTE: given the new config system
+# (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html),
+# we will stop adding new functionalities to default CfgNode.
+
+# -----------------------------------------------------------------------------
+# Convention about Training / Test specific parameters
+# -----------------------------------------------------------------------------
+# Whenever an argument can be either used for training or for testing, the
+# corresponding name will be post-fixed by a _TRAIN for a training parameter,
+# or _TEST for a test-specific parameter.
+# For example, the number of images during training will be
+# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
+# IMAGES_PER_BATCH_TEST
+
+# -----------------------------------------------------------------------------
+# Config definition
+# -----------------------------------------------------------------------------
+
+_C = CN()
+
+# The version number, to upgrade from old configs to new ones if any
+# changes happen. It's recommended to keep a VERSION in your config file.
+_C.VERSION = 2
+
+_C.MODEL = CN()
+_C.MODEL.LOAD_PROPOSALS = False
+_C.MODEL.MASK_ON = False
+_C.MODEL.KEYPOINT_ON = False
+_C.MODEL.DEVICE = "cuda"
+_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
+
+# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
+# to be loaded to the model. You can find available models in the model zoo.
+_C.MODEL.WEIGHTS = ""
+
+# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
+# To train on images of different number of channels, just set different mean & std.
+# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
+
+
+# -----------------------------------------------------------------------------
+# INPUT
+# -----------------------------------------------------------------------------
+_C.INPUT = CN()
+# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
+# Please refer to ResizeShortestEdge for detailed definition.
+# Size of the smallest side of the image during training
+_C.INPUT.MIN_SIZE_TRAIN = (800,)
+# Sample size of smallest side by choice or random selection from range give by
+# INPUT.MIN_SIZE_TRAIN
+_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
+# Maximum size of the side of the image during training
+_C.INPUT.MAX_SIZE_TRAIN = 1333
+# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
+_C.INPUT.MIN_SIZE_TEST = 800
+# Maximum size of the side of the image during testing
+_C.INPUT.MAX_SIZE_TEST = 1333
+# Mode for flipping images used in data augmentation during training
+# choose one of ["horizontal, "vertical", "none"]
+_C.INPUT.RANDOM_FLIP = "horizontal"
+
+# `True` if cropping is used for data augmentation during training
+_C.INPUT.CROP = CN({"ENABLED": False})
+# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
+_C.INPUT.CROP.TYPE = "relative_range"
+# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
+# pixels if CROP.TYPE is "absolute"
+_C.INPUT.CROP.SIZE = [0.9, 0.9]
+
+
+# Whether the model needs RGB, YUV, HSV etc.
+# Should be one of the modes defined here, as we use PIL to read the image:
+# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
+# with BGR being the one exception. One can set image format to BGR, we will
+# internally use RGB for conversion and flip the channels over
+_C.INPUT.FORMAT = "BGR"
+# The ground truth mask format that the model will use.
+# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
+_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
+
+
+# -----------------------------------------------------------------------------
+# Dataset
+# -----------------------------------------------------------------------------
+_C.DATASETS = CN()
+# List of the dataset names for training. Must be registered in DatasetCatalog
+# Samples from these datasets will be merged and used as one dataset.
+_C.DATASETS.TRAIN = ()
+# List of the pre-computed proposal files for training, which must be consistent
+# with datasets listed in DATASETS.TRAIN.
+_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
+# Number of top scoring precomputed proposals to keep for training
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
+# List of the dataset names for testing. Must be registered in DatasetCatalog
+_C.DATASETS.TEST = ()
+# List of the pre-computed proposal files for test, which must be consistent
+# with datasets listed in DATASETS.TEST.
+_C.DATASETS.PROPOSAL_FILES_TEST = ()
+# Number of top scoring precomputed proposals to keep for test
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
+
+# -----------------------------------------------------------------------------
+# DataLoader
+# -----------------------------------------------------------------------------
+_C.DATALOADER = CN()
+# Number of data loading threads
+_C.DATALOADER.NUM_WORKERS = 4
+# If True, each batch should contain only images for which the aspect ratio
+# is compatible. This groups portrait images together, and landscape images
+# are not batched with portrait images.
+_C.DATALOADER.ASPECT_RATIO_GROUPING = True
+# Options: TrainingSampler, RepeatFactorTrainingSampler
+_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
+# Repeat threshold for RepeatFactorTrainingSampler
+_C.DATALOADER.REPEAT_THRESHOLD = 0.0
+# Tf True, when working on datasets that have instance annotations, the
+# training dataloader will filter out images without associated annotations
+_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
+
+# ---------------------------------------------------------------------------- #
+# Backbone options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.BACKBONE = CN()
+
+_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
+# Freeze the first several stages so they are not trained.
+# There are 5 stages in ResNet. The first is a convolution, and the following
+# stages are each group of residual blocks.
+_C.MODEL.BACKBONE.FREEZE_AT = 2
+
+
+# ---------------------------------------------------------------------------- #
+# FPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.FPN = CN()
+# Names of the input feature maps to be used by FPN
+# They must have contiguous power of 2 strides
+# e.g., ["res2", "res3", "res4", "res5"]
+_C.MODEL.FPN.IN_FEATURES = []
+_C.MODEL.FPN.OUT_CHANNELS = 256
+
+# Options: "" (no norm), "GN"
+_C.MODEL.FPN.NORM = ""
+
+# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
+_C.MODEL.FPN.FUSE_TYPE = "sum"
+
+
+# ---------------------------------------------------------------------------- #
+# Proposal generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.PROPOSAL_GENERATOR = CN()
+# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
+_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
+# Proposal height and width both need to be greater than MIN_SIZE
+# (a the scale used during training or inference)
+_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
+
+
+# ---------------------------------------------------------------------------- #
+# Anchor generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ANCHOR_GENERATOR = CN()
+# The generator can be any name in the ANCHOR_GENERATOR registry
+_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
+# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
+# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
+# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
+# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
+# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
+# ratios are generated by an anchor generator.
+# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
+# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
+# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
+# for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
+# Anchor angles.
+# list[list[float]], the angle in degrees, for each input feature map.
+# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
+_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
+# Relative offset between the center of the first anchor and the top-left corner of the image
+# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
+# The value is not expected to affect model accuracy.
+_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
+
+# ---------------------------------------------------------------------------- #
+# RPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RPN = CN()
+_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
+
+# Names of the input feature maps to be used by RPN
+# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
+_C.MODEL.RPN.IN_FEATURES = ["res4"]
+# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+_C.MODEL.RPN.BOUNDARY_THRESH = -1
+# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
+# Minimum overlap required between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
+# ==> positive RPN example: 1)
+# Maximum overlap allowed between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
+# ==> negative RPN example: 0)
+# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
+# are ignored (-1)
+_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
+_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
+# Number of regions per image used to train RPN
+_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
+# Target fraction of foreground (positive) examples per RPN minibatch
+_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
+# Options are: "smooth_l1", "giou", "diou", "ciou"
+_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
+_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
+# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
+_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
+_C.MODEL.RPN.LOSS_WEIGHT = 1.0
+# Number of top scoring RPN proposals to keep before applying NMS
+# When FPN is used, this is *per FPN level* (not total)
+_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
+_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
+# Number of top scoring RPN proposals to keep after applying NMS
+# When FPN is used, this limit is applied per level and then again to the union
+# of proposals from all levels
+# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
+# It means per-batch topk in Detectron1, but per-image topk here.
+# See the "find_top_rpn_proposals" function for details.
+_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
+_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
+# NMS threshold used on RPN proposals
+_C.MODEL.RPN.NMS_THRESH = 0.7
+# Set this to -1 to use the same number of output channels as input channels.
+_C.MODEL.RPN.CONV_DIMS = [-1]
+
+# ---------------------------------------------------------------------------- #
+# ROI HEADS options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_HEADS = CN()
+_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
+# Number of foreground classes
+_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
+# Names of the input feature maps to be used by ROI heads
+# Currently all heads (box, mask, ...) use the same input feature map list
+# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
+_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
+# IOU overlap ratios [IOU_THRESHOLD]
+# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
+# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
+_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
+_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
+# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
+# Total number of RoIs per training minibatch =
+# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
+# E.g., a common configuration is: 512 * 16 = 8192
+_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
+# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
+_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
+
+# Only used on test mode
+
+# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
+# balance obtaining high recall with not having too many low precision
+# detections that will slow down inference post processing steps (like NMS)
+# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
+# inference.
+_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
+# If True, augment proposals with ground-truth boxes before sampling proposals to
+# train ROI heads.
+_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
+
+# ---------------------------------------------------------------------------- #
+# Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_HEAD = CN()
+# C4 don't use head name option
+# Options for non-C4 models: FastRCNNConvFCHead,
+_C.MODEL.ROI_BOX_HEAD.NAME = ""
+# Options are: "smooth_l1", "giou", "diou", "ciou"
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
+# The final scaling coefficient on the box regression loss, used to balance the magnitude of its
+# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
+_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
+# Hidden layer dimension for FC layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
+_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
+# Channel dimension for Conv layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_BOX_HEAD.NORM = ""
+# Whether to use class agnostic for bbox regression
+_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
+# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
+_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
+
+# Federated loss can be used to improve the training of LVIS
+_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
+# Sigmoid cross entrophy is used with federated loss
+_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
+# The power value applied to image_count when calcualting frequency weight
+_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5
+# Number of classes to keep in total
+_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50
+
+# ---------------------------------------------------------------------------- #
+# Cascaded Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
+# The number of cascade stages is implicitly defined by the length of the following two configs.
+_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
+ (10.0, 10.0, 5.0, 5.0),
+ (20.0, 20.0, 10.0, 10.0),
+ (30.0, 30.0, 15.0, 15.0),
+)
+_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
+
+
+# ---------------------------------------------------------------------------- #
+# Mask Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_MASK_HEAD = CN()
+_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
+_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
+_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_MASK_HEAD.NORM = ""
+# Whether to use class agnostic for mask prediction
+_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_KEYPOINT_HEAD = CN()
+_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
+_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
+
+# Images with too few (or no) keypoints are excluded from training.
+_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
+# Normalize by the total number of visible keypoints in the minibatch if True.
+# Otherwise, normalize by the total number of keypoints that could ever exist
+# in the minibatch.
+# The keypoint softmax loss is only calculated on visible keypoints.
+# Since the number of visible keypoints can vary significantly between
+# minibatches, this has the effect of up-weighting the importance of
+# minibatches with few visible keypoints. (Imagine the extreme case of
+# only one visible keypoint versus N: in the case of N, each one
+# contributes 1/N to the gradient compared to the single keypoint
+# determining the gradient direction). Instead, we can normalize the
+# loss by the total number of keypoints, if it were the case that all
+# keypoints were visible in a full minibatch. (Returning to the example,
+# this means that the one visible keypoint contributes as much as each
+# of the N keypoints.)
+_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
+# Multi-task loss weight to use for keypoints
+# Recommended values:
+# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
+# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
+_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+# ---------------------------------------------------------------------------- #
+# Semantic Segmentation Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.SEM_SEG_HEAD = CN()
+_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
+_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
+# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
+# the correposnding pixel.
+_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
+# Number of classes in the semantic segmentation head
+_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
+# Number of channels in the 3x3 convs inside semantic-FPN heads.
+_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
+# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
+_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
+# Normalization method for the convolution layers. Options: "" (no norm), "GN".
+_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
+_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
+
+_C.MODEL.PANOPTIC_FPN = CN()
+# Scaling of all losses from instance detection / segmentation head.
+_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
+
+# options when combining instance & semantic segmentation outputs
+_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used
+_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
+_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
+_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
+
+
+# ---------------------------------------------------------------------------- #
+# RetinaNet Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RETINANET = CN()
+
+# This is the number of foreground classes.
+_C.MODEL.RETINANET.NUM_CLASSES = 80
+
+_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
+
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+_C.MODEL.RETINANET.NUM_CONVS = 4
+
+# IoU overlap ratio [bg, fg] for labeling anchors.
+# Anchors with < bg are labeled negative (0)
+# Anchors with >= bg and < fg are ignored (-1)
+# Anchors with >= fg are labeled positive (1)
+_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
+_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
+
+# Prior prob for rare case (i.e. foreground) at the beginning of training.
+# This is used to set the bias for the logits layer of the classifier subnet.
+# This improves training stability in the case of heavy class imbalance.
+_C.MODEL.RETINANET.PRIOR_PROB = 0.01
+
+# Inference cls score threshold, only anchors with score > INFERENCE_TH are
+# considered for inference (to improve speed)
+_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
+# Select topk candidates before NMS
+_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
+_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
+
+# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
+_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+
+# Loss parameters
+_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
+_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
+_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
+# Options are: "smooth_l1", "giou", "diou", "ciou"
+_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
+
+# One of BN, SyncBN, FrozenBN, GN
+# Only supports GN until unshared norm is implemented
+_C.MODEL.RETINANET.NORM = ""
+
+
+# ---------------------------------------------------------------------------- #
+# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
+# Note that parts of a resnet may be used for both the backbone and the head
+# These options apply to both
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RESNETS = CN()
+
+_C.MODEL.RESNETS.DEPTH = 50
+_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
+
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+_C.MODEL.RESNETS.NUM_GROUPS = 1
+
+# Options: FrozenBN, GN, "SyncBN", "BN"
+_C.MODEL.RESNETS.NORM = "FrozenBN"
+
+# Baseline width of each group.
+# Scaling this parameters will scale the width of all bottleneck layers.
+_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
+
+# Place the stride 2 conv on the 1x1 filter
+# Use True only for the original MSRA ResNet; use False for C2 and Torch models
+_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
+
+# Apply dilation in stage "res5"
+_C.MODEL.RESNETS.RES5_DILATION = 1
+
+# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
+# For R18 and R34, this needs to be set to 64
+_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
+_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
+
+# Apply Deformable Convolution in stages
+# Specify if apply deform_conv on Res2, Res3, Res4, Res5
+_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
+# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
+# Use False for DeformableV1.
+_C.MODEL.RESNETS.DEFORM_MODULATED = False
+# Number of groups in deformable conv.
+_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
+
+
+# ---------------------------------------------------------------------------- #
+# Solver
+# ---------------------------------------------------------------------------- #
+_C.SOLVER = CN()
+
+# Options: WarmupMultiStepLR, WarmupCosineLR.
+# See detectron2/solver/build.py for definition.
+_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
+
+_C.SOLVER.MAX_ITER = 40000
+
+_C.SOLVER.BASE_LR = 0.001
+# The end lr, only used by WarmupCosineLR
+_C.SOLVER.BASE_LR_END = 0.0
+
+_C.SOLVER.MOMENTUM = 0.9
+
+_C.SOLVER.NESTEROV = False
+
+_C.SOLVER.WEIGHT_DECAY = 0.0001
+# The weight decay that's applied to parameters of normalization layers
+# (typically the affine transformation)
+_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
+
+_C.SOLVER.GAMMA = 0.1
+# The iteration number to decrease learning rate by GAMMA.
+_C.SOLVER.STEPS = (30000,)
+# Number of decays in WarmupStepWithFixedGammaLR schedule
+_C.SOLVER.NUM_DECAYS = 3
+
+_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
+_C.SOLVER.WARMUP_ITERS = 1000
+_C.SOLVER.WARMUP_METHOD = "linear"
+# Whether to rescale the interval for the learning schedule after warmup
+_C.SOLVER.RESCALE_INTERVAL = False
+
+# Save a checkpoint after every this number of iterations
+_C.SOLVER.CHECKPOINT_PERIOD = 5000
+
+# Number of images per batch across all machines. This is also the number
+# of training images per step (i.e. per iteration). If we use 16 GPUs
+# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
+# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
+_C.SOLVER.IMS_PER_BATCH = 16
+
+# The reference number of workers (GPUs) this config is meant to train with.
+# It takes no effect when set to 0.
+# With a non-zero value, it will be used by DefaultTrainer to compute a desired
+# per-worker batch size, and then scale the other related configs (total batch size,
+# learning rate, etc) to match the per-worker batch size.
+# See documentation of `DefaultTrainer.auto_scale_workers` for details:
+_C.SOLVER.REFERENCE_WORLD_SIZE = 0
+
+# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
+# biases. This is not useful (at least for recent models). You should avoid
+# changing these and they exist only to reproduce Detectron v1 training if
+# desired.
+_C.SOLVER.BIAS_LR_FACTOR = 1.0
+_C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY
+
+# Gradient clipping
+_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
+# Type of gradient clipping, currently 2 values are supported:
+# - "value": the absolute values of elements of each gradients are clipped
+# - "norm": the norm of the gradient for each parameter is clipped thus
+# affecting all elements in the parameter
+_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
+# Maximum absolute value used for clipping gradients
+_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
+# Floating point number p for L-p norm to be used with the "norm"
+# gradient clipping type; for L-inf, please specify .inf
+_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
+
+# Enable automatic mixed precision for training
+# Note that this does not change model's inference behavior.
+# To use AMP in inference, run inference under autocast()
+_C.SOLVER.AMP = CN({"ENABLED": False})
+
+# ---------------------------------------------------------------------------- #
+# Specific test options
+# ---------------------------------------------------------------------------- #
+_C.TEST = CN()
+# For end-to-end tests to verify the expected accuracy.
+# Each item is [task, metric, value, tolerance]
+# e.g.: [['bbox', 'AP', 38.5, 0.2]]
+_C.TEST.EXPECTED_RESULTS = []
+# The period (in terms of steps) to evaluate the model during training.
+# Set to 0 to disable.
+_C.TEST.EVAL_PERIOD = 0
+# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
+# When empty, it will use the defaults in COCO.
+# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+_C.TEST.KEYPOINT_OKS_SIGMAS = []
+# Maximum number of detections to return per image during inference (100 is
+# based on the limit established for the COCO dataset).
+_C.TEST.DETECTIONS_PER_IMAGE = 100
+
+_C.TEST.AUG = CN({"ENABLED": False})
+_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+_C.TEST.AUG.MAX_SIZE = 4000
+_C.TEST.AUG.FLIP = True
+
+_C.TEST.PRECISE_BN = CN({"ENABLED": False})
+_C.TEST.PRECISE_BN.NUM_ITER = 200
+
+# ---------------------------------------------------------------------------- #
+# Misc options
+# ---------------------------------------------------------------------------- #
+# Directory where output files are written
+_C.OUTPUT_DIR = "./output"
+# Set seed to negative to fully randomize everything.
+# Set seed to positive to use a fixed seed. Note that a fixed seed increases
+# reproducibility but does not guarantee fully deterministic behavior.
+# Disabling all parallelism further increases reproducibility.
+_C.SEED = -1
+# Benchmark different cudnn algorithms.
+# If input images have very different sizes, this option will have large overhead
+# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
+# If input images have the same or similar sizes, benchmark is often helpful.
+_C.CUDNN_BENCHMARK = False
+# The period (in terms of steps) for minibatch visualization at train time.
+# Set to 0 to disable.
+_C.VIS_PERIOD = 0
+
+# global config is for quick hack purposes.
+# You can set them in command line or config files,
+# and access it with:
+#
+# from detectron2.config import global_cfg
+# print(global_cfg.HACK)
+#
+# Do not commit any configs into it.
+_C.GLOBAL = CN()
+_C.GLOBAL.HACK = 1.0
diff --git a/vendor/detectron2/detectron2/config/instantiate.py b/vendor/detectron2/detectron2/config/instantiate.py
new file mode 100644
index 0000000000000000000000000000000000000000..05ee2c7d21c9bf3e56a0a8e98447d2587b4b8fed
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/instantiate.py
@@ -0,0 +1,88 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import collections.abc as abc
+import dataclasses
+import logging
+from typing import Any
+
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+__all__ = ["dump_dataclass", "instantiate"]
+
+
+def dump_dataclass(obj: Any):
+ """
+ Dump a dataclass recursively into a dict that can be later instantiated.
+
+ Args:
+ obj: a dataclass object
+
+ Returns:
+ dict
+ """
+ assert dataclasses.is_dataclass(obj) and not isinstance(
+ obj, type
+ ), "dump_dataclass() requires an instance of a dataclass."
+ ret = {"_target_": _convert_target_to_string(type(obj))}
+ for f in dataclasses.fields(obj):
+ v = getattr(obj, f.name)
+ if dataclasses.is_dataclass(v):
+ v = dump_dataclass(v)
+ if isinstance(v, (list, tuple)):
+ v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
+ ret[f.name] = v
+ return ret
+
+
+def instantiate(cfg):
+ """
+ Recursively instantiate objects defined in dictionaries by
+ "_target_" and arguments.
+
+ Args:
+ cfg: a dict-like object with "_target_" that defines the caller, and
+ other keys that define the arguments
+
+ Returns:
+ object instantiated by cfg
+ """
+ from omegaconf import ListConfig, DictConfig, OmegaConf
+
+ if isinstance(cfg, ListConfig):
+ lst = [instantiate(x) for x in cfg]
+ return ListConfig(lst, flags={"allow_objects": True})
+ if isinstance(cfg, list):
+ # Specialize for list, because many classes take
+ # list[objects] as arguments, such as ResNet, DatasetMapper
+ return [instantiate(x) for x in cfg]
+
+ # If input is a DictConfig backed by dataclasses (i.e. omegaconf's structured config),
+ # instantiate it to the actual dataclass.
+ if isinstance(cfg, DictConfig) and dataclasses.is_dataclass(cfg._metadata.object_type):
+ return OmegaConf.to_object(cfg)
+
+ if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
+ # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
+ # but faster: https://github.com/facebookresearch/hydra/issues/1200
+ cfg = {k: instantiate(v) for k, v in cfg.items()}
+ cls = cfg.pop("_target_")
+ cls = instantiate(cls)
+
+ if isinstance(cls, str):
+ cls_name = cls
+ cls = locate(cls_name)
+ assert cls is not None, cls_name
+ else:
+ try:
+ cls_name = cls.__module__ + "." + cls.__qualname__
+ except Exception:
+ # target could be anything, so the above could fail
+ cls_name = str(cls)
+ assert callable(cls), f"_target_ {cls} does not define a callable object"
+ try:
+ return cls(**cfg)
+ except TypeError:
+ logger = logging.getLogger(__name__)
+ logger.error(f"Error when instantiating {cls_name}!")
+ raise
+ return cfg # return as-is if don't know what to do
diff --git a/vendor/detectron2/detectron2/config/lazy.py b/vendor/detectron2/detectron2/config/lazy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea93e865acce31de07af476f95454d62128a9d1c
--- /dev/null
+++ b/vendor/detectron2/detectron2/config/lazy.py
@@ -0,0 +1,436 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import ast
+import builtins
+import collections.abc as abc
+import importlib
+import inspect
+import logging
+import os
+import uuid
+from contextlib import contextmanager
+from copy import deepcopy
+from dataclasses import is_dataclass
+from typing import List, Tuple, Union
+import cloudpickle
+import yaml
+from omegaconf import DictConfig, ListConfig, OmegaConf, SCMode
+
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.registry import _convert_target_to_string
+
+__all__ = ["LazyCall", "LazyConfig"]
+
+
+class LazyCall:
+ """
+ Wrap a callable so that when it's called, the call will not be executed,
+ but returns a dict that describes the call.
+
+ LazyCall object has to be called with only keyword arguments. Positional
+ arguments are not yet supported.
+
+ Examples:
+ ::
+ from detectron2.config import instantiate, LazyCall
+
+ layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
+ layer_cfg.out_channels = 64 # can edit it afterwards
+ layer = instantiate(layer_cfg)
+ """
+
+ def __init__(self, target):
+ if not (callable(target) or isinstance(target, (str, abc.Mapping))):
+ raise TypeError(
+ f"target of LazyCall must be a callable or defines a callable! Got {target}"
+ )
+ self._target = target
+
+ def __call__(self, **kwargs):
+ if is_dataclass(self._target):
+ # omegaconf object cannot hold dataclass type
+ # https://github.com/omry/omegaconf/issues/784
+ target = _convert_target_to_string(self._target)
+ else:
+ target = self._target
+ kwargs["_target_"] = target
+
+ return DictConfig(content=kwargs, flags={"allow_objects": True})
+
+
+def _visit_dict_config(cfg, func):
+ """
+ Apply func recursively to all DictConfig in cfg.
+ """
+ if isinstance(cfg, DictConfig):
+ func(cfg)
+ for v in cfg.values():
+ _visit_dict_config(v, func)
+ elif isinstance(cfg, ListConfig):
+ for v in cfg:
+ _visit_dict_config(v, func)
+
+
+def _validate_py_syntax(filename):
+ # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
+ with PathManager.open(filename, "r") as f:
+ content = f.read()
+ try:
+ ast.parse(content)
+ except SyntaxError as e:
+ raise SyntaxError(f"Config file {filename} has syntax error!") from e
+
+
+def _cast_to_config(obj):
+ # if given a dict, return DictConfig instead
+ if isinstance(obj, dict):
+ return DictConfig(obj, flags={"allow_objects": True})
+ return obj
+
+
+_CFG_PACKAGE_NAME = "detectron2._cfg_loader"
+"""
+A namespace to put all imported config into.
+"""
+
+
+def _random_package_name(filename):
+ # generate a random package name when loading config files
+ return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename)
+
+
+@contextmanager
+def _patch_import():
+ """
+ Enhance relative import statements in config files, so that they:
+ 1. locate files purely based on relative location, regardless of packages.
+ e.g. you can import file without having __init__
+ 2. do not cache modules globally; modifications of module states has no side effect
+ 3. support other storage system through PathManager, so config files can be in the cloud
+ 4. imported dict are turned into omegaconf.DictConfig automatically
+ """
+ old_import = builtins.__import__
+
+ def find_relative_file(original_file, relative_import_path, level):
+ # NOTE: "from . import x" is not handled. Because then it's unclear
+ # if such import should produce `x` as a python module or DictConfig.
+ # This can be discussed further if needed.
+ relative_import_err = """
+Relative import of directories is not allowed within config files.
+Within a config file, relative import can only import other config files.
+""".replace(
+ "\n", " "
+ )
+ if not len(relative_import_path):
+ raise ImportError(relative_import_err)
+
+ cur_file = os.path.dirname(original_file)
+ for _ in range(level - 1):
+ cur_file = os.path.dirname(cur_file)
+ cur_name = relative_import_path.lstrip(".")
+ for part in cur_name.split("."):
+ cur_file = os.path.join(cur_file, part)
+ if not cur_file.endswith(".py"):
+ cur_file += ".py"
+ if not PathManager.isfile(cur_file):
+ cur_file_no_suffix = cur_file[: -len(".py")]
+ if PathManager.isdir(cur_file_no_suffix):
+ raise ImportError(f"Cannot import from {cur_file_no_suffix}." + relative_import_err)
+ else:
+ raise ImportError(
+ f"Cannot import name {relative_import_path} from "
+ f"{original_file}: {cur_file} does not exist."
+ )
+ return cur_file
+
+ def new_import(name, globals=None, locals=None, fromlist=(), level=0):
+ if (
+ # Only deal with relative imports inside config files
+ level != 0
+ and globals is not None
+ and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)
+ ):
+ cur_file = find_relative_file(globals["__file__"], name, level)
+ _validate_py_syntax(cur_file)
+ spec = importlib.machinery.ModuleSpec(
+ _random_package_name(cur_file), None, origin=cur_file
+ )
+ module = importlib.util.module_from_spec(spec)
+ module.__file__ = cur_file
+ with PathManager.open(cur_file) as f:
+ content = f.read()
+ exec(compile(content, cur_file, "exec"), module.__dict__)
+ for name in fromlist: # turn imported dict into DictConfig automatically
+ val = _cast_to_config(module.__dict__[name])
+ module.__dict__[name] = val
+ return module
+ return old_import(name, globals, locals, fromlist=fromlist, level=level)
+
+ builtins.__import__ = new_import
+ yield new_import
+ builtins.__import__ = old_import
+
+
+class LazyConfig:
+ """
+ Provide methods to save, load, and overrides an omegaconf config object
+ which may contain definition of lazily-constructed objects.
+ """
+
+ @staticmethod
+ def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
+ """
+ Similar to :meth:`load()`, but load path relative to the caller's
+ source file.
+
+ This has the same functionality as a relative import, except that this method
+ accepts filename as a string, so more characters are allowed in the filename.
+ """
+ caller_frame = inspect.stack()[1]
+ caller_fname = caller_frame[0].f_code.co_filename
+ assert caller_fname != "", "load_rel Unable to find caller"
+ caller_dir = os.path.dirname(caller_fname)
+ filename = os.path.join(caller_dir, filename)
+ return LazyConfig.load(filename, keys)
+
+ @staticmethod
+ def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
+ """
+ Load a config file.
+
+ Args:
+ filename: absolute path or relative path w.r.t. the current working directory
+ keys: keys to load and return. If not given, return all keys
+ (whose values are config objects) in a dict.
+ """
+ has_keys = keys is not None
+ filename = filename.replace("/./", "/") # redundant
+ if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
+ raise ValueError(f"Config file {filename} has to be a python or yaml file.")
+ if filename.endswith(".py"):
+ _validate_py_syntax(filename)
+
+ with _patch_import():
+ # Record the filename
+ module_namespace = {
+ "__file__": filename,
+ "__package__": _random_package_name(filename),
+ }
+ with PathManager.open(filename) as f:
+ content = f.read()
+ # Compile first with filename to:
+ # 1. make filename appears in stacktrace
+ # 2. make load_rel able to find its parent's (possibly remote) location
+ exec(compile(content, filename, "exec"), module_namespace)
+
+ ret = module_namespace
+ else:
+ with PathManager.open(filename) as f:
+ obj = yaml.unsafe_load(f)
+ ret = OmegaConf.create(obj, flags={"allow_objects": True})
+
+ if has_keys:
+ if isinstance(keys, str):
+ return _cast_to_config(ret[keys])
+ else:
+ return tuple(_cast_to_config(ret[a]) for a in keys)
+ else:
+ if filename.endswith(".py"):
+ # when not specified, only load those that are config objects
+ ret = DictConfig(
+ {
+ name: _cast_to_config(value)
+ for name, value in ret.items()
+ if isinstance(value, (DictConfig, ListConfig, dict))
+ and not name.startswith("_")
+ },
+ flags={"allow_objects": True},
+ )
+ return ret
+
+ @staticmethod
+ def save(cfg, filename: str):
+ """
+ Save a config object to a yaml file.
+ Note that when the config dictionary contains complex objects (e.g. lambda),
+ it can't be saved to yaml. In that case we will print an error and
+ attempt to save to a pkl file instead.
+
+ Args:
+ cfg: an omegaconf config object
+ filename: yaml file name to save the config file
+ """
+ logger = logging.getLogger(__name__)
+ try:
+ cfg = deepcopy(cfg)
+ except Exception:
+ pass
+ else:
+ # if it's deep-copyable, then...
+ def _replace_type_by_name(x):
+ if "_target_" in x and callable(x._target_):
+ try:
+ x._target_ = _convert_target_to_string(x._target_)
+ except AttributeError:
+ pass
+
+ # not necessary, but makes yaml looks nicer
+ _visit_dict_config(cfg, _replace_type_by_name)
+
+ save_pkl = False
+ try:
+ dict = OmegaConf.to_container(
+ cfg,
+ # Do not resolve interpolation when saving, i.e. do not turn ${a} into
+ # actual values when saving.
+ resolve=False,
+ # Save structures (dataclasses) in a format that can be instantiated later.
+ # Without this option, the type information of the dataclass will be erased.
+ structured_config_mode=SCMode.INSTANTIATE,
+ )
+ dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999)
+ with PathManager.open(filename, "w") as f:
+ f.write(dumped)
+
+ try:
+ _ = yaml.unsafe_load(dumped) # test that it is loadable
+ except Exception:
+ logger.warning(
+ "The config contains objects that cannot serialize to a valid yaml. "
+ f"{filename} is human-readable but cannot be loaded."
+ )
+ save_pkl = True
+ except Exception:
+ logger.exception("Unable to serialize the config to yaml. Error:")
+ save_pkl = True
+
+ if save_pkl:
+ new_filename = filename + ".pkl"
+ try:
+ # retry by pickle
+ with PathManager.open(new_filename, "wb") as f:
+ cloudpickle.dump(cfg, f)
+ logger.warning(f"Config is saved using cloudpickle at {new_filename}.")
+ except Exception:
+ pass
+
+ @staticmethod
+ def apply_overrides(cfg, overrides: List[str]):
+ """
+ In-place override contents of cfg.
+
+ Args:
+ cfg: an omegaconf config object
+ overrides: list of strings in the format of "a=b" to override configs.
+ See https://hydra.cc/docs/next/advanced/override_grammar/basic/
+ for syntax.
+
+ Returns:
+ the cfg object
+ """
+
+ def safe_update(cfg, key, value):
+ parts = key.split(".")
+ for idx in range(1, len(parts)):
+ prefix = ".".join(parts[:idx])
+ v = OmegaConf.select(cfg, prefix, default=None)
+ if v is None:
+ break
+ if not OmegaConf.is_config(v):
+ raise KeyError(
+ f"Trying to update key {key}, but {prefix} "
+ f"is not a config, but has type {type(v)}."
+ )
+ OmegaConf.update(cfg, key, value, merge=True)
+
+ try:
+ from hydra.core.override_parser.overrides_parser import OverridesParser
+
+ has_hydra = True
+ except ImportError:
+ has_hydra = False
+
+ if has_hydra:
+ parser = OverridesParser.create()
+ overrides = parser.parse_overrides(overrides)
+ for o in overrides:
+ key = o.key_or_group
+ value = o.value()
+ if o.is_delete():
+ # TODO support this
+ raise NotImplementedError("deletion is not yet a supported override")
+ safe_update(cfg, key, value)
+ else:
+ # Fallback. Does not support all the features and error checking like hydra.
+ for o in overrides:
+ key, value = o.split("=")
+ try:
+ value = eval(value, {})
+ except NameError:
+ pass
+ safe_update(cfg, key, value)
+ return cfg
+
+ @staticmethod
+ def to_py(cfg, prefix: str = "cfg."):
+ """
+ Try to convert a config object into Python-like psuedo code.
+
+ Note that perfect conversion is not always possible. So the returned
+ results are mainly meant to be human-readable, and not meant to be executed.
+
+ Args:
+ cfg: an omegaconf config object
+ prefix: root name for the resulting code (default: "cfg.")
+
+
+ Returns:
+ str of formatted Python code
+ """
+ import black
+
+ cfg = OmegaConf.to_container(cfg, resolve=True)
+
+ def _to_str(obj, prefix=None, inside_call=False):
+ if prefix is None:
+ prefix = []
+ if isinstance(obj, abc.Mapping) and "_target_" in obj:
+ # Dict representing a function call
+ target = _convert_target_to_string(obj.pop("_target_"))
+ args = []
+ for k, v in sorted(obj.items()):
+ args.append(f"{k}={_to_str(v, inside_call=True)}")
+ args = ", ".join(args)
+ call = f"{target}({args})"
+ return "".join(prefix) + call
+ elif isinstance(obj, abc.Mapping) and not inside_call:
+ # Dict that is not inside a call is a list of top-level config objects that we
+ # render as one object per line with dot separated prefixes
+ key_list = []
+ for k, v in sorted(obj.items()):
+ if isinstance(v, abc.Mapping) and "_target_" not in v:
+ key_list.append(_to_str(v, prefix=prefix + [k + "."]))
+ else:
+ key = "".join(prefix) + k
+ key_list.append(f"{key}={_to_str(v)}")
+ return "\n".join(key_list)
+ elif isinstance(obj, abc.Mapping):
+ # Dict that is inside a call is rendered as a regular dict
+ return (
+ "{"
+ + ",".join(
+ f"{repr(k)}: {_to_str(v, inside_call=inside_call)}"
+ for k, v in sorted(obj.items())
+ )
+ + "}"
+ )
+ elif isinstance(obj, list):
+ return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]"
+ else:
+ return repr(obj)
+
+ py_str = _to_str(cfg, prefix=[prefix])
+ try:
+ return black.format_str(py_str, mode=black.Mode())
+ except black.InvalidInput:
+ return py_str
diff --git a/vendor/detectron2/detectron2/data/__init__.py b/vendor/detectron2/detectron2/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..259f669b78bd05815cb8d3351fd6c5fc9a1b85a1
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from . import transforms # isort:skip
+
+from .build import (
+ build_batch_data_loader,
+ build_detection_test_loader,
+ build_detection_train_loader,
+ get_detection_dataset_dicts,
+ load_proposals_into_dataset,
+ print_instances_class_histogram,
+)
+from .catalog import DatasetCatalog, MetadataCatalog, Metadata
+from .common import DatasetFromList, MapDataset, ToIterableDataset
+from .dataset_mapper import DatasetMapper
+
+# ensure the builtin datasets are registered
+from . import datasets, samplers # isort:skip
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/vendor/detectron2/detectron2/data/benchmark.py b/vendor/detectron2/detectron2/data/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac2f372a4b111ad40b8e720adea208608271bab6
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/benchmark.py
@@ -0,0 +1,225 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+from itertools import count
+from typing import List, Tuple
+import torch
+import tqdm
+from fvcore.common.timer import Timer
+
+from detectron2.utils import comm
+
+from .build import build_batch_data_loader
+from .common import DatasetFromList, MapDataset
+from .samplers import TrainingSampler
+
+logger = logging.getLogger(__name__)
+
+
+class _EmptyMapDataset(torch.utils.data.Dataset):
+ """
+ Map anything to emptiness.
+ """
+
+ def __init__(self, dataset):
+ self.ds = dataset
+
+ def __len__(self):
+ return len(self.ds)
+
+ def __getitem__(self, idx):
+ _ = self.ds[idx]
+ return [0]
+
+
+def iter_benchmark(
+ iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60
+) -> Tuple[float, List[float]]:
+ """
+ Benchmark an iterator/iterable for `num_iter` iterations with an extra
+ `warmup` iterations of warmup.
+ End early if `max_time_seconds` time is spent on iterations.
+
+ Returns:
+ float: average time (seconds) per iteration
+ list[float]: time spent on each iteration. Sometimes useful for further analysis.
+ """
+ num_iter, warmup = int(num_iter), int(warmup)
+
+ iterator = iter(iterator)
+ for _ in range(warmup):
+ next(iterator)
+ timer = Timer()
+ all_times = []
+ for curr_iter in tqdm.trange(num_iter):
+ start = timer.seconds()
+ if start > max_time_seconds:
+ num_iter = curr_iter
+ break
+ next(iterator)
+ all_times.append(timer.seconds() - start)
+ avg = timer.seconds() / num_iter
+ return avg, all_times
+
+
+class DataLoaderBenchmark:
+ """
+ Some common benchmarks that help understand perf bottleneck of a standard dataloader
+ made of dataset, mapper and sampler.
+ """
+
+ def __init__(
+ self,
+ dataset,
+ *,
+ mapper,
+ sampler=None,
+ total_batch_size,
+ num_workers=0,
+ max_time_seconds: int = 90,
+ ):
+ """
+ Args:
+ max_time_seconds (int): maximum time to spent for each benchmark
+ other args: same as in `build.py:build_detection_train_loader`
+ """
+ if isinstance(dataset, list):
+ dataset = DatasetFromList(dataset, copy=False, serialize=True)
+ if sampler is None:
+ sampler = TrainingSampler(len(dataset))
+
+ self.dataset = dataset
+ self.mapper = mapper
+ self.sampler = sampler
+ self.total_batch_size = total_batch_size
+ self.num_workers = num_workers
+ self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size()
+
+ self.max_time_seconds = max_time_seconds
+
+ def _benchmark(self, iterator, num_iter, warmup, msg=None):
+ avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds)
+ if msg is not None:
+ self._log_time(msg, avg, all_times)
+ return avg, all_times
+
+ def _log_time(self, msg, avg, all_times, distributed=False):
+ percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]]
+ if not distributed:
+ logger.info(
+ f"{msg}: avg={1.0/avg:.1f} it/s, "
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
+ )
+ return
+ avg_per_gpu = comm.all_gather(avg)
+ percentiles_per_gpu = comm.all_gather(percentiles)
+ if comm.get_rank() > 0:
+ return
+ for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu):
+ logger.info(
+ f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, "
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
+ )
+
+ def benchmark_dataset(self, num_iter, warmup=5):
+ """
+ Benchmark the speed of taking raw samples from the dataset.
+ """
+
+ def loader():
+ while True:
+ for k in self.sampler:
+ yield self.dataset[k]
+
+ self._benchmark(loader(), num_iter, warmup, "Dataset Alone")
+
+ def benchmark_mapper(self, num_iter, warmup=5):
+ """
+ Benchmark the speed of taking raw samples from the dataset and map
+ them in a single process.
+ """
+
+ def loader():
+ while True:
+ for k in self.sampler:
+ yield self.mapper(self.dataset[k])
+
+ self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)")
+
+ def benchmark_workers(self, num_iter, warmup=10):
+ """
+ Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers].
+ """
+ candidates = [0, 1]
+ if self.num_workers not in candidates:
+ candidates.append(self.num_workers)
+
+ dataset = MapDataset(self.dataset, self.mapper)
+ for n in candidates:
+ loader = build_batch_data_loader(
+ dataset,
+ self.sampler,
+ self.total_batch_size,
+ num_workers=n,
+ )
+ self._benchmark(
+ iter(loader),
+ num_iter * max(n, 1),
+ warmup * max(n, 1),
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})",
+ )
+ del loader
+
+ def benchmark_IPC(self, num_iter, warmup=10):
+ """
+ Benchmark the dataloader where each worker outputs nothing. This
+ eliminates the IPC overhead compared to the regular dataloader.
+
+ PyTorch multiprocessing's IPC only optimizes for torch tensors.
+ Large numpy arrays or other data structure may incur large IPC overhead.
+ """
+ n = self.num_workers
+ dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper))
+ loader = build_batch_data_loader(
+ dataset, self.sampler, self.total_batch_size, num_workers=n
+ )
+ self._benchmark(
+ iter(loader),
+ num_iter * max(n, 1),
+ warmup * max(n, 1),
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm",
+ )
+
+ def benchmark_distributed(self, num_iter, warmup=10):
+ """
+ Benchmark the dataloader in each distributed worker, and log results of
+ all workers. This helps understand the final performance as well as
+ the variances among workers.
+
+ It also prints startup time (first iter) of the dataloader.
+ """
+ gpu = comm.get_world_size()
+ dataset = MapDataset(self.dataset, self.mapper)
+ n = self.num_workers
+ loader = build_batch_data_loader(
+ dataset, self.sampler, self.total_batch_size, num_workers=n
+ )
+
+ timer = Timer()
+ loader = iter(loader)
+ next(loader)
+ startup_time = timer.seconds()
+ logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time))
+
+ comm.synchronize()
+
+ avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1))
+ del loader
+ self._log_time(
+ f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})",
+ avg,
+ all_times,
+ True,
+ )
diff --git a/vendor/detectron2/detectron2/data/build.py b/vendor/detectron2/detectron2/data/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fa2c6b1a5850f7b9771ff79861d008251ec8564
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/build.py
@@ -0,0 +1,556 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+import numpy as np
+import operator
+import pickle
+from typing import Any, Callable, Dict, List, Optional, Union
+import torch
+import torch.utils.data as torchdata
+from tabulate import tabulate
+from termcolor import colored
+
+from detectron2.config import configurable
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.env import seed_all_rng
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import _log_api_usage, log_first_n
+
+from .catalog import DatasetCatalog, MetadataCatalog
+from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset
+from .dataset_mapper import DatasetMapper
+from .detection_utils import check_metadata_consistency
+from .samplers import (
+ InferenceSampler,
+ RandomSubsetTrainingSampler,
+ RepeatFactorTrainingSampler,
+ TrainingSampler,
+)
+
+"""
+This file contains the default logic to build a dataloader for training or testing.
+"""
+
+__all__ = [
+ "build_batch_data_loader",
+ "build_detection_train_loader",
+ "build_detection_test_loader",
+ "get_detection_dataset_dicts",
+ "load_proposals_into_dataset",
+ "print_instances_class_histogram",
+]
+
+
+def filter_images_with_only_crowd_annotations(dataset_dicts):
+ """
+ Filter out images with none annotations or only crowd annotations
+ (i.e., images without non-crowd annotations).
+ A common training-time preprocessing on COCO dataset.
+
+ Args:
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+ Returns:
+ list[dict]: the same format, but filtered.
+ """
+ num_before = len(dataset_dicts)
+
+ def valid(anns):
+ for ann in anns:
+ if ann.get("iscrowd", 0) == 0:
+ return True
+ return False
+
+ dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
+ num_after = len(dataset_dicts)
+ logger = logging.getLogger(__name__)
+ logger.info(
+ "Removed {} images with no usable annotations. {} images left.".format(
+ num_before - num_after, num_after
+ )
+ )
+ return dataset_dicts
+
+
+def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
+ """
+ Filter out images with too few number of keypoints.
+
+ Args:
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+ Returns:
+ list[dict]: the same format as dataset_dicts, but filtered.
+ """
+ num_before = len(dataset_dicts)
+
+ def visible_keypoints_in_image(dic):
+ # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
+ annotations = dic["annotations"]
+ return sum(
+ (np.array(ann["keypoints"][2::3]) > 0).sum()
+ for ann in annotations
+ if "keypoints" in ann
+ )
+
+ dataset_dicts = [
+ x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
+ ]
+ num_after = len(dataset_dicts)
+ logger = logging.getLogger(__name__)
+ logger.info(
+ "Removed {} images with fewer than {} keypoints.".format(
+ num_before - num_after, min_keypoints_per_image
+ )
+ )
+ return dataset_dicts
+
+
+def load_proposals_into_dataset(dataset_dicts, proposal_file):
+ """
+ Load precomputed object proposals into the dataset.
+
+ The proposal file should be a pickled dict with the following keys:
+
+ - "ids": list[int] or list[str], the image ids
+ - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
+ - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
+ corresponding to the boxes.
+ - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
+
+ Args:
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+ proposal_file (str): file path of pre-computed proposals, in pkl format.
+
+ Returns:
+ list[dict]: the same format as dataset_dicts, but added proposal field.
+ """
+ logger = logging.getLogger(__name__)
+ logger.info("Loading proposals from: {}".format(proposal_file))
+
+ with PathManager.open(proposal_file, "rb") as f:
+ proposals = pickle.load(f, encoding="latin1")
+
+ # Rename the key names in D1 proposal files
+ rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
+ for key in rename_keys:
+ if key in proposals:
+ proposals[rename_keys[key]] = proposals.pop(key)
+
+ # Fetch the indexes of all proposals that are in the dataset
+ # Convert image_id to str since they could be int.
+ img_ids = set({str(record["image_id"]) for record in dataset_dicts})
+ id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
+
+ # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
+ bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
+
+ for record in dataset_dicts:
+ # Get the index of the proposal
+ i = id_to_index[str(record["image_id"])]
+
+ boxes = proposals["boxes"][i]
+ objectness_logits = proposals["objectness_logits"][i]
+ # Sort the proposals in descending order of the scores
+ inds = objectness_logits.argsort()[::-1]
+ record["proposal_boxes"] = boxes[inds]
+ record["proposal_objectness_logits"] = objectness_logits[inds]
+ record["proposal_bbox_mode"] = bbox_mode
+
+ return dataset_dicts
+
+
+def print_instances_class_histogram(dataset_dicts, class_names):
+ """
+ Args:
+ dataset_dicts (list[dict]): list of dataset dicts.
+ class_names (list[str]): list of class names (zero-indexed).
+ """
+ num_classes = len(class_names)
+ hist_bins = np.arange(num_classes + 1)
+ histogram = np.zeros((num_classes,), dtype=np.int)
+ for entry in dataset_dicts:
+ annos = entry["annotations"]
+ classes = np.asarray(
+ [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int
+ )
+ if len(classes):
+ assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
+ assert (
+ classes.max() < num_classes
+ ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
+ histogram += np.histogram(classes, bins=hist_bins)[0]
+
+ N_COLS = min(6, len(class_names) * 2)
+
+ def short_name(x):
+ # make long class names shorter. useful for lvis
+ if len(x) > 13:
+ return x[:11] + ".."
+ return x
+
+ data = list(
+ itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
+ )
+ total_num_instances = sum(data[1::2])
+ data.extend([None] * (N_COLS - (len(data) % N_COLS)))
+ if num_classes > 1:
+ data.extend(["total", total_num_instances])
+ data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
+ table = tabulate(
+ data,
+ headers=["category", "#instances"] * (N_COLS // 2),
+ tablefmt="pipe",
+ numalign="left",
+ stralign="center",
+ )
+ log_first_n(
+ logging.INFO,
+ "Distribution of instances among all {} categories:\n".format(num_classes)
+ + colored(table, "cyan"),
+ key="message",
+ )
+
+
+def get_detection_dataset_dicts(
+ names,
+ filter_empty=True,
+ min_keypoints=0,
+ proposal_files=None,
+ check_consistency=True,
+):
+ """
+ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
+
+ Args:
+ names (str or list[str]): a dataset name or a list of dataset names
+ filter_empty (bool): whether to filter out images without instance annotations
+ min_keypoints (int): filter out images with fewer keypoints than
+ `min_keypoints`. Set to 0 to do nothing.
+ proposal_files (list[str]): if given, a list of object proposal files
+ that match each dataset in `names`.
+ check_consistency (bool): whether to check if datasets have consistent metadata.
+
+ Returns:
+ list[dict]: a list of dicts following the standard dataset dict format.
+ """
+ if isinstance(names, str):
+ names = [names]
+ assert len(names), names
+ dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
+
+ if isinstance(dataset_dicts[0], torchdata.Dataset):
+ if len(dataset_dicts) > 1:
+ # ConcatDataset does not work for iterable style dataset.
+ # We could support concat for iterable as well, but it's often
+ # not a good idea to concat iterables anyway.
+ return torchdata.ConcatDataset(dataset_dicts)
+ return dataset_dicts[0]
+
+ for dataset_name, dicts in zip(names, dataset_dicts):
+ assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
+
+ if proposal_files is not None:
+ assert len(names) == len(proposal_files)
+ # load precomputed proposals from proposal files
+ dataset_dicts = [
+ load_proposals_into_dataset(dataset_i_dicts, proposal_file)
+ for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
+ ]
+
+ dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
+
+ has_instances = "annotations" in dataset_dicts[0]
+ if filter_empty and has_instances:
+ dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
+ if min_keypoints > 0 and has_instances:
+ dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
+
+ if check_consistency and has_instances:
+ try:
+ class_names = MetadataCatalog.get(names[0]).thing_classes
+ check_metadata_consistency("thing_classes", names)
+ print_instances_class_histogram(dataset_dicts, class_names)
+ except AttributeError: # class names are not available for this dataset
+ pass
+
+ assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
+ return dataset_dicts
+
+
+def build_batch_data_loader(
+ dataset,
+ sampler,
+ total_batch_size,
+ *,
+ aspect_ratio_grouping=False,
+ num_workers=0,
+ collate_fn=None,
+):
+ """
+ Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
+ 1. support aspect ratio grouping options
+ 2. use no "batch collation", because this is common for detection training
+
+ Args:
+ dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
+ Must be provided iff. ``dataset`` is a map-style dataset.
+ total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see
+ :func:`build_detection_train_loader`.
+
+ Returns:
+ iterable[list]. Length of each list is the batch size of the current
+ GPU. Each element in the list comes from the dataset.
+ """
+ world_size = get_world_size()
+ assert (
+ total_batch_size > 0 and total_batch_size % world_size == 0
+ ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
+ total_batch_size, world_size
+ )
+ batch_size = total_batch_size // world_size
+
+ if isinstance(dataset, torchdata.IterableDataset):
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
+ else:
+ dataset = ToIterableDataset(dataset, sampler)
+
+ if aspect_ratio_grouping:
+ data_loader = torchdata.DataLoader(
+ dataset,
+ num_workers=num_workers,
+ collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
+ worker_init_fn=worker_init_reset_seed,
+ ) # yield individual mapped dict
+ data_loader = AspectRatioGroupedDataset(data_loader, batch_size)
+ if collate_fn is None:
+ return data_loader
+ return MapDataset(data_loader, collate_fn)
+ else:
+ return torchdata.DataLoader(
+ dataset,
+ batch_size=batch_size,
+ drop_last=True,
+ num_workers=num_workers,
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
+ worker_init_fn=worker_init_reset_seed,
+ )
+
+
+def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
+ if dataset is None:
+ dataset = get_detection_dataset_dicts(
+ cfg.DATASETS.TRAIN,
+ filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
+ min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+ if cfg.MODEL.KEYPOINT_ON
+ else 0,
+ proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+ )
+ _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
+
+ if mapper is None:
+ mapper = DatasetMapper(cfg, True)
+
+ if sampler is None:
+ sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+ logger = logging.getLogger(__name__)
+ if isinstance(dataset, torchdata.IterableDataset):
+ logger.info("Not using any sampler since the dataset is IterableDataset.")
+ sampler = None
+ else:
+ logger.info("Using training sampler {}".format(sampler_name))
+ if sampler_name == "TrainingSampler":
+ sampler = TrainingSampler(len(dataset))
+ elif sampler_name == "RepeatFactorTrainingSampler":
+ repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+ dataset, cfg.DATALOADER.REPEAT_THRESHOLD
+ )
+ sampler = RepeatFactorTrainingSampler(repeat_factors)
+ elif sampler_name == "RandomSubsetTrainingSampler":
+ sampler = RandomSubsetTrainingSampler(
+ len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO
+ )
+ else:
+ raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+ return {
+ "dataset": dataset,
+ "sampler": sampler,
+ "mapper": mapper,
+ "total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
+ "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
+ }
+
+
+@configurable(from_config=_train_loader_from_config)
+def build_detection_train_loader(
+ dataset,
+ *,
+ mapper,
+ sampler=None,
+ total_batch_size,
+ aspect_ratio_grouping=True,
+ num_workers=0,
+ collate_fn=None,
+):
+ """
+ Build a dataloader for object detection with some default features.
+
+ Args:
+ dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
+ or a pytorch dataset (either map-style or iterable). It can be obtained
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+ mapper (callable): a callable which takes a sample (dict) from dataset and
+ returns the format to be consumed by the model.
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
+ indices to be applied on ``dataset``.
+ If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`,
+ which coordinates an infinite random shuffle sequence across all workers.
+ Sampler must be None if ``dataset`` is iterable.
+ total_batch_size (int): total batch size across all workers.
+ aspect_ratio_grouping (bool): whether to group images with similar
+ aspect ratio for efficiency. When enabled, it requires each
+ element in dataset be a dict with keys "width" and "height".
+ num_workers (int): number of parallel data loading workers
+ collate_fn: a function that determines how to do batching, same as the argument of
+ `torch.utils.data.DataLoader`. Defaults to do no collation and return a list of
+ data. No collation is OK for small batch size and simple data structures.
+ If your batch size is large and each sample contains too many small tensors,
+ it's more efficient to collate them in data loader.
+
+ Returns:
+ torch.utils.data.DataLoader:
+ a dataloader. Each output from it is a ``list[mapped_element]`` of length
+ ``total_batch_size / num_workers``, where ``mapped_element`` is produced
+ by the ``mapper``.
+ """
+ if isinstance(dataset, list):
+ dataset = DatasetFromList(dataset, copy=False)
+ if mapper is not None:
+ dataset = MapDataset(dataset, mapper)
+
+ if isinstance(dataset, torchdata.IterableDataset):
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
+ else:
+ if sampler is None:
+ sampler = TrainingSampler(len(dataset))
+ assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
+ return build_batch_data_loader(
+ dataset,
+ sampler,
+ total_batch_size,
+ aspect_ratio_grouping=aspect_ratio_grouping,
+ num_workers=num_workers,
+ collate_fn=collate_fn,
+ )
+
+
+def _test_loader_from_config(cfg, dataset_name, mapper=None):
+ """
+ Uses the given `dataset_name` argument (instead of the names in cfg), because the
+ standard practice is to evaluate each test set individually (not combining them).
+ """
+ if isinstance(dataset_name, str):
+ dataset_name = [dataset_name]
+
+ dataset = get_detection_dataset_dicts(
+ dataset_name,
+ filter_empty=False,
+ proposal_files=[
+ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] for x in dataset_name
+ ]
+ if cfg.MODEL.LOAD_PROPOSALS
+ else None,
+ )
+ if mapper is None:
+ mapper = DatasetMapper(cfg, False)
+ return {
+ "dataset": dataset,
+ "mapper": mapper,
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
+ "sampler": InferenceSampler(len(dataset))
+ if not isinstance(dataset, torchdata.IterableDataset)
+ else None,
+ }
+
+
+@configurable(from_config=_test_loader_from_config)
+def build_detection_test_loader(
+ dataset: Union[List[Any], torchdata.Dataset],
+ *,
+ mapper: Callable[[Dict[str, Any]], Any],
+ sampler: Optional[torchdata.Sampler] = None,
+ batch_size: int = 1,
+ num_workers: int = 0,
+ collate_fn: Optional[Callable[[List[Any]], Any]] = None,
+) -> torchdata.DataLoader:
+ """
+ Similar to `build_detection_train_loader`, with default batch size = 1,
+ and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
+ to produce the exact set of all samples.
+
+ Args:
+ dataset: a list of dataset dicts,
+ or a pytorch dataset (either map-style or iterable). They can be obtained
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+ mapper: a callable which takes a sample (dict) from dataset
+ and returns the format to be consumed by the model.
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
+ sampler: a sampler that produces
+ indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
+ which splits the dataset across all workers. Sampler must be None
+ if `dataset` is iterable.
+ batch_size: the batch size of the data loader to be created.
+ Default to 1 image per worker since this is the standard when reporting
+ inference time in papers.
+ num_workers: number of parallel data loading workers
+ collate_fn: same as the argument of `torch.utils.data.DataLoader`.
+ Defaults to do no collation and return a list of data.
+
+ Returns:
+ DataLoader: a torch DataLoader, that loads the given detection
+ dataset, with test-time transformation and batching.
+
+ Examples:
+ ::
+ data_loader = build_detection_test_loader(
+ DatasetRegistry.get("my_test"),
+ mapper=DatasetMapper(...))
+
+ # or, instantiate with a CfgNode:
+ data_loader = build_detection_test_loader(cfg, "my_test")
+ """
+ if isinstance(dataset, list):
+ dataset = DatasetFromList(dataset, copy=False)
+ if mapper is not None:
+ dataset = MapDataset(dataset, mapper)
+ if isinstance(dataset, torchdata.IterableDataset):
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
+ else:
+ if sampler is None:
+ sampler = InferenceSampler(len(dataset))
+ return torchdata.DataLoader(
+ dataset,
+ batch_size=batch_size,
+ sampler=sampler,
+ drop_last=False,
+ num_workers=num_workers,
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
+ )
+
+
+def trivial_batch_collator(batch):
+ """
+ A batch collator that does nothing.
+ """
+ return batch
+
+
+def worker_init_reset_seed(worker_id):
+ initial_seed = torch.initial_seed() % 2**31
+ seed_all_rng(initial_seed + worker_id)
diff --git a/vendor/detectron2/detectron2/data/catalog.py b/vendor/detectron2/detectron2/data/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..45c110c19508f23921b9033cdaf0aa8056f0c125
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/catalog.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import types
+from collections import UserDict
+from typing import List
+
+from detectron2.utils.logger import log_first_n
+
+__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
+
+
+class _DatasetCatalog(UserDict):
+ """
+ A global dictionary that stores information about the datasets and how to obtain them.
+
+ It contains a mapping from strings
+ (which are names that identify a dataset, e.g. "coco_2014_train")
+ to a function which parses the dataset and returns the samples in the
+ format of `list[dict]`.
+
+ The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
+ if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
+
+ The purpose of having this catalog is to make it easy to choose
+ different datasets, by just using the strings in the config.
+ """
+
+ def register(self, name, func):
+ """
+ Args:
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+ func (callable): a callable which takes no arguments and returns a list of dicts.
+ It must return the same results if called multiple times.
+ """
+ assert callable(func), "You must register a function with `DatasetCatalog.register`!"
+ assert name not in self, "Dataset '{}' is already registered!".format(name)
+ self[name] = func
+
+ def get(self, name):
+ """
+ Call the registered function and return its results.
+
+ Args:
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+
+ Returns:
+ list[dict]: dataset annotations.
+ """
+ try:
+ f = self[name]
+ except KeyError as e:
+ raise KeyError(
+ "Dataset '{}' is not registered! Available datasets are: {}".format(
+ name, ", ".join(list(self.keys()))
+ )
+ ) from e
+ return f()
+
+ def list(self) -> List[str]:
+ """
+ List all registered datasets.
+
+ Returns:
+ list[str]
+ """
+ return list(self.keys())
+
+ def remove(self, name):
+ """
+ Alias of ``pop``.
+ """
+ self.pop(name)
+
+ def __str__(self):
+ return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
+
+ __repr__ = __str__
+
+
+DatasetCatalog = _DatasetCatalog()
+DatasetCatalog.__doc__ = (
+ _DatasetCatalog.__doc__
+ + """
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.register
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.get
+"""
+)
+
+
+class Metadata(types.SimpleNamespace):
+ """
+ A class that supports simple attribute setter/getter.
+ It is intended for storing metadata of a dataset and make it accessible globally.
+
+ Examples:
+ ::
+ # somewhere when you load the data:
+ MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
+
+ # somewhere when you print statistics or visualize:
+ classes = MetadataCatalog.get("mydataset").thing_classes
+ """
+
+ # the name of the dataset
+ # set default to N/A so that `self.name` in the errors will not trigger getattr again
+ name: str = "N/A"
+
+ _RENAMED = {
+ "class_names": "thing_classes",
+ "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
+ "stuff_class_names": "stuff_classes",
+ }
+
+ def __getattr__(self, key):
+ if key in self._RENAMED:
+ log_first_n(
+ logging.WARNING,
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+ n=10,
+ )
+ return getattr(self, self._RENAMED[key])
+
+ # "name" exists in every metadata
+ if len(self.__dict__) > 1:
+ raise AttributeError(
+ "Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
+ "keys are {}.".format(key, self.name, str(self.__dict__.keys()))
+ )
+ else:
+ raise AttributeError(
+ f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
+ "metadata is empty."
+ )
+
+ def __setattr__(self, key, val):
+ if key in self._RENAMED:
+ log_first_n(
+ logging.WARNING,
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+ n=10,
+ )
+ setattr(self, self._RENAMED[key], val)
+
+ # Ensure that metadata of the same name stays consistent
+ try:
+ oldval = getattr(self, key)
+ assert oldval == val, (
+ "Attribute '{}' in the metadata of '{}' cannot be set "
+ "to a different value!\n{} != {}".format(key, self.name, oldval, val)
+ )
+ except AttributeError:
+ super().__setattr__(key, val)
+
+ def as_dict(self):
+ """
+ Returns all the metadata as a dict.
+ Note that modifications to the returned dict will not reflect on the Metadata object.
+ """
+ return copy.copy(self.__dict__)
+
+ def set(self, **kwargs):
+ """
+ Set multiple metadata with kwargs.
+ """
+ for k, v in kwargs.items():
+ setattr(self, k, v)
+ return self
+
+ def get(self, key, default=None):
+ """
+ Access an attribute and return its value if exists.
+ Otherwise return default.
+ """
+ try:
+ return getattr(self, key)
+ except AttributeError:
+ return default
+
+
+class _MetadataCatalog(UserDict):
+ """
+ MetadataCatalog is a global dictionary that provides access to
+ :class:`Metadata` of a given dataset.
+
+ The metadata associated with a certain name is a singleton: once created, the
+ metadata will stay alive and will be returned by future calls to ``get(name)``.
+
+ It's like global variables, so don't abuse it.
+ It's meant for storing knowledge that's constant and shared across the execution
+ of the program, e.g.: the class names in COCO.
+ """
+
+ def get(self, name):
+ """
+ Args:
+ name (str): name of a dataset (e.g. coco_2014_train).
+
+ Returns:
+ Metadata: The :class:`Metadata` instance associated with this name,
+ or create an empty one if none is available.
+ """
+ assert len(name)
+ r = super().get(name, None)
+ if r is None:
+ r = self[name] = Metadata(name=name)
+ return r
+
+ def list(self):
+ """
+ List all registered metadata.
+
+ Returns:
+ list[str]: keys (names of datasets) of all registered metadata
+ """
+ return list(self.keys())
+
+ def remove(self, name):
+ """
+ Alias of ``pop``.
+ """
+ self.pop(name)
+
+ def __str__(self):
+ return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
+
+ __repr__ = __str__
+
+
+MetadataCatalog = _MetadataCatalog()
+MetadataCatalog.__doc__ = (
+ _MetadataCatalog.__doc__
+ + """
+ .. automethod:: detectron2.data.catalog.MetadataCatalog.get
+"""
+)
diff --git a/vendor/detectron2/detectron2/data/common.py b/vendor/detectron2/detectron2/data/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf24b1d968e01737d76a672546535e57400df262
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/common.py
@@ -0,0 +1,301 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import copy
+import itertools
+import logging
+import numpy as np
+import pickle
+import random
+from typing import Callable, Union
+import torch
+import torch.utils.data as data
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils.serialize import PicklableWrapper
+
+__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
+
+logger = logging.getLogger(__name__)
+
+
+def _shard_iterator_dataloader_worker(iterable):
+ # Shard the iterable if we're currently inside pytorch dataloader worker.
+ worker_info = data.get_worker_info()
+ if worker_info is None or worker_info.num_workers == 1:
+ # do nothing
+ yield from iterable
+ else:
+ yield from itertools.islice(iterable, worker_info.id, None, worker_info.num_workers)
+
+
+class _MapIterableDataset(data.IterableDataset):
+ """
+ Map a function over elements in an IterableDataset.
+
+ Similar to pytorch's MapIterDataPipe, but support filtering when map_func
+ returns None.
+
+ This class is not public-facing. Will be called by `MapDataset`.
+ """
+
+ def __init__(self, dataset, map_func):
+ self._dataset = dataset
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
+
+ def __len__(self):
+ return len(self._dataset)
+
+ def __iter__(self):
+ for x in map(self._map_func, self._dataset):
+ if x is not None:
+ yield x
+
+
+class MapDataset(data.Dataset):
+ """
+ Map a function over the elements in a dataset.
+ """
+
+ def __init__(self, dataset, map_func):
+ """
+ Args:
+ dataset: a dataset where map function is applied. Can be either
+ map-style or iterable dataset. When given an iterable dataset,
+ the returned object will also be an iterable dataset.
+ map_func: a callable which maps the element in dataset. map_func can
+ return None to skip the data (e.g. in case of errors).
+ How None is handled depends on the style of `dataset`.
+ If `dataset` is map-style, it randomly tries other elements.
+ If `dataset` is iterable, it skips the data and tries the next.
+ """
+ self._dataset = dataset
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
+
+ self._rng = random.Random(42)
+ self._fallback_candidates = set(range(len(dataset)))
+
+ def __new__(cls, dataset, map_func):
+ is_iterable = isinstance(dataset, data.IterableDataset)
+ if is_iterable:
+ return _MapIterableDataset(dataset, map_func)
+ else:
+ return super().__new__(cls)
+
+ def __getnewargs__(self):
+ return self._dataset, self._map_func
+
+ def __len__(self):
+ return len(self._dataset)
+
+ def __getitem__(self, idx):
+ retry_count = 0
+ cur_idx = int(idx)
+
+ while True:
+ data = self._map_func(self._dataset[cur_idx])
+ if data is not None:
+ self._fallback_candidates.add(cur_idx)
+ return data
+
+ # _map_func fails for this idx, use a random new index from the pool
+ retry_count += 1
+ self._fallback_candidates.discard(cur_idx)
+ cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
+
+ if retry_count >= 3:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
+ idx, retry_count
+ )
+ )
+
+
+class _TorchSerializedList(object):
+ """
+ A list-like object whose items are serialized and stored in a torch tensor. When
+ launching a process that uses TorchSerializedList with "fork" start method,
+ the subprocess can read the same buffer without triggering copy-on-access. When
+ launching a process that uses TorchSerializedList with "spawn/forkserver" start
+ method, the list will be pickled by a special ForkingPickler registered by PyTorch
+ that moves data to shared memory. In both cases, this allows parent and child
+ processes to share RAM for the list data, hence avoids the issue in
+ https://github.com/pytorch/pytorch/issues/13246.
+
+ See also https://ppwwyyxx.com/blog/2022/Demystify-RAM-Usage-in-Multiprocess-DataLoader/
+ on how it works.
+ """
+
+ def __init__(self, lst: list):
+ self._lst = lst
+
+ def _serialize(data):
+ buffer = pickle.dumps(data, protocol=-1)
+ return np.frombuffer(buffer, dtype=np.uint8)
+
+ logger.info(
+ "Serializing {} elements to byte tensors and concatenating them all ...".format(
+ len(self._lst)
+ )
+ )
+ self._lst = [_serialize(x) for x in self._lst]
+ self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
+ self._addr = torch.from_numpy(np.cumsum(self._addr))
+ self._lst = torch.from_numpy(np.concatenate(self._lst))
+ logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2))
+
+ def __len__(self):
+ return len(self._addr)
+
+ def __getitem__(self, idx):
+ start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
+ end_addr = self._addr[idx].item()
+ bytes = memoryview(self._lst[start_addr:end_addr].numpy())
+
+ # @lint-ignore PYTHONPICKLEISBAD
+ return pickle.loads(bytes)
+
+
+_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = _TorchSerializedList
+
+
+@contextlib.contextmanager
+def set_default_dataset_from_list_serialize_method(new):
+ """
+ Context manager for using custom serialize function when creating DatasetFromList
+ """
+
+ global _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
+ orig = _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
+ _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = new
+ yield
+ _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = orig
+
+
+class DatasetFromList(data.Dataset):
+ """
+ Wrap a list to a torch Dataset. It produces elements of the list as data.
+ """
+
+ def __init__(
+ self,
+ lst: list,
+ copy: bool = True,
+ serialize: Union[bool, Callable] = True,
+ ):
+ """
+ Args:
+ lst (list): a list which contains elements to produce.
+ copy (bool): whether to deepcopy the element when producing it,
+ so that the result can be modified in place without affecting the
+ source in the list.
+ serialize (bool or callable): whether to serialize the stroage to other
+ backend. If `True`, the default serialize method will be used, if given
+ a callable, the callable will be used as serialize method.
+ """
+ self._lst = lst
+ self._copy = copy
+ if not isinstance(serialize, (bool, Callable)):
+ raise TypeError(f"Unsupported type for argument `serailzie`: {serialize}")
+ self._serialize = serialize is not False
+
+ if self._serialize:
+ serialize_method = (
+ serialize
+ if isinstance(serialize, Callable)
+ else _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
+ )
+ logger.info(f"Serializing the dataset using: {serialize_method}")
+ self._lst = serialize_method(self._lst)
+
+ def __len__(self):
+ return len(self._lst)
+
+ def __getitem__(self, idx):
+ if self._copy and not self._serialize:
+ return copy.deepcopy(self._lst[idx])
+ else:
+ return self._lst[idx]
+
+
+class ToIterableDataset(data.IterableDataset):
+ """
+ Convert an old indices-based (also called map-style) dataset
+ to an iterable-style dataset.
+ """
+
+ def __init__(self, dataset: data.Dataset, sampler: Sampler, shard_sampler: bool = True):
+ """
+ Args:
+ dataset: an old-style dataset with ``__getitem__``
+ sampler: a cheap iterable that produces indices to be applied on ``dataset``.
+ shard_sampler: whether to shard the sampler based on the current pytorch data loader
+ worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple
+ workers, it is responsible for sharding its data based on worker id so that workers
+ don't produce identical data.
+
+ Most samplers (like our TrainingSampler) do not shard based on dataloader worker id
+ and this argument should be set to True. But certain samplers may be already
+ sharded, in that case this argument should be set to False.
+ """
+ assert not isinstance(dataset, data.IterableDataset), dataset
+ assert isinstance(sampler, Sampler), sampler
+ self.dataset = dataset
+ self.sampler = sampler
+ self.shard_sampler = shard_sampler
+
+ def __iter__(self):
+ if not self.shard_sampler:
+ sampler = self.sampler
+ else:
+ # With map-style dataset, `DataLoader(dataset, sampler)` runs the
+ # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
+ # will run sampler in every of the N worker. So we should only keep 1/N of the ids on
+ # each worker. The assumption is that sampler is cheap to iterate so it's fine to
+ # discard ids in workers.
+ sampler = _shard_iterator_dataloader_worker(self.sampler)
+ for idx in sampler:
+ yield self.dataset[idx]
+
+ def __len__(self):
+ return len(self.sampler)
+
+
+class AspectRatioGroupedDataset(data.IterableDataset):
+ """
+ Batch data that have similar aspect ratio together.
+ In this implementation, images whose aspect ratio < (or >) 1 will
+ be batched together.
+ This improves training speed because the images then need less padding
+ to form a batch.
+
+ It assumes the underlying dataset produces dicts with "width" and "height" keys.
+ It will then produce a list of original dicts with length = batch_size,
+ all with similar aspect ratios.
+ """
+
+ def __init__(self, dataset, batch_size):
+ """
+ Args:
+ dataset: an iterable. Each element must be a dict with keys
+ "width" and "height", which will be used to batch data.
+ batch_size (int):
+ """
+ self.dataset = dataset
+ self.batch_size = batch_size
+ self._buckets = [[] for _ in range(2)]
+ # Hard-coded two aspect ratio groups: w > h and w < h.
+ # Can add support for more aspect ratio groups, but doesn't seem useful
+
+ def __iter__(self):
+ for d in self.dataset:
+ w, h = d["width"], d["height"]
+ bucket_id = 0 if w > h else 1
+ bucket = self._buckets[bucket_id]
+ bucket.append(d)
+ if len(bucket) == self.batch_size:
+ data = bucket[:]
+ # Clear bucket first, because code after yield is not
+ # guaranteed to execute
+ del bucket[:]
+ yield data
diff --git a/vendor/detectron2/detectron2/data/dataset_mapper.py b/vendor/detectron2/detectron2/data/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8714f7990f11e146a01e03d108518e0356b50c4
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/dataset_mapper.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+from typing import List, Optional, Union
+import torch
+
+from detectron2.config import configurable
+
+from . import detection_utils as utils
+from . import transforms as T
+
+"""
+This file contains the default mapping that's applied to "dataset dicts".
+"""
+
+__all__ = ["DatasetMapper"]
+
+
+class DatasetMapper:
+ """
+ A callable which takes a dataset dict in Detectron2 Dataset format,
+ and map it into a format used by the model.
+
+ This is the default callable to be used to map your dataset dict into training data.
+ You may need to follow it to implement your own one for customized logic,
+ such as a different way to read or transform images.
+ See :doc:`/tutorials/data_loading` for details.
+
+ The callable currently does the following:
+
+ 1. Read the image from "file_name"
+ 2. Applies cropping/geometric transforms to the image and annotations
+ 3. Prepare data and annotations to Tensor and :class:`Instances`
+ """
+
+ @configurable
+ def __init__(
+ self,
+ is_train: bool,
+ *,
+ augmentations: List[Union[T.Augmentation, T.Transform]],
+ image_format: str,
+ use_instance_mask: bool = False,
+ use_keypoint: bool = False,
+ instance_mask_format: str = "polygon",
+ keypoint_hflip_indices: Optional[np.ndarray] = None,
+ precomputed_proposal_topk: Optional[int] = None,
+ recompute_boxes: bool = False,
+ ):
+ """
+ NOTE: this interface is experimental.
+
+ Args:
+ is_train: whether it's used in training or inference
+ augmentations: a list of augmentations or deterministic transforms to apply
+ image_format: an image format supported by :func:`detection_utils.read_image`.
+ use_instance_mask: whether to process instance segmentation annotations, if available
+ use_keypoint: whether to process keypoint annotations if available
+ instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
+ masks into this format.
+ keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
+ precomputed_proposal_topk: if given, will load pre-computed
+ proposals from dataset_dict and keep the top k proposals for each image.
+ recompute_boxes: whether to overwrite bounding box annotations
+ by computing tight bounding boxes from instance mask annotations.
+ """
+ if recompute_boxes:
+ assert use_instance_mask, "recompute_boxes requires instance masks"
+ # fmt: off
+ self.is_train = is_train
+ self.augmentations = T.AugmentationList(augmentations)
+ self.image_format = image_format
+ self.use_instance_mask = use_instance_mask
+ self.instance_mask_format = instance_mask_format
+ self.use_keypoint = use_keypoint
+ self.keypoint_hflip_indices = keypoint_hflip_indices
+ self.proposal_topk = precomputed_proposal_topk
+ self.recompute_boxes = recompute_boxes
+ # fmt: on
+ logger = logging.getLogger(__name__)
+ mode = "training" if is_train else "inference"
+ logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
+
+ @classmethod
+ def from_config(cls, cfg, is_train: bool = True):
+ augs = utils.build_augmentation(cfg, is_train)
+ if cfg.INPUT.CROP.ENABLED and is_train:
+ augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
+ recompute_boxes = cfg.MODEL.MASK_ON
+ else:
+ recompute_boxes = False
+
+ ret = {
+ "is_train": is_train,
+ "augmentations": augs,
+ "image_format": cfg.INPUT.FORMAT,
+ "use_instance_mask": cfg.MODEL.MASK_ON,
+ "instance_mask_format": cfg.INPUT.MASK_FORMAT,
+ "use_keypoint": cfg.MODEL.KEYPOINT_ON,
+ "recompute_boxes": recompute_boxes,
+ }
+
+ if cfg.MODEL.KEYPOINT_ON:
+ ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+
+ if cfg.MODEL.LOAD_PROPOSALS:
+ ret["precomputed_proposal_topk"] = (
+ cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
+ if is_train
+ else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
+ )
+ return ret
+
+ def _transform_annotations(self, dataset_dict, transforms, image_shape):
+ # USER: Modify this if you want to keep them for some reason.
+ for anno in dataset_dict["annotations"]:
+ if not self.use_instance_mask:
+ anno.pop("segmentation", None)
+ if not self.use_keypoint:
+ anno.pop("keypoints", None)
+
+ # USER: Implement additional transformations if you have other types of data
+ annos = [
+ utils.transform_instance_annotations(
+ obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+ )
+ for obj in dataset_dict.pop("annotations")
+ if obj.get("iscrowd", 0) == 0
+ ]
+ instances = utils.annotations_to_instances(
+ annos, image_shape, mask_format=self.instance_mask_format
+ )
+
+ # After transforms such as cropping are applied, the bounding box may no longer
+ # tightly bound the object. As an example, imagine a triangle object
+ # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
+ # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
+ # the intersection of original bounding box and the cropping box.
+ if self.recompute_boxes:
+ instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
+ dataset_dict["instances"] = utils.filter_empty_instances(instances)
+
+ def __call__(self, dataset_dict):
+ """
+ Args:
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+ Returns:
+ dict: a format that builtin models in detectron2 accept
+ """
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
+ # USER: Write your own image loading if it's not from a file
+ image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
+ utils.check_image_size(dataset_dict, image)
+
+ # USER: Remove if you don't do semantic/panoptic segmentation.
+ if "sem_seg_file_name" in dataset_dict:
+ sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
+ else:
+ sem_seg_gt = None
+
+ aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
+ transforms = self.augmentations(aug_input)
+ image, sem_seg_gt = aug_input.image, aug_input.sem_seg
+
+ image_shape = image.shape[:2] # h, w
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+ # Therefore it's important to use torch.Tensor.
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+ if sem_seg_gt is not None:
+ dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
+
+ # USER: Remove if you don't use pre-computed proposals.
+ # Most users would not need this feature.
+ if self.proposal_topk is not None:
+ utils.transform_proposals(
+ dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
+ )
+
+ if not self.is_train:
+ # USER: Modify this if you want to keep them for some reason.
+ dataset_dict.pop("annotations", None)
+ dataset_dict.pop("sem_seg_file_name", None)
+ return dataset_dict
+
+ if "annotations" in dataset_dict:
+ self._transform_annotations(dataset_dict, transforms, image_shape)
+
+ return dataset_dict
diff --git a/vendor/detectron2/detectron2/data/datasets/README.md b/vendor/detectron2/detectron2/data/datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3e4f7afec17137c95c78be6ef06d520ec8032
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/README.md
@@ -0,0 +1,9 @@
+
+
+### Common Datasets
+
+The dataset implemented here do not need to load the data into the final format.
+It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
+
+For example, for an image dataset, just provide the file names and labels, but don't read the images.
+Let the downstream decide how to read.
diff --git a/vendor/detectron2/detectron2/data/datasets/__init__.py b/vendor/detectron2/detectron2/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a44bedc15e5f0e762fc4d77efd6f1b07c6ff77d0
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
+from .pascal_voc import load_voc_instances, register_pascal_voc
+from . import builtin as _builtin # ensure the builtin datasets are registered
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/vendor/detectron2/detectron2/data/datasets/builtin.py b/vendor/detectron2/detectron2/data/datasets/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3a68aa833f12f0fa324a269c36190f21b8a75bd
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/builtin.py
@@ -0,0 +1,259 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+"""
+This file registers pre-defined datasets at hard-coded paths, and their metadata.
+
+We hard-code metadata for common datasets. This will enable:
+1. Consistency check when loading the datasets
+2. Use models on these standard datasets directly and run demos,
+ without having to download the dataset annotations
+
+We hard-code some paths to the dataset that's assumed to
+exist in "./datasets/".
+
+Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
+To add new dataset, refer to the tutorial "docs/DATASETS.md".
+"""
+
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata
+from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
+from .cityscapes_panoptic import register_all_cityscapes_panoptic
+from .coco import load_sem_seg, register_coco_instances
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import get_lvis_instances_meta, register_lvis_instances
+from .pascal_voc import register_pascal_voc
+
+# ==== Predefined datasets and splits for COCO ==========
+
+_PREDEFINED_SPLITS_COCO = {}
+_PREDEFINED_SPLITS_COCO["coco"] = {
+ "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
+ "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
+ "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
+ "coco_2014_valminusminival": (
+ "coco/val2014",
+ "coco/annotations/instances_valminusminival2014.json",
+ ),
+ "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
+ "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
+ "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
+ "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
+ "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
+}
+
+_PREDEFINED_SPLITS_COCO["coco_person"] = {
+ "keypoints_coco_2014_train": (
+ "coco/train2014",
+ "coco/annotations/person_keypoints_train2014.json",
+ ),
+ "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
+ "keypoints_coco_2014_minival": (
+ "coco/val2014",
+ "coco/annotations/person_keypoints_minival2014.json",
+ ),
+ "keypoints_coco_2014_valminusminival": (
+ "coco/val2014",
+ "coco/annotations/person_keypoints_valminusminival2014.json",
+ ),
+ "keypoints_coco_2017_train": (
+ "coco/train2017",
+ "coco/annotations/person_keypoints_train2017.json",
+ ),
+ "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
+ "keypoints_coco_2017_val_100": (
+ "coco/val2017",
+ "coco/annotations/person_keypoints_val2017_100.json",
+ ),
+}
+
+
+_PREDEFINED_SPLITS_COCO_PANOPTIC = {
+ "coco_2017_train_panoptic": (
+ # This is the original panoptic annotation directory
+ "coco/panoptic_train2017",
+ "coco/annotations/panoptic_train2017.json",
+ # This directory contains semantic annotations that are
+ # converted from panoptic annotations.
+ # It is used by PanopticFPN.
+ # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
+ # to create these directories.
+ "coco/panoptic_stuff_train2017",
+ ),
+ "coco_2017_val_panoptic": (
+ "coco/panoptic_val2017",
+ "coco/annotations/panoptic_val2017.json",
+ "coco/panoptic_stuff_val2017",
+ ),
+ "coco_2017_val_100_panoptic": (
+ "coco/panoptic_val2017_100",
+ "coco/annotations/panoptic_val2017_100.json",
+ "coco/panoptic_stuff_val2017_100",
+ ),
+}
+
+
+def register_all_coco(root):
+ for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
+ for key, (image_root, json_file) in splits_per_dataset.items():
+ # Assume pre-defined datasets live in `./datasets`.
+ register_coco_instances(
+ key,
+ _get_builtin_metadata(dataset_name),
+ os.path.join(root, json_file) if "://" not in json_file else json_file,
+ os.path.join(root, image_root),
+ )
+
+ for (
+ prefix,
+ (panoptic_root, panoptic_json, semantic_root),
+ ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
+ prefix_instances = prefix[: -len("_panoptic")]
+ instances_meta = MetadataCatalog.get(prefix_instances)
+ image_root, instances_json = instances_meta.image_root, instances_meta.json_file
+ # The "separated" version of COCO panoptic segmentation dataset,
+ # e.g. used by Panoptic FPN
+ register_coco_panoptic_separated(
+ prefix,
+ _get_builtin_metadata("coco_panoptic_separated"),
+ image_root,
+ os.path.join(root, panoptic_root),
+ os.path.join(root, panoptic_json),
+ os.path.join(root, semantic_root),
+ instances_json,
+ )
+ # The "standard" version of COCO panoptic segmentation dataset,
+ # e.g. used by Panoptic-DeepLab
+ register_coco_panoptic(
+ prefix,
+ _get_builtin_metadata("coco_panoptic_standard"),
+ image_root,
+ os.path.join(root, panoptic_root),
+ os.path.join(root, panoptic_json),
+ instances_json,
+ )
+
+
+# ==== Predefined datasets and splits for LVIS ==========
+
+
+_PREDEFINED_SPLITS_LVIS = {
+ "lvis_v1": {
+ "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"),
+ "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"),
+ "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"),
+ "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"),
+ },
+ "lvis_v0.5": {
+ "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"),
+ "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"),
+ "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"),
+ "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"),
+ },
+ "lvis_v0.5_cocofied": {
+ "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"),
+ "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"),
+ },
+}
+
+
+def register_all_lvis(root):
+ for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
+ for key, (image_root, json_file) in splits_per_dataset.items():
+ register_lvis_instances(
+ key,
+ get_lvis_instances_meta(dataset_name),
+ os.path.join(root, json_file) if "://" not in json_file else json_file,
+ os.path.join(root, image_root),
+ )
+
+
+# ==== Predefined splits for raw cityscapes images ===========
+_RAW_CITYSCAPES_SPLITS = {
+ "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"),
+ "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"),
+ "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"),
+}
+
+
+def register_all_cityscapes(root):
+ for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
+ meta = _get_builtin_metadata("cityscapes")
+ image_dir = os.path.join(root, image_dir)
+ gt_dir = os.path.join(root, gt_dir)
+
+ inst_key = key.format(task="instance_seg")
+ DatasetCatalog.register(
+ inst_key,
+ lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
+ x, y, from_json=True, to_polygons=True
+ ),
+ )
+ MetadataCatalog.get(inst_key).set(
+ image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
+ )
+
+ sem_key = key.format(task="sem_seg")
+ DatasetCatalog.register(
+ sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
+ )
+ MetadataCatalog.get(sem_key).set(
+ image_dir=image_dir,
+ gt_dir=gt_dir,
+ evaluator_type="cityscapes_sem_seg",
+ ignore_label=255,
+ **meta,
+ )
+
+
+# ==== Predefined splits for PASCAL VOC ===========
+def register_all_pascal_voc(root):
+ SPLITS = [
+ ("voc_2007_trainval", "VOC2007", "trainval"),
+ ("voc_2007_train", "VOC2007", "train"),
+ ("voc_2007_val", "VOC2007", "val"),
+ ("voc_2007_test", "VOC2007", "test"),
+ ("voc_2012_trainval", "VOC2012", "trainval"),
+ ("voc_2012_train", "VOC2012", "train"),
+ ("voc_2012_val", "VOC2012", "val"),
+ ]
+ for name, dirname, split in SPLITS:
+ year = 2007 if "2007" in name else 2012
+ register_pascal_voc(name, os.path.join(root, dirname), split, year)
+ MetadataCatalog.get(name).evaluator_type = "pascal_voc"
+
+
+def register_all_ade20k(root):
+ root = os.path.join(root, "ADEChallengeData2016")
+ for name, dirname in [("train", "training"), ("val", "validation")]:
+ image_dir = os.path.join(root, "images", dirname)
+ gt_dir = os.path.join(root, "annotations_detectron2", dirname)
+ name = f"ade20k_sem_seg_{name}"
+ DatasetCatalog.register(
+ name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+ )
+ MetadataCatalog.get(name).set(
+ stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:],
+ image_root=image_dir,
+ sem_seg_root=gt_dir,
+ evaluator_type="sem_seg",
+ ignore_label=255,
+ )
+
+
+# True for open source;
+# Internally at fb, we register them elsewhere
+if __name__.endswith(".builtin"):
+ # Assume pre-defined datasets live in `./datasets`.
+ _root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets"))
+ register_all_coco(_root)
+ register_all_lvis(_root)
+ register_all_cityscapes(_root)
+ register_all_cityscapes_panoptic(_root)
+ register_all_pascal_voc(_root)
+ register_all_ade20k(_root)
diff --git a/vendor/detectron2/detectron2/data/datasets/builtin_meta.py b/vendor/detectron2/detectron2/data/datasets/builtin_meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c7a1a31b31dd89b82011effee26471faccacf5
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/builtin_meta.py
@@ -0,0 +1,350 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Note:
+For your custom dataset, there is no need to hard-code metadata anywhere in the code.
+For example, for COCO-format dataset, metadata will be obtained automatically
+when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
+during loading.
+
+However, we hard-coded metadata for a few common dataset here.
+The only goal is to allow users who don't have these dataset to use pre-trained models.
+Users don't have to download a COCO json (which contains metadata), in order to visualize a
+COCO model (with correct class names and colors).
+"""
+
+
+# All coco categories, together with their nice-looking visualization colors
+# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
+COCO_CATEGORIES = [
+ {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
+ {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+ {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+ {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+ {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+ {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+ {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+ {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+ {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+ {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+ {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+ {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+ {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+ {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+ {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+ {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+ {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+ {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+ {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+ {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+ {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+ {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+ {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+ {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+ {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+ {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+ {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+ {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+ {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+ {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+ {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+ {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+ {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+ {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+ {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+ {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+ {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+ {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+ {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+ {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+ {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+ {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+ {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+ {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+ {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+ {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+ {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+ {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+ {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+ {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+ {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+ {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+ {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+ {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+ {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+ {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+ {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+ {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+ {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+ {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+ {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+ {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+ {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+ {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+ {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+ {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+ {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+ {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+ {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+ {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+ {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+ {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+ {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+ {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+ {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+ {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+ {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+ {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+ {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+ {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+ {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+ {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+ {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+ {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+ {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+ {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+ {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+ {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+ {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+ {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+ {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+ {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+ {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+ {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+ {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+ {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+ {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+ {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+ {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+ {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+ {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+ {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+ {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+ {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+ {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+ {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+ {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+ {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+ {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+ {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+ {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+ {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+ {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+ {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+ {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+ {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+ {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+ {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+ {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+ {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+ {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+ {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+ {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+ {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+ {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+ {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+ {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+ {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+ {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+ {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+ {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+ {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+ {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
+
+# fmt: off
+COCO_PERSON_KEYPOINT_NAMES = (
+ "nose",
+ "left_eye", "right_eye",
+ "left_ear", "right_ear",
+ "left_shoulder", "right_shoulder",
+ "left_elbow", "right_elbow",
+ "left_wrist", "right_wrist",
+ "left_hip", "right_hip",
+ "left_knee", "right_knee",
+ "left_ankle", "right_ankle",
+)
+# fmt: on
+
+# Pairs of keypoints that should be exchanged under horizontal flipping
+COCO_PERSON_KEYPOINT_FLIP_MAP = (
+ ("left_eye", "right_eye"),
+ ("left_ear", "right_ear"),
+ ("left_shoulder", "right_shoulder"),
+ ("left_elbow", "right_elbow"),
+ ("left_wrist", "right_wrist"),
+ ("left_hip", "right_hip"),
+ ("left_knee", "right_knee"),
+ ("left_ankle", "right_ankle"),
+)
+
+# rules for pairs of keypoints to draw a line between, and the line color to use.
+KEYPOINT_CONNECTION_RULES = [
+ # face
+ ("left_ear", "left_eye", (102, 204, 255)),
+ ("right_ear", "right_eye", (51, 153, 255)),
+ ("left_eye", "nose", (102, 0, 204)),
+ ("nose", "right_eye", (51, 102, 255)),
+ # upper-body
+ ("left_shoulder", "right_shoulder", (255, 128, 0)),
+ ("left_shoulder", "left_elbow", (153, 255, 204)),
+ ("right_shoulder", "right_elbow", (128, 229, 255)),
+ ("left_elbow", "left_wrist", (153, 255, 153)),
+ ("right_elbow", "right_wrist", (102, 255, 224)),
+ # lower-body
+ ("left_hip", "right_hip", (255, 102, 0)),
+ ("left_hip", "left_knee", (255, 255, 77)),
+ ("right_hip", "right_knee", (153, 255, 204)),
+ ("left_knee", "left_ankle", (191, 255, 128)),
+ ("right_knee", "right_ankle", (255, 195, 77)),
+]
+
+# All Cityscapes categories, together with their nice-looking visualization colors
+# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py # noqa
+CITYSCAPES_CATEGORIES = [
+ {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
+ {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
+ {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
+ {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
+ {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
+ {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
+ {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
+ {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
+ {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
+ {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
+ {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
+ {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
+ {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
+ {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
+ {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
+ {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
+ {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
+ {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
+ {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
+]
+
+# fmt: off
+ADE20K_SEM_SEG_CATEGORIES = [
+ "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa
+]
+# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore
+# fmt: on
+
+
+def _get_coco_instances_meta():
+ thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+ thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+ assert len(thing_ids) == 80, len(thing_ids)
+ # Mapping from the incontiguous COCO category id to an id in [0, 79]
+ thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+ thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+ ret = {
+ "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+ "thing_classes": thing_classes,
+ "thing_colors": thing_colors,
+ }
+ return ret
+
+
+def _get_coco_panoptic_separated_meta():
+ """
+ Returns metadata for "separated" version of the panoptic segmentation dataset.
+ """
+ stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+ assert len(stuff_ids) == 53, len(stuff_ids)
+
+ # For semantic segmentation, this mapping maps from contiguous stuff id
+ # (in [0, 53], used in models) to ids in the dataset (used for processing results)
+ # The id 0 is mapped to an extra category "thing".
+ stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
+ # When converting COCO panoptic annotations to semantic annotations
+ # We label the "thing" category to 0
+ stuff_dataset_id_to_contiguous_id[0] = 0
+
+ # 54 names for COCO stuff categories (including "things")
+ stuff_classes = ["things"] + [
+ k["name"].replace("-other", "").replace("-merged", "")
+ for k in COCO_CATEGORIES
+ if k["isthing"] == 0
+ ]
+
+ # NOTE: I randomly picked a color for things
+ stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+ ret = {
+ "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+ "stuff_classes": stuff_classes,
+ "stuff_colors": stuff_colors,
+ }
+ ret.update(_get_coco_instances_meta())
+ return ret
+
+
+def _get_builtin_metadata(dataset_name):
+ if dataset_name == "coco":
+ return _get_coco_instances_meta()
+ if dataset_name == "coco_panoptic_separated":
+ return _get_coco_panoptic_separated_meta()
+ elif dataset_name == "coco_panoptic_standard":
+ meta = {}
+ # The following metadata maps contiguous id from [0, #thing categories +
+ # #stuff categories) to their names and colors. We have to replica of the
+ # same name and color under "thing_*" and "stuff_*" because the current
+ # visualization function in D2 handles thing and class classes differently
+ # due to some heuristic used in Panoptic FPN. We keep the same naming to
+ # enable reusing existing visualization functions.
+ thing_classes = [k["name"] for k in COCO_CATEGORIES]
+ thing_colors = [k["color"] for k in COCO_CATEGORIES]
+ stuff_classes = [k["name"] for k in COCO_CATEGORIES]
+ stuff_colors = [k["color"] for k in COCO_CATEGORIES]
+
+ meta["thing_classes"] = thing_classes
+ meta["thing_colors"] = thing_colors
+ meta["stuff_classes"] = stuff_classes
+ meta["stuff_colors"] = stuff_colors
+
+ # Convert category id for training:
+ # category id: like semantic segmentation, it is the class id for each
+ # pixel. Since there are some classes not used in evaluation, the category
+ # id is not always contiguous and thus we have two set of category ids:
+ # - original category id: category id in the original dataset, mainly
+ # used for evaluation.
+ # - contiguous category id: [0, #classes), in order to train the linear
+ # softmax classifier.
+ thing_dataset_id_to_contiguous_id = {}
+ stuff_dataset_id_to_contiguous_id = {}
+
+ for i, cat in enumerate(COCO_CATEGORIES):
+ if cat["isthing"]:
+ thing_dataset_id_to_contiguous_id[cat["id"]] = i
+ else:
+ stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+ meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+ meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+ return meta
+ elif dataset_name == "coco_person":
+ return {
+ "thing_classes": ["person"],
+ "keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
+ "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
+ "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
+ }
+ elif dataset_name == "cityscapes":
+ # fmt: off
+ CITYSCAPES_THING_CLASSES = [
+ "person", "rider", "car", "truck",
+ "bus", "train", "motorcycle", "bicycle",
+ ]
+ CITYSCAPES_STUFF_CLASSES = [
+ "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
+ "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
+ "truck", "bus", "train", "motorcycle", "bicycle",
+ ]
+ # fmt: on
+ return {
+ "thing_classes": CITYSCAPES_THING_CLASSES,
+ "stuff_classes": CITYSCAPES_STUFF_CLASSES,
+ }
+ raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
diff --git a/vendor/detectron2/detectron2/data/datasets/cityscapes.py b/vendor/detectron2/detectron2/data/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e84a5bdb3d4e410d8eef4b80a5d4c099a180104
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/cityscapes.py
@@ -0,0 +1,329 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import functools
+import json
+import logging
+import multiprocessing as mp
+import numpy as np
+import os
+from itertools import chain
+import pycocotools.mask as mask_util
+from PIL import Image
+
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+try:
+ import cv2 # noqa
+except ImportError:
+ # OpenCV is an optional dependency at the moment
+ pass
+
+
+logger = logging.getLogger(__name__)
+
+
+def _get_cityscapes_files(image_dir, gt_dir):
+ files = []
+ # scan through the directory
+ cities = PathManager.ls(image_dir)
+ logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+ for city in cities:
+ city_img_dir = os.path.join(image_dir, city)
+ city_gt_dir = os.path.join(gt_dir, city)
+ for basename in PathManager.ls(city_img_dir):
+ image_file = os.path.join(city_img_dir, basename)
+
+ suffix = "leftImg8bit.png"
+ assert basename.endswith(suffix), basename
+ basename = basename[: -len(suffix)]
+
+ instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
+ label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
+ json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
+
+ files.append((image_file, instance_file, label_file, json_file))
+ assert len(files), "No images found in {}".format(image_dir)
+ for f in files[0]:
+ assert PathManager.isfile(f), f
+ return files
+
+
+def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
+ """
+ Args:
+ image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+ gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+ from_json (bool): whether to read annotations from the raw json file or the png files.
+ to_polygons (bool): whether to represent the segmentation as polygons
+ (COCO's format) instead of masks (cityscapes's format).
+
+ Returns:
+ list[dict]: a list of dicts in Detectron2 standard format. (See
+ `Using Custom Datasets `_ )
+ """
+ if from_json:
+ assert to_polygons, (
+ "Cityscapes's json annotations are in polygon format. "
+ "Converting to mask format is not supported now."
+ )
+ files = _get_cityscapes_files(image_dir, gt_dir)
+
+ logger.info("Preprocessing cityscapes annotations ...")
+ # This is still not fast: all workers will execute duplicate works and will
+ # take up to 10m on a 8GPU server.
+ pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
+
+ ret = pool.map(
+ functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
+ files,
+ )
+ logger.info("Loaded {} images from {}".format(len(ret), image_dir))
+
+ # Map cityscape ids to contiguous ids
+ from cityscapesscripts.helpers.labels import labels
+
+ labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
+ dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
+ for dict_per_image in ret:
+ for anno in dict_per_image["annotations"]:
+ anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
+ return ret
+
+
+def load_cityscapes_semantic(image_dir, gt_dir):
+ """
+ Args:
+ image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+ gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+
+ Returns:
+ list[dict]: a list of dict, each has "file_name" and
+ "sem_seg_file_name".
+ """
+ ret = []
+ # gt_dir is small and contain many small files. make sense to fetch to local first
+ gt_dir = PathManager.get_local_path(gt_dir)
+ for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir):
+ label_file = label_file.replace("labelIds", "labelTrainIds")
+
+ with PathManager.open(json_file, "r") as f:
+ jsonobj = json.load(f)
+ ret.append(
+ {
+ "file_name": image_file,
+ "sem_seg_file_name": label_file,
+ "height": jsonobj["imgHeight"],
+ "width": jsonobj["imgWidth"],
+ }
+ )
+ assert len(ret), f"No images found in {image_dir}!"
+ assert PathManager.isfile(
+ ret[0]["sem_seg_file_name"]
+ ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa
+ return ret
+
+
+def _cityscapes_files_to_dict(files, from_json, to_polygons):
+ """
+ Parse cityscapes annotation files to a instance segmentation dataset dict.
+
+ Args:
+ files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
+ from_json (bool): whether to read annotations from the raw json file or the png files.
+ to_polygons (bool): whether to represent the segmentation as polygons
+ (COCO's format) instead of masks (cityscapes's format).
+
+ Returns:
+ A dict in Detectron2 Dataset format.
+ """
+ from cityscapesscripts.helpers.labels import id2label, name2label
+
+ image_file, instance_id_file, _, json_file = files
+
+ annos = []
+
+ if from_json:
+ from shapely.geometry import MultiPolygon, Polygon
+
+ with PathManager.open(json_file, "r") as f:
+ jsonobj = json.load(f)
+ ret = {
+ "file_name": image_file,
+ "image_id": os.path.basename(image_file),
+ "height": jsonobj["imgHeight"],
+ "width": jsonobj["imgWidth"],
+ }
+
+ # `polygons_union` contains the union of all valid polygons.
+ polygons_union = Polygon()
+
+ # CityscapesScripts draw the polygons in sequential order
+ # and each polygon *overwrites* existing ones. See
+ # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
+ # We use reverse order, and each polygon *avoids* early ones.
+ # This will resolve the ploygon overlaps in the same way as CityscapesScripts.
+ for obj in jsonobj["objects"][::-1]:
+ if "deleted" in obj: # cityscapes data format specific
+ continue
+ label_name = obj["label"]
+
+ try:
+ label = name2label[label_name]
+ except KeyError:
+ if label_name.endswith("group"): # crowd area
+ label = name2label[label_name[: -len("group")]]
+ else:
+ raise
+ if label.id < 0: # cityscapes data format
+ continue
+
+ # Cityscapes's raw annotations uses integer coordinates
+ # Therefore +0.5 here
+ poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
+ # CityscapesScript uses PIL.ImageDraw.polygon to rasterize
+ # polygons for evaluation. This function operates in integer space
+ # and draws each pixel whose center falls into the polygon.
+ # Therefore it draws a polygon which is 0.5 "fatter" in expectation.
+ # We therefore dilate the input polygon by 0.5 as our input.
+ poly = Polygon(poly_coord).buffer(0.5, resolution=4)
+
+ if not label.hasInstances or label.ignoreInEval:
+ # even if we won't store the polygon it still contributes to overlaps resolution
+ polygons_union = polygons_union.union(poly)
+ continue
+
+ # Take non-overlapping part of the polygon
+ poly_wo_overlaps = poly.difference(polygons_union)
+ if poly_wo_overlaps.is_empty:
+ continue
+ polygons_union = polygons_union.union(poly)
+
+ anno = {}
+ anno["iscrowd"] = label_name.endswith("group")
+ anno["category_id"] = label.id
+
+ if isinstance(poly_wo_overlaps, Polygon):
+ poly_list = [poly_wo_overlaps]
+ elif isinstance(poly_wo_overlaps, MultiPolygon):
+ poly_list = poly_wo_overlaps.geoms
+ else:
+ raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
+
+ poly_coord = []
+ for poly_el in poly_list:
+ # COCO API can work only with exterior boundaries now, hence we store only them.
+ # TODO: store both exterior and interior boundaries once other parts of the
+ # codebase support holes in polygons.
+ poly_coord.append(list(chain(*poly_el.exterior.coords)))
+ anno["segmentation"] = poly_coord
+ (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
+
+ anno["bbox"] = (xmin, ymin, xmax, ymax)
+ anno["bbox_mode"] = BoxMode.XYXY_ABS
+
+ annos.append(anno)
+ else:
+ # See also the official annotation parsing scripts at
+ # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa
+ with PathManager.open(instance_id_file, "rb") as f:
+ inst_image = np.asarray(Image.open(f), order="F")
+ # ids < 24 are stuff labels (filtering them first is about 5% faster)
+ flattened_ids = np.unique(inst_image[inst_image >= 24])
+
+ ret = {
+ "file_name": image_file,
+ "image_id": os.path.basename(image_file),
+ "height": inst_image.shape[0],
+ "width": inst_image.shape[1],
+ }
+
+ for instance_id in flattened_ids:
+ # For non-crowd annotations, instance_id // 1000 is the label_id
+ # Crowd annotations have <1000 instance ids
+ label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
+ label = id2label[label_id]
+ if not label.hasInstances or label.ignoreInEval:
+ continue
+
+ anno = {}
+ anno["iscrowd"] = instance_id < 1000
+ anno["category_id"] = label.id
+
+ mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
+
+ inds = np.nonzero(mask)
+ ymin, ymax = inds[0].min(), inds[0].max()
+ xmin, xmax = inds[1].min(), inds[1].max()
+ anno["bbox"] = (xmin, ymin, xmax, ymax)
+ if xmax <= xmin or ymax <= ymin:
+ continue
+ anno["bbox_mode"] = BoxMode.XYXY_ABS
+ if to_polygons:
+ # This conversion comes from D4809743 and D5171122,
+ # when Mask-RCNN was first developed.
+ contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
+ -2
+ ]
+ polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
+ # opencv's can produce invalid polygons
+ if len(polygons) == 0:
+ continue
+ anno["segmentation"] = polygons
+ else:
+ anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
+ annos.append(anno)
+ ret["annotations"] = annos
+ return ret
+
+
+if __name__ == "__main__":
+ """
+ Test the cityscapes dataset loader.
+
+ Usage:
+ python -m detectron2.data.datasets.cityscapes \
+ cityscapes/leftImg8bit/train cityscapes/gtFine/train
+ """
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("image_dir")
+ parser.add_argument("gt_dir")
+ parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
+ args = parser.parse_args()
+ from detectron2.data.catalog import Metadata
+ from detectron2.utils.visualizer import Visualizer
+ from cityscapesscripts.helpers.labels import labels
+
+ logger = setup_logger(name=__name__)
+
+ dirname = "cityscapes-data-vis"
+ os.makedirs(dirname, exist_ok=True)
+
+ if args.type == "instance":
+ dicts = load_cityscapes_instances(
+ args.image_dir, args.gt_dir, from_json=True, to_polygons=True
+ )
+ logger.info("Done loading {} samples.".format(len(dicts)))
+
+ thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
+ meta = Metadata().set(thing_classes=thing_classes)
+
+ else:
+ dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
+ logger.info("Done loading {} samples.".format(len(dicts)))
+
+ stuff_classes = [k.name for k in labels if k.trainId != 255]
+ stuff_colors = [k.color for k in labels if k.trainId != 255]
+ meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors)
+
+ for d in dicts:
+ img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
+ visualizer = Visualizer(img, metadata=meta)
+ vis = visualizer.draw_dataset_dict(d)
+ # cv2.imshow("a", vis.get_image()[:, :, ::-1])
+ # cv2.waitKey()
+ fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+ vis.save(fpath)
diff --git a/vendor/detectron2/detectron2/data/datasets/cityscapes_panoptic.py b/vendor/detectron2/detectron2/data/datasets/cityscapes_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..48c136f1623261b079591065fec7c7fc38165076
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/cityscapes_panoptic.py
@@ -0,0 +1,187 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import json
+import logging
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES
+from detectron2.utils.file_io import PathManager
+
+"""
+This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog.
+"""
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
+ files = []
+ # scan through the directory
+ cities = PathManager.ls(image_dir)
+ logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+ image_dict = {}
+ for city in cities:
+ city_img_dir = os.path.join(image_dir, city)
+ for basename in PathManager.ls(city_img_dir):
+ image_file = os.path.join(city_img_dir, basename)
+
+ suffix = "_leftImg8bit.png"
+ assert basename.endswith(suffix), basename
+ basename = os.path.basename(basename)[: -len(suffix)]
+
+ image_dict[basename] = image_file
+
+ for ann in json_info["annotations"]:
+ image_file = image_dict.get(ann["image_id"], None)
+ assert image_file is not None, "No image {} found for annotation {}".format(
+ ann["image_id"], ann["file_name"]
+ )
+ label_file = os.path.join(gt_dir, ann["file_name"])
+ segments_info = ann["segments_info"]
+
+ files.append((image_file, label_file, segments_info))
+
+ assert len(files), "No images found in {}".format(image_dir)
+ assert PathManager.isfile(files[0][0]), files[0][0]
+ assert PathManager.isfile(files[0][1]), files[0][1]
+ return files
+
+
+def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta):
+ """
+ Args:
+ image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+ gt_dir (str): path to the raw annotations. e.g.,
+ "~/cityscapes/gtFine/cityscapes_panoptic_train".
+ gt_json (str): path to the json file. e.g.,
+ "~/cityscapes/gtFine/cityscapes_panoptic_train.json".
+ meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id"
+ and "stuff_dataset_id_to_contiguous_id" to map category ids to
+ contiguous ids for training.
+
+ Returns:
+ list[dict]: a list of dicts in Detectron2 standard format. (See
+ `Using Custom Datasets `_ )
+ """
+
+ def _convert_category_id(segment_info, meta):
+ if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+ segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+ segment_info["category_id"]
+ ]
+ else:
+ segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+ segment_info["category_id"]
+ ]
+ return segment_info
+
+ assert os.path.exists(
+ gt_json
+ ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa
+ with open(gt_json) as f:
+ json_info = json.load(f)
+ files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info)
+ ret = []
+ for image_file, label_file, segments_info in files:
+ sem_label_file = (
+ image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png"
+ )
+ segments_info = [_convert_category_id(x, meta) for x in segments_info]
+ ret.append(
+ {
+ "file_name": image_file,
+ "image_id": "_".join(
+ os.path.splitext(os.path.basename(image_file))[0].split("_")[:3]
+ ),
+ "sem_seg_file_name": sem_label_file,
+ "pan_seg_file_name": label_file,
+ "segments_info": segments_info,
+ }
+ )
+ assert len(ret), f"No images found in {image_dir}!"
+ assert PathManager.isfile(
+ ret[0]["sem_seg_file_name"]
+ ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa
+ assert PathManager.isfile(
+ ret[0]["pan_seg_file_name"]
+ ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa
+ return ret
+
+
+_RAW_CITYSCAPES_PANOPTIC_SPLITS = {
+ "cityscapes_fine_panoptic_train": (
+ "cityscapes/leftImg8bit/train",
+ "cityscapes/gtFine/cityscapes_panoptic_train",
+ "cityscapes/gtFine/cityscapes_panoptic_train.json",
+ ),
+ "cityscapes_fine_panoptic_val": (
+ "cityscapes/leftImg8bit/val",
+ "cityscapes/gtFine/cityscapes_panoptic_val",
+ "cityscapes/gtFine/cityscapes_panoptic_val.json",
+ ),
+ # "cityscapes_fine_panoptic_test": not supported yet
+}
+
+
+def register_all_cityscapes_panoptic(root):
+ meta = {}
+ # The following metadata maps contiguous id from [0, #thing categories +
+ # #stuff categories) to their names and colors. We have to replica of the
+ # same name and color under "thing_*" and "stuff_*" because the current
+ # visualization function in D2 handles thing and class classes differently
+ # due to some heuristic used in Panoptic FPN. We keep the same naming to
+ # enable reusing existing visualization functions.
+ thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+ thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+ stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+ stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+
+ meta["thing_classes"] = thing_classes
+ meta["thing_colors"] = thing_colors
+ meta["stuff_classes"] = stuff_classes
+ meta["stuff_colors"] = stuff_colors
+
+ # There are three types of ids in cityscapes panoptic segmentation:
+ # (1) category id: like semantic segmentation, it is the class id for each
+ # pixel. Since there are some classes not used in evaluation, the category
+ # id is not always contiguous and thus we have two set of category ids:
+ # - original category id: category id in the original dataset, mainly
+ # used for evaluation.
+ # - contiguous category id: [0, #classes), in order to train the classifier
+ # (2) instance id: this id is used to differentiate different instances from
+ # the same category. For "stuff" classes, the instance id is always 0; for
+ # "thing" classes, the instance id starts from 1 and 0 is reserved for
+ # ignored instances (e.g. crowd annotation).
+ # (3) panoptic id: this is the compact id that encode both category and
+ # instance id by: category_id * 1000 + instance_id.
+ thing_dataset_id_to_contiguous_id = {}
+ stuff_dataset_id_to_contiguous_id = {}
+
+ for k in CITYSCAPES_CATEGORIES:
+ if k["isthing"] == 1:
+ thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+ else:
+ stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+
+ meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+ meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+ for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items():
+ image_dir = os.path.join(root, image_dir)
+ gt_dir = os.path.join(root, gt_dir)
+ gt_json = os.path.join(root, gt_json)
+
+ DatasetCatalog.register(
+ key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta)
+ )
+ MetadataCatalog.get(key).set(
+ panoptic_root=gt_dir,
+ image_root=image_dir,
+ panoptic_json=gt_json,
+ gt_dir=gt_dir.replace("cityscapes_panoptic_", ""),
+ evaluator_type="cityscapes_panoptic_seg",
+ ignore_label=255,
+ label_divisor=1000,
+ **meta,
+ )
diff --git a/vendor/detectron2/detectron2/data/datasets/coco.py b/vendor/detectron2/detectron2/data/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed4f7ccb20efa3b54c719783e279c381ca5d8587
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/coco.py
@@ -0,0 +1,539 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import datetime
+import io
+import json
+import logging
+import numpy as np
+import os
+import shutil
+import pycocotools.mask as mask_util
+from fvcore.common.timer import Timer
+from iopath.common.file_io import file_lock
+from PIL import Image
+
+from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .. import DatasetCatalog, MetadataCatalog
+
+"""
+This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
+"""
+
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json", "register_coco_instances"]
+
+
+def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
+ """
+ Load a json file with COCO's instances annotation format.
+ Currently supports instance detection, instance segmentation,
+ and person keypoints annotations.
+
+ Args:
+ json_file (str): full path to the json file in COCO instances annotation format.
+ image_root (str or path-like): the directory where the images in this json file exists.
+ dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
+ When provided, this function will also do the following:
+
+ * Put "thing_classes" into the metadata associated with this dataset.
+ * Map the category ids into a contiguous range (needed by standard dataset format),
+ and add "thing_dataset_id_to_contiguous_id" to the metadata associated
+ with this dataset.
+
+ This option should usually be provided, unless users need to load
+ the original json content and apply more processing manually.
+ extra_annotation_keys (list[str]): list of per-annotation keys that should also be
+ loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
+ "category_id", "segmentation"). The values for these keys will be returned as-is.
+ For example, the densepose annotations are loaded in this way.
+
+ Returns:
+ list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
+ `Using Custom Datasets `_ ) when `dataset_name` is not None.
+ If `dataset_name` is None, the returned `category_ids` may be
+ incontiguous and may not conform to the Detectron2 standard format.
+
+ Notes:
+ 1. This function does not read the image files.
+ The results do not have the "image" field.
+ """
+ from pycocotools.coco import COCO
+
+ timer = Timer()
+ json_file = PathManager.get_local_path(json_file)
+ with contextlib.redirect_stdout(io.StringIO()):
+ coco_api = COCO(json_file)
+ if timer.seconds() > 1:
+ logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+ id_map = None
+ if dataset_name is not None:
+ meta = MetadataCatalog.get(dataset_name)
+ cat_ids = sorted(coco_api.getCatIds())
+ cats = coco_api.loadCats(cat_ids)
+ # The categories in a custom json file may not be sorted.
+ thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
+ meta.thing_classes = thing_classes
+
+ # In COCO, certain category ids are artificially removed,
+ # and by convention they are always ignored.
+ # We deal with COCO's id issue and translate
+ # the category ids to contiguous ids in [0, 80).
+
+ # It works by looking at the "categories" field in the json, therefore
+ # if users' own json also have incontiguous ids, we'll
+ # apply this mapping as well but print a warning.
+ if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
+ if "coco" not in dataset_name:
+ logger.warning(
+ """
+Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
+"""
+ )
+ id_map = {v: i for i, v in enumerate(cat_ids)}
+ meta.thing_dataset_id_to_contiguous_id = id_map
+
+ # sort indices for reproducible results
+ img_ids = sorted(coco_api.imgs.keys())
+ # imgs is a list of dicts, each looks something like:
+ # {'license': 4,
+ # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+ # 'file_name': 'COCO_val2014_000000001268.jpg',
+ # 'height': 427,
+ # 'width': 640,
+ # 'date_captured': '2013-11-17 05:57:24',
+ # 'id': 1268}
+ imgs = coco_api.loadImgs(img_ids)
+ # anns is a list[list[dict]], where each dict is an annotation
+ # record for an object. The inner list enumerates the objects in an image
+ # and the outer list enumerates over images. Example of anns[0]:
+ # [{'segmentation': [[192.81,
+ # 247.09,
+ # ...
+ # 219.03,
+ # 249.06]],
+ # 'area': 1035.749,
+ # 'iscrowd': 0,
+ # 'image_id': 1268,
+ # 'bbox': [192.81, 224.8, 74.73, 33.43],
+ # 'category_id': 16,
+ # 'id': 42986},
+ # ...]
+ anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+ total_num_valid_anns = sum([len(x) for x in anns])
+ total_num_anns = len(coco_api.anns)
+ if total_num_valid_anns < total_num_anns:
+ logger.warning(
+ f"{json_file} contains {total_num_anns} annotations, but only "
+ f"{total_num_valid_anns} of them match to images in the file."
+ )
+
+ if "minival" not in json_file:
+ # The popular valminusminival & minival annotations for COCO2014 contain this bug.
+ # However the ratio of buggy annotations there is tiny and does not affect accuracy.
+ # Therefore we explicitly white-list them.
+ ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+ assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+ json_file
+ )
+
+ imgs_anns = list(zip(imgs, anns))
+ logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
+
+ dataset_dicts = []
+
+ ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
+
+ num_instances_without_valid_segmentation = 0
+
+ for (img_dict, anno_dict_list) in imgs_anns:
+ record = {}
+ record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+ record["height"] = img_dict["height"]
+ record["width"] = img_dict["width"]
+ image_id = record["image_id"] = img_dict["id"]
+
+ objs = []
+ for anno in anno_dict_list:
+ # Check that the image_id in this annotation is the same as
+ # the image_id we're looking at.
+ # This fails only when the data parsing logic or the annotation file is buggy.
+
+ # The original COCO valminusminival2014 & minival2014 annotation files
+ # actually contains bugs that, together with certain ways of using COCO API,
+ # can trigger this assertion.
+ assert anno["image_id"] == image_id
+
+ assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
+
+ obj = {key: anno[key] for key in ann_keys if key in anno}
+ if "bbox" in obj and len(obj["bbox"]) == 0:
+ raise ValueError(
+ f"One annotation of image {image_id} contains empty 'bbox' value! "
+ "This json does not have valid COCO format."
+ )
+
+ segm = anno.get("segmentation", None)
+ if segm: # either list[list[float]] or dict(RLE)
+ if isinstance(segm, dict):
+ if isinstance(segm["counts"], list):
+ # convert to compressed RLE
+ segm = mask_util.frPyObjects(segm, *segm["size"])
+ else:
+ # filter out invalid polygons (< 3 points)
+ segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+ if len(segm) == 0:
+ num_instances_without_valid_segmentation += 1
+ continue # ignore this instance
+ obj["segmentation"] = segm
+
+ keypts = anno.get("keypoints", None)
+ if keypts: # list[int]
+ for idx, v in enumerate(keypts):
+ if idx % 3 != 2:
+ # COCO's segmentation coordinates are floating points in [0, H or W],
+ # but keypoint coordinates are integers in [0, H-1 or W-1]
+ # Therefore we assume the coordinates are "pixel indices" and
+ # add 0.5 to convert to floating point coordinates.
+ keypts[idx] = v + 0.5
+ obj["keypoints"] = keypts
+
+ obj["bbox_mode"] = BoxMode.XYWH_ABS
+ if id_map:
+ annotation_category_id = obj["category_id"]
+ try:
+ obj["category_id"] = id_map[annotation_category_id]
+ except KeyError as e:
+ raise KeyError(
+ f"Encountered category_id={annotation_category_id} "
+ "but this id does not exist in 'categories' of the json file."
+ ) from e
+ objs.append(obj)
+ record["annotations"] = objs
+ dataset_dicts.append(record)
+
+ if num_instances_without_valid_segmentation > 0:
+ logger.warning(
+ "Filtered out {} instances without valid segmentation. ".format(
+ num_instances_without_valid_segmentation
+ )
+ + "There might be issues in your dataset generation process. Please "
+ "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully"
+ )
+ return dataset_dicts
+
+
+def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
+ """
+ Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
+ treated as ground truth annotations and all files under "image_root" with "image_ext" extension
+ as input images. Ground truth and input images are matched using file paths relative to
+ "gt_root" and "image_root" respectively without taking into account file extensions.
+ This works for COCO as well as some other datasets.
+
+ Args:
+ gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
+ annotations are stored as images with integer values in pixels that represent
+ corresponding semantic labels.
+ image_root (str): the directory where the input images are.
+ gt_ext (str): file extension for ground truth annotations.
+ image_ext (str): file extension for input images.
+
+ Returns:
+ list[dict]:
+ a list of dicts in detectron2 standard format without instance-level
+ annotation.
+
+ Notes:
+ 1. This function does not read the image and ground truth files.
+ The results do not have the "image" and "sem_seg" fields.
+ """
+
+ # We match input images with ground truth based on their relative filepaths (without file
+ # extensions) starting from 'image_root' and 'gt_root' respectively.
+ def file2id(folder_path, file_path):
+ # extract relative path starting from `folder_path`
+ image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
+ # remove file extension
+ image_id = os.path.splitext(image_id)[0]
+ return image_id
+
+ input_files = sorted(
+ (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
+ key=lambda file_path: file2id(image_root, file_path),
+ )
+ gt_files = sorted(
+ (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
+ key=lambda file_path: file2id(gt_root, file_path),
+ )
+
+ assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
+
+ # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
+ if len(input_files) != len(gt_files):
+ logger.warn(
+ "Directory {} and {} has {} and {} files, respectively.".format(
+ image_root, gt_root, len(input_files), len(gt_files)
+ )
+ )
+ input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
+ gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
+ intersect = list(set(input_basenames) & set(gt_basenames))
+ # sort, otherwise each worker may obtain a list[dict] in different order
+ intersect = sorted(intersect)
+ logger.warn("Will use their intersection of {} files.".format(len(intersect)))
+ input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
+ gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
+
+ logger.info(
+ "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
+ )
+
+ dataset_dicts = []
+ for (img_path, gt_path) in zip(input_files, gt_files):
+ record = {}
+ record["file_name"] = img_path
+ record["sem_seg_file_name"] = gt_path
+ dataset_dicts.append(record)
+
+ return dataset_dicts
+
+
+def convert_to_coco_dict(dataset_name):
+ """
+ Convert an instance detection/segmentation or keypoint detection dataset
+ in detectron2's standard format into COCO json format.
+
+ Generic dataset description can be found here:
+ https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
+
+ COCO data format description can be found here:
+ http://cocodataset.org/#format-data
+
+ Args:
+ dataset_name (str):
+ name of the source dataset
+ Must be registered in DatastCatalog and in detectron2's standard format.
+ Must have corresponding metadata "thing_classes"
+ Returns:
+ coco_dict: serializable dict in COCO json format
+ """
+
+ dataset_dicts = DatasetCatalog.get(dataset_name)
+ metadata = MetadataCatalog.get(dataset_name)
+
+ # unmap the category mapping ids for COCO
+ if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
+ reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
+ reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa
+ else:
+ reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa
+
+ categories = [
+ {"id": reverse_id_mapper(id), "name": name}
+ for id, name in enumerate(metadata.thing_classes)
+ ]
+
+ logger.info("Converting dataset dicts into COCO format")
+ coco_images = []
+ coco_annotations = []
+
+ for image_id, image_dict in enumerate(dataset_dicts):
+ coco_image = {
+ "id": image_dict.get("image_id", image_id),
+ "width": int(image_dict["width"]),
+ "height": int(image_dict["height"]),
+ "file_name": str(image_dict["file_name"]),
+ }
+ coco_images.append(coco_image)
+
+ anns_per_image = image_dict.get("annotations", [])
+ for annotation in anns_per_image:
+ # create a new dict with only COCO fields
+ coco_annotation = {}
+
+ # COCO requirement: XYWH box format for axis-align and XYWHA for rotated
+ bbox = annotation["bbox"]
+ if isinstance(bbox, np.ndarray):
+ if bbox.ndim != 1:
+ raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.")
+ bbox = bbox.tolist()
+ if len(bbox) not in [4, 5]:
+ raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.")
+ from_bbox_mode = annotation["bbox_mode"]
+ to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS
+ bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode)
+
+ # COCO requirement: instance area
+ if "segmentation" in annotation:
+ # Computing areas for instances by counting the pixels
+ segmentation = annotation["segmentation"]
+ # TODO: check segmentation type: RLE, BinaryMask or Polygon
+ if isinstance(segmentation, list):
+ polygons = PolygonMasks([segmentation])
+ area = polygons.area()[0].item()
+ elif isinstance(segmentation, dict): # RLE
+ area = mask_util.area(segmentation).item()
+ else:
+ raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
+ else:
+ # Computing areas using bounding boxes
+ if to_bbox_mode == BoxMode.XYWH_ABS:
+ bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS)
+ area = Boxes([bbox_xy]).area()[0].item()
+ else:
+ area = RotatedBoxes([bbox]).area()[0].item()
+
+ if "keypoints" in annotation:
+ keypoints = annotation["keypoints"] # list[int]
+ for idx, v in enumerate(keypoints):
+ if idx % 3 != 2:
+ # COCO's segmentation coordinates are floating points in [0, H or W],
+ # but keypoint coordinates are integers in [0, H-1 or W-1]
+ # For COCO format consistency we substract 0.5
+ # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
+ keypoints[idx] = v - 0.5
+ if "num_keypoints" in annotation:
+ num_keypoints = annotation["num_keypoints"]
+ else:
+ num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
+
+ # COCO requirement:
+ # linking annotations to images
+ # "id" field must start with 1
+ coco_annotation["id"] = len(coco_annotations) + 1
+ coco_annotation["image_id"] = coco_image["id"]
+ coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
+ coco_annotation["area"] = float(area)
+ coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0))
+ coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"]))
+
+ # Add optional fields
+ if "keypoints" in annotation:
+ coco_annotation["keypoints"] = keypoints
+ coco_annotation["num_keypoints"] = num_keypoints
+
+ if "segmentation" in annotation:
+ seg = coco_annotation["segmentation"] = annotation["segmentation"]
+ if isinstance(seg, dict): # RLE
+ counts = seg["counts"]
+ if not isinstance(counts, str):
+ # make it json-serializable
+ seg["counts"] = counts.decode("ascii")
+
+ coco_annotations.append(coco_annotation)
+
+ logger.info(
+ "Conversion finished, "
+ f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
+ )
+
+ info = {
+ "date_created": str(datetime.datetime.now()),
+ "description": "Automatically generated COCO json file for Detectron2.",
+ }
+ coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None}
+ if len(coco_annotations) > 0:
+ coco_dict["annotations"] = coco_annotations
+ return coco_dict
+
+
+def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
+ """
+ Converts dataset into COCO format and saves it to a json file.
+ dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
+
+ Args:
+ dataset_name:
+ reference from the config file to the catalogs
+ must be registered in DatasetCatalog and in detectron2's standard format
+ output_file: path of json file that will be saved to
+ allow_cached: if json file is already present then skip conversion
+ """
+
+ # TODO: The dataset or the conversion script *may* change,
+ # a checksum would be useful for validating the cached data
+
+ PathManager.mkdirs(os.path.dirname(output_file))
+ with file_lock(output_file):
+ if PathManager.exists(output_file) and allow_cached:
+ logger.warning(
+ f"Using previously cached COCO format annotations at '{output_file}'. "
+ "You need to clear the cache file if your dataset has been modified."
+ )
+ else:
+ logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)")
+ coco_dict = convert_to_coco_dict(dataset_name)
+
+ logger.info(f"Caching COCO format annotations at '{output_file}' ...")
+ tmp_file = output_file + ".tmp"
+ with PathManager.open(tmp_file, "w") as f:
+ json.dump(coco_dict, f)
+ shutil.move(tmp_file, output_file)
+
+
+def register_coco_instances(name, metadata, json_file, image_root):
+ """
+ Register a dataset in COCO's json annotation format for
+ instance detection, instance segmentation and keypoint detection.
+ (i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
+ `instances*.json` and `person_keypoints*.json` in the dataset).
+
+ This is an example of how to register a new dataset.
+ You can do something similar to this function, to register new datasets.
+
+ Args:
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+ metadata (dict): extra metadata associated with this dataset. You can
+ leave it as an empty dict.
+ json_file (str): path to the json instance annotation file.
+ image_root (str or path-like): directory which contains all the images.
+ """
+ assert isinstance(name, str), name
+ assert isinstance(json_file, (str, os.PathLike)), json_file
+ assert isinstance(image_root, (str, os.PathLike)), image_root
+ # 1. register a function which returns dicts
+ DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))
+
+ # 2. Optionally, add metadata about this dataset,
+ # since they might be useful in evaluation, visualization or logging
+ MetadataCatalog.get(name).set(
+ json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
+ )
+
+
+if __name__ == "__main__":
+ """
+ Test the COCO json dataset loader.
+
+ Usage:
+ python -m detectron2.data.datasets.coco \
+ path/to/json path/to/image_root dataset_name
+
+ "dataset_name" can be "coco_2014_minival_100", or other
+ pre-registered ones
+ """
+ from detectron2.utils.logger import setup_logger
+ from detectron2.utils.visualizer import Visualizer
+ import detectron2.data.datasets # noqa # add pre-defined metadata
+ import sys
+
+ logger = setup_logger(name=__name__)
+ assert sys.argv[3] in DatasetCatalog.list()
+ meta = MetadataCatalog.get(sys.argv[3])
+
+ dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
+ logger.info("Done loading {} samples.".format(len(dicts)))
+
+ dirname = "coco-data-vis"
+ os.makedirs(dirname, exist_ok=True)
+ for d in dicts:
+ img = np.array(Image.open(d["file_name"]))
+ visualizer = Visualizer(img, metadata=meta)
+ vis = visualizer.draw_dataset_dict(d)
+ fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+ vis.save(fpath)
diff --git a/vendor/detectron2/detectron2/data/datasets/coco_panoptic.py b/vendor/detectron2/detectron2/data/datasets/coco_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8dae44317b556610d7fed39017e082d7e855956
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/coco_panoptic.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from .coco import load_coco_json, load_sem_seg
+
+__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"]
+
+
+def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta):
+ """
+ Args:
+ image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+ gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+ json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+
+ Returns:
+ list[dict]: a list of dicts in Detectron2 standard format. (See
+ `Using Custom Datasets `_ )
+ """
+
+ def _convert_category_id(segment_info, meta):
+ if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+ segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+ segment_info["category_id"]
+ ]
+ segment_info["isthing"] = True
+ else:
+ segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+ segment_info["category_id"]
+ ]
+ segment_info["isthing"] = False
+ return segment_info
+
+ with PathManager.open(json_file) as f:
+ json_info = json.load(f)
+
+ ret = []
+ for ann in json_info["annotations"]:
+ image_id = int(ann["image_id"])
+ # TODO: currently we assume image and label has the same filename but
+ # different extension, and images have extension ".jpg" for COCO. Need
+ # to make image extension a user-provided argument if we extend this
+ # function to support other COCO-like datasets.
+ image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+ label_file = os.path.join(gt_dir, ann["file_name"])
+ segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+ ret.append(
+ {
+ "file_name": image_file,
+ "image_id": image_id,
+ "pan_seg_file_name": label_file,
+ "segments_info": segments_info,
+ }
+ )
+ assert len(ret), f"No images found in {image_dir}!"
+ assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+ assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+ return ret
+
+
+def register_coco_panoptic(
+ name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None
+):
+ """
+ Register a "standard" version of COCO panoptic segmentation dataset named `name`.
+ The dictionaries in this registered dataset follows detectron2's standard format.
+ Hence it's called "standard".
+
+ Args:
+ name (str): the name that identifies a dataset,
+ e.g. "coco_2017_train_panoptic"
+ metadata (dict): extra metadata associated with this dataset.
+ image_root (str): directory which contains all the images
+ panoptic_root (str): directory which contains panoptic annotation images in COCO format
+ panoptic_json (str): path to the json panoptic annotation file in COCO format
+ sem_seg_root (none): not used, to be consistent with
+ `register_coco_panoptic_separated`.
+ instances_json (str): path to the json instance annotation file
+ """
+ panoptic_name = name
+ DatasetCatalog.register(
+ panoptic_name,
+ lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata),
+ )
+ MetadataCatalog.get(panoptic_name).set(
+ panoptic_root=panoptic_root,
+ image_root=image_root,
+ panoptic_json=panoptic_json,
+ json_file=instances_json,
+ evaluator_type="coco_panoptic_seg",
+ ignore_label=255,
+ label_divisor=1000,
+ **metadata,
+ )
+
+
+def register_coco_panoptic_separated(
+ name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
+):
+ """
+ Register a "separated" version of COCO panoptic segmentation dataset named `name`.
+ The annotations in this registered dataset will contain both instance annotations and
+ semantic annotations, each with its own contiguous ids. Hence it's called "separated".
+
+ It follows the setting used by the PanopticFPN paper:
+
+ 1. The instance annotations directly come from polygons in the COCO
+ instances annotation task, rather than from the masks in the COCO panoptic annotations.
+
+ The two format have small differences:
+ Polygons in the instance annotations may have overlaps.
+ The mask annotations are produced by labeling the overlapped polygons
+ with depth ordering.
+
+ 2. The semantic annotations are converted from panoptic annotations, where
+ all "things" are assigned a semantic id of 0.
+ All semantic categories will therefore have ids in contiguous
+ range [1, #stuff_categories].
+
+ This function will also register a pure semantic segmentation dataset
+ named ``name + '_stuffonly'``.
+
+ Args:
+ name (str): the name that identifies a dataset,
+ e.g. "coco_2017_train_panoptic"
+ metadata (dict): extra metadata associated with this dataset.
+ image_root (str): directory which contains all the images
+ panoptic_root (str): directory which contains panoptic annotation images
+ panoptic_json (str): path to the json panoptic annotation file
+ sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
+ instances_json (str): path to the json instance annotation file
+ """
+ panoptic_name = name + "_separated"
+ DatasetCatalog.register(
+ panoptic_name,
+ lambda: merge_to_panoptic(
+ load_coco_json(instances_json, image_root, panoptic_name),
+ load_sem_seg(sem_seg_root, image_root),
+ ),
+ )
+ MetadataCatalog.get(panoptic_name).set(
+ panoptic_root=panoptic_root,
+ image_root=image_root,
+ panoptic_json=panoptic_json,
+ sem_seg_root=sem_seg_root,
+ json_file=instances_json, # TODO rename
+ evaluator_type="coco_panoptic_seg",
+ ignore_label=255,
+ **metadata,
+ )
+
+ semantic_name = name + "_stuffonly"
+ DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root))
+ MetadataCatalog.get(semantic_name).set(
+ sem_seg_root=sem_seg_root,
+ image_root=image_root,
+ evaluator_type="sem_seg",
+ ignore_label=255,
+ **metadata,
+ )
+
+
+def merge_to_panoptic(detection_dicts, sem_seg_dicts):
+ """
+ Create dataset dicts for panoptic segmentation, by
+ merging two dicts using "file_name" field to match their entries.
+
+ Args:
+ detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
+ sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
+
+ Returns:
+ list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
+ both detection_dicts and sem_seg_dicts that correspond to the same image.
+ The function assumes that the same key in different dicts has the same value.
+ """
+ results = []
+ sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
+ assert len(sem_seg_file_to_entry) > 0
+
+ for det_dict in detection_dicts:
+ dic = copy.copy(det_dict)
+ dic.update(sem_seg_file_to_entry[dic["file_name"]])
+ results.append(dic)
+ return results
+
+
+if __name__ == "__main__":
+ """
+ Test the COCO panoptic dataset loader.
+
+ Usage:
+ python -m detectron2.data.datasets.coco_panoptic \
+ path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10
+
+ "dataset_name" can be "coco_2017_train_panoptic", or other
+ pre-registered ones
+ """
+ from detectron2.utils.logger import setup_logger
+ from detectron2.utils.visualizer import Visualizer
+ import detectron2.data.datasets # noqa # add pre-defined metadata
+ import sys
+ from PIL import Image
+ import numpy as np
+
+ logger = setup_logger(name=__name__)
+ assert sys.argv[4] in DatasetCatalog.list()
+ meta = MetadataCatalog.get(sys.argv[4])
+
+ dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict())
+ logger.info("Done loading {} samples.".format(len(dicts)))
+
+ dirname = "coco-data-vis"
+ os.makedirs(dirname, exist_ok=True)
+ num_imgs_to_vis = int(sys.argv[5])
+ for i, d in enumerate(dicts):
+ img = np.array(Image.open(d["file_name"]))
+ visualizer = Visualizer(img, metadata=meta)
+ vis = visualizer.draw_dataset_dict(d)
+ fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+ vis.save(fpath)
+ if i + 1 >= num_imgs_to_vis:
+ break
diff --git a/vendor/detectron2/detectron2/data/datasets/lvis.py b/vendor/detectron2/detectron2/data/datasets/lvis.py
new file mode 100644
index 0000000000000000000000000000000000000000..576d962c8ce23ce31a01839b232cec89817186de
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/lvis.py
@@ -0,0 +1,241 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import os
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from .builtin_meta import _get_coco_instances_meta
+from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES
+from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES
+from .lvis_v1_category_image_count import LVIS_CATEGORY_IMAGE_COUNT as LVIS_V1_CATEGORY_IMAGE_COUNT
+
+"""
+This file contains functions to parse LVIS-format annotations into dicts in the
+"Detectron2 format".
+"""
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"]
+
+
+def register_lvis_instances(name, metadata, json_file, image_root):
+ """
+ Register a dataset in LVIS's json annotation format for instance detection and segmentation.
+
+ Args:
+ name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
+ metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
+ json_file (str): path to the json instance annotation file.
+ image_root (str or path-like): directory which contains all the images.
+ """
+ DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name))
+ MetadataCatalog.get(name).set(
+ json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata
+ )
+
+
+def load_lvis_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
+ """
+ Load a json file in LVIS's annotation format.
+
+ Args:
+ json_file (str): full path to the LVIS json annotation file.
+ image_root (str): the directory where the images in this json file exists.
+ dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
+ If provided, this function will put "thing_classes" into the metadata
+ associated with this dataset.
+ extra_annotation_keys (list[str]): list of per-annotation keys that should also be
+ loaded into the dataset dict (besides "bbox", "bbox_mode", "category_id",
+ "segmentation"). The values for these keys will be returned as-is.
+
+ Returns:
+ list[dict]: a list of dicts in Detectron2 standard format. (See
+ `Using Custom Datasets `_ )
+
+ Notes:
+ 1. This function does not read the image files.
+ The results do not have the "image" field.
+ """
+ from lvis import LVIS
+
+ json_file = PathManager.get_local_path(json_file)
+
+ timer = Timer()
+ lvis_api = LVIS(json_file)
+ if timer.seconds() > 1:
+ logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+ if dataset_name is not None:
+ meta = get_lvis_instances_meta(dataset_name)
+ MetadataCatalog.get(dataset_name).set(**meta)
+
+ # sort indices for reproducible results
+ img_ids = sorted(lvis_api.imgs.keys())
+ # imgs is a list of dicts, each looks something like:
+ # {'license': 4,
+ # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+ # 'file_name': 'COCO_val2014_000000001268.jpg',
+ # 'height': 427,
+ # 'width': 640,
+ # 'date_captured': '2013-11-17 05:57:24',
+ # 'id': 1268}
+ imgs = lvis_api.load_imgs(img_ids)
+ # anns is a list[list[dict]], where each dict is an annotation
+ # record for an object. The inner list enumerates the objects in an image
+ # and the outer list enumerates over images. Example of anns[0]:
+ # [{'segmentation': [[192.81,
+ # 247.09,
+ # ...
+ # 219.03,
+ # 249.06]],
+ # 'area': 1035.749,
+ # 'image_id': 1268,
+ # 'bbox': [192.81, 224.8, 74.73, 33.43],
+ # 'category_id': 16,
+ # 'id': 42986},
+ # ...]
+ anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
+
+ # Sanity check that each annotation has a unique id
+ ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+ assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format(
+ json_file
+ )
+
+ imgs_anns = list(zip(imgs, anns))
+
+ logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file))
+
+ if extra_annotation_keys:
+ logger.info(
+ "The following extra annotation keys will be loaded: {} ".format(extra_annotation_keys)
+ )
+ else:
+ extra_annotation_keys = []
+
+ def get_file_name(img_root, img_dict):
+ # Determine the path including the split folder ("train2017", "val2017", "test2017") from
+ # the coco_url field. Example:
+ # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg'
+ split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
+ return os.path.join(img_root + split_folder, file_name)
+
+ dataset_dicts = []
+
+ for (img_dict, anno_dict_list) in imgs_anns:
+ record = {}
+ record["file_name"] = get_file_name(image_root, img_dict)
+ record["height"] = img_dict["height"]
+ record["width"] = img_dict["width"]
+ record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", [])
+ record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
+ image_id = record["image_id"] = img_dict["id"]
+
+ objs = []
+ for anno in anno_dict_list:
+ # Check that the image_id in this annotation is the same as
+ # the image_id we're looking at.
+ # This fails only when the data parsing logic or the annotation file is buggy.
+ assert anno["image_id"] == image_id
+ obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
+ # LVIS data loader can be used to load COCO dataset categories. In this case `meta`
+ # variable will have a field with COCO-specific category mapping.
+ if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta:
+ obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][anno["category_id"]]
+ else:
+ obj["category_id"] = anno["category_id"] - 1 # Convert 1-indexed to 0-indexed
+ segm = anno["segmentation"] # list[list[float]]
+ # filter out invalid polygons (< 3 points)
+ valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+ assert len(segm) == len(
+ valid_segm
+ ), "Annotation contains an invalid polygon with < 3 points"
+ assert len(segm) > 0
+ obj["segmentation"] = segm
+ for extra_ann_key in extra_annotation_keys:
+ obj[extra_ann_key] = anno[extra_ann_key]
+ objs.append(obj)
+ record["annotations"] = objs
+ dataset_dicts.append(record)
+
+ return dataset_dicts
+
+
+def get_lvis_instances_meta(dataset_name):
+ """
+ Load LVIS metadata.
+
+ Args:
+ dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5").
+
+ Returns:
+ dict: LVIS metadata with keys: thing_classes
+ """
+ if "cocofied" in dataset_name:
+ return _get_coco_instances_meta()
+ if "v0.5" in dataset_name:
+ return _get_lvis_instances_meta_v0_5()
+ elif "v1" in dataset_name:
+ return _get_lvis_instances_meta_v1()
+ raise ValueError("No built-in metadata for dataset {}".format(dataset_name))
+
+
+def _get_lvis_instances_meta_v0_5():
+ assert len(LVIS_V0_5_CATEGORIES) == 1230
+ cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES]
+ assert min(cat_ids) == 1 and max(cat_ids) == len(
+ cat_ids
+ ), "Category ids are not in [1, #categories], as expected"
+ # Ensure that the category list is sorted by id
+ lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"])
+ thing_classes = [k["synonyms"][0] for k in lvis_categories]
+ meta = {"thing_classes": thing_classes}
+ return meta
+
+
+def _get_lvis_instances_meta_v1():
+ assert len(LVIS_V1_CATEGORIES) == 1203
+ cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES]
+ assert min(cat_ids) == 1 and max(cat_ids) == len(
+ cat_ids
+ ), "Category ids are not in [1, #categories], as expected"
+ # Ensure that the category list is sorted by id
+ lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"])
+ thing_classes = [k["synonyms"][0] for k in lvis_categories]
+ meta = {"thing_classes": thing_classes, "class_image_count": LVIS_V1_CATEGORY_IMAGE_COUNT}
+ return meta
+
+
+if __name__ == "__main__":
+ """
+ Test the LVIS json dataset loader.
+
+ Usage:
+ python -m detectron2.data.datasets.lvis \
+ path/to/json path/to/image_root dataset_name vis_limit
+ """
+ import sys
+ import numpy as np
+ from detectron2.utils.logger import setup_logger
+ from PIL import Image
+ import detectron2.data.datasets # noqa # add pre-defined metadata
+ from detectron2.utils.visualizer import Visualizer
+
+ logger = setup_logger(name=__name__)
+ meta = MetadataCatalog.get(sys.argv[3])
+
+ dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3])
+ logger.info("Done loading {} samples.".format(len(dicts)))
+
+ dirname = "lvis-data-vis"
+ os.makedirs(dirname, exist_ok=True)
+ for d in dicts[: int(sys.argv[4])]:
+ img = np.array(Image.open(d["file_name"]))
+ visualizer = Visualizer(img, metadata=meta)
+ vis = visualizer.draw_dataset_dict(d)
+ fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+ vis.save(fpath)
diff --git a/vendor/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py b/vendor/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3dab6198da614937b08682f4c9edf52bdf1d236
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v0.5_val.json", "r") as f:
+# a = json.load(f)
+# c = a["categories"]
+# for x in c:
+# del x["image_count"]
+# del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + " # noqa"
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa
+# fmt: on
diff --git a/vendor/detectron2/detectron2/data/datasets/lvis_v1_categories.py b/vendor/detectron2/detectron2/data/datasets/lvis_v1_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..7374e6968bb006f5d8c49e75d9d3b31ea3d77d05
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/lvis_v1_categories.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v1_val.json", "r") as f:
+# a = json.load(f)
+# c = a["categories"]
+# for x in c:
+# del x["image_count"]
+# del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + " # noqa"
+# with open("/tmp/lvis_categories.py", "wt") as f:
+# f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}")
+# Then paste the contents of that file below
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa
+# fmt: on
diff --git a/vendor/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py b/vendor/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py
new file mode 100644
index 0000000000000000000000000000000000000000..31bf0cfcd5096ab87835db86a28671d474514c40
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v1_train.json", "r") as f:
+# a = json.load(f)
+# c = a["categories"]
+# for x in c:
+# del x["name"]
+# del x["instance_count"]
+# del x["def"]
+# del x["synonyms"]
+# del x["frequency"]
+# del x["synset"]
+# LVIS_CATEGORY_IMAGE_COUNT = repr(c) + " # noqa"
+# with open("/tmp/lvis_category_image_count.py", "wt") as f:
+# f.write(f"LVIS_CATEGORY_IMAGE_COUNT = {LVIS_CATEGORY_IMAGE_COUNT}")
+# Then paste the contents of that file below
+
+# fmt: off
+LVIS_CATEGORY_IMAGE_COUNT = [{'id': 1, 'image_count': 64}, {'id': 2, 'image_count': 364}, {'id': 3, 'image_count': 1911}, {'id': 4, 'image_count': 149}, {'id': 5, 'image_count': 29}, {'id': 6, 'image_count': 26}, {'id': 7, 'image_count': 59}, {'id': 8, 'image_count': 22}, {'id': 9, 'image_count': 12}, {'id': 10, 'image_count': 28}, {'id': 11, 'image_count': 505}, {'id': 12, 'image_count': 1207}, {'id': 13, 'image_count': 4}, {'id': 14, 'image_count': 10}, {'id': 15, 'image_count': 500}, {'id': 16, 'image_count': 33}, {'id': 17, 'image_count': 3}, {'id': 18, 'image_count': 44}, {'id': 19, 'image_count': 561}, {'id': 20, 'image_count': 8}, {'id': 21, 'image_count': 9}, {'id': 22, 'image_count': 33}, {'id': 23, 'image_count': 1883}, {'id': 24, 'image_count': 98}, {'id': 25, 'image_count': 70}, {'id': 26, 'image_count': 46}, {'id': 27, 'image_count': 117}, {'id': 28, 'image_count': 41}, {'id': 29, 'image_count': 1395}, {'id': 30, 'image_count': 7}, {'id': 31, 'image_count': 1}, {'id': 32, 'image_count': 314}, {'id': 33, 'image_count': 31}, {'id': 34, 'image_count': 1905}, {'id': 35, 'image_count': 1859}, {'id': 36, 'image_count': 1623}, {'id': 37, 'image_count': 47}, {'id': 38, 'image_count': 3}, {'id': 39, 'image_count': 3}, {'id': 40, 'image_count': 1}, {'id': 41, 'image_count': 305}, {'id': 42, 'image_count': 6}, {'id': 43, 'image_count': 210}, {'id': 44, 'image_count': 36}, {'id': 45, 'image_count': 1787}, {'id': 46, 'image_count': 17}, {'id': 47, 'image_count': 51}, {'id': 48, 'image_count': 138}, {'id': 49, 'image_count': 3}, {'id': 50, 'image_count': 1470}, {'id': 51, 'image_count': 3}, {'id': 52, 'image_count': 2}, {'id': 53, 'image_count': 186}, {'id': 54, 'image_count': 76}, {'id': 55, 'image_count': 26}, {'id': 56, 'image_count': 303}, {'id': 57, 'image_count': 738}, {'id': 58, 'image_count': 1799}, {'id': 59, 'image_count': 1934}, {'id': 60, 'image_count': 1609}, {'id': 61, 'image_count': 1622}, {'id': 62, 'image_count': 41}, {'id': 63, 'image_count': 4}, {'id': 64, 'image_count': 11}, {'id': 65, 'image_count': 270}, {'id': 66, 'image_count': 349}, {'id': 67, 'image_count': 42}, {'id': 68, 'image_count': 823}, {'id': 69, 'image_count': 6}, {'id': 70, 'image_count': 48}, {'id': 71, 'image_count': 3}, {'id': 72, 'image_count': 42}, {'id': 73, 'image_count': 24}, {'id': 74, 'image_count': 16}, {'id': 75, 'image_count': 605}, {'id': 76, 'image_count': 646}, {'id': 77, 'image_count': 1765}, {'id': 78, 'image_count': 2}, {'id': 79, 'image_count': 125}, {'id': 80, 'image_count': 1420}, {'id': 81, 'image_count': 140}, {'id': 82, 'image_count': 4}, {'id': 83, 'image_count': 322}, {'id': 84, 'image_count': 60}, {'id': 85, 'image_count': 2}, {'id': 86, 'image_count': 231}, {'id': 87, 'image_count': 333}, {'id': 88, 'image_count': 1941}, {'id': 89, 'image_count': 367}, {'id': 90, 'image_count': 1922}, {'id': 91, 'image_count': 18}, {'id': 92, 'image_count': 81}, {'id': 93, 'image_count': 1}, {'id': 94, 'image_count': 1852}, {'id': 95, 'image_count': 430}, {'id': 96, 'image_count': 247}, {'id': 97, 'image_count': 94}, {'id': 98, 'image_count': 21}, {'id': 99, 'image_count': 1821}, {'id': 100, 'image_count': 16}, {'id': 101, 'image_count': 12}, {'id': 102, 'image_count': 25}, {'id': 103, 'image_count': 41}, {'id': 104, 'image_count': 244}, {'id': 105, 'image_count': 7}, {'id': 106, 'image_count': 1}, {'id': 107, 'image_count': 40}, {'id': 108, 'image_count': 40}, {'id': 109, 'image_count': 104}, {'id': 110, 'image_count': 1671}, {'id': 111, 'image_count': 49}, {'id': 112, 'image_count': 243}, {'id': 113, 'image_count': 2}, {'id': 114, 'image_count': 242}, {'id': 115, 'image_count': 271}, {'id': 116, 'image_count': 104}, {'id': 117, 'image_count': 8}, {'id': 118, 'image_count': 1758}, {'id': 119, 'image_count': 1}, {'id': 120, 'image_count': 48}, {'id': 121, 'image_count': 14}, {'id': 122, 'image_count': 40}, {'id': 123, 'image_count': 1}, {'id': 124, 'image_count': 37}, {'id': 125, 'image_count': 1510}, {'id': 126, 'image_count': 6}, {'id': 127, 'image_count': 1903}, {'id': 128, 'image_count': 70}, {'id': 129, 'image_count': 86}, {'id': 130, 'image_count': 7}, {'id': 131, 'image_count': 5}, {'id': 132, 'image_count': 1406}, {'id': 133, 'image_count': 1901}, {'id': 134, 'image_count': 15}, {'id': 135, 'image_count': 28}, {'id': 136, 'image_count': 6}, {'id': 137, 'image_count': 494}, {'id': 138, 'image_count': 234}, {'id': 139, 'image_count': 1922}, {'id': 140, 'image_count': 1}, {'id': 141, 'image_count': 35}, {'id': 142, 'image_count': 5}, {'id': 143, 'image_count': 1828}, {'id': 144, 'image_count': 8}, {'id': 145, 'image_count': 63}, {'id': 146, 'image_count': 1668}, {'id': 147, 'image_count': 4}, {'id': 148, 'image_count': 95}, {'id': 149, 'image_count': 17}, {'id': 150, 'image_count': 1567}, {'id': 151, 'image_count': 2}, {'id': 152, 'image_count': 103}, {'id': 153, 'image_count': 50}, {'id': 154, 'image_count': 1309}, {'id': 155, 'image_count': 6}, {'id': 156, 'image_count': 92}, {'id': 157, 'image_count': 19}, {'id': 158, 'image_count': 37}, {'id': 159, 'image_count': 4}, {'id': 160, 'image_count': 709}, {'id': 161, 'image_count': 9}, {'id': 162, 'image_count': 82}, {'id': 163, 'image_count': 15}, {'id': 164, 'image_count': 3}, {'id': 165, 'image_count': 61}, {'id': 166, 'image_count': 51}, {'id': 167, 'image_count': 5}, {'id': 168, 'image_count': 13}, {'id': 169, 'image_count': 642}, {'id': 170, 'image_count': 24}, {'id': 171, 'image_count': 255}, {'id': 172, 'image_count': 9}, {'id': 173, 'image_count': 1808}, {'id': 174, 'image_count': 31}, {'id': 175, 'image_count': 158}, {'id': 176, 'image_count': 80}, {'id': 177, 'image_count': 1884}, {'id': 178, 'image_count': 158}, {'id': 179, 'image_count': 2}, {'id': 180, 'image_count': 12}, {'id': 181, 'image_count': 1659}, {'id': 182, 'image_count': 7}, {'id': 183, 'image_count': 834}, {'id': 184, 'image_count': 57}, {'id': 185, 'image_count': 174}, {'id': 186, 'image_count': 95}, {'id': 187, 'image_count': 27}, {'id': 188, 'image_count': 22}, {'id': 189, 'image_count': 1391}, {'id': 190, 'image_count': 90}, {'id': 191, 'image_count': 40}, {'id': 192, 'image_count': 445}, {'id': 193, 'image_count': 21}, {'id': 194, 'image_count': 1132}, {'id': 195, 'image_count': 177}, {'id': 196, 'image_count': 4}, {'id': 197, 'image_count': 17}, {'id': 198, 'image_count': 84}, {'id': 199, 'image_count': 55}, {'id': 200, 'image_count': 30}, {'id': 201, 'image_count': 25}, {'id': 202, 'image_count': 2}, {'id': 203, 'image_count': 125}, {'id': 204, 'image_count': 1135}, {'id': 205, 'image_count': 19}, {'id': 206, 'image_count': 72}, {'id': 207, 'image_count': 1926}, {'id': 208, 'image_count': 159}, {'id': 209, 'image_count': 7}, {'id': 210, 'image_count': 1}, {'id': 211, 'image_count': 13}, {'id': 212, 'image_count': 35}, {'id': 213, 'image_count': 18}, {'id': 214, 'image_count': 8}, {'id': 215, 'image_count': 6}, {'id': 216, 'image_count': 35}, {'id': 217, 'image_count': 1222}, {'id': 218, 'image_count': 103}, {'id': 219, 'image_count': 28}, {'id': 220, 'image_count': 63}, {'id': 221, 'image_count': 28}, {'id': 222, 'image_count': 5}, {'id': 223, 'image_count': 7}, {'id': 224, 'image_count': 14}, {'id': 225, 'image_count': 1918}, {'id': 226, 'image_count': 133}, {'id': 227, 'image_count': 16}, {'id': 228, 'image_count': 27}, {'id': 229, 'image_count': 110}, {'id': 230, 'image_count': 1895}, {'id': 231, 'image_count': 4}, {'id': 232, 'image_count': 1927}, {'id': 233, 'image_count': 8}, {'id': 234, 'image_count': 1}, {'id': 235, 'image_count': 263}, {'id': 236, 'image_count': 10}, {'id': 237, 'image_count': 2}, {'id': 238, 'image_count': 3}, {'id': 239, 'image_count': 87}, {'id': 240, 'image_count': 9}, {'id': 241, 'image_count': 71}, {'id': 242, 'image_count': 13}, {'id': 243, 'image_count': 18}, {'id': 244, 'image_count': 2}, {'id': 245, 'image_count': 5}, {'id': 246, 'image_count': 45}, {'id': 247, 'image_count': 1}, {'id': 248, 'image_count': 23}, {'id': 249, 'image_count': 32}, {'id': 250, 'image_count': 4}, {'id': 251, 'image_count': 1}, {'id': 252, 'image_count': 858}, {'id': 253, 'image_count': 661}, {'id': 254, 'image_count': 168}, {'id': 255, 'image_count': 210}, {'id': 256, 'image_count': 65}, {'id': 257, 'image_count': 4}, {'id': 258, 'image_count': 2}, {'id': 259, 'image_count': 159}, {'id': 260, 'image_count': 31}, {'id': 261, 'image_count': 811}, {'id': 262, 'image_count': 1}, {'id': 263, 'image_count': 42}, {'id': 264, 'image_count': 27}, {'id': 265, 'image_count': 2}, {'id': 266, 'image_count': 5}, {'id': 267, 'image_count': 95}, {'id': 268, 'image_count': 32}, {'id': 269, 'image_count': 1}, {'id': 270, 'image_count': 1}, {'id': 271, 'image_count': 1844}, {'id': 272, 'image_count': 897}, {'id': 273, 'image_count': 31}, {'id': 274, 'image_count': 23}, {'id': 275, 'image_count': 1}, {'id': 276, 'image_count': 202}, {'id': 277, 'image_count': 746}, {'id': 278, 'image_count': 44}, {'id': 279, 'image_count': 14}, {'id': 280, 'image_count': 26}, {'id': 281, 'image_count': 1}, {'id': 282, 'image_count': 2}, {'id': 283, 'image_count': 25}, {'id': 284, 'image_count': 238}, {'id': 285, 'image_count': 592}, {'id': 286, 'image_count': 26}, {'id': 287, 'image_count': 5}, {'id': 288, 'image_count': 42}, {'id': 289, 'image_count': 13}, {'id': 290, 'image_count': 46}, {'id': 291, 'image_count': 1}, {'id': 292, 'image_count': 8}, {'id': 293, 'image_count': 34}, {'id': 294, 'image_count': 5}, {'id': 295, 'image_count': 1}, {'id': 296, 'image_count': 1871}, {'id': 297, 'image_count': 717}, {'id': 298, 'image_count': 1010}, {'id': 299, 'image_count': 679}, {'id': 300, 'image_count': 3}, {'id': 301, 'image_count': 4}, {'id': 302, 'image_count': 1}, {'id': 303, 'image_count': 166}, {'id': 304, 'image_count': 2}, {'id': 305, 'image_count': 266}, {'id': 306, 'image_count': 101}, {'id': 307, 'image_count': 6}, {'id': 308, 'image_count': 14}, {'id': 309, 'image_count': 133}, {'id': 310, 'image_count': 2}, {'id': 311, 'image_count': 38}, {'id': 312, 'image_count': 95}, {'id': 313, 'image_count': 1}, {'id': 314, 'image_count': 12}, {'id': 315, 'image_count': 49}, {'id': 316, 'image_count': 5}, {'id': 317, 'image_count': 5}, {'id': 318, 'image_count': 16}, {'id': 319, 'image_count': 216}, {'id': 320, 'image_count': 12}, {'id': 321, 'image_count': 1}, {'id': 322, 'image_count': 54}, {'id': 323, 'image_count': 5}, {'id': 324, 'image_count': 245}, {'id': 325, 'image_count': 12}, {'id': 326, 'image_count': 7}, {'id': 327, 'image_count': 35}, {'id': 328, 'image_count': 36}, {'id': 329, 'image_count': 32}, {'id': 330, 'image_count': 1027}, {'id': 331, 'image_count': 10}, {'id': 332, 'image_count': 12}, {'id': 333, 'image_count': 1}, {'id': 334, 'image_count': 67}, {'id': 335, 'image_count': 71}, {'id': 336, 'image_count': 30}, {'id': 337, 'image_count': 48}, {'id': 338, 'image_count': 249}, {'id': 339, 'image_count': 13}, {'id': 340, 'image_count': 29}, {'id': 341, 'image_count': 14}, {'id': 342, 'image_count': 236}, {'id': 343, 'image_count': 15}, {'id': 344, 'image_count': 1521}, {'id': 345, 'image_count': 25}, {'id': 346, 'image_count': 249}, {'id': 347, 'image_count': 139}, {'id': 348, 'image_count': 2}, {'id': 349, 'image_count': 2}, {'id': 350, 'image_count': 1890}, {'id': 351, 'image_count': 1240}, {'id': 352, 'image_count': 1}, {'id': 353, 'image_count': 9}, {'id': 354, 'image_count': 1}, {'id': 355, 'image_count': 3}, {'id': 356, 'image_count': 11}, {'id': 357, 'image_count': 4}, {'id': 358, 'image_count': 236}, {'id': 359, 'image_count': 44}, {'id': 360, 'image_count': 19}, {'id': 361, 'image_count': 1100}, {'id': 362, 'image_count': 7}, {'id': 363, 'image_count': 69}, {'id': 364, 'image_count': 2}, {'id': 365, 'image_count': 8}, {'id': 366, 'image_count': 5}, {'id': 367, 'image_count': 227}, {'id': 368, 'image_count': 6}, {'id': 369, 'image_count': 106}, {'id': 370, 'image_count': 81}, {'id': 371, 'image_count': 17}, {'id': 372, 'image_count': 134}, {'id': 373, 'image_count': 312}, {'id': 374, 'image_count': 8}, {'id': 375, 'image_count': 271}, {'id': 376, 'image_count': 2}, {'id': 377, 'image_count': 103}, {'id': 378, 'image_count': 1938}, {'id': 379, 'image_count': 574}, {'id': 380, 'image_count': 120}, {'id': 381, 'image_count': 2}, {'id': 382, 'image_count': 2}, {'id': 383, 'image_count': 13}, {'id': 384, 'image_count': 29}, {'id': 385, 'image_count': 1710}, {'id': 386, 'image_count': 66}, {'id': 387, 'image_count': 1008}, {'id': 388, 'image_count': 1}, {'id': 389, 'image_count': 3}, {'id': 390, 'image_count': 1942}, {'id': 391, 'image_count': 19}, {'id': 392, 'image_count': 1488}, {'id': 393, 'image_count': 46}, {'id': 394, 'image_count': 106}, {'id': 395, 'image_count': 115}, {'id': 396, 'image_count': 19}, {'id': 397, 'image_count': 2}, {'id': 398, 'image_count': 1}, {'id': 399, 'image_count': 28}, {'id': 400, 'image_count': 9}, {'id': 401, 'image_count': 192}, {'id': 402, 'image_count': 12}, {'id': 403, 'image_count': 21}, {'id': 404, 'image_count': 247}, {'id': 405, 'image_count': 6}, {'id': 406, 'image_count': 64}, {'id': 407, 'image_count': 7}, {'id': 408, 'image_count': 40}, {'id': 409, 'image_count': 542}, {'id': 410, 'image_count': 2}, {'id': 411, 'image_count': 1898}, {'id': 412, 'image_count': 36}, {'id': 413, 'image_count': 4}, {'id': 414, 'image_count': 1}, {'id': 415, 'image_count': 191}, {'id': 416, 'image_count': 6}, {'id': 417, 'image_count': 41}, {'id': 418, 'image_count': 39}, {'id': 419, 'image_count': 46}, {'id': 420, 'image_count': 1}, {'id': 421, 'image_count': 1451}, {'id': 422, 'image_count': 1878}, {'id': 423, 'image_count': 11}, {'id': 424, 'image_count': 82}, {'id': 425, 'image_count': 18}, {'id': 426, 'image_count': 1}, {'id': 427, 'image_count': 7}, {'id': 428, 'image_count': 3}, {'id': 429, 'image_count': 575}, {'id': 430, 'image_count': 1907}, {'id': 431, 'image_count': 8}, {'id': 432, 'image_count': 4}, {'id': 433, 'image_count': 32}, {'id': 434, 'image_count': 11}, {'id': 435, 'image_count': 4}, {'id': 436, 'image_count': 54}, {'id': 437, 'image_count': 202}, {'id': 438, 'image_count': 32}, {'id': 439, 'image_count': 3}, {'id': 440, 'image_count': 130}, {'id': 441, 'image_count': 119}, {'id': 442, 'image_count': 141}, {'id': 443, 'image_count': 29}, {'id': 444, 'image_count': 525}, {'id': 445, 'image_count': 1323}, {'id': 446, 'image_count': 2}, {'id': 447, 'image_count': 113}, {'id': 448, 'image_count': 16}, {'id': 449, 'image_count': 7}, {'id': 450, 'image_count': 35}, {'id': 451, 'image_count': 1908}, {'id': 452, 'image_count': 353}, {'id': 453, 'image_count': 18}, {'id': 454, 'image_count': 14}, {'id': 455, 'image_count': 77}, {'id': 456, 'image_count': 8}, {'id': 457, 'image_count': 37}, {'id': 458, 'image_count': 1}, {'id': 459, 'image_count': 346}, {'id': 460, 'image_count': 19}, {'id': 461, 'image_count': 1779}, {'id': 462, 'image_count': 23}, {'id': 463, 'image_count': 25}, {'id': 464, 'image_count': 67}, {'id': 465, 'image_count': 19}, {'id': 466, 'image_count': 28}, {'id': 467, 'image_count': 4}, {'id': 468, 'image_count': 27}, {'id': 469, 'image_count': 1861}, {'id': 470, 'image_count': 11}, {'id': 471, 'image_count': 13}, {'id': 472, 'image_count': 13}, {'id': 473, 'image_count': 32}, {'id': 474, 'image_count': 1767}, {'id': 475, 'image_count': 42}, {'id': 476, 'image_count': 17}, {'id': 477, 'image_count': 128}, {'id': 478, 'image_count': 1}, {'id': 479, 'image_count': 9}, {'id': 480, 'image_count': 10}, {'id': 481, 'image_count': 4}, {'id': 482, 'image_count': 9}, {'id': 483, 'image_count': 18}, {'id': 484, 'image_count': 41}, {'id': 485, 'image_count': 28}, {'id': 486, 'image_count': 3}, {'id': 487, 'image_count': 65}, {'id': 488, 'image_count': 9}, {'id': 489, 'image_count': 23}, {'id': 490, 'image_count': 24}, {'id': 491, 'image_count': 1}, {'id': 492, 'image_count': 2}, {'id': 493, 'image_count': 59}, {'id': 494, 'image_count': 48}, {'id': 495, 'image_count': 17}, {'id': 496, 'image_count': 1877}, {'id': 497, 'image_count': 18}, {'id': 498, 'image_count': 1920}, {'id': 499, 'image_count': 50}, {'id': 500, 'image_count': 1890}, {'id': 501, 'image_count': 99}, {'id': 502, 'image_count': 1530}, {'id': 503, 'image_count': 3}, {'id': 504, 'image_count': 11}, {'id': 505, 'image_count': 19}, {'id': 506, 'image_count': 3}, {'id': 507, 'image_count': 63}, {'id': 508, 'image_count': 5}, {'id': 509, 'image_count': 6}, {'id': 510, 'image_count': 233}, {'id': 511, 'image_count': 54}, {'id': 512, 'image_count': 36}, {'id': 513, 'image_count': 10}, {'id': 514, 'image_count': 124}, {'id': 515, 'image_count': 101}, {'id': 516, 'image_count': 3}, {'id': 517, 'image_count': 363}, {'id': 518, 'image_count': 3}, {'id': 519, 'image_count': 30}, {'id': 520, 'image_count': 18}, {'id': 521, 'image_count': 199}, {'id': 522, 'image_count': 97}, {'id': 523, 'image_count': 32}, {'id': 524, 'image_count': 121}, {'id': 525, 'image_count': 16}, {'id': 526, 'image_count': 12}, {'id': 527, 'image_count': 2}, {'id': 528, 'image_count': 214}, {'id': 529, 'image_count': 48}, {'id': 530, 'image_count': 26}, {'id': 531, 'image_count': 13}, {'id': 532, 'image_count': 4}, {'id': 533, 'image_count': 11}, {'id': 534, 'image_count': 123}, {'id': 535, 'image_count': 7}, {'id': 536, 'image_count': 200}, {'id': 537, 'image_count': 91}, {'id': 538, 'image_count': 9}, {'id': 539, 'image_count': 72}, {'id': 540, 'image_count': 1886}, {'id': 541, 'image_count': 4}, {'id': 542, 'image_count': 1}, {'id': 543, 'image_count': 1}, {'id': 544, 'image_count': 1932}, {'id': 545, 'image_count': 4}, {'id': 546, 'image_count': 56}, {'id': 547, 'image_count': 854}, {'id': 548, 'image_count': 755}, {'id': 549, 'image_count': 1843}, {'id': 550, 'image_count': 96}, {'id': 551, 'image_count': 7}, {'id': 552, 'image_count': 74}, {'id': 553, 'image_count': 66}, {'id': 554, 'image_count': 57}, {'id': 555, 'image_count': 44}, {'id': 556, 'image_count': 1905}, {'id': 557, 'image_count': 4}, {'id': 558, 'image_count': 90}, {'id': 559, 'image_count': 1635}, {'id': 560, 'image_count': 8}, {'id': 561, 'image_count': 5}, {'id': 562, 'image_count': 50}, {'id': 563, 'image_count': 545}, {'id': 564, 'image_count': 20}, {'id': 565, 'image_count': 193}, {'id': 566, 'image_count': 285}, {'id': 567, 'image_count': 3}, {'id': 568, 'image_count': 1}, {'id': 569, 'image_count': 1904}, {'id': 570, 'image_count': 294}, {'id': 571, 'image_count': 3}, {'id': 572, 'image_count': 5}, {'id': 573, 'image_count': 24}, {'id': 574, 'image_count': 2}, {'id': 575, 'image_count': 2}, {'id': 576, 'image_count': 16}, {'id': 577, 'image_count': 8}, {'id': 578, 'image_count': 154}, {'id': 579, 'image_count': 66}, {'id': 580, 'image_count': 1}, {'id': 581, 'image_count': 24}, {'id': 582, 'image_count': 1}, {'id': 583, 'image_count': 4}, {'id': 584, 'image_count': 75}, {'id': 585, 'image_count': 6}, {'id': 586, 'image_count': 126}, {'id': 587, 'image_count': 24}, {'id': 588, 'image_count': 22}, {'id': 589, 'image_count': 1872}, {'id': 590, 'image_count': 16}, {'id': 591, 'image_count': 423}, {'id': 592, 'image_count': 1927}, {'id': 593, 'image_count': 38}, {'id': 594, 'image_count': 3}, {'id': 595, 'image_count': 1945}, {'id': 596, 'image_count': 35}, {'id': 597, 'image_count': 1}, {'id': 598, 'image_count': 13}, {'id': 599, 'image_count': 9}, {'id': 600, 'image_count': 14}, {'id': 601, 'image_count': 37}, {'id': 602, 'image_count': 3}, {'id': 603, 'image_count': 4}, {'id': 604, 'image_count': 100}, {'id': 605, 'image_count': 195}, {'id': 606, 'image_count': 1}, {'id': 607, 'image_count': 12}, {'id': 608, 'image_count': 24}, {'id': 609, 'image_count': 489}, {'id': 610, 'image_count': 10}, {'id': 611, 'image_count': 1689}, {'id': 612, 'image_count': 42}, {'id': 613, 'image_count': 81}, {'id': 614, 'image_count': 894}, {'id': 615, 'image_count': 1868}, {'id': 616, 'image_count': 7}, {'id': 617, 'image_count': 1567}, {'id': 618, 'image_count': 10}, {'id': 619, 'image_count': 8}, {'id': 620, 'image_count': 7}, {'id': 621, 'image_count': 629}, {'id': 622, 'image_count': 89}, {'id': 623, 'image_count': 15}, {'id': 624, 'image_count': 134}, {'id': 625, 'image_count': 4}, {'id': 626, 'image_count': 1802}, {'id': 627, 'image_count': 595}, {'id': 628, 'image_count': 1210}, {'id': 629, 'image_count': 48}, {'id': 630, 'image_count': 418}, {'id': 631, 'image_count': 1846}, {'id': 632, 'image_count': 5}, {'id': 633, 'image_count': 221}, {'id': 634, 'image_count': 10}, {'id': 635, 'image_count': 7}, {'id': 636, 'image_count': 76}, {'id': 637, 'image_count': 22}, {'id': 638, 'image_count': 10}, {'id': 639, 'image_count': 341}, {'id': 640, 'image_count': 1}, {'id': 641, 'image_count': 705}, {'id': 642, 'image_count': 1900}, {'id': 643, 'image_count': 188}, {'id': 644, 'image_count': 227}, {'id': 645, 'image_count': 861}, {'id': 646, 'image_count': 6}, {'id': 647, 'image_count': 115}, {'id': 648, 'image_count': 5}, {'id': 649, 'image_count': 43}, {'id': 650, 'image_count': 14}, {'id': 651, 'image_count': 6}, {'id': 652, 'image_count': 15}, {'id': 653, 'image_count': 1167}, {'id': 654, 'image_count': 15}, {'id': 655, 'image_count': 994}, {'id': 656, 'image_count': 28}, {'id': 657, 'image_count': 2}, {'id': 658, 'image_count': 338}, {'id': 659, 'image_count': 334}, {'id': 660, 'image_count': 15}, {'id': 661, 'image_count': 102}, {'id': 662, 'image_count': 1}, {'id': 663, 'image_count': 8}, {'id': 664, 'image_count': 1}, {'id': 665, 'image_count': 1}, {'id': 666, 'image_count': 28}, {'id': 667, 'image_count': 91}, {'id': 668, 'image_count': 260}, {'id': 669, 'image_count': 131}, {'id': 670, 'image_count': 128}, {'id': 671, 'image_count': 3}, {'id': 672, 'image_count': 10}, {'id': 673, 'image_count': 39}, {'id': 674, 'image_count': 2}, {'id': 675, 'image_count': 925}, {'id': 676, 'image_count': 354}, {'id': 677, 'image_count': 31}, {'id': 678, 'image_count': 10}, {'id': 679, 'image_count': 215}, {'id': 680, 'image_count': 71}, {'id': 681, 'image_count': 43}, {'id': 682, 'image_count': 28}, {'id': 683, 'image_count': 34}, {'id': 684, 'image_count': 16}, {'id': 685, 'image_count': 273}, {'id': 686, 'image_count': 2}, {'id': 687, 'image_count': 999}, {'id': 688, 'image_count': 4}, {'id': 689, 'image_count': 107}, {'id': 690, 'image_count': 2}, {'id': 691, 'image_count': 1}, {'id': 692, 'image_count': 454}, {'id': 693, 'image_count': 9}, {'id': 694, 'image_count': 1901}, {'id': 695, 'image_count': 61}, {'id': 696, 'image_count': 91}, {'id': 697, 'image_count': 46}, {'id': 698, 'image_count': 1402}, {'id': 699, 'image_count': 74}, {'id': 700, 'image_count': 421}, {'id': 701, 'image_count': 226}, {'id': 702, 'image_count': 10}, {'id': 703, 'image_count': 1720}, {'id': 704, 'image_count': 261}, {'id': 705, 'image_count': 1337}, {'id': 706, 'image_count': 293}, {'id': 707, 'image_count': 62}, {'id': 708, 'image_count': 814}, {'id': 709, 'image_count': 407}, {'id': 710, 'image_count': 6}, {'id': 711, 'image_count': 16}, {'id': 712, 'image_count': 7}, {'id': 713, 'image_count': 1791}, {'id': 714, 'image_count': 2}, {'id': 715, 'image_count': 1915}, {'id': 716, 'image_count': 1940}, {'id': 717, 'image_count': 13}, {'id': 718, 'image_count': 16}, {'id': 719, 'image_count': 448}, {'id': 720, 'image_count': 12}, {'id': 721, 'image_count': 18}, {'id': 722, 'image_count': 4}, {'id': 723, 'image_count': 71}, {'id': 724, 'image_count': 189}, {'id': 725, 'image_count': 74}, {'id': 726, 'image_count': 103}, {'id': 727, 'image_count': 3}, {'id': 728, 'image_count': 110}, {'id': 729, 'image_count': 5}, {'id': 730, 'image_count': 9}, {'id': 731, 'image_count': 15}, {'id': 732, 'image_count': 25}, {'id': 733, 'image_count': 7}, {'id': 734, 'image_count': 647}, {'id': 735, 'image_count': 824}, {'id': 736, 'image_count': 100}, {'id': 737, 'image_count': 47}, {'id': 738, 'image_count': 121}, {'id': 739, 'image_count': 731}, {'id': 740, 'image_count': 73}, {'id': 741, 'image_count': 49}, {'id': 742, 'image_count': 23}, {'id': 743, 'image_count': 4}, {'id': 744, 'image_count': 62}, {'id': 745, 'image_count': 118}, {'id': 746, 'image_count': 99}, {'id': 747, 'image_count': 40}, {'id': 748, 'image_count': 1036}, {'id': 749, 'image_count': 105}, {'id': 750, 'image_count': 21}, {'id': 751, 'image_count': 229}, {'id': 752, 'image_count': 7}, {'id': 753, 'image_count': 72}, {'id': 754, 'image_count': 9}, {'id': 755, 'image_count': 10}, {'id': 756, 'image_count': 328}, {'id': 757, 'image_count': 468}, {'id': 758, 'image_count': 1}, {'id': 759, 'image_count': 2}, {'id': 760, 'image_count': 24}, {'id': 761, 'image_count': 11}, {'id': 762, 'image_count': 72}, {'id': 763, 'image_count': 17}, {'id': 764, 'image_count': 10}, {'id': 765, 'image_count': 17}, {'id': 766, 'image_count': 489}, {'id': 767, 'image_count': 47}, {'id': 768, 'image_count': 93}, {'id': 769, 'image_count': 1}, {'id': 770, 'image_count': 12}, {'id': 771, 'image_count': 228}, {'id': 772, 'image_count': 5}, {'id': 773, 'image_count': 76}, {'id': 774, 'image_count': 71}, {'id': 775, 'image_count': 30}, {'id': 776, 'image_count': 109}, {'id': 777, 'image_count': 14}, {'id': 778, 'image_count': 1}, {'id': 779, 'image_count': 8}, {'id': 780, 'image_count': 26}, {'id': 781, 'image_count': 339}, {'id': 782, 'image_count': 153}, {'id': 783, 'image_count': 2}, {'id': 784, 'image_count': 3}, {'id': 785, 'image_count': 8}, {'id': 786, 'image_count': 47}, {'id': 787, 'image_count': 8}, {'id': 788, 'image_count': 6}, {'id': 789, 'image_count': 116}, {'id': 790, 'image_count': 69}, {'id': 791, 'image_count': 13}, {'id': 792, 'image_count': 6}, {'id': 793, 'image_count': 1928}, {'id': 794, 'image_count': 79}, {'id': 795, 'image_count': 14}, {'id': 796, 'image_count': 7}, {'id': 797, 'image_count': 20}, {'id': 798, 'image_count': 114}, {'id': 799, 'image_count': 221}, {'id': 800, 'image_count': 502}, {'id': 801, 'image_count': 62}, {'id': 802, 'image_count': 87}, {'id': 803, 'image_count': 4}, {'id': 804, 'image_count': 1912}, {'id': 805, 'image_count': 7}, {'id': 806, 'image_count': 186}, {'id': 807, 'image_count': 18}, {'id': 808, 'image_count': 4}, {'id': 809, 'image_count': 3}, {'id': 810, 'image_count': 7}, {'id': 811, 'image_count': 1413}, {'id': 812, 'image_count': 7}, {'id': 813, 'image_count': 12}, {'id': 814, 'image_count': 248}, {'id': 815, 'image_count': 4}, {'id': 816, 'image_count': 1881}, {'id': 817, 'image_count': 529}, {'id': 818, 'image_count': 1932}, {'id': 819, 'image_count': 50}, {'id': 820, 'image_count': 3}, {'id': 821, 'image_count': 28}, {'id': 822, 'image_count': 10}, {'id': 823, 'image_count': 5}, {'id': 824, 'image_count': 5}, {'id': 825, 'image_count': 18}, {'id': 826, 'image_count': 14}, {'id': 827, 'image_count': 1890}, {'id': 828, 'image_count': 660}, {'id': 829, 'image_count': 8}, {'id': 830, 'image_count': 25}, {'id': 831, 'image_count': 10}, {'id': 832, 'image_count': 218}, {'id': 833, 'image_count': 36}, {'id': 834, 'image_count': 16}, {'id': 835, 'image_count': 808}, {'id': 836, 'image_count': 479}, {'id': 837, 'image_count': 1404}, {'id': 838, 'image_count': 307}, {'id': 839, 'image_count': 57}, {'id': 840, 'image_count': 28}, {'id': 841, 'image_count': 80}, {'id': 842, 'image_count': 11}, {'id': 843, 'image_count': 92}, {'id': 844, 'image_count': 20}, {'id': 845, 'image_count': 194}, {'id': 846, 'image_count': 23}, {'id': 847, 'image_count': 52}, {'id': 848, 'image_count': 673}, {'id': 849, 'image_count': 2}, {'id': 850, 'image_count': 2}, {'id': 851, 'image_count': 1}, {'id': 852, 'image_count': 2}, {'id': 853, 'image_count': 8}, {'id': 854, 'image_count': 80}, {'id': 855, 'image_count': 3}, {'id': 856, 'image_count': 3}, {'id': 857, 'image_count': 15}, {'id': 858, 'image_count': 2}, {'id': 859, 'image_count': 10}, {'id': 860, 'image_count': 386}, {'id': 861, 'image_count': 65}, {'id': 862, 'image_count': 3}, {'id': 863, 'image_count': 35}, {'id': 864, 'image_count': 5}, {'id': 865, 'image_count': 180}, {'id': 866, 'image_count': 99}, {'id': 867, 'image_count': 49}, {'id': 868, 'image_count': 28}, {'id': 869, 'image_count': 1}, {'id': 870, 'image_count': 52}, {'id': 871, 'image_count': 36}, {'id': 872, 'image_count': 70}, {'id': 873, 'image_count': 6}, {'id': 874, 'image_count': 29}, {'id': 875, 'image_count': 24}, {'id': 876, 'image_count': 1115}, {'id': 877, 'image_count': 61}, {'id': 878, 'image_count': 18}, {'id': 879, 'image_count': 18}, {'id': 880, 'image_count': 665}, {'id': 881, 'image_count': 1096}, {'id': 882, 'image_count': 29}, {'id': 883, 'image_count': 8}, {'id': 884, 'image_count': 14}, {'id': 885, 'image_count': 1622}, {'id': 886, 'image_count': 2}, {'id': 887, 'image_count': 3}, {'id': 888, 'image_count': 32}, {'id': 889, 'image_count': 55}, {'id': 890, 'image_count': 1}, {'id': 891, 'image_count': 10}, {'id': 892, 'image_count': 10}, {'id': 893, 'image_count': 47}, {'id': 894, 'image_count': 3}, {'id': 895, 'image_count': 29}, {'id': 896, 'image_count': 342}, {'id': 897, 'image_count': 25}, {'id': 898, 'image_count': 1469}, {'id': 899, 'image_count': 521}, {'id': 900, 'image_count': 347}, {'id': 901, 'image_count': 35}, {'id': 902, 'image_count': 7}, {'id': 903, 'image_count': 207}, {'id': 904, 'image_count': 108}, {'id': 905, 'image_count': 2}, {'id': 906, 'image_count': 34}, {'id': 907, 'image_count': 12}, {'id': 908, 'image_count': 10}, {'id': 909, 'image_count': 13}, {'id': 910, 'image_count': 361}, {'id': 911, 'image_count': 1023}, {'id': 912, 'image_count': 782}, {'id': 913, 'image_count': 2}, {'id': 914, 'image_count': 5}, {'id': 915, 'image_count': 247}, {'id': 916, 'image_count': 221}, {'id': 917, 'image_count': 4}, {'id': 918, 'image_count': 8}, {'id': 919, 'image_count': 158}, {'id': 920, 'image_count': 3}, {'id': 921, 'image_count': 752}, {'id': 922, 'image_count': 64}, {'id': 923, 'image_count': 707}, {'id': 924, 'image_count': 143}, {'id': 925, 'image_count': 1}, {'id': 926, 'image_count': 49}, {'id': 927, 'image_count': 126}, {'id': 928, 'image_count': 76}, {'id': 929, 'image_count': 11}, {'id': 930, 'image_count': 11}, {'id': 931, 'image_count': 4}, {'id': 932, 'image_count': 39}, {'id': 933, 'image_count': 11}, {'id': 934, 'image_count': 13}, {'id': 935, 'image_count': 91}, {'id': 936, 'image_count': 14}, {'id': 937, 'image_count': 5}, {'id': 938, 'image_count': 3}, {'id': 939, 'image_count': 10}, {'id': 940, 'image_count': 18}, {'id': 941, 'image_count': 9}, {'id': 942, 'image_count': 6}, {'id': 943, 'image_count': 951}, {'id': 944, 'image_count': 2}, {'id': 945, 'image_count': 1}, {'id': 946, 'image_count': 19}, {'id': 947, 'image_count': 1942}, {'id': 948, 'image_count': 1916}, {'id': 949, 'image_count': 139}, {'id': 950, 'image_count': 43}, {'id': 951, 'image_count': 1969}, {'id': 952, 'image_count': 5}, {'id': 953, 'image_count': 134}, {'id': 954, 'image_count': 74}, {'id': 955, 'image_count': 381}, {'id': 956, 'image_count': 1}, {'id': 957, 'image_count': 381}, {'id': 958, 'image_count': 6}, {'id': 959, 'image_count': 1826}, {'id': 960, 'image_count': 28}, {'id': 961, 'image_count': 1635}, {'id': 962, 'image_count': 1967}, {'id': 963, 'image_count': 16}, {'id': 964, 'image_count': 1926}, {'id': 965, 'image_count': 1789}, {'id': 966, 'image_count': 401}, {'id': 967, 'image_count': 1968}, {'id': 968, 'image_count': 1167}, {'id': 969, 'image_count': 1}, {'id': 970, 'image_count': 56}, {'id': 971, 'image_count': 17}, {'id': 972, 'image_count': 1}, {'id': 973, 'image_count': 58}, {'id': 974, 'image_count': 9}, {'id': 975, 'image_count': 8}, {'id': 976, 'image_count': 1124}, {'id': 977, 'image_count': 31}, {'id': 978, 'image_count': 16}, {'id': 979, 'image_count': 491}, {'id': 980, 'image_count': 432}, {'id': 981, 'image_count': 1945}, {'id': 982, 'image_count': 1899}, {'id': 983, 'image_count': 5}, {'id': 984, 'image_count': 28}, {'id': 985, 'image_count': 7}, {'id': 986, 'image_count': 146}, {'id': 987, 'image_count': 1}, {'id': 988, 'image_count': 25}, {'id': 989, 'image_count': 22}, {'id': 990, 'image_count': 1}, {'id': 991, 'image_count': 10}, {'id': 992, 'image_count': 9}, {'id': 993, 'image_count': 308}, {'id': 994, 'image_count': 4}, {'id': 995, 'image_count': 1969}, {'id': 996, 'image_count': 45}, {'id': 997, 'image_count': 12}, {'id': 998, 'image_count': 1}, {'id': 999, 'image_count': 85}, {'id': 1000, 'image_count': 1127}, {'id': 1001, 'image_count': 11}, {'id': 1002, 'image_count': 60}, {'id': 1003, 'image_count': 1}, {'id': 1004, 'image_count': 16}, {'id': 1005, 'image_count': 1}, {'id': 1006, 'image_count': 65}, {'id': 1007, 'image_count': 13}, {'id': 1008, 'image_count': 655}, {'id': 1009, 'image_count': 51}, {'id': 1010, 'image_count': 1}, {'id': 1011, 'image_count': 673}, {'id': 1012, 'image_count': 5}, {'id': 1013, 'image_count': 36}, {'id': 1014, 'image_count': 54}, {'id': 1015, 'image_count': 5}, {'id': 1016, 'image_count': 8}, {'id': 1017, 'image_count': 305}, {'id': 1018, 'image_count': 297}, {'id': 1019, 'image_count': 1053}, {'id': 1020, 'image_count': 223}, {'id': 1021, 'image_count': 1037}, {'id': 1022, 'image_count': 63}, {'id': 1023, 'image_count': 1881}, {'id': 1024, 'image_count': 507}, {'id': 1025, 'image_count': 333}, {'id': 1026, 'image_count': 1911}, {'id': 1027, 'image_count': 1765}, {'id': 1028, 'image_count': 1}, {'id': 1029, 'image_count': 5}, {'id': 1030, 'image_count': 1}, {'id': 1031, 'image_count': 9}, {'id': 1032, 'image_count': 2}, {'id': 1033, 'image_count': 151}, {'id': 1034, 'image_count': 82}, {'id': 1035, 'image_count': 1931}, {'id': 1036, 'image_count': 41}, {'id': 1037, 'image_count': 1895}, {'id': 1038, 'image_count': 24}, {'id': 1039, 'image_count': 22}, {'id': 1040, 'image_count': 35}, {'id': 1041, 'image_count': 69}, {'id': 1042, 'image_count': 962}, {'id': 1043, 'image_count': 588}, {'id': 1044, 'image_count': 21}, {'id': 1045, 'image_count': 825}, {'id': 1046, 'image_count': 52}, {'id': 1047, 'image_count': 5}, {'id': 1048, 'image_count': 5}, {'id': 1049, 'image_count': 5}, {'id': 1050, 'image_count': 1860}, {'id': 1051, 'image_count': 56}, {'id': 1052, 'image_count': 1582}, {'id': 1053, 'image_count': 7}, {'id': 1054, 'image_count': 2}, {'id': 1055, 'image_count': 1562}, {'id': 1056, 'image_count': 1885}, {'id': 1057, 'image_count': 1}, {'id': 1058, 'image_count': 5}, {'id': 1059, 'image_count': 137}, {'id': 1060, 'image_count': 1094}, {'id': 1061, 'image_count': 134}, {'id': 1062, 'image_count': 29}, {'id': 1063, 'image_count': 22}, {'id': 1064, 'image_count': 522}, {'id': 1065, 'image_count': 50}, {'id': 1066, 'image_count': 68}, {'id': 1067, 'image_count': 16}, {'id': 1068, 'image_count': 40}, {'id': 1069, 'image_count': 35}, {'id': 1070, 'image_count': 135}, {'id': 1071, 'image_count': 1413}, {'id': 1072, 'image_count': 772}, {'id': 1073, 'image_count': 50}, {'id': 1074, 'image_count': 1015}, {'id': 1075, 'image_count': 1}, {'id': 1076, 'image_count': 65}, {'id': 1077, 'image_count': 1900}, {'id': 1078, 'image_count': 1302}, {'id': 1079, 'image_count': 1977}, {'id': 1080, 'image_count': 2}, {'id': 1081, 'image_count': 29}, {'id': 1082, 'image_count': 36}, {'id': 1083, 'image_count': 138}, {'id': 1084, 'image_count': 4}, {'id': 1085, 'image_count': 67}, {'id': 1086, 'image_count': 26}, {'id': 1087, 'image_count': 25}, {'id': 1088, 'image_count': 33}, {'id': 1089, 'image_count': 37}, {'id': 1090, 'image_count': 50}, {'id': 1091, 'image_count': 270}, {'id': 1092, 'image_count': 12}, {'id': 1093, 'image_count': 316}, {'id': 1094, 'image_count': 41}, {'id': 1095, 'image_count': 224}, {'id': 1096, 'image_count': 105}, {'id': 1097, 'image_count': 1925}, {'id': 1098, 'image_count': 1021}, {'id': 1099, 'image_count': 1213}, {'id': 1100, 'image_count': 172}, {'id': 1101, 'image_count': 28}, {'id': 1102, 'image_count': 745}, {'id': 1103, 'image_count': 187}, {'id': 1104, 'image_count': 147}, {'id': 1105, 'image_count': 136}, {'id': 1106, 'image_count': 34}, {'id': 1107, 'image_count': 41}, {'id': 1108, 'image_count': 636}, {'id': 1109, 'image_count': 570}, {'id': 1110, 'image_count': 1149}, {'id': 1111, 'image_count': 61}, {'id': 1112, 'image_count': 1890}, {'id': 1113, 'image_count': 18}, {'id': 1114, 'image_count': 143}, {'id': 1115, 'image_count': 1517}, {'id': 1116, 'image_count': 7}, {'id': 1117, 'image_count': 943}, {'id': 1118, 'image_count': 6}, {'id': 1119, 'image_count': 1}, {'id': 1120, 'image_count': 11}, {'id': 1121, 'image_count': 101}, {'id': 1122, 'image_count': 1909}, {'id': 1123, 'image_count': 800}, {'id': 1124, 'image_count': 1}, {'id': 1125, 'image_count': 44}, {'id': 1126, 'image_count': 3}, {'id': 1127, 'image_count': 44}, {'id': 1128, 'image_count': 31}, {'id': 1129, 'image_count': 7}, {'id': 1130, 'image_count': 20}, {'id': 1131, 'image_count': 11}, {'id': 1132, 'image_count': 13}, {'id': 1133, 'image_count': 1924}, {'id': 1134, 'image_count': 113}, {'id': 1135, 'image_count': 2}, {'id': 1136, 'image_count': 139}, {'id': 1137, 'image_count': 12}, {'id': 1138, 'image_count': 37}, {'id': 1139, 'image_count': 1866}, {'id': 1140, 'image_count': 47}, {'id': 1141, 'image_count': 1468}, {'id': 1142, 'image_count': 729}, {'id': 1143, 'image_count': 24}, {'id': 1144, 'image_count': 1}, {'id': 1145, 'image_count': 10}, {'id': 1146, 'image_count': 3}, {'id': 1147, 'image_count': 14}, {'id': 1148, 'image_count': 4}, {'id': 1149, 'image_count': 29}, {'id': 1150, 'image_count': 4}, {'id': 1151, 'image_count': 70}, {'id': 1152, 'image_count': 46}, {'id': 1153, 'image_count': 14}, {'id': 1154, 'image_count': 48}, {'id': 1155, 'image_count': 1855}, {'id': 1156, 'image_count': 113}, {'id': 1157, 'image_count': 1}, {'id': 1158, 'image_count': 1}, {'id': 1159, 'image_count': 10}, {'id': 1160, 'image_count': 54}, {'id': 1161, 'image_count': 1923}, {'id': 1162, 'image_count': 630}, {'id': 1163, 'image_count': 31}, {'id': 1164, 'image_count': 69}, {'id': 1165, 'image_count': 7}, {'id': 1166, 'image_count': 11}, {'id': 1167, 'image_count': 1}, {'id': 1168, 'image_count': 30}, {'id': 1169, 'image_count': 50}, {'id': 1170, 'image_count': 45}, {'id': 1171, 'image_count': 28}, {'id': 1172, 'image_count': 114}, {'id': 1173, 'image_count': 193}, {'id': 1174, 'image_count': 21}, {'id': 1175, 'image_count': 91}, {'id': 1176, 'image_count': 31}, {'id': 1177, 'image_count': 1469}, {'id': 1178, 'image_count': 1924}, {'id': 1179, 'image_count': 87}, {'id': 1180, 'image_count': 77}, {'id': 1181, 'image_count': 11}, {'id': 1182, 'image_count': 47}, {'id': 1183, 'image_count': 21}, {'id': 1184, 'image_count': 47}, {'id': 1185, 'image_count': 70}, {'id': 1186, 'image_count': 1838}, {'id': 1187, 'image_count': 19}, {'id': 1188, 'image_count': 531}, {'id': 1189, 'image_count': 11}, {'id': 1190, 'image_count': 941}, {'id': 1191, 'image_count': 113}, {'id': 1192, 'image_count': 26}, {'id': 1193, 'image_count': 5}, {'id': 1194, 'image_count': 56}, {'id': 1195, 'image_count': 73}, {'id': 1196, 'image_count': 32}, {'id': 1197, 'image_count': 128}, {'id': 1198, 'image_count': 623}, {'id': 1199, 'image_count': 12}, {'id': 1200, 'image_count': 52}, {'id': 1201, 'image_count': 11}, {'id': 1202, 'image_count': 1674}, {'id': 1203, 'image_count': 81}] # noqa
+# fmt: on
diff --git a/vendor/detectron2/detectron2/data/datasets/pascal_voc.py b/vendor/detectron2/detectron2/data/datasets/pascal_voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbbf82cb96442bfa0cf05ed0f4dddf3645434b7e
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/pascal_voc.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+from typing import List, Tuple, Union
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+__all__ = ["load_voc_instances", "register_pascal_voc"]
+
+
+# fmt: off
+CLASS_NAMES = (
+ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
+ "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
+ "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+)
+# fmt: on
+
+
+def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
+ """
+ Load Pascal VOC detection annotations to Detectron2 format.
+
+ Args:
+ dirname: Contain "Annotations", "ImageSets", "JPEGImages"
+ split (str): one of "train", "test", "val", "trainval"
+ class_names: list or tuple of class names
+ """
+ with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
+ fileids = np.loadtxt(f, dtype=np.str)
+
+ # Needs to read many small annotation files. Makes sense at local
+ annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
+ dicts = []
+ for fileid in fileids:
+ anno_file = os.path.join(annotation_dirname, fileid + ".xml")
+ jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
+
+ with PathManager.open(anno_file) as f:
+ tree = ET.parse(f)
+
+ r = {
+ "file_name": jpeg_file,
+ "image_id": fileid,
+ "height": int(tree.findall("./size/height")[0].text),
+ "width": int(tree.findall("./size/width")[0].text),
+ }
+ instances = []
+
+ for obj in tree.findall("object"):
+ cls = obj.find("name").text
+ # We include "difficult" samples in training.
+ # Based on limited experiments, they don't hurt accuracy.
+ # difficult = int(obj.find("difficult").text)
+ # if difficult == 1:
+ # continue
+ bbox = obj.find("bndbox")
+ bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
+ # Original annotations are integers in the range [1, W or H]
+ # Assuming they mean 1-based pixel indices (inclusive),
+ # a box with annotation (xmin=1, xmax=W) covers the whole image.
+ # In coordinate space this is represented by (xmin=0, xmax=W)
+ bbox[0] -= 1.0
+ bbox[1] -= 1.0
+ instances.append(
+ {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
+ )
+ r["annotations"] = instances
+ dicts.append(r)
+ return dicts
+
+
+def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
+ DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
+ MetadataCatalog.get(name).set(
+ thing_classes=list(class_names), dirname=dirname, year=year, split=split
+ )
diff --git a/vendor/detectron2/detectron2/data/datasets/register_coco.py b/vendor/detectron2/detectron2/data/datasets/register_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..e564438d5bf016bcdbb65b4bbdc215d79f579f8a
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/datasets/register_coco.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import register_coco_instances # noqa
+from .coco_panoptic import register_coco_panoptic_separated # noqa
diff --git a/vendor/detectron2/detectron2/data/detection_utils.py b/vendor/detectron2/detectron2/data/detection_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada19bdb4a2aa74874da4dba5d179ce38201c85d
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/detection_utils.py
@@ -0,0 +1,659 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Common data processing utilities that are used in a
+typical object detection data pipeline.
+"""
+import logging
+import numpy as np
+from typing import List, Union
+import pycocotools.mask as mask_util
+import torch
+from PIL import Image
+
+from detectron2.structures import (
+ BitMasks,
+ Boxes,
+ BoxMode,
+ Instances,
+ Keypoints,
+ PolygonMasks,
+ RotatedBoxes,
+ polygons_to_bitmask,
+)
+from detectron2.utils.file_io import PathManager
+
+from . import transforms as T
+from .catalog import MetadataCatalog
+
+__all__ = [
+ "SizeMismatchError",
+ "convert_image_to_rgb",
+ "check_image_size",
+ "transform_proposals",
+ "transform_instance_annotations",
+ "annotations_to_instances",
+ "annotations_to_instances_rotated",
+ "build_augmentation",
+ "build_transform_gen",
+ "create_keypoint_hflip_indices",
+ "filter_empty_instances",
+ "read_image",
+]
+
+
+class SizeMismatchError(ValueError):
+ """
+ When loaded image has difference width/height compared with annotation.
+ """
+
+
+# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274 # exif 'Orientation' tag
+
+
+def convert_PIL_to_numpy(image, format):
+ """
+ Convert PIL image to numpy array of target format.
+
+ Args:
+ image (PIL.Image): a PIL image
+ format (str): the format of output image
+
+ Returns:
+ (np.ndarray): also see `read_image`
+ """
+ if format is not None:
+ # PIL only supports RGB, so convert to RGB and flip channels over below
+ conversion_format = format
+ if format in ["BGR", "YUV-BT.601"]:
+ conversion_format = "RGB"
+ image = image.convert(conversion_format)
+ image = np.asarray(image)
+ # PIL squeezes out the channel dimension for "L", so make it HWC
+ if format == "L":
+ image = np.expand_dims(image, -1)
+
+ # handle formats not supported by PIL
+ elif format == "BGR":
+ # flip channels if needed
+ image = image[:, :, ::-1]
+ elif format == "YUV-BT.601":
+ image = image / 255.0
+ image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+ return image
+
+
+def convert_image_to_rgb(image, format):
+ """
+ Convert an image from given format to RGB.
+
+ Args:
+ image (np.ndarray or Tensor): an HWC image
+ format (str): the format of input image, also see `read_image`
+
+ Returns:
+ (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8
+ """
+ if isinstance(image, torch.Tensor):
+ image = image.cpu().numpy()
+ if format == "BGR":
+ image = image[:, :, [2, 1, 0]]
+ elif format == "YUV-BT.601":
+ image = np.dot(image, np.array(_M_YUV2RGB).T)
+ image = image * 255.0
+ else:
+ if format == "L":
+ image = image[:, :, 0]
+ image = image.astype(np.uint8)
+ image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
+ return image
+
+
+def _apply_exif_orientation(image):
+ """
+ Applies the exif orientation correctly.
+
+ This code exists per the bug:
+ https://github.com/python-pillow/Pillow/issues/3973
+ with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+ various methods, especially `tobytes`
+
+ Function based on:
+ https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+ https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+ Args:
+ image (PIL.Image): a PIL image
+
+ Returns:
+ (PIL.Image): the PIL image with exif orientation applied, if applicable
+ """
+ if not hasattr(image, "getexif"):
+ return image
+
+ try:
+ exif = image.getexif()
+ except Exception: # https://github.com/facebookresearch/detectron2/issues/1885
+ exif = None
+
+ if exif is None:
+ return image
+
+ orientation = exif.get(_EXIF_ORIENT)
+
+ method = {
+ 2: Image.FLIP_LEFT_RIGHT,
+ 3: Image.ROTATE_180,
+ 4: Image.FLIP_TOP_BOTTOM,
+ 5: Image.TRANSPOSE,
+ 6: Image.ROTATE_270,
+ 7: Image.TRANSVERSE,
+ 8: Image.ROTATE_90,
+ }.get(orientation)
+
+ if method is not None:
+ return image.transpose(method)
+ return image
+
+
+def read_image(file_name, format=None):
+ """
+ Read an image into the given format.
+ Will apply rotation and flipping if the image has such exif information.
+
+ Args:
+ file_name (str): image file path
+ format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+ Returns:
+ image (np.ndarray):
+ an HWC image in the given format, which is 0-255, uint8 for
+ supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+ """
+ with PathManager.open(file_name, "rb") as f:
+ image = Image.open(f)
+
+ # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+ image = _apply_exif_orientation(image)
+ return convert_PIL_to_numpy(image, format)
+
+
+def check_image_size(dataset_dict, image):
+ """
+ Raise an error if the image does not match the size specified in the dict.
+ """
+ if "width" in dataset_dict or "height" in dataset_dict:
+ image_wh = (image.shape[1], image.shape[0])
+ expected_wh = (dataset_dict["width"], dataset_dict["height"])
+ if not image_wh == expected_wh:
+ raise SizeMismatchError(
+ "Mismatched image shape{}, got {}, expect {}.".format(
+ " for image " + dataset_dict["file_name"]
+ if "file_name" in dataset_dict
+ else "",
+ image_wh,
+ expected_wh,
+ )
+ + " Please check the width/height in your annotation."
+ )
+
+ # To ensure bbox always remap to original image size
+ if "width" not in dataset_dict:
+ dataset_dict["width"] = image.shape[1]
+ if "height" not in dataset_dict:
+ dataset_dict["height"] = image.shape[0]
+
+
+def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0):
+ """
+ Apply transformations to the proposals in dataset_dict, if any.
+
+ Args:
+ dataset_dict (dict): a dict read from the dataset, possibly
+ contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
+ image_shape (tuple): height, width
+ transforms (TransformList):
+ proposal_topk (int): only keep top-K scoring proposals
+ min_box_size (int): proposals with either side smaller than this
+ threshold are removed
+
+ The input dict is modified in-place, with abovementioned keys removed. A new
+ key "proposals" will be added. Its value is an `Instances`
+ object which contains the transformed proposals in its field
+ "proposal_boxes" and "objectness_logits".
+ """
+ if "proposal_boxes" in dataset_dict:
+ # Transform proposal boxes
+ boxes = transforms.apply_box(
+ BoxMode.convert(
+ dataset_dict.pop("proposal_boxes"),
+ dataset_dict.pop("proposal_bbox_mode"),
+ BoxMode.XYXY_ABS,
+ )
+ )
+ boxes = Boxes(boxes)
+ objectness_logits = torch.as_tensor(
+ dataset_dict.pop("proposal_objectness_logits").astype("float32")
+ )
+
+ boxes.clip(image_shape)
+ keep = boxes.nonempty(threshold=min_box_size)
+ boxes = boxes[keep]
+ objectness_logits = objectness_logits[keep]
+
+ proposals = Instances(image_shape)
+ proposals.proposal_boxes = boxes[:proposal_topk]
+ proposals.objectness_logits = objectness_logits[:proposal_topk]
+ dataset_dict["proposals"] = proposals
+
+
+def get_bbox(annotation):
+ """
+ Get bbox from data
+ Args:
+ annotation (dict): dict of instance annotations for a single instance.
+ Returns:
+ bbox (ndarray): x1, y1, x2, y2 coordinates
+ """
+ # bbox is 1d (per-instance bounding box)
+ bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
+ return bbox
+
+
+def transform_instance_annotations(
+ annotation, transforms, image_size, *, keypoint_hflip_indices=None
+):
+ """
+ Apply transforms to box, segmentation and keypoints annotations of a single instance.
+
+ It will use `transforms.apply_box` for the box, and
+ `transforms.apply_coords` for segmentation polygons & keypoints.
+ If you need anything more specially designed for each data structure,
+ you'll need to implement your own version of this function or the transforms.
+
+ Args:
+ annotation (dict): dict of instance annotations for a single instance.
+ It will be modified in-place.
+ transforms (TransformList or list[Transform]):
+ image_size (tuple): the height, width of the transformed image
+ keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+
+ Returns:
+ dict:
+ the same input dict with fields "bbox", "segmentation", "keypoints"
+ transformed according to `transforms`.
+ The "bbox_mode" field will be set to XYXY_ABS.
+ """
+ if isinstance(transforms, (tuple, list)):
+ transforms = T.TransformList(transforms)
+ # bbox is 1d (per-instance bounding box)
+ bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
+ # clip transformed bbox to image size
+ bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
+ annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
+ annotation["bbox_mode"] = BoxMode.XYXY_ABS
+
+ if "segmentation" in annotation:
+ # each instance contains 1 or more polygons
+ segm = annotation["segmentation"]
+ if isinstance(segm, list):
+ # polygons
+ polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
+ annotation["segmentation"] = [
+ p.reshape(-1) for p in transforms.apply_polygons(polygons)
+ ]
+ elif isinstance(segm, dict):
+ # RLE
+ mask = mask_util.decode(segm)
+ mask = transforms.apply_segmentation(mask)
+ assert tuple(mask.shape[:2]) == image_size
+ annotation["segmentation"] = mask
+ else:
+ raise ValueError(
+ "Cannot transform segmentation of type '{}'!"
+ "Supported types are: polygons as list[list[float] or ndarray],"
+ " COCO-style RLE as a dict.".format(type(segm))
+ )
+
+ if "keypoints" in annotation:
+ keypoints = transform_keypoint_annotations(
+ annotation["keypoints"], transforms, image_size, keypoint_hflip_indices
+ )
+ annotation["keypoints"] = keypoints
+
+ return annotation
+
+
+def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None):
+ """
+ Transform keypoint annotations of an image.
+ If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0)
+
+ Args:
+ keypoints (list[float]): Nx3 float in Detectron2's Dataset format.
+ Each point is represented by (x, y, visibility).
+ transforms (TransformList):
+ image_size (tuple): the height, width of the transformed image
+ keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+ When `transforms` includes horizontal flip, will use the index
+ mapping to flip keypoints.
+ """
+ # (N*3,) -> (N, 3)
+ keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3)
+ keypoints_xy = transforms.apply_coords(keypoints[:, :2])
+
+ # Set all out-of-boundary points to "unlabeled"
+ inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1]))
+ inside = inside.all(axis=1)
+ keypoints[:, :2] = keypoints_xy
+ keypoints[:, 2][~inside] = 0
+
+ # This assumes that HorizFlipTransform is the only one that does flip
+ do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+
+ # Alternative way: check if probe points was horizontally flipped.
+ # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]])
+ # probe_aug = transforms.apply_coords(probe.copy())
+ # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0]) # noqa
+
+ # If flipped, swap each keypoint with its opposite-handed equivalent
+ if do_hflip:
+ if keypoint_hflip_indices is None:
+ raise ValueError("Cannot flip keypoints without providing flip indices!")
+ if len(keypoints) != len(keypoint_hflip_indices):
+ raise ValueError(
+ "Keypoint data has {} points, but metadata "
+ "contains {} points!".format(len(keypoints), len(keypoint_hflip_indices))
+ )
+ keypoints = keypoints[np.asarray(keypoint_hflip_indices, dtype=np.int32), :]
+
+ # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0
+ keypoints[keypoints[:, 2] == 0] = 0
+ return keypoints
+
+
+def annotations_to_instances(annos, image_size, mask_format="polygon"):
+ """
+ Create an :class:`Instances` object used by the models,
+ from instance annotations in the dataset dict.
+
+ Args:
+ annos (list[dict]): a list of instance annotations in one image, each
+ element for one instance.
+ image_size (tuple): height, width
+
+ Returns:
+ Instances:
+ It will contain fields "gt_boxes", "gt_classes",
+ "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
+ This is the format that builtin models expect.
+ """
+ boxes = (
+ np.stack(
+ [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
+ )
+ if len(annos)
+ else np.zeros((0, 4))
+ )
+ target = Instances(image_size)
+ target.gt_boxes = Boxes(boxes)
+
+ classes = [int(obj["category_id"]) for obj in annos]
+ classes = torch.tensor(classes, dtype=torch.int64)
+ target.gt_classes = classes
+
+ if len(annos) and "segmentation" in annos[0]:
+ segms = [obj["segmentation"] for obj in annos]
+ if mask_format == "polygon":
+ try:
+ masks = PolygonMasks(segms)
+ except ValueError as e:
+ raise ValueError(
+ "Failed to use mask_format=='polygon' from the given annotations!"
+ ) from e
+ else:
+ assert mask_format == "bitmask", mask_format
+ masks = []
+ for segm in segms:
+ if isinstance(segm, list):
+ # polygon
+ masks.append(polygons_to_bitmask(segm, *image_size))
+ elif isinstance(segm, dict):
+ # COCO RLE
+ masks.append(mask_util.decode(segm))
+ elif isinstance(segm, np.ndarray):
+ assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
+ segm.ndim
+ )
+ # mask array
+ masks.append(segm)
+ else:
+ raise ValueError(
+ "Cannot convert segmentation of type '{}' to BitMasks!"
+ "Supported types are: polygons as list[list[float] or ndarray],"
+ " COCO-style RLE as a dict, or a binary segmentation mask "
+ " in a 2D numpy array of shape HxW.".format(type(segm))
+ )
+ # torch.from_numpy does not support array with negative stride.
+ masks = BitMasks(
+ torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
+ )
+ target.gt_masks = masks
+
+ if len(annos) and "keypoints" in annos[0]:
+ kpts = [obj.get("keypoints", []) for obj in annos]
+ target.gt_keypoints = Keypoints(kpts)
+
+ return target
+
+
+def annotations_to_instances_rotated(annos, image_size):
+ """
+ Create an :class:`Instances` object used by the models,
+ from instance annotations in the dataset dict.
+ Compared to `annotations_to_instances`, this function is for rotated boxes only
+
+ Args:
+ annos (list[dict]): a list of instance annotations in one image, each
+ element for one instance.
+ image_size (tuple): height, width
+
+ Returns:
+ Instances:
+ Containing fields "gt_boxes", "gt_classes",
+ if they can be obtained from `annos`.
+ This is the format that builtin models expect.
+ """
+ boxes = [obj["bbox"] for obj in annos]
+ target = Instances(image_size)
+ boxes = target.gt_boxes = RotatedBoxes(boxes)
+ boxes.clip(image_size)
+
+ classes = [obj["category_id"] for obj in annos]
+ classes = torch.tensor(classes, dtype=torch.int64)
+ target.gt_classes = classes
+
+ return target
+
+
+def filter_empty_instances(
+ instances, by_box=True, by_mask=True, box_threshold=1e-5, return_mask=False
+):
+ """
+ Filter out empty instances in an `Instances` object.
+
+ Args:
+ instances (Instances):
+ by_box (bool): whether to filter out instances with empty boxes
+ by_mask (bool): whether to filter out instances with empty masks
+ box_threshold (float): minimum width and height to be considered non-empty
+ return_mask (bool): whether to return boolean mask of filtered instances
+
+ Returns:
+ Instances: the filtered instances.
+ tensor[bool], optional: boolean mask of filtered instances
+ """
+ assert by_box or by_mask
+ r = []
+ if by_box:
+ r.append(instances.gt_boxes.nonempty(threshold=box_threshold))
+ if instances.has("gt_masks") and by_mask:
+ r.append(instances.gt_masks.nonempty())
+
+ # TODO: can also filter visible keypoints
+
+ if not r:
+ return instances
+ m = r[0]
+ for x in r[1:]:
+ m = m & x
+ if return_mask:
+ return instances[m], m
+ return instances[m]
+
+
+def create_keypoint_hflip_indices(dataset_names: Union[str, List[str]]) -> List[int]:
+ """
+ Args:
+ dataset_names: list of dataset names
+
+ Returns:
+ list[int]: a list of size=#keypoints, storing the
+ horizontally-flipped keypoint indices.
+ """
+ if isinstance(dataset_names, str):
+ dataset_names = [dataset_names]
+
+ check_metadata_consistency("keypoint_names", dataset_names)
+ check_metadata_consistency("keypoint_flip_map", dataset_names)
+
+ meta = MetadataCatalog.get(dataset_names[0])
+ names = meta.keypoint_names
+ # TODO flip -> hflip
+ flip_map = dict(meta.keypoint_flip_map)
+ flip_map.update({v: k for k, v in flip_map.items()})
+ flipped_names = [i if i not in flip_map else flip_map[i] for i in names]
+ flip_indices = [names.index(i) for i in flipped_names]
+ return flip_indices
+
+
+def get_fed_loss_cls_weights(dataset_names: Union[str, List[str]], freq_weight_power=1.0):
+ """
+ Get frequency weight for each class sorted by class id.
+ We now calcualte freqency weight using image_count to the power freq_weight_power.
+
+ Args:
+ dataset_names: list of dataset names
+ freq_weight_power: power value
+ """
+ if isinstance(dataset_names, str):
+ dataset_names = [dataset_names]
+
+ check_metadata_consistency("class_image_count", dataset_names)
+
+ meta = MetadataCatalog.get(dataset_names[0])
+ class_freq_meta = meta.class_image_count
+ class_freq = torch.tensor(
+ [c["image_count"] for c in sorted(class_freq_meta, key=lambda x: x["id"])]
+ )
+ class_freq_weight = class_freq.float() ** freq_weight_power
+ return class_freq_weight
+
+
+def gen_crop_transform_with_instance(crop_size, image_size, instance):
+ """
+ Generate a CropTransform so that the cropping region contains
+ the center of the given instance.
+
+ Args:
+ crop_size (tuple): h, w in pixels
+ image_size (tuple): h, w
+ instance (dict): an annotation dict of one instance, in Detectron2's
+ dataset format.
+ """
+ crop_size = np.asarray(crop_size, dtype=np.int32)
+ bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
+ center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
+ assert (
+ image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
+ ), "The annotation bounding box is outside of the image!"
+ assert (
+ image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
+ ), "Crop size is larger than image size!"
+
+ min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
+ max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
+ max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
+
+ y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
+ x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
+ return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
+
+
+def check_metadata_consistency(key, dataset_names):
+ """
+ Check that the datasets have consistent metadata.
+
+ Args:
+ key (str): a metadata key
+ dataset_names (list[str]): a list of dataset names
+
+ Raises:
+ AttributeError: if the key does not exist in the metadata
+ ValueError: if the given datasets do not have the same metadata values defined by key
+ """
+ if len(dataset_names) == 0:
+ return
+ logger = logging.getLogger(__name__)
+ entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names]
+ for idx, entry in enumerate(entries_per_dataset):
+ if entry != entries_per_dataset[0]:
+ logger.error(
+ "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry))
+ )
+ logger.error(
+ "Metadata '{}' for dataset '{}' is '{}'".format(
+ key, dataset_names[0], str(entries_per_dataset[0])
+ )
+ )
+ raise ValueError("Datasets have different metadata '{}'!".format(key))
+
+
+def build_augmentation(cfg, is_train):
+ """
+ Create a list of default :class:`Augmentation` from config.
+ Now it includes resizing and flipping.
+
+ Returns:
+ list[Augmentation]
+ """
+ if is_train:
+ min_size = cfg.INPUT.MIN_SIZE_TRAIN
+ max_size = cfg.INPUT.MAX_SIZE_TRAIN
+ sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+ else:
+ min_size = cfg.INPUT.MIN_SIZE_TEST
+ max_size = cfg.INPUT.MAX_SIZE_TEST
+ sample_style = "choice"
+ augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
+ if is_train and cfg.INPUT.RANDOM_FLIP != "none":
+ augmentation.append(
+ T.RandomFlip(
+ horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+ vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+ )
+ )
+ return augmentation
+
+
+build_transform_gen = build_augmentation
+"""
+Alias for backward-compatibility.
+"""
diff --git a/vendor/detectron2/detectron2/data/samplers/__init__.py b/vendor/detectron2/detectron2/data/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..85c9f1a9df8a4038fbd4246239b699402e382309
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/samplers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .distributed_sampler import (
+ InferenceSampler,
+ RandomSubsetTrainingSampler,
+ RepeatFactorTrainingSampler,
+ TrainingSampler,
+)
+
+from .grouped_batch_sampler import GroupedBatchSampler
+
+__all__ = [
+ "GroupedBatchSampler",
+ "TrainingSampler",
+ "RandomSubsetTrainingSampler",
+ "InferenceSampler",
+ "RepeatFactorTrainingSampler",
+]
diff --git a/vendor/detectron2/detectron2/data/samplers/distributed_sampler.py b/vendor/detectron2/detectron2/data/samplers/distributed_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..a098e6ac07c1b193fddcb69e6e54aced82e6081c
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/samplers/distributed_sampler.py
@@ -0,0 +1,278 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+import math
+from collections import defaultdict
+from typing import Optional
+import torch
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils import comm
+
+logger = logging.getLogger(__name__)
+
+
+class TrainingSampler(Sampler):
+ """
+ In training, we only care about the "infinite stream" of training data.
+ So this sampler produces an infinite stream of indices and
+ all workers cooperate to correctly shuffle the indices and sample different indices.
+
+ The samplers in each worker effectively produces `indices[worker_id::num_workers]`
+ where `indices` is an infinite stream of indices consisting of
+ `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
+ or `range(size) + range(size) + ...` (if shuffle is False)
+
+ Note that this sampler does not shard based on pytorch DataLoader worker id.
+ A sampler passed to pytorch DataLoader is used only with map-style dataset
+ and will not be executed inside workers.
+ But if this sampler is used in a way that it gets execute inside a dataloader
+ worker, then extra work needs to be done to shard its outputs based on worker id.
+ This is required so that workers don't produce identical data.
+ :class:`ToIterableDataset` implements this logic.
+ This note is true for all samplers in detectron2.
+ """
+
+ def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None):
+ """
+ Args:
+ size (int): the total number of data of the underlying dataset to sample from
+ shuffle (bool): whether to shuffle the indices or not
+ seed (int): the initial seed of the shuffle. Must be the same
+ across all workers. If None, will use a random seed shared
+ among workers (require synchronization among all workers).
+ """
+ if not isinstance(size, int):
+ raise TypeError(f"TrainingSampler(size=) expects an int. Got type {type(size)}.")
+ if size <= 0:
+ raise ValueError(f"TrainingSampler(size=) expects a positive int. Got {size}.")
+ self._size = size
+ self._shuffle = shuffle
+ if seed is None:
+ seed = comm.shared_random_seed()
+ self._seed = int(seed)
+
+ self._rank = comm.get_rank()
+ self._world_size = comm.get_world_size()
+
+ def __iter__(self):
+ start = self._rank
+ yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+ def _infinite_indices(self):
+ g = torch.Generator()
+ g.manual_seed(self._seed)
+ while True:
+ if self._shuffle:
+ yield from torch.randperm(self._size, generator=g).tolist()
+ else:
+ yield from torch.arange(self._size).tolist()
+
+
+class RandomSubsetTrainingSampler(TrainingSampler):
+ """
+ Similar to TrainingSampler, but only sample a random subset of indices.
+ This is useful when you want to estimate the accuracy vs data-number curves by
+ training the model with different subset_ratio.
+ """
+
+ def __init__(
+ self,
+ size: int,
+ subset_ratio: float,
+ shuffle: bool = True,
+ seed_shuffle: Optional[int] = None,
+ seed_subset: Optional[int] = None,
+ ):
+ """
+ Args:
+ size (int): the total number of data of the underlying dataset to sample from
+ subset_ratio (float): the ratio of subset data to sample from the underlying dataset
+ shuffle (bool): whether to shuffle the indices or not
+ seed_shuffle (int): the initial seed of the shuffle. Must be the same
+ across all workers. If None, will use a random seed shared
+ among workers (require synchronization among all workers).
+ seed_subset (int): the seed to randomize the subset to be sampled.
+ Must be the same across all workers. If None, will use a random seed shared
+ among workers (require synchronization among all workers).
+ """
+ super().__init__(size=size, shuffle=shuffle, seed=seed_shuffle)
+
+ assert 0.0 < subset_ratio <= 1.0
+ self._size_subset = int(size * subset_ratio)
+ assert self._size_subset > 0
+ if seed_subset is None:
+ seed_subset = comm.shared_random_seed()
+ self._seed_subset = int(seed_subset)
+
+ # randomly generate the subset indexes to be sampled from
+ g = torch.Generator()
+ g.manual_seed(self._seed_subset)
+ indexes_randperm = torch.randperm(self._size, generator=g)
+ self._indexes_subset = indexes_randperm[: self._size_subset]
+
+ logger.info("Using RandomSubsetTrainingSampler......")
+ logger.info(f"Randomly sample {self._size_subset} data from the original {self._size} data")
+
+ def _infinite_indices(self):
+ g = torch.Generator()
+ g.manual_seed(self._seed) # self._seed equals seed_shuffle from __init__()
+ while True:
+ if self._shuffle:
+ # generate a random permutation to shuffle self._indexes_subset
+ randperm = torch.randperm(self._size_subset, generator=g)
+ yield from self._indexes_subset[randperm].tolist()
+ else:
+ yield from self._indexes_subset.tolist()
+
+
+class RepeatFactorTrainingSampler(Sampler):
+ """
+ Similar to TrainingSampler, but a sample may appear more times than others based
+ on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS.
+ """
+
+ def __init__(self, repeat_factors, *, shuffle=True, seed=None):
+ """
+ Args:
+ repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's
+ full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``.
+ shuffle (bool): whether to shuffle the indices or not
+ seed (int): the initial seed of the shuffle. Must be the same
+ across all workers. If None, will use a random seed shared
+ among workers (require synchronization among all workers).
+ """
+ self._shuffle = shuffle
+ if seed is None:
+ seed = comm.shared_random_seed()
+ self._seed = int(seed)
+
+ self._rank = comm.get_rank()
+ self._world_size = comm.get_world_size()
+
+ # Split into whole number (_int_part) and fractional (_frac_part) parts.
+ self._int_part = torch.trunc(repeat_factors)
+ self._frac_part = repeat_factors - self._int_part
+
+ @staticmethod
+ def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh):
+ """
+ Compute (fractional) per-image repeat factors based on category frequency.
+ The repeat factor for an image is a function of the frequency of the rarest
+ category labeled in that image. The "frequency of category c" in [0, 1] is defined
+ as the fraction of images in the training set (without repeats) in which category c
+ appears.
+ See :paper:`lvis` (>= v2) Appendix B.2.
+
+ Args:
+ dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
+ repeat_thresh (float): frequency threshold below which data is repeated.
+ If the frequency is half of `repeat_thresh`, the image will be
+ repeated twice.
+
+ Returns:
+ torch.Tensor:
+ the i-th element is the repeat factor for the dataset image at index i.
+ """
+ # 1. For each category c, compute the fraction of images that contain it: f(c)
+ category_freq = defaultdict(int)
+ for dataset_dict in dataset_dicts: # For each image (without repeats)
+ cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+ for cat_id in cat_ids:
+ category_freq[cat_id] += 1
+ num_images = len(dataset_dicts)
+ for k, v in category_freq.items():
+ category_freq[k] = v / num_images
+
+ # 2. For each category c, compute the category-level repeat factor:
+ # r(c) = max(1, sqrt(t / f(c)))
+ category_rep = {
+ cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq))
+ for cat_id, cat_freq in category_freq.items()
+ }
+
+ # 3. For each image I, compute the image-level repeat factor:
+ # r(I) = max_{c in I} r(c)
+ rep_factors = []
+ for dataset_dict in dataset_dicts:
+ cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+ rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0)
+ rep_factors.append(rep_factor)
+
+ return torch.tensor(rep_factors, dtype=torch.float32)
+
+ def _get_epoch_indices(self, generator):
+ """
+ Create a list of dataset indices (with repeats) to use for one epoch.
+
+ Args:
+ generator (torch.Generator): pseudo random number generator used for
+ stochastic rounding.
+
+ Returns:
+ torch.Tensor: list of dataset indices to use in one epoch. Each index
+ is repeated based on its calculated repeat factor.
+ """
+ # Since repeat factors are fractional, we use stochastic rounding so
+ # that the target repeat factor is achieved in expectation over the
+ # course of training
+ rands = torch.rand(len(self._frac_part), generator=generator)
+ rep_factors = self._int_part + (rands < self._frac_part).float()
+ # Construct a list of indices in which we repeat images as specified
+ indices = []
+ for dataset_index, rep_factor in enumerate(rep_factors):
+ indices.extend([dataset_index] * int(rep_factor.item()))
+ return torch.tensor(indices, dtype=torch.int64)
+
+ def __iter__(self):
+ start = self._rank
+ yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+ def _infinite_indices(self):
+ g = torch.Generator()
+ g.manual_seed(self._seed)
+ while True:
+ # Sample indices with repeats determined by stochastic rounding; each
+ # "epoch" may have a slightly different size due to the rounding.
+ indices = self._get_epoch_indices(g)
+ if self._shuffle:
+ randperm = torch.randperm(len(indices), generator=g)
+ yield from indices[randperm].tolist()
+ else:
+ yield from indices.tolist()
+
+
+class InferenceSampler(Sampler):
+ """
+ Produce indices for inference across all workers.
+ Inference needs to run on the __exact__ set of samples,
+ therefore when the total number of samples is not divisible by the number of workers,
+ this sampler produces different number of samples on different workers.
+ """
+
+ def __init__(self, size: int):
+ """
+ Args:
+ size (int): the total number of data of the underlying dataset to sample from
+ """
+ self._size = size
+ assert size > 0
+ self._rank = comm.get_rank()
+ self._world_size = comm.get_world_size()
+ self._local_indices = self._get_local_indices(size, self._world_size, self._rank)
+
+ @staticmethod
+ def _get_local_indices(total_size, world_size, rank):
+ shard_size = total_size // world_size
+ left = total_size % world_size
+ shard_sizes = [shard_size + int(r < left) for r in range(world_size)]
+
+ begin = sum(shard_sizes[:rank])
+ end = min(sum(shard_sizes[: rank + 1]), total_size)
+ return range(begin, end)
+
+ def __iter__(self):
+ yield from self._local_indices
+
+ def __len__(self):
+ return len(self._local_indices)
diff --git a/vendor/detectron2/detectron2/data/samplers/grouped_batch_sampler.py b/vendor/detectron2/detectron2/data/samplers/grouped_batch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b247730aacd04dd0c752664acde3257c4eddd71
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/samplers/grouped_batch_sampler.py
@@ -0,0 +1,47 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from torch.utils.data.sampler import BatchSampler, Sampler
+
+
+class GroupedBatchSampler(BatchSampler):
+ """
+ Wraps another sampler to yield a mini-batch of indices.
+ It enforces that the batch only contain elements from the same group.
+ It also tries to provide mini-batches which follows an ordering which is
+ as close as possible to the ordering from the original sampler.
+ """
+
+ def __init__(self, sampler, group_ids, batch_size):
+ """
+ Args:
+ sampler (Sampler): Base sampler.
+ group_ids (list[int]): If the sampler produces indices in range [0, N),
+ `group_ids` must be a list of `N` ints which contains the group id of each sample.
+ The group ids must be a set of integers in the range [0, num_groups).
+ batch_size (int): Size of mini-batch.
+ """
+ if not isinstance(sampler, Sampler):
+ raise ValueError(
+ "sampler should be an instance of "
+ "torch.utils.data.Sampler, but got sampler={}".format(sampler)
+ )
+ self.sampler = sampler
+ self.group_ids = np.asarray(group_ids)
+ assert self.group_ids.ndim == 1
+ self.batch_size = batch_size
+ groups = np.unique(self.group_ids).tolist()
+
+ # buffer the indices of each group until batch size is reached
+ self.buffer_per_group = {k: [] for k in groups}
+
+ def __iter__(self):
+ for idx in self.sampler:
+ group_id = self.group_ids[idx]
+ group_buffer = self.buffer_per_group[group_id]
+ group_buffer.append(idx)
+ if len(group_buffer) == self.batch_size:
+ yield group_buffer[:] # yield a copy of the list
+ del group_buffer[:]
+
+ def __len__(self):
+ raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
diff --git a/vendor/detectron2/detectron2/data/transforms/__init__.py b/vendor/detectron2/detectron2/data/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab3c63b5b456a7fb878757e25768a3634f76ae5b
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/transforms/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from fvcore.transforms.transform import Transform, TransformList # order them first
+from fvcore.transforms.transform import *
+from .transform import *
+from .augmentation import *
+from .augmentation_impl import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/vendor/detectron2/detectron2/data/transforms/augmentation.py b/vendor/detectron2/detectron2/data/transforms/augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..63dd41aef658c9b51c7246880399405a029c5580
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/transforms/augmentation.py
@@ -0,0 +1,380 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import inspect
+import numpy as np
+import pprint
+from typing import Any, List, Optional, Tuple, Union
+from fvcore.transforms.transform import Transform, TransformList
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+
+__all__ = [
+ "Augmentation",
+ "AugmentationList",
+ "AugInput",
+ "TransformGen",
+ "apply_transform_gens",
+ "StandardAugInput",
+ "apply_augmentations",
+]
+
+
+def _check_img_dtype(img):
+ assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format(
+ type(img)
+ )
+ assert not isinstance(img.dtype, np.integer) or (
+ img.dtype == np.uint8
+ ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format(
+ img.dtype
+ )
+ assert img.ndim in [2, 3], img.ndim
+
+
+def _get_aug_input_args(aug, aug_input) -> List[Any]:
+ """
+ Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``.
+ """
+ if aug.input_args is None:
+ # Decide what attributes are needed automatically
+ prms = list(inspect.signature(aug.get_transform).parameters.items())
+ # The default behavior is: if there is one parameter, then its "image"
+ # (work automatically for majority of use cases, and also avoid BC breaking),
+ # Otherwise, use the argument names.
+ if len(prms) == 1:
+ names = ("image",)
+ else:
+ names = []
+ for name, prm in prms:
+ if prm.kind in (
+ inspect.Parameter.VAR_POSITIONAL,
+ inspect.Parameter.VAR_KEYWORD,
+ ):
+ raise TypeError(
+ f""" \
+The default implementation of `{type(aug)}.__call__` does not allow \
+`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \
+If arguments are unknown, reimplement `__call__` instead. \
+"""
+ )
+ names.append(name)
+ aug.input_args = tuple(names)
+
+ args = []
+ for f in aug.input_args:
+ try:
+ args.append(getattr(aug_input, f))
+ except AttributeError as e:
+ raise AttributeError(
+ f"{type(aug)}.get_transform needs input attribute '{f}', "
+ f"but it is not an attribute of {type(aug_input)}!"
+ ) from e
+ return args
+
+
+class Augmentation:
+ """
+ Augmentation defines (often random) policies/strategies to generate :class:`Transform`
+ from data. It is often used for pre-processing of input data.
+
+ A "policy" that generates a :class:`Transform` may, in the most general case,
+ need arbitrary information from input data in order to determine what transforms
+ to apply. Therefore, each :class:`Augmentation` instance defines the arguments
+ needed by its :meth:`get_transform` method. When called with the positional arguments,
+ the :meth:`get_transform` method executes the policy.
+
+ Note that :class:`Augmentation` defines the policies to create a :class:`Transform`,
+ but not how to execute the actual transform operations to those data.
+ Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform.
+
+ The returned `Transform` object is meant to describe deterministic transformation, which means
+ it can be re-applied on associated data, e.g. the geometry of an image and its segmentation
+ masks need to be transformed together.
+ (If such re-application is not needed, then determinism is not a crucial requirement.)
+ """
+
+ input_args: Optional[Tuple[str]] = None
+ """
+ Stores the attribute names needed by :meth:`get_transform`, e.g. ``("image", "sem_seg")``.
+ By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only
+ contain "image". As long as the argument name convention is followed, there is no need for
+ users to touch this attribute.
+ """
+
+ def _init(self, params=None):
+ if params:
+ for k, v in params.items():
+ if k != "self" and not k.startswith("_"):
+ setattr(self, k, v)
+
+ def get_transform(self, *args) -> Transform:
+ """
+ Execute the policy based on input data, and decide what transform to apply to inputs.
+
+ Args:
+ args: Any fixed-length positional arguments. By default, the name of the arguments
+ should exist in the :class:`AugInput` to be used.
+
+ Returns:
+ Transform: Returns the deterministic transform to apply to the input.
+
+ Examples:
+ ::
+ class MyAug:
+ # if a policy needs to know both image and semantic segmentation
+ def get_transform(image, sem_seg) -> T.Transform:
+ pass
+ tfm: Transform = MyAug().get_transform(image, sem_seg)
+ new_image = tfm.apply_image(image)
+
+ Notes:
+ Users can freely use arbitrary new argument names in custom
+ :meth:`get_transform` method, as long as they are available in the
+ input data. In detectron2 we use the following convention:
+
+ * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+ floating point in range [0, 1] or [0, 255].
+ * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes
+ of N instances. Each is in XYXY format in unit of absolute coordinates.
+ * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel.
+
+ We do not specify convention for other types and do not include builtin
+ :class:`Augmentation` that uses other types in detectron2.
+ """
+ raise NotImplementedError
+
+ def __call__(self, aug_input) -> Transform:
+ """
+ Augment the given `aug_input` **in-place**, and return the transform that's used.
+
+ This method will be called to apply the augmentation. In most augmentation, it
+ is enough to use the default implementation, which calls :meth:`get_transform`
+ using the inputs. But a subclass can overwrite it to have more complicated logic.
+
+ Args:
+ aug_input (AugInput): an object that has attributes needed by this augmentation
+ (defined by ``self.get_transform``). Its ``transform`` method will be called
+ to in-place transform it.
+
+ Returns:
+ Transform: the transform that is applied on the input.
+ """
+ args = _get_aug_input_args(self, aug_input)
+ tfm = self.get_transform(*args)
+ assert isinstance(tfm, (Transform, TransformList)), (
+ f"{type(self)}.get_transform must return an instance of Transform! "
+ f"Got {type(tfm)} instead."
+ )
+ aug_input.transform(tfm)
+ return tfm
+
+ def _rand_range(self, low=1.0, high=None, size=None):
+ """
+ Uniform float random number between low and high.
+ """
+ if high is None:
+ low, high = 0, low
+ if size is None:
+ size = []
+ return np.random.uniform(low, high, size)
+
+ def __repr__(self):
+ """
+ Produce something like:
+ "MyAugmentation(field1={self.field1}, field2={self.field2})"
+ """
+ try:
+ sig = inspect.signature(self.__init__)
+ classname = type(self).__name__
+ argstr = []
+ for name, param in sig.parameters.items():
+ assert (
+ param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD
+ ), "The default __repr__ doesn't support *args or **kwargs"
+ assert hasattr(self, name), (
+ "Attribute {} not found! "
+ "Default __repr__ only works if attributes match the constructor.".format(name)
+ )
+ attr = getattr(self, name)
+ default = param.default
+ if default is attr:
+ continue
+ attr_str = pprint.pformat(attr)
+ if "\n" in attr_str:
+ # don't show it if pformat decides to use >1 lines
+ attr_str = "..."
+ argstr.append("{}={}".format(name, attr_str))
+ return "{}({})".format(classname, ", ".join(argstr))
+ except AssertionError:
+ return super().__repr__()
+
+ __str__ = __repr__
+
+
+class _TransformToAug(Augmentation):
+ def __init__(self, tfm: Transform):
+ self.tfm = tfm
+
+ def get_transform(self, *args):
+ return self.tfm
+
+ def __repr__(self):
+ return repr(self.tfm)
+
+ __str__ = __repr__
+
+
+def _transform_to_aug(tfm_or_aug):
+ """
+ Wrap Transform into Augmentation.
+ Private, used internally to implement augmentations.
+ """
+ assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug
+ if isinstance(tfm_or_aug, Augmentation):
+ return tfm_or_aug
+ else:
+ return _TransformToAug(tfm_or_aug)
+
+
+class AugmentationList(Augmentation):
+ """
+ Apply a sequence of augmentations.
+
+ It has ``__call__`` method to apply the augmentations.
+
+ Note that :meth:`get_transform` method is impossible (will throw error if called)
+ for :class:`AugmentationList`, because in order to apply a sequence of augmentations,
+ the kth augmentation must be applied first, to provide inputs needed by the (k+1)th
+ augmentation.
+ """
+
+ def __init__(self, augs):
+ """
+ Args:
+ augs (list[Augmentation or Transform]):
+ """
+ super().__init__()
+ self.augs = [_transform_to_aug(x) for x in augs]
+
+ def __call__(self, aug_input) -> TransformList:
+ tfms = []
+ for x in self.augs:
+ tfm = x(aug_input)
+ tfms.append(tfm)
+ return TransformList(tfms)
+
+ def __repr__(self):
+ msgs = [str(x) for x in self.augs]
+ return "AugmentationList[{}]".format(", ".join(msgs))
+
+ __str__ = __repr__
+
+
+class AugInput:
+ """
+ Input that can be used with :meth:`Augmentation.__call__`.
+ This is a standard implementation for the majority of use cases.
+ This class provides the standard attributes **"image", "boxes", "sem_seg"**
+ defined in :meth:`__init__` and they may be needed by different augmentations.
+ Most augmentation policies do not need attributes beyond these three.
+
+ After applying augmentations to these attributes (using :meth:`AugInput.transform`),
+ the returned transforms can then be used to transform other data structures that users have.
+
+ Examples:
+ ::
+ input = AugInput(image, boxes=boxes)
+ tfms = augmentation(input)
+ transformed_image = input.image
+ transformed_boxes = input.boxes
+ transformed_other_data = tfms.apply_other(other_data)
+
+ An extended project that works with new data types may implement augmentation policies
+ that need other inputs. An algorithm may need to transform inputs in a way different
+ from the standard approach defined in this class. In those rare situations, users can
+ implement a class similar to this class, that satify the following condition:
+
+ * The input must provide access to these data in the form of attribute access
+ (``getattr``). For example, if an :class:`Augmentation` to be applied needs "image"
+ and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg".
+ * The input must have a ``transform(tfm: Transform) -> None`` method which
+ in-place transforms all its attributes.
+ """
+
+ # TODO maybe should support more builtin data types here
+ def __init__(
+ self,
+ image: np.ndarray,
+ *,
+ boxes: Optional[np.ndarray] = None,
+ sem_seg: Optional[np.ndarray] = None,
+ ):
+ """
+ Args:
+ image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+ floating point in range [0, 1] or [0, 255]. The meaning of C is up
+ to users.
+ boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode
+ sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element
+ is an integer label of pixel.
+ """
+ _check_img_dtype(image)
+ self.image = image
+ self.boxes = boxes
+ self.sem_seg = sem_seg
+
+ def transform(self, tfm: Transform) -> None:
+ """
+ In-place transform all attributes of this class.
+
+ By "in-place", it means after calling this method, accessing an attribute such
+ as ``self.image`` will return transformed data.
+ """
+ self.image = tfm.apply_image(self.image)
+ if self.boxes is not None:
+ self.boxes = tfm.apply_box(self.boxes)
+ if self.sem_seg is not None:
+ self.sem_seg = tfm.apply_segmentation(self.sem_seg)
+
+ def apply_augmentations(
+ self, augmentations: List[Union[Augmentation, Transform]]
+ ) -> TransformList:
+ """
+ Equivalent of ``AugmentationList(augmentations)(self)``
+ """
+ return AugmentationList(augmentations)(self)
+
+
+def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs):
+ """
+ Use ``T.AugmentationList(augmentations)(inputs)`` instead.
+ """
+ if isinstance(inputs, np.ndarray):
+ # handle the common case of image-only Augmentation, also for backward compatibility
+ image_only = True
+ inputs = AugInput(inputs)
+ else:
+ image_only = False
+ tfms = inputs.apply_augmentations(augmentations)
+ return inputs.image if image_only else inputs, tfms
+
+
+apply_transform_gens = apply_augmentations
+"""
+Alias for backward-compatibility.
+"""
+
+TransformGen = Augmentation
+"""
+Alias for Augmentation, since it is something that generates :class:`Transform`s
+"""
+
+StandardAugInput = AugInput
+"""
+Alias for compatibility. It's not worth the complexity to have two classes.
+"""
diff --git a/vendor/detectron2/detectron2/data/transforms/augmentation_impl.py b/vendor/detectron2/detectron2/data/transforms/augmentation_impl.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc270cd109df5c52404cc2de855e6146d9fef330
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/transforms/augmentation_impl.py
@@ -0,0 +1,736 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Implement many useful :class:`Augmentation`.
+"""
+import numpy as np
+import sys
+from numpy import random
+from typing import Tuple
+import torch
+from fvcore.transforms.transform import (
+ BlendTransform,
+ CropTransform,
+ HFlipTransform,
+ NoOpTransform,
+ PadTransform,
+ Transform,
+ TransformList,
+ VFlipTransform,
+)
+from PIL import Image
+
+from detectron2.structures import Boxes, pairwise_iou
+
+from .augmentation import Augmentation, _transform_to_aug
+from .transform import ExtentTransform, ResizeTransform, RotationTransform
+
+__all__ = [
+ "FixedSizeCrop",
+ "RandomApply",
+ "RandomBrightness",
+ "RandomContrast",
+ "RandomCrop",
+ "RandomExtent",
+ "RandomFlip",
+ "RandomSaturation",
+ "RandomLighting",
+ "RandomRotation",
+ "Resize",
+ "ResizeScale",
+ "ResizeShortestEdge",
+ "RandomCrop_CategoryAreaConstraint",
+ "RandomResize",
+ "MinIoURandomCrop",
+]
+
+
+class RandomApply(Augmentation):
+ """
+ Randomly apply an augmentation with a given probability.
+ """
+
+ def __init__(self, tfm_or_aug, prob=0.5):
+ """
+ Args:
+ tfm_or_aug (Transform, Augmentation): the transform or augmentation
+ to be applied. It can either be a `Transform` or `Augmentation`
+ instance.
+ prob (float): probability between 0.0 and 1.0 that
+ the wrapper transformation is applied
+ """
+ super().__init__()
+ self.aug = _transform_to_aug(tfm_or_aug)
+ assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})"
+ self.prob = prob
+
+ def get_transform(self, *args):
+ do = self._rand_range() < self.prob
+ if do:
+ return self.aug.get_transform(*args)
+ else:
+ return NoOpTransform()
+
+ def __call__(self, aug_input):
+ do = self._rand_range() < self.prob
+ if do:
+ return self.aug(aug_input)
+ else:
+ return NoOpTransform()
+
+
+class RandomFlip(Augmentation):
+ """
+ Flip the image horizontally or vertically with the given probability.
+ """
+
+ def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
+ """
+ Args:
+ prob (float): probability of flip.
+ horizontal (boolean): whether to apply horizontal flipping
+ vertical (boolean): whether to apply vertical flipping
+ """
+ super().__init__()
+
+ if horizontal and vertical:
+ raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.")
+ if not horizontal and not vertical:
+ raise ValueError("At least one of horiz or vert has to be True!")
+ self._init(locals())
+
+ def get_transform(self, image):
+ h, w = image.shape[:2]
+ do = self._rand_range() < self.prob
+ if do:
+ if self.horizontal:
+ return HFlipTransform(w)
+ elif self.vertical:
+ return VFlipTransform(h)
+ else:
+ return NoOpTransform()
+
+
+class Resize(Augmentation):
+ """Resize image to a fixed target size"""
+
+ def __init__(self, shape, interp=Image.BILINEAR):
+ """
+ Args:
+ shape: (h, w) tuple or a int
+ interp: PIL interpolation method
+ """
+ if isinstance(shape, int):
+ shape = (shape, shape)
+ shape = tuple(shape)
+ self._init(locals())
+
+ def get_transform(self, image):
+ return ResizeTransform(
+ image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp
+ )
+
+
+class ResizeShortestEdge(Augmentation):
+ """
+ Resize the image while keeping the aspect ratio unchanged.
+ It attempts to scale the shorter edge to the given `short_edge_length`,
+ as long as the longer edge does not exceed `max_size`.
+ If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
+ """
+
+ @torch.jit.unused
+ def __init__(
+ self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
+ ):
+ """
+ Args:
+ short_edge_length (list[int]): If ``sample_style=="range"``,
+ a [min, max] interval from which to sample the shortest edge length.
+ If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+ max_size (int): maximum allowed longest edge length.
+ sample_style (str): either "range" or "choice".
+ """
+ super().__init__()
+ assert sample_style in ["range", "choice"], sample_style
+
+ self.is_range = sample_style == "range"
+ if isinstance(short_edge_length, int):
+ short_edge_length = (short_edge_length, short_edge_length)
+ if self.is_range:
+ assert len(short_edge_length) == 2, (
+ "short_edge_length must be two values using 'range' sample style."
+ f" Got {short_edge_length}!"
+ )
+ self._init(locals())
+
+ @torch.jit.unused
+ def get_transform(self, image):
+ h, w = image.shape[:2]
+ if self.is_range:
+ size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
+ else:
+ size = np.random.choice(self.short_edge_length)
+ if size == 0:
+ return NoOpTransform()
+
+ newh, neww = ResizeShortestEdge.get_output_shape(h, w, size, self.max_size)
+ return ResizeTransform(h, w, newh, neww, self.interp)
+
+ @staticmethod
+ def get_output_shape(
+ oldh: int, oldw: int, short_edge_length: int, max_size: int
+ ) -> Tuple[int, int]:
+ """
+ Compute the output size given input size and target short edge length.
+ """
+ h, w = oldh, oldw
+ size = short_edge_length * 1.0
+ scale = size / min(h, w)
+ if h < w:
+ newh, neww = size, scale * w
+ else:
+ newh, neww = scale * h, size
+ if max(newh, neww) > max_size:
+ scale = max_size * 1.0 / max(newh, neww)
+ newh = newh * scale
+ neww = neww * scale
+ neww = int(neww + 0.5)
+ newh = int(newh + 0.5)
+ return (newh, neww)
+
+
+class ResizeScale(Augmentation):
+ """
+ Takes target size as input and randomly scales the given target size between `min_scale`
+ and `max_scale`. It then scales the input image such that it fits inside the scaled target
+ box, keeping the aspect ratio constant.
+ This implements the resize part of the Google's 'resize_and_crop' data augmentation:
+ https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/input_utils.py#L127
+ """
+
+ def __init__(
+ self,
+ min_scale: float,
+ max_scale: float,
+ target_height: int,
+ target_width: int,
+ interp: int = Image.BILINEAR,
+ ):
+ """
+ Args:
+ min_scale: minimum image scale range.
+ max_scale: maximum image scale range.
+ target_height: target image height.
+ target_width: target image width.
+ interp: image interpolation method.
+ """
+ super().__init__()
+ self._init(locals())
+
+ def _get_resize(self, image: np.ndarray, scale: float) -> Transform:
+ input_size = image.shape[:2]
+
+ # Compute new target size given a scale.
+ target_size = (self.target_height, self.target_width)
+ target_scale_size = np.multiply(target_size, scale)
+
+ # Compute actual rescaling applied to input image and output size.
+ output_scale = np.minimum(
+ target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1]
+ )
+ output_size = np.round(np.multiply(input_size, output_scale)).astype(int)
+
+ return ResizeTransform(
+ input_size[0], input_size[1], output_size[0], output_size[1], self.interp
+ )
+
+ def get_transform(self, image: np.ndarray) -> Transform:
+ random_scale = np.random.uniform(self.min_scale, self.max_scale)
+ return self._get_resize(image, random_scale)
+
+
+class RandomRotation(Augmentation):
+ """
+ This method returns a copy of this image, rotated the given
+ number of degrees counter clockwise around the given center.
+ """
+
+ def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None):
+ """
+ Args:
+ angle (list[float]): If ``sample_style=="range"``,
+ a [min, max] interval from which to sample the angle (in degrees).
+ If ``sample_style=="choice"``, a list of angles to sample from
+ expand (bool): choose if the image should be resized to fit the whole
+ rotated image (default), or simply cropped
+ center (list[[float, float]]): If ``sample_style=="range"``,
+ a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center,
+ [0, 0] being the top left of the image and [1, 1] the bottom right.
+ If ``sample_style=="choice"``, a list of centers to sample from
+ Default: None, which means that the center of rotation is the center of the image
+ center has no effect if expand=True because it only affects shifting
+ """
+ super().__init__()
+ assert sample_style in ["range", "choice"], sample_style
+ self.is_range = sample_style == "range"
+ if isinstance(angle, (float, int)):
+ angle = (angle, angle)
+ if center is not None and isinstance(center[0], (float, int)):
+ center = (center, center)
+ self._init(locals())
+
+ def get_transform(self, image):
+ h, w = image.shape[:2]
+ center = None
+ if self.is_range:
+ angle = np.random.uniform(self.angle[0], self.angle[1])
+ if self.center is not None:
+ center = (
+ np.random.uniform(self.center[0][0], self.center[1][0]),
+ np.random.uniform(self.center[0][1], self.center[1][1]),
+ )
+ else:
+ angle = np.random.choice(self.angle)
+ if self.center is not None:
+ center = np.random.choice(self.center)
+
+ if center is not None:
+ center = (w * center[0], h * center[1]) # Convert to absolute coordinates
+
+ if angle % 360 == 0:
+ return NoOpTransform()
+
+ return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp)
+
+
+class FixedSizeCrop(Augmentation):
+ """
+ If `crop_size` is smaller than the input image size, then it uses a random crop of
+ the crop size. If `crop_size` is larger than the input image size, then it pads
+ the right and the bottom of the image to the crop size if `pad` is True, otherwise
+ it returns the smaller image.
+ """
+
+ def __init__(
+ self,
+ crop_size: Tuple[int],
+ pad: bool = True,
+ pad_value: float = 128.0,
+ seg_pad_value: int = 255,
+ ):
+ """
+ Args:
+ crop_size: target image (height, width).
+ pad: if True, will pad images smaller than `crop_size` up to `crop_size`
+ pad_value: the padding value to the image.
+ seg_pad_value: the padding value to the segmentation mask.
+ """
+ super().__init__()
+ self._init(locals())
+
+ def _get_crop(self, image: np.ndarray) -> Transform:
+ # Compute the image scale and scaled size.
+ input_size = image.shape[:2]
+ output_size = self.crop_size
+
+ # Add random crop if the image is scaled up.
+ max_offset = np.subtract(input_size, output_size)
+ max_offset = np.maximum(max_offset, 0)
+ offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0))
+ offset = np.round(offset).astype(int)
+ return CropTransform(
+ offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0]
+ )
+
+ def _get_pad(self, image: np.ndarray) -> Transform:
+ # Compute the image scale and scaled size.
+ input_size = image.shape[:2]
+ output_size = self.crop_size
+
+ # Add padding if the image is scaled down.
+ pad_size = np.subtract(output_size, input_size)
+ pad_size = np.maximum(pad_size, 0)
+ original_size = np.minimum(input_size, output_size)
+ return PadTransform(
+ 0,
+ 0,
+ pad_size[1],
+ pad_size[0],
+ original_size[1],
+ original_size[0],
+ self.pad_value,
+ self.seg_pad_value,
+ )
+
+ def get_transform(self, image: np.ndarray) -> TransformList:
+ transforms = [self._get_crop(image)]
+ if self.pad:
+ transforms.append(self._get_pad(image))
+ return TransformList(transforms)
+
+
+class RandomCrop(Augmentation):
+ """
+ Randomly crop a rectangle region out of an image.
+ """
+
+ def __init__(self, crop_type: str, crop_size):
+ """
+ Args:
+ crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range".
+ crop_size (tuple[float, float]): two floats, explained below.
+
+ - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of
+ size (H, W). crop size should be in (0, 1]
+ - "relative_range": uniformly sample two values from [crop_size[0], 1]
+ and [crop_size[1]], 1], and use them as in "relative" crop type.
+ - "absolute" crop a (crop_size[0], crop_size[1]) region from input image.
+ crop_size must be smaller than the input image size.
+ - "absolute_range", for an input of size (H, W), uniformly sample H_crop in
+ [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])].
+ Then crop a region (H_crop, W_crop).
+ """
+ # TODO style of relative_range and absolute_range are not consistent:
+ # one takes (h, w) but another takes (min, max)
+ super().__init__()
+ assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"]
+ self._init(locals())
+
+ def get_transform(self, image):
+ h, w = image.shape[:2]
+ croph, cropw = self.get_crop_size((h, w))
+ assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self)
+ h0 = np.random.randint(h - croph + 1)
+ w0 = np.random.randint(w - cropw + 1)
+ return CropTransform(w0, h0, cropw, croph)
+
+ def get_crop_size(self, image_size):
+ """
+ Args:
+ image_size (tuple): height, width
+
+ Returns:
+ crop_size (tuple): height, width in absolute pixels
+ """
+ h, w = image_size
+ if self.crop_type == "relative":
+ ch, cw = self.crop_size
+ return int(h * ch + 0.5), int(w * cw + 0.5)
+ elif self.crop_type == "relative_range":
+ crop_size = np.asarray(self.crop_size, dtype=np.float32)
+ ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
+ return int(h * ch + 0.5), int(w * cw + 0.5)
+ elif self.crop_type == "absolute":
+ return (min(self.crop_size[0], h), min(self.crop_size[1], w))
+ elif self.crop_type == "absolute_range":
+ assert self.crop_size[0] <= self.crop_size[1]
+ ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1)
+ cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
+ return ch, cw
+ else:
+ raise NotImplementedError("Unknown crop type {}".format(self.crop_type))
+
+
+class RandomCrop_CategoryAreaConstraint(Augmentation):
+ """
+ Similar to :class:`RandomCrop`, but find a cropping window such that no single category
+ occupies a ratio of more than `single_category_max_area` in semantic segmentation ground
+ truth, which can cause unstability in training. The function attempts to find such a valid
+ cropping window for at most 10 times.
+ """
+
+ def __init__(
+ self,
+ crop_type: str,
+ crop_size,
+ single_category_max_area: float = 1.0,
+ ignored_category: int = None,
+ ):
+ """
+ Args:
+ crop_type, crop_size: same as in :class:`RandomCrop`
+ single_category_max_area: the maximum allowed area ratio of a
+ category. Set to 1.0 to disable
+ ignored_category: allow this category in the semantic segmentation
+ ground truth to exceed the area ratio. Usually set to the category
+ that's ignored in training.
+ """
+ self.crop_aug = RandomCrop(crop_type, crop_size)
+ self._init(locals())
+
+ def get_transform(self, image, sem_seg):
+ if self.single_category_max_area >= 1.0:
+ return self.crop_aug.get_transform(image)
+ else:
+ h, w = sem_seg.shape
+ for _ in range(10):
+ crop_size = self.crop_aug.get_crop_size((h, w))
+ y0 = np.random.randint(h - crop_size[0] + 1)
+ x0 = np.random.randint(w - crop_size[1] + 1)
+ sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]]
+ labels, cnt = np.unique(sem_seg_temp, return_counts=True)
+ if self.ignored_category is not None:
+ cnt = cnt[labels != self.ignored_category]
+ if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area:
+ break
+ crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0])
+ return crop_tfm
+
+
+class RandomExtent(Augmentation):
+ """
+ Outputs an image by cropping a random "subrect" of the source image.
+
+ The subrect can be parameterized to include pixels outside the source image,
+ in which case they will be set to zeros (i.e. black). The size of the output
+ image will vary with the size of the random subrect.
+ """
+
+ def __init__(self, scale_range, shift_range):
+ """
+ Args:
+ output_size (h, w): Dimensions of output image
+ scale_range (l, h): Range of input-to-output size scaling factor
+ shift_range (x, y): Range of shifts of the cropped subrect. The rect
+ is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
+ where (w, h) is the (width, height) of the input image. Set each
+ component to zero to crop at the image's center.
+ """
+ super().__init__()
+ self._init(locals())
+
+ def get_transform(self, image):
+ img_h, img_w = image.shape[:2]
+
+ # Initialize src_rect to fit the input image.
+ src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
+
+ # Apply a random scaling to the src_rect.
+ src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
+
+ # Apply a random shift to the coordinates origin.
+ src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
+ src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
+
+ # Map src_rect coordinates into image coordinates (center at corner).
+ src_rect[0::2] += 0.5 * img_w
+ src_rect[1::2] += 0.5 * img_h
+
+ return ExtentTransform(
+ src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
+ output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])),
+ )
+
+
+class RandomContrast(Augmentation):
+ """
+ Randomly transforms image contrast.
+
+ Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
+ - intensity < 1 will reduce contrast
+ - intensity = 1 will preserve the input image
+ - intensity > 1 will increase contrast
+
+ See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+ """
+
+ def __init__(self, intensity_min, intensity_max):
+ """
+ Args:
+ intensity_min (float): Minimum augmentation
+ intensity_max (float): Maximum augmentation
+ """
+ super().__init__()
+ self._init(locals())
+
+ def get_transform(self, image):
+ w = np.random.uniform(self.intensity_min, self.intensity_max)
+ return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w)
+
+
+class RandomBrightness(Augmentation):
+ """
+ Randomly transforms image brightness.
+
+ Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
+ - intensity < 1 will reduce brightness
+ - intensity = 1 will preserve the input image
+ - intensity > 1 will increase brightness
+
+ See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+ """
+
+ def __init__(self, intensity_min, intensity_max):
+ """
+ Args:
+ intensity_min (float): Minimum augmentation
+ intensity_max (float): Maximum augmentation
+ """
+ super().__init__()
+ self._init(locals())
+
+ def get_transform(self, image):
+ w = np.random.uniform(self.intensity_min, self.intensity_max)
+ return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
+
+
+class RandomSaturation(Augmentation):
+ """
+ Randomly transforms saturation of an RGB image.
+ Input images are assumed to have 'RGB' channel order.
+
+ Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
+ - intensity < 1 will reduce saturation (make the image more grayscale)
+ - intensity = 1 will preserve the input image
+ - intensity > 1 will increase saturation
+
+ See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+ """
+
+ def __init__(self, intensity_min, intensity_max):
+ """
+ Args:
+ intensity_min (float): Minimum augmentation (1 preserves input).
+ intensity_max (float): Maximum augmentation (1 preserves input).
+ """
+ super().__init__()
+ self._init(locals())
+
+ def get_transform(self, image):
+ assert image.shape[-1] == 3, "RandomSaturation only works on RGB images"
+ w = np.random.uniform(self.intensity_min, self.intensity_max)
+ grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
+ return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
+
+
+class RandomLighting(Augmentation):
+ """
+ The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet.
+ Input images are assumed to have 'RGB' channel order.
+
+ The degree of color jittering is randomly sampled via a normal distribution,
+ with standard deviation given by the scale parameter.
+ """
+
+ def __init__(self, scale):
+ """
+ Args:
+ scale (float): Standard deviation of principal component weighting.
+ """
+ super().__init__()
+ self._init(locals())
+ self.eigen_vecs = np.array(
+ [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]
+ )
+ self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
+
+ def get_transform(self, image):
+ assert image.shape[-1] == 3, "RandomLighting only works on RGB images"
+ weights = np.random.normal(scale=self.scale, size=3)
+ return BlendTransform(
+ src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0
+ )
+
+
+class RandomResize(Augmentation):
+ """Randomly resize image to a target size in shape_list"""
+
+ def __init__(self, shape_list, interp=Image.BILINEAR):
+ """
+ Args:
+ shape_list: a list of shapes in (h, w)
+ interp: PIL interpolation method
+ """
+ self.shape_list = shape_list
+ self._init(locals())
+
+ def get_transform(self, image):
+ shape_idx = np.random.randint(low=0, high=len(self.shape_list))
+ h, w = self.shape_list[shape_idx]
+ return ResizeTransform(image.shape[0], image.shape[1], h, w, self.interp)
+
+
+class MinIoURandomCrop(Augmentation):
+ """Random crop the image & bboxes, the cropped patches have minimum IoU
+ requirement with original image & bboxes, the IoU threshold is randomly
+ selected from min_ious.
+
+ Args:
+ min_ious (tuple): minimum IoU threshold for all intersections with
+ bounding boxes
+ min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
+ where a >= min_crop_size)
+ mode_trials: number of trials for sampling min_ious threshold
+ crop_trials: number of trials for sampling crop_size after cropping
+ """
+
+ def __init__(
+ self,
+ min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+ min_crop_size=0.3,
+ mode_trials=1000,
+ crop_trials=50,
+ ):
+ self.min_ious = min_ious
+ self.sample_mode = (1, *min_ious, 0)
+ self.min_crop_size = min_crop_size
+ self.mode_trials = mode_trials
+ self.crop_trials = crop_trials
+
+ def get_transform(self, image, boxes):
+ """Call function to crop images and bounding boxes with minimum IoU
+ constraint.
+
+ Args:
+ boxes: ground truth boxes in (x1, y1, x2, y2) format
+ """
+ if boxes is None:
+ return NoOpTransform()
+ h, w, c = image.shape
+ for _ in range(self.mode_trials):
+ mode = random.choice(self.sample_mode)
+ self.mode = mode
+ if mode == 1:
+ return NoOpTransform()
+
+ min_iou = mode
+ for _ in range(self.crop_trials):
+ new_w = random.uniform(self.min_crop_size * w, w)
+ new_h = random.uniform(self.min_crop_size * h, h)
+
+ # h / w in [0.5, 2]
+ if new_h / new_w < 0.5 or new_h / new_w > 2:
+ continue
+
+ left = random.uniform(w - new_w)
+ top = random.uniform(h - new_h)
+
+ patch = np.array((int(left), int(top), int(left + new_w), int(top + new_h)))
+ # Line or point crop is not allowed
+ if patch[2] == patch[0] or patch[3] == patch[1]:
+ continue
+ overlaps = pairwise_iou(
+ Boxes(patch.reshape(-1, 4)), Boxes(boxes.reshape(-1, 4))
+ ).reshape(-1)
+ if len(overlaps) > 0 and overlaps.min() < min_iou:
+ continue
+
+ # center of boxes should inside the crop img
+ # only adjust boxes and instance masks when the gt is not empty
+ if len(overlaps) > 0:
+ # adjust boxes
+ def is_center_of_bboxes_in_patch(boxes, patch):
+ center = (boxes[:, :2] + boxes[:, 2:]) / 2
+ mask = (
+ (center[:, 0] > patch[0])
+ * (center[:, 1] > patch[1])
+ * (center[:, 0] < patch[2])
+ * (center[:, 1] < patch[3])
+ )
+ return mask
+
+ mask = is_center_of_bboxes_in_patch(boxes, patch)
+ if not mask.any():
+ continue
+ return CropTransform(int(left), int(top), int(new_w), int(new_h))
diff --git a/vendor/detectron2/detectron2/data/transforms/transform.py b/vendor/detectron2/detectron2/data/transforms/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..de44b991d7ab0d920ffb769e1402f08e358d37f7
--- /dev/null
+++ b/vendor/detectron2/detectron2/data/transforms/transform.py
@@ -0,0 +1,351 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fvcore.transforms.transform import (
+ CropTransform,
+ HFlipTransform,
+ NoOpTransform,
+ Transform,
+ TransformList,
+)
+from PIL import Image
+
+try:
+ import cv2 # noqa
+except ImportError:
+ # OpenCV is an optional dependency at the moment
+ pass
+
+__all__ = [
+ "ExtentTransform",
+ "ResizeTransform",
+ "RotationTransform",
+ "ColorTransform",
+ "PILColorTransform",
+]
+
+
+class ExtentTransform(Transform):
+ """
+ Extracts a subregion from the source image and scales it to the output size.
+
+ The fill color is used to map pixels from the source rect that fall outside
+ the source image.
+
+ See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
+ """
+
+ def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
+ """
+ Args:
+ src_rect (x0, y0, x1, y1): src coordinates
+ output_size (h, w): dst image size
+ interp: PIL interpolation methods
+ fill: Fill color used when src_rect extends outside image
+ """
+ super().__init__()
+ self._set_attributes(locals())
+
+ def apply_image(self, img, interp=None):
+ h, w = self.output_size
+ if len(img.shape) > 2 and img.shape[2] == 1:
+ pil_image = Image.fromarray(img[:, :, 0], mode="L")
+ else:
+ pil_image = Image.fromarray(img)
+ pil_image = pil_image.transform(
+ size=(w, h),
+ method=Image.EXTENT,
+ data=self.src_rect,
+ resample=interp if interp else self.interp,
+ fill=self.fill,
+ )
+ ret = np.asarray(pil_image)
+ if len(img.shape) > 2 and img.shape[2] == 1:
+ ret = np.expand_dims(ret, -1)
+ return ret
+
+ def apply_coords(self, coords):
+ # Transform image center from source coordinates into output coordinates
+ # and then map the new origin to the corner of the output image.
+ h, w = self.output_size
+ x0, y0, x1, y1 = self.src_rect
+ new_coords = coords.astype(np.float32)
+ new_coords[:, 0] -= 0.5 * (x0 + x1)
+ new_coords[:, 1] -= 0.5 * (y0 + y1)
+ new_coords[:, 0] *= w / (x1 - x0)
+ new_coords[:, 1] *= h / (y1 - y0)
+ new_coords[:, 0] += 0.5 * w
+ new_coords[:, 1] += 0.5 * h
+ return new_coords
+
+ def apply_segmentation(self, segmentation):
+ segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+ return segmentation
+
+
+class ResizeTransform(Transform):
+ """
+ Resize the image to a target size.
+ """
+
+ def __init__(self, h, w, new_h, new_w, interp=None):
+ """
+ Args:
+ h, w (int): original image size
+ new_h, new_w (int): new image size
+ interp: PIL interpolation methods, defaults to bilinear.
+ """
+ # TODO decide on PIL vs opencv
+ super().__init__()
+ if interp is None:
+ interp = Image.BILINEAR
+ self._set_attributes(locals())
+
+ def apply_image(self, img, interp=None):
+ assert img.shape[:2] == (self.h, self.w)
+ assert len(img.shape) <= 4
+ interp_method = interp if interp is not None else self.interp
+
+ if img.dtype == np.uint8:
+ if len(img.shape) > 2 and img.shape[2] == 1:
+ pil_image = Image.fromarray(img[:, :, 0], mode="L")
+ else:
+ pil_image = Image.fromarray(img)
+ pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+ ret = np.asarray(pil_image)
+ if len(img.shape) > 2 and img.shape[2] == 1:
+ ret = np.expand_dims(ret, -1)
+ else:
+ # PIL only supports uint8
+ if any(x < 0 for x in img.strides):
+ img = np.ascontiguousarray(img)
+ img = torch.from_numpy(img)
+ shape = list(img.shape)
+ shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+ img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw
+ _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+ Image.NEAREST: "nearest",
+ Image.BILINEAR: "bilinear",
+ Image.BICUBIC: "bicubic",
+ }
+ mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+ align_corners = None if mode == "nearest" else False
+ img = F.interpolate(
+ img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+ )
+ shape[:2] = (self.new_h, self.new_w)
+ ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c)
+
+ return ret
+
+ def apply_coords(self, coords):
+ coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+ coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+ return coords
+
+ def apply_segmentation(self, segmentation):
+ segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+ return segmentation
+
+ def inverse(self):
+ return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+
+
+class RotationTransform(Transform):
+ """
+ This method returns a copy of this image, rotated the given
+ number of degrees counter clockwise around its center.
+ """
+
+ def __init__(self, h, w, angle, expand=True, center=None, interp=None):
+ """
+ Args:
+ h, w (int): original image size
+ angle (float): degrees for rotation
+ expand (bool): choose if the image should be resized to fit the whole
+ rotated image (default), or simply cropped
+ center (tuple (width, height)): coordinates of the rotation center
+ if left to None, the center will be fit to the center of each image
+ center has no effect if expand=True because it only affects shifting
+ interp: cv2 interpolation method, default cv2.INTER_LINEAR
+ """
+ super().__init__()
+ image_center = np.array((w / 2, h / 2))
+ if center is None:
+ center = image_center
+ if interp is None:
+ interp = cv2.INTER_LINEAR
+ abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
+ if expand:
+ # find the new width and height bounds
+ bound_w, bound_h = np.rint(
+ [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
+ ).astype(int)
+ else:
+ bound_w, bound_h = w, h
+
+ self._set_attributes(locals())
+ self.rm_coords = self.create_rotation_matrix()
+ # Needed because of this problem https://github.com/opencv/opencv/issues/11784
+ self.rm_image = self.create_rotation_matrix(offset=-0.5)
+
+ def apply_image(self, img, interp=None):
+ """
+ img should be a numpy array, formatted as Height * Width * Nchannels
+ """
+ if len(img) == 0 or self.angle % 360 == 0:
+ return img
+ assert img.shape[:2] == (self.h, self.w)
+ interp = interp if interp is not None else self.interp
+ return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
+
+ def apply_coords(self, coords):
+ """
+ coords should be a N * 2 array-like, containing N couples of (x, y) points
+ """
+ coords = np.asarray(coords, dtype=float)
+ if len(coords) == 0 or self.angle % 360 == 0:
+ return coords
+ return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
+
+ def apply_segmentation(self, segmentation):
+ segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
+ return segmentation
+
+ def create_rotation_matrix(self, offset=0):
+ center = (self.center[0] + offset, self.center[1] + offset)
+ rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
+ if self.expand:
+ # Find the coordinates of the center of rotation in the new image
+ # The only point for which we know the future coordinates is the center of the image
+ rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
+ new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
+ # shift the rotation center to the new coordinates
+ rm[:, 2] += new_center
+ return rm
+
+ def inverse(self):
+ """
+ The inverse is to rotate it back with expand, and crop to get the original shape.
+ """
+ if not self.expand: # Not possible to inverse if a part of the image is lost
+ raise NotImplementedError()
+ rotation = RotationTransform(
+ self.bound_h, self.bound_w, -self.angle, True, None, self.interp
+ )
+ crop = CropTransform(
+ (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
+ )
+ return TransformList([rotation, crop])
+
+
+class ColorTransform(Transform):
+ """
+ Generic wrapper for any photometric transforms.
+ These transformations should only affect the color space and
+ not the coordinate space of the image (e.g. annotation
+ coordinates such as bounding boxes should not be changed)
+ """
+
+ def __init__(self, op):
+ """
+ Args:
+ op (Callable): operation to be applied to the image,
+ which takes in an ndarray and returns an ndarray.
+ """
+ if not callable(op):
+ raise ValueError("op parameter should be callable")
+ super().__init__()
+ self._set_attributes(locals())
+
+ def apply_image(self, img):
+ return self.op(img)
+
+ def apply_coords(self, coords):
+ return coords
+
+ def inverse(self):
+ return NoOpTransform()
+
+ def apply_segmentation(self, segmentation):
+ return segmentation
+
+
+class PILColorTransform(ColorTransform):
+ """
+ Generic wrapper for PIL Photometric image transforms,
+ which affect the color space and not the coordinate
+ space of the image
+ """
+
+ def __init__(self, op):
+ """
+ Args:
+ op (Callable): operation to be applied to the image,
+ which takes in a PIL Image and returns a transformed
+ PIL Image.
+ For reference on possible operations see:
+ - https://pillow.readthedocs.io/en/stable/
+ """
+ if not callable(op):
+ raise ValueError("op parameter should be callable")
+ super().__init__(op)
+
+ def apply_image(self, img):
+ img = Image.fromarray(img)
+ return np.asarray(super().apply_image(img))
+
+
+def HFlip_rotated_box(transform, rotated_boxes):
+ """
+ Apply the horizontal flip transform on rotated boxes.
+
+ Args:
+ rotated_boxes (ndarray): Nx5 floating point array of
+ (x_center, y_center, width, height, angle_degrees) format
+ in absolute coordinates.
+ """
+ # Transform x_center
+ rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
+ # Transform angle
+ rotated_boxes[:, 4] = -rotated_boxes[:, 4]
+ return rotated_boxes
+
+
+def Resize_rotated_box(transform, rotated_boxes):
+ """
+ Apply the resizing transform on rotated boxes. For details of how these (approximation)
+ formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
+
+ Args:
+ rotated_boxes (ndarray): Nx5 floating point array of
+ (x_center, y_center, width, height, angle_degrees) format
+ in absolute coordinates.
+ """
+ scale_factor_x = transform.new_w * 1.0 / transform.w
+ scale_factor_y = transform.new_h * 1.0 / transform.h
+ rotated_boxes[:, 0] *= scale_factor_x
+ rotated_boxes[:, 1] *= scale_factor_y
+ theta = rotated_boxes[:, 4] * np.pi / 180.0
+ c = np.cos(theta)
+ s = np.sin(theta)
+ rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
+ rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
+ rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
+
+ return rotated_boxes
+
+
+HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
+ResizeTransform.register_type("rotated_box", Resize_rotated_box)
+
+# not necessary any more with latest fvcore
+NoOpTransform.register_type("rotated_box", lambda t, x: x)
diff --git a/vendor/detectron2/detectron2/engine/__init__.py b/vendor/detectron2/detectron2/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..08a61572b4c7d09c8d400e903a96cbf5b2cc4763
--- /dev/null
+++ b/vendor/detectron2/detectron2/engine/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .launch import *
+from .train_loop import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
+# but still make them available here
+from .hooks import *
+from .defaults import *
diff --git a/vendor/detectron2/detectron2/engine/defaults.py b/vendor/detectron2/detectron2/engine/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b9525745565479709730cbb5b7dc9cd8afd4707
--- /dev/null
+++ b/vendor/detectron2/detectron2/engine/defaults.py
@@ -0,0 +1,715 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+This file contains components with some default boilerplate logic user may need
+in training / testing. They will not work for everyone, but many users may find them useful.
+
+The behavior of functions/classes in this file is subject to change,
+since they are meant to represent the "common default behavior" people need in their projects.
+"""
+
+import argparse
+import logging
+import os
+import sys
+import weakref
+from collections import OrderedDict
+from typing import Optional
+import torch
+from fvcore.nn.precise_bn import get_bn_modules
+from omegaconf import OmegaConf
+from torch.nn.parallel import DistributedDataParallel
+
+import detectron2.data.transforms as T
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode, LazyConfig
+from detectron2.data import (
+ MetadataCatalog,
+ build_detection_test_loader,
+ build_detection_train_loader,
+)
+from detectron2.evaluation import (
+ DatasetEvaluator,
+ inference_on_dataset,
+ print_csv_format,
+ verify_results,
+)
+from detectron2.modeling import build_model
+from detectron2.solver import build_lr_scheduler, build_optimizer
+from detectron2.utils import comm
+from detectron2.utils.collect_env import collect_env_info
+from detectron2.utils.env import seed_all_rng
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+from . import hooks
+from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase
+
+__all__ = [
+ "create_ddp_model",
+ "default_argument_parser",
+ "default_setup",
+ "default_writers",
+ "DefaultPredictor",
+ "DefaultTrainer",
+]
+
+
+def create_ddp_model(model, *, fp16_compression=False, **kwargs):
+ """
+ Create a DistributedDataParallel model if there are >1 processes.
+
+ Args:
+ model: a torch.nn.Module
+ fp16_compression: add fp16 compression hooks to the ddp object.
+ See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook
+ kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`.
+ """ # noqa
+ if comm.get_world_size() == 1:
+ return model
+ if "device_ids" not in kwargs:
+ kwargs["device_ids"] = [comm.get_local_rank()]
+ ddp = DistributedDataParallel(model, **kwargs)
+ if fp16_compression:
+ from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks
+
+ ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook)
+ return ddp
+
+
+def default_argument_parser(epilog=None):
+ """
+ Create a parser with some common arguments used by detectron2 users.
+
+ Args:
+ epilog (str): epilog passed to ArgumentParser describing the usage.
+
+ Returns:
+ argparse.ArgumentParser:
+ """
+ parser = argparse.ArgumentParser(
+ epilog=epilog
+ or f"""
+Examples:
+
+Run on single machine:
+ $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml
+
+Change some config options:
+ $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
+
+Run on multiple machines:
+ (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags]
+ (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags]
+""",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+ parser.add_argument(
+ "--resume",
+ action="store_true",
+ help="Whether to attempt to resume from the checkpoint directory. "
+ "See documentation of `DefaultTrainer.resume_or_load()` for what it means.",
+ )
+ parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
+ parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
+ parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
+ parser.add_argument(
+ "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
+ )
+
+ # PyTorch still may leave orphan processes in multi-gpu training.
+ # Therefore we use a deterministic way to obtain port,
+ # so that users are aware of orphan processes by seeing the port occupied.
+ port = 2**15 + 2**14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2**14
+ parser.add_argument(
+ "--dist-url",
+ default="tcp://127.0.0.1:{}".format(port),
+ help="initialization URL for pytorch distributed backend. See "
+ "https://pytorch.org/docs/stable/distributed.html for details.",
+ )
+ parser.add_argument(
+ "opts",
+ help="""
+Modify config options at the end of the command. For Yacs configs, use
+space-separated "PATH.KEY VALUE" pairs.
+For python-based LazyConfig, use "path.key=value".
+ """.strip(),
+ default=None,
+ nargs=argparse.REMAINDER,
+ )
+ return parser
+
+
+def _try_get_key(cfg, *keys, default=None):
+ """
+ Try select keys from cfg until the first key that exists. Otherwise return default.
+ """
+ if isinstance(cfg, CfgNode):
+ cfg = OmegaConf.create(cfg.dump())
+ for k in keys:
+ none = object()
+ p = OmegaConf.select(cfg, k, default=none)
+ if p is not none:
+ return p
+ return default
+
+
+def _highlight(code, filename):
+ try:
+ import pygments
+ except ImportError:
+ return code
+
+ from pygments.lexers import Python3Lexer, YamlLexer
+ from pygments.formatters import Terminal256Formatter
+
+ lexer = Python3Lexer() if filename.endswith(".py") else YamlLexer()
+ code = pygments.highlight(code, lexer, Terminal256Formatter(style="monokai"))
+ return code
+
+
+def default_setup(cfg, args):
+ """
+ Perform some basic common setups at the beginning of a job, including:
+
+ 1. Set up the detectron2 logger
+ 2. Log basic information about environment, cmdline arguments, and config
+ 3. Backup the config to the output directory
+
+ Args:
+ cfg (CfgNode or omegaconf.DictConfig): the full config to be used
+ args (argparse.NameSpace): the command line arguments to be logged
+ """
+ output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir")
+ if comm.is_main_process() and output_dir:
+ PathManager.mkdirs(output_dir)
+
+ rank = comm.get_rank()
+ setup_logger(output_dir, distributed_rank=rank, name="fvcore")
+ logger = setup_logger(output_dir, distributed_rank=rank)
+
+ logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
+ logger.info("Environment info:\n" + collect_env_info())
+
+ logger.info("Command line arguments: " + str(args))
+ if hasattr(args, "config_file") and args.config_file != "":
+ logger.info(
+ "Contents of args.config_file={}:\n{}".format(
+ args.config_file,
+ _highlight(PathManager.open(args.config_file, "r").read(), args.config_file),
+ )
+ )
+
+ if comm.is_main_process() and output_dir:
+ # Note: some of our scripts may expect the existence of
+ # config.yaml in output directory
+ path = os.path.join(output_dir, "config.yaml")
+ if isinstance(cfg, CfgNode):
+ logger.info("Running with full config:\n{}".format(_highlight(cfg.dump(), ".yaml")))
+ with PathManager.open(path, "w") as f:
+ f.write(cfg.dump())
+ else:
+ LazyConfig.save(cfg, path)
+ logger.info("Full config saved to {}".format(path))
+
+ # make sure each worker has a different, yet deterministic seed if specified
+ seed = _try_get_key(cfg, "SEED", "train.seed", default=-1)
+ seed_all_rng(None if seed < 0 else seed + rank)
+
+ # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
+ # typical validation set.
+ if not (hasattr(args, "eval_only") and args.eval_only):
+ torch.backends.cudnn.benchmark = _try_get_key(
+ cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False
+ )
+
+
+def default_writers(output_dir: str, max_iter: Optional[int] = None):
+ """
+ Build a list of :class:`EventWriter` to be used.
+ It now consists of a :class:`CommonMetricPrinter`,
+ :class:`TensorboardXWriter` and :class:`JSONWriter`.
+
+ Args:
+ output_dir: directory to store JSON metrics and tensorboard events
+ max_iter: the total number of iterations
+
+ Returns:
+ list[EventWriter]: a list of :class:`EventWriter` objects.
+ """
+ PathManager.mkdirs(output_dir)
+ return [
+ # It may not always print what you want to see, since it prints "common" metrics only.
+ CommonMetricPrinter(max_iter),
+ JSONWriter(os.path.join(output_dir, "metrics.json")),
+ TensorboardXWriter(output_dir),
+ ]
+
+
+class DefaultPredictor:
+ """
+ Create a simple end-to-end predictor with the given config that runs on
+ single device for a single input image.
+
+ Compared to using the model directly, this class does the following additions:
+
+ 1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
+ 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
+ 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
+ 4. Take one input image and produce a single output, instead of a batch.
+
+ This is meant for simple demo purposes, so it does the above steps automatically.
+ This is not meant for benchmarks or running complicated inference logic.
+ If you'd like to do anything more complicated, please refer to its source code as
+ examples to build and use the model manually.
+
+ Attributes:
+ metadata (Metadata): the metadata of the underlying dataset, obtained from
+ cfg.DATASETS.TEST.
+
+ Examples:
+ ::
+ pred = DefaultPredictor(cfg)
+ inputs = cv2.imread("input.jpg")
+ outputs = pred(inputs)
+ """
+
+ def __init__(self, cfg):
+ self.cfg = cfg.clone() # cfg can be modified by model
+ self.model = build_model(self.cfg)
+ self.model.eval()
+ if len(cfg.DATASETS.TEST):
+ self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+
+ checkpointer = DetectionCheckpointer(self.model)
+ checkpointer.load(cfg.MODEL.WEIGHTS)
+
+ self.aug = T.ResizeShortestEdge(
+ [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+ )
+
+ self.input_format = cfg.INPUT.FORMAT
+ assert self.input_format in ["RGB", "BGR"], self.input_format
+
+ def __call__(self, original_image):
+ """
+ Args:
+ original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+
+ Returns:
+ predictions (dict):
+ the output of the model for one image only.
+ See :doc:`/tutorials/models` for details about the format.
+ """
+ with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258
+ # Apply pre-processing to image.
+ if self.input_format == "RGB":
+ # whether the model expects BGR inputs or RGB
+ original_image = original_image[:, :, ::-1]
+ height, width = original_image.shape[:2]
+ image = self.aug.get_transform(original_image).apply_image(original_image)
+ image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+ inputs = {"image": image, "height": height, "width": width}
+ predictions = self.model([inputs])[0]
+ return predictions
+
+
+class DefaultTrainer(TrainerBase):
+ """
+ A trainer with default training logic. It does the following:
+
+ 1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader
+ defined by the given config. Create a LR scheduler defined by the config.
+ 2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when
+ `resume_or_load` is called.
+ 3. Register a few common hooks defined by the config.
+
+ It is created to simplify the **standard model training workflow** and reduce code boilerplate
+ for users who only need the standard training workflow, with standard features.
+ It means this class makes *many assumptions* about your training logic that
+ may easily become invalid in a new research. In fact, any assumptions beyond those made in the
+ :class:`SimpleTrainer` are too much for research.
+
+ The code of this class has been annotated about restrictive assumptions it makes.
+ When they do not work for you, you're encouraged to:
+
+ 1. Overwrite methods of this class, OR:
+ 2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
+ nothing else. You can then add your own hooks if needed. OR:
+ 3. Write your own training loop similar to `tools/plain_train_net.py`.
+
+ See the :doc:`/tutorials/training` tutorials for more details.
+
+ Note that the behavior of this class, like other functions/classes in
+ this file, is not stable, since it is meant to represent the "common default behavior".
+ It is only guaranteed to work well with the standard models and training workflow in detectron2.
+ To obtain more stable behavior, write your own training logic with other public APIs.
+
+ Examples:
+ ::
+ trainer = DefaultTrainer(cfg)
+ trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS
+ trainer.train()
+
+ Attributes:
+ scheduler:
+ checkpointer (DetectionCheckpointer):
+ cfg (CfgNode):
+ """
+
+ def __init__(self, cfg):
+ """
+ Args:
+ cfg (CfgNode):
+ """
+ super().__init__()
+ logger = logging.getLogger("detectron2")
+ if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2
+ setup_logger()
+ cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
+
+ # Assume these objects must be constructed in this order.
+ model = self.build_model(cfg)
+ optimizer = self.build_optimizer(cfg, model)
+ data_loader = self.build_train_loader(cfg)
+
+ model = create_ddp_model(model, broadcast_buffers=False)
+ self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
+ model, data_loader, optimizer
+ )
+
+ self.scheduler = self.build_lr_scheduler(cfg, optimizer)
+ self.checkpointer = DetectionCheckpointer(
+ # Assume you want to save checkpoints together with logs/statistics
+ model,
+ cfg.OUTPUT_DIR,
+ trainer=weakref.proxy(self),
+ )
+ self.start_iter = 0
+ self.max_iter = cfg.SOLVER.MAX_ITER
+ self.cfg = cfg
+
+ self.register_hooks(self.build_hooks())
+
+ def resume_or_load(self, resume=True):
+ """
+ If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
+ a `last_checkpoint` file), resume from the file. Resuming means loading all
+ available states (eg. optimizer and scheduler) and update iteration counter
+ from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
+
+ Otherwise, this is considered as an independent training. The method will load model
+ weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
+ from iteration 0.
+
+ Args:
+ resume (bool): whether to do resume or not
+ """
+ self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
+ if resume and self.checkpointer.has_checkpoint():
+ # The checkpoint stores the training iteration that just finished, thus we start
+ # at the next iteration
+ self.start_iter = self.iter + 1
+
+ def build_hooks(self):
+ """
+ Build a list of default hooks, including timing, evaluation,
+ checkpointing, lr scheduling, precise BN, writing events.
+
+ Returns:
+ list[HookBase]:
+ """
+ cfg = self.cfg.clone()
+ cfg.defrost()
+ cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN
+
+ ret = [
+ hooks.IterationTimer(),
+ hooks.LRScheduler(),
+ hooks.PreciseBN(
+ # Run at the same freq as (but before) evaluation.
+ cfg.TEST.EVAL_PERIOD,
+ self.model,
+ # Build a new data loader to not affect training
+ self.build_train_loader(cfg),
+ cfg.TEST.PRECISE_BN.NUM_ITER,
+ )
+ if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
+ else None,
+ ]
+
+ # Do PreciseBN before checkpointer, because it updates the model and need to
+ # be saved by checkpointer.
+ # This is not always the best: if checkpointing has a different frequency,
+ # some checkpoints may have more precise statistics than others.
+ if comm.is_main_process():
+ ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
+
+ def test_and_save_results():
+ self._last_eval_results = self.test(self.cfg, self.model)
+ return self._last_eval_results
+
+ # Do evaluation after checkpointer, because then if it fails,
+ # we can use the saved checkpoint to debug.
+ ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
+
+ if comm.is_main_process():
+ # Here the default print/log frequency of each writer is used.
+ # run writers in the end, so that evaluation metrics are written
+ ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
+ return ret
+
+ def build_writers(self):
+ """
+ Build a list of writers to be used using :func:`default_writers()`.
+ If you'd like a different list of writers, you can overwrite it in
+ your trainer.
+
+ Returns:
+ list[EventWriter]: a list of :class:`EventWriter` objects.
+ """
+ return default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
+
+ def train(self):
+ """
+ Run training.
+
+ Returns:
+ OrderedDict of results, if evaluation is enabled. Otherwise None.
+ """
+ super().train(self.start_iter, self.max_iter)
+ if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process():
+ assert hasattr(
+ self, "_last_eval_results"
+ ), "No evaluation results obtained during training!"
+ verify_results(self.cfg, self._last_eval_results)
+ return self._last_eval_results
+
+ def run_step(self):
+ self._trainer.iter = self.iter
+ self._trainer.run_step()
+
+ def state_dict(self):
+ ret = super().state_dict()
+ ret["_trainer"] = self._trainer.state_dict()
+ return ret
+
+ def load_state_dict(self, state_dict):
+ super().load_state_dict(state_dict)
+ self._trainer.load_state_dict(state_dict["_trainer"])
+
+ @classmethod
+ def build_model(cls, cfg):
+ """
+ Returns:
+ torch.nn.Module:
+
+ It now calls :func:`detectron2.modeling.build_model`.
+ Overwrite it if you'd like a different model.
+ """
+ model = build_model(cfg)
+ logger = logging.getLogger(__name__)
+ logger.info("Model:\n{}".format(model))
+ return model
+
+ @classmethod
+ def build_optimizer(cls, cfg, model):
+ """
+ Returns:
+ torch.optim.Optimizer:
+
+ It now calls :func:`detectron2.solver.build_optimizer`.
+ Overwrite it if you'd like a different optimizer.
+ """
+ return build_optimizer(cfg, model)
+
+ @classmethod
+ def build_lr_scheduler(cls, cfg, optimizer):
+ """
+ It now calls :func:`detectron2.solver.build_lr_scheduler`.
+ Overwrite it if you'd like a different scheduler.
+ """
+ return build_lr_scheduler(cfg, optimizer)
+
+ @classmethod
+ def build_train_loader(cls, cfg):
+ """
+ Returns:
+ iterable
+
+ It now calls :func:`detectron2.data.build_detection_train_loader`.
+ Overwrite it if you'd like a different data loader.
+ """
+ return build_detection_train_loader(cfg)
+
+ @classmethod
+ def build_test_loader(cls, cfg, dataset_name):
+ """
+ Returns:
+ iterable
+
+ It now calls :func:`detectron2.data.build_detection_test_loader`.
+ Overwrite it if you'd like a different data loader.
+ """
+ return build_detection_test_loader(cfg, dataset_name)
+
+ @classmethod
+ def build_evaluator(cls, cfg, dataset_name):
+ """
+ Returns:
+ DatasetEvaluator or None
+
+ It is not implemented by default.
+ """
+ raise NotImplementedError(
+ """
+If you want DefaultTrainer to automatically run evaluation,
+please implement `build_evaluator()` in subclasses (see train_net.py for example).
+Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example).
+"""
+ )
+
+ @classmethod
+ def test(cls, cfg, model, evaluators=None):
+ """
+ Evaluate the given model. The given model is expected to already contain
+ weights to evaluate.
+
+ Args:
+ cfg (CfgNode):
+ model (nn.Module):
+ evaluators (list[DatasetEvaluator] or None): if None, will call
+ :meth:`build_evaluator`. Otherwise, must have the same length as
+ ``cfg.DATASETS.TEST``.
+
+ Returns:
+ dict: a dict of result metrics
+ """
+ logger = logging.getLogger(__name__)
+ if isinstance(evaluators, DatasetEvaluator):
+ evaluators = [evaluators]
+ if evaluators is not None:
+ assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+ len(cfg.DATASETS.TEST), len(evaluators)
+ )
+
+ results = OrderedDict()
+ for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+ data_loader = cls.build_test_loader(cfg, dataset_name)
+ # When evaluators are passed in as arguments,
+ # implicitly assume that evaluators can be created before data_loader.
+ if evaluators is not None:
+ evaluator = evaluators[idx]
+ else:
+ try:
+ evaluator = cls.build_evaluator(cfg, dataset_name)
+ except NotImplementedError:
+ logger.warn(
+ "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+ "or implement its `build_evaluator` method."
+ )
+ results[dataset_name] = {}
+ continue
+ results_i = inference_on_dataset(model, data_loader, evaluator)
+ results[dataset_name] = results_i
+ if comm.is_main_process():
+ assert isinstance(
+ results_i, dict
+ ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+ results_i
+ )
+ logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+ print_csv_format(results_i)
+
+ if len(results) == 1:
+ results = list(results.values())[0]
+ return results
+
+ @staticmethod
+ def auto_scale_workers(cfg, num_workers: int):
+ """
+ When the config is defined for certain number of workers (according to
+ ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of
+ workers currently in use, returns a new cfg where the total batch size
+ is scaled so that the per-GPU batch size stays the same as the
+ original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``.
+
+ Other config options are also scaled accordingly:
+ * training steps and warmup steps are scaled inverse proportionally.
+ * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`.
+
+ For example, with the original config like the following:
+
+ .. code-block:: yaml
+
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.1
+ REFERENCE_WORLD_SIZE: 8
+ MAX_ITER: 5000
+ STEPS: (4000,)
+ CHECKPOINT_PERIOD: 1000
+
+ When this config is used on 16 GPUs instead of the reference number 8,
+ calling this method will return a new config with:
+
+ .. code-block:: yaml
+
+ IMS_PER_BATCH: 32
+ BASE_LR: 0.2
+ REFERENCE_WORLD_SIZE: 16
+ MAX_ITER: 2500
+ STEPS: (2000,)
+ CHECKPOINT_PERIOD: 500
+
+ Note that both the original config and this new config can be trained on 16 GPUs.
+ It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``).
+
+ Returns:
+ CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``.
+ """
+ old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE
+ if old_world_size == 0 or old_world_size == num_workers:
+ return cfg
+ cfg = cfg.clone()
+ frozen = cfg.is_frozen()
+ cfg.defrost()
+
+ assert (
+ cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0
+ ), "Invalid REFERENCE_WORLD_SIZE in config!"
+ scale = num_workers / old_world_size
+ bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale))
+ lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale
+ max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale))
+ warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale))
+ cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS)
+ cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale))
+ cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale))
+ cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers # maintain invariant
+ logger = logging.getLogger(__name__)
+ logger.info(
+ f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, "
+ f"max_iter={max_iter}, warmup={warmup_iter}."
+ )
+
+ if frozen:
+ cfg.freeze()
+ return cfg
+
+
+# Access basic attributes from the underlying trainer
+for _attr in ["model", "data_loader", "optimizer"]:
+ setattr(
+ DefaultTrainer,
+ _attr,
+ property(
+ # getter
+ lambda self, x=_attr: getattr(self._trainer, x),
+ # setter
+ lambda self, value, x=_attr: setattr(self._trainer, x, value),
+ ),
+ )
diff --git a/vendor/detectron2/detectron2/engine/hooks.py b/vendor/detectron2/detectron2/engine/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc37af0fd3a276eb389f7667be113b41ca53f012
--- /dev/null
+++ b/vendor/detectron2/detectron2/engine/hooks.py
@@ -0,0 +1,690 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import datetime
+import itertools
+import logging
+import math
+import operator
+import os
+import tempfile
+import time
+import warnings
+from collections import Counter
+import torch
+from fvcore.common.checkpoint import Checkpointer
+from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
+from fvcore.common.param_scheduler import ParamScheduler
+from fvcore.common.timer import Timer
+from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
+
+import detectron2.utils.comm as comm
+from detectron2.evaluation.testing import flatten_results_dict
+from detectron2.solver import LRMultiplier
+from detectron2.solver import LRScheduler as _LRScheduler
+from detectron2.utils.events import EventStorage, EventWriter
+from detectron2.utils.file_io import PathManager
+
+from .train_loop import HookBase
+
+__all__ = [
+ "CallbackHook",
+ "IterationTimer",
+ "PeriodicWriter",
+ "PeriodicCheckpointer",
+ "BestCheckpointer",
+ "LRScheduler",
+ "AutogradProfiler",
+ "EvalHook",
+ "PreciseBN",
+ "TorchProfiler",
+ "TorchMemoryStats",
+]
+
+
+"""
+Implement some common hooks.
+"""
+
+
+class CallbackHook(HookBase):
+ """
+ Create a hook using callback functions provided by the user.
+ """
+
+ def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):
+ """
+ Each argument is a function that takes one argument: the trainer.
+ """
+ self._before_train = before_train
+ self._before_step = before_step
+ self._after_step = after_step
+ self._after_train = after_train
+
+ def before_train(self):
+ if self._before_train:
+ self._before_train(self.trainer)
+
+ def after_train(self):
+ if self._after_train:
+ self._after_train(self.trainer)
+ # The functions may be closures that hold reference to the trainer
+ # Therefore, delete them to avoid circular reference.
+ del self._before_train, self._after_train
+ del self._before_step, self._after_step
+
+ def before_step(self):
+ if self._before_step:
+ self._before_step(self.trainer)
+
+ def after_step(self):
+ if self._after_step:
+ self._after_step(self.trainer)
+
+
+class IterationTimer(HookBase):
+ """
+ Track the time spent for each iteration (each run_step call in the trainer).
+ Print a summary in the end of training.
+
+ This hook uses the time between the call to its :meth:`before_step`
+ and :meth:`after_step` methods.
+ Under the convention that :meth:`before_step` of all hooks should only
+ take negligible amount of time, the :class:`IterationTimer` hook should be
+ placed at the beginning of the list of hooks to obtain accurate timing.
+ """
+
+ def __init__(self, warmup_iter=3):
+ """
+ Args:
+ warmup_iter (int): the number of iterations at the beginning to exclude
+ from timing.
+ """
+ self._warmup_iter = warmup_iter
+ self._step_timer = Timer()
+ self._start_time = time.perf_counter()
+ self._total_timer = Timer()
+
+ def before_train(self):
+ self._start_time = time.perf_counter()
+ self._total_timer.reset()
+ self._total_timer.pause()
+
+ def after_train(self):
+ logger = logging.getLogger(__name__)
+ total_time = time.perf_counter() - self._start_time
+ total_time_minus_hooks = self._total_timer.seconds()
+ hook_time = total_time - total_time_minus_hooks
+
+ num_iter = self.trainer.storage.iter + 1 - self.trainer.start_iter - self._warmup_iter
+
+ if num_iter > 0 and total_time_minus_hooks > 0:
+ # Speed is meaningful only after warmup
+ # NOTE this format is parsed by grep in some scripts
+ logger.info(
+ "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
+ num_iter,
+ str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
+ total_time_minus_hooks / num_iter,
+ )
+ )
+
+ logger.info(
+ "Total training time: {} ({} on hooks)".format(
+ str(datetime.timedelta(seconds=int(total_time))),
+ str(datetime.timedelta(seconds=int(hook_time))),
+ )
+ )
+
+ def before_step(self):
+ self._step_timer.reset()
+ self._total_timer.resume()
+
+ def after_step(self):
+ # +1 because we're in after_step, the current step is done
+ # but not yet counted
+ iter_done = self.trainer.storage.iter - self.trainer.start_iter + 1
+ if iter_done >= self._warmup_iter:
+ sec = self._step_timer.seconds()
+ self.trainer.storage.put_scalars(time=sec)
+ else:
+ self._start_time = time.perf_counter()
+ self._total_timer.reset()
+
+ self._total_timer.pause()
+
+
+class PeriodicWriter(HookBase):
+ """
+ Write events to EventStorage (by calling ``writer.write()``) periodically.
+
+ It is executed every ``period`` iterations and after the last iteration.
+ Note that ``period`` does not affect how data is smoothed by each writer.
+ """
+
+ def __init__(self, writers, period=20):
+ """
+ Args:
+ writers (list[EventWriter]): a list of EventWriter objects
+ period (int):
+ """
+ self._writers = writers
+ for w in writers:
+ assert isinstance(w, EventWriter), w
+ self._period = period
+
+ def after_step(self):
+ if (self.trainer.iter + 1) % self._period == 0 or (
+ self.trainer.iter == self.trainer.max_iter - 1
+ ):
+ for writer in self._writers:
+ writer.write()
+
+ def after_train(self):
+ for writer in self._writers:
+ # If any new data is found (e.g. produced by other after_train),
+ # write them before closing
+ writer.write()
+ writer.close()
+
+
+class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
+ """
+ Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook.
+
+ Note that when used as a hook,
+ it is unable to save additional data other than what's defined
+ by the given `checkpointer`.
+
+ It is executed every ``period`` iterations and after the last iteration.
+ """
+
+ def before_train(self):
+ self.max_iter = self.trainer.max_iter
+
+ def after_step(self):
+ # No way to use **kwargs
+ self.step(self.trainer.iter)
+
+
+class BestCheckpointer(HookBase):
+ """
+ Checkpoints best weights based off given metric.
+
+ This hook should be used in conjunction to and executed after the hook
+ that produces the metric, e.g. `EvalHook`.
+ """
+
+ def __init__(
+ self,
+ eval_period: int,
+ checkpointer: Checkpointer,
+ val_metric: str,
+ mode: str = "max",
+ file_prefix: str = "model_best",
+ ) -> None:
+ """
+ Args:
+ eval_period (int): the period `EvalHook` is set to run.
+ checkpointer: the checkpointer object used to save checkpoints.
+ val_metric (str): validation metric to track for best checkpoint, e.g. "bbox/AP50"
+ mode (str): one of {'max', 'min'}. controls whether the chosen val metric should be
+ maximized or minimized, e.g. for "bbox/AP50" it should be "max"
+ file_prefix (str): the prefix of checkpoint's filename, defaults to "model_best"
+ """
+ self._logger = logging.getLogger(__name__)
+ self._period = eval_period
+ self._val_metric = val_metric
+ assert mode in [
+ "max",
+ "min",
+ ], f'Mode "{mode}" to `BestCheckpointer` is unknown. It should be one of {"max", "min"}.'
+ if mode == "max":
+ self._compare = operator.gt
+ else:
+ self._compare = operator.lt
+ self._checkpointer = checkpointer
+ self._file_prefix = file_prefix
+ self.best_metric = None
+ self.best_iter = None
+
+ def _update_best(self, val, iteration):
+ if math.isnan(val) or math.isinf(val):
+ return False
+ self.best_metric = val
+ self.best_iter = iteration
+ return True
+
+ def _best_checking(self):
+ metric_tuple = self.trainer.storage.latest().get(self._val_metric)
+ if metric_tuple is None:
+ self._logger.warning(
+ f"Given val metric {self._val_metric} does not seem to be computed/stored."
+ "Will not be checkpointing based on it."
+ )
+ return
+ else:
+ latest_metric, metric_iter = metric_tuple
+
+ if self.best_metric is None:
+ if self._update_best(latest_metric, metric_iter):
+ additional_state = {"iteration": metric_iter}
+ self._checkpointer.save(f"{self._file_prefix}", **additional_state)
+ self._logger.info(
+ f"Saved first model at {self.best_metric:0.5f} @ {self.best_iter} steps"
+ )
+ elif self._compare(latest_metric, self.best_metric):
+ additional_state = {"iteration": metric_iter}
+ self._checkpointer.save(f"{self._file_prefix}", **additional_state)
+ self._logger.info(
+ f"Saved best model as latest eval score for {self._val_metric} is "
+ f"{latest_metric:0.5f}, better than last best score "
+ f"{self.best_metric:0.5f} @ iteration {self.best_iter}."
+ )
+ self._update_best(latest_metric, metric_iter)
+ else:
+ self._logger.info(
+ f"Not saving as latest eval score for {self._val_metric} is {latest_metric:0.5f}, "
+ f"not better than best score {self.best_metric:0.5f} @ iteration {self.best_iter}."
+ )
+
+ def after_step(self):
+ # same conditions as `EvalHook`
+ next_iter = self.trainer.iter + 1
+ if (
+ self._period > 0
+ and next_iter % self._period == 0
+ and next_iter != self.trainer.max_iter
+ ):
+ self._best_checking()
+
+ def after_train(self):
+ # same conditions as `EvalHook`
+ if self.trainer.iter + 1 >= self.trainer.max_iter:
+ self._best_checking()
+
+
+class LRScheduler(HookBase):
+ """
+ A hook which executes a torch builtin LR scheduler and summarizes the LR.
+ It is executed after every iteration.
+ """
+
+ def __init__(self, optimizer=None, scheduler=None):
+ """
+ Args:
+ optimizer (torch.optim.Optimizer):
+ scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler):
+ if a :class:`ParamScheduler` object, it defines the multiplier over the base LR
+ in the optimizer.
+
+ If any argument is not given, will try to obtain it from the trainer.
+ """
+ self._optimizer = optimizer
+ self._scheduler = scheduler
+
+ def before_train(self):
+ self._optimizer = self._optimizer or self.trainer.optimizer
+ if isinstance(self.scheduler, ParamScheduler):
+ self._scheduler = LRMultiplier(
+ self._optimizer,
+ self.scheduler,
+ self.trainer.max_iter,
+ last_iter=self.trainer.iter - 1,
+ )
+ self._best_param_group_id = LRScheduler.get_best_param_group_id(self._optimizer)
+
+ @staticmethod
+ def get_best_param_group_id(optimizer):
+ # NOTE: some heuristics on what LR to summarize
+ # summarize the param group with most parameters
+ largest_group = max(len(g["params"]) for g in optimizer.param_groups)
+
+ if largest_group == 1:
+ # If all groups have one parameter,
+ # then find the most common initial LR, and use it for summary
+ lr_count = Counter([g["lr"] for g in optimizer.param_groups])
+ lr = lr_count.most_common()[0][0]
+ for i, g in enumerate(optimizer.param_groups):
+ if g["lr"] == lr:
+ return i
+ else:
+ for i, g in enumerate(optimizer.param_groups):
+ if len(g["params"]) == largest_group:
+ return i
+
+ def after_step(self):
+ lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
+ self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
+ self.scheduler.step()
+
+ @property
+ def scheduler(self):
+ return self._scheduler or self.trainer.scheduler
+
+ def state_dict(self):
+ if isinstance(self.scheduler, _LRScheduler):
+ return self.scheduler.state_dict()
+ return {}
+
+ def load_state_dict(self, state_dict):
+ if isinstance(self.scheduler, _LRScheduler):
+ logger = logging.getLogger(__name__)
+ logger.info("Loading scheduler from state_dict ...")
+ self.scheduler.load_state_dict(state_dict)
+
+
+class TorchProfiler(HookBase):
+ """
+ A hook which runs `torch.profiler.profile`.
+
+ Examples:
+ ::
+ hooks.TorchProfiler(
+ lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
+ )
+
+ The above example will run the profiler for iteration 10~20 and dump
+ results to ``OUTPUT_DIR``. We did not profile the first few iterations
+ because they are typically slower than the rest.
+ The result files can be loaded in the ``chrome://tracing`` page in chrome browser,
+ and the tensorboard visualizations can be visualized using
+ ``tensorboard --logdir OUTPUT_DIR/log``
+ """
+
+ def __init__(self, enable_predicate, output_dir, *, activities=None, save_tensorboard=True):
+ """
+ Args:
+ enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+ and returns whether to enable the profiler.
+ It will be called once every step, and can be used to select which steps to profile.
+ output_dir (str): the output directory to dump tracing files.
+ activities (iterable): same as in `torch.profiler.profile`.
+ save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/
+ """
+ self._enable_predicate = enable_predicate
+ self._activities = activities
+ self._output_dir = output_dir
+ self._save_tensorboard = save_tensorboard
+
+ def before_step(self):
+ if self._enable_predicate(self.trainer):
+ if self._save_tensorboard:
+ on_trace_ready = torch.profiler.tensorboard_trace_handler(
+ os.path.join(
+ self._output_dir,
+ "log",
+ "profiler-tensorboard-iter{}".format(self.trainer.iter),
+ ),
+ f"worker{comm.get_rank()}",
+ )
+ else:
+ on_trace_ready = None
+ self._profiler = torch.profiler.profile(
+ activities=self._activities,
+ on_trace_ready=on_trace_ready,
+ record_shapes=True,
+ profile_memory=True,
+ with_stack=True,
+ with_flops=True,
+ )
+ self._profiler.__enter__()
+ else:
+ self._profiler = None
+
+ def after_step(self):
+ if self._profiler is None:
+ return
+ self._profiler.__exit__(None, None, None)
+ if not self._save_tensorboard:
+ PathManager.mkdirs(self._output_dir)
+ out_file = os.path.join(
+ self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
+ )
+ if "://" not in out_file:
+ self._profiler.export_chrome_trace(out_file)
+ else:
+ # Support non-posix filesystems
+ with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
+ tmp_file = os.path.join(d, "tmp.json")
+ self._profiler.export_chrome_trace(tmp_file)
+ with open(tmp_file) as f:
+ content = f.read()
+ with PathManager.open(out_file, "w") as f:
+ f.write(content)
+
+
+class AutogradProfiler(TorchProfiler):
+ """
+ A hook which runs `torch.autograd.profiler.profile`.
+
+ Examples:
+ ::
+ hooks.AutogradProfiler(
+ lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
+ )
+
+ The above example will run the profiler for iteration 10~20 and dump
+ results to ``OUTPUT_DIR``. We did not profile the first few iterations
+ because they are typically slower than the rest.
+ The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
+
+ Note:
+ When used together with NCCL on older version of GPUs,
+ autograd profiler may cause deadlock because it unnecessarily allocates
+ memory on every device it sees. The memory management calls, if
+ interleaved with NCCL calls, lead to deadlock on GPUs that do not
+ support ``cudaLaunchCooperativeKernelMultiDevice``.
+ """
+
+ def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
+ """
+ Args:
+ enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+ and returns whether to enable the profiler.
+ It will be called once every step, and can be used to select which steps to profile.
+ output_dir (str): the output directory to dump tracing files.
+ use_cuda (bool): same as in `torch.autograd.profiler.profile`.
+ """
+ warnings.warn("AutogradProfiler has been deprecated in favor of TorchProfiler.")
+ self._enable_predicate = enable_predicate
+ self._use_cuda = use_cuda
+ self._output_dir = output_dir
+
+ def before_step(self):
+ if self._enable_predicate(self.trainer):
+ self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
+ self._profiler.__enter__()
+ else:
+ self._profiler = None
+
+
+class EvalHook(HookBase):
+ """
+ Run an evaluation function periodically, and at the end of training.
+
+ It is executed every ``eval_period`` iterations and after the last iteration.
+ """
+
+ def __init__(self, eval_period, eval_function, eval_after_train=True):
+ """
+ Args:
+ eval_period (int): the period to run `eval_function`. Set to 0 to
+ not evaluate periodically (but still evaluate after the last iteration
+ if `eval_after_train` is True).
+ eval_function (callable): a function which takes no arguments, and
+ returns a nested dict of evaluation metrics.
+ eval_after_train (bool): whether to evaluate after the last iteration
+
+ Note:
+ This hook must be enabled in all or none workers.
+ If you would like only certain workers to perform evaluation,
+ give other workers a no-op function (`eval_function=lambda: None`).
+ """
+ self._period = eval_period
+ self._func = eval_function
+ self._eval_after_train = eval_after_train
+
+ def _do_eval(self):
+ results = self._func()
+
+ if results:
+ assert isinstance(
+ results, dict
+ ), "Eval function must return a dict. Got {} instead.".format(results)
+
+ flattened_results = flatten_results_dict(results)
+ for k, v in flattened_results.items():
+ try:
+ v = float(v)
+ except Exception as e:
+ raise ValueError(
+ "[EvalHook] eval_function should return a nested dict of float. "
+ "Got '{}: {}' instead.".format(k, v)
+ ) from e
+ self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
+
+ # Evaluation may take different time among workers.
+ # A barrier make them start the next iteration together.
+ comm.synchronize()
+
+ def after_step(self):
+ next_iter = self.trainer.iter + 1
+ if self._period > 0 and next_iter % self._period == 0:
+ # do the last eval in after_train
+ if next_iter != self.trainer.max_iter:
+ self._do_eval()
+
+ def after_train(self):
+ # This condition is to prevent the eval from running after a failed training
+ if self._eval_after_train and self.trainer.iter + 1 >= self.trainer.max_iter:
+ self._do_eval()
+ # func is likely a closure that holds reference to the trainer
+ # therefore we clean it to avoid circular reference in the end
+ del self._func
+
+
+class PreciseBN(HookBase):
+ """
+ The standard implementation of BatchNorm uses EMA in inference, which is
+ sometimes suboptimal.
+ This class computes the true average of statistics rather than the moving average,
+ and put true averages to every BN layer in the given model.
+
+ It is executed every ``period`` iterations and after the last iteration.
+ """
+
+ def __init__(self, period, model, data_loader, num_iter):
+ """
+ Args:
+ period (int): the period this hook is run, or 0 to not run during training.
+ The hook will always run in the end of training.
+ model (nn.Module): a module whose all BN layers in training mode will be
+ updated by precise BN.
+ Note that user is responsible for ensuring the BN layers to be
+ updated are in training mode when this hook is triggered.
+ data_loader (iterable): it will produce data to be run by `model(data)`.
+ num_iter (int): number of iterations used to compute the precise
+ statistics.
+ """
+ self._logger = logging.getLogger(__name__)
+ if len(get_bn_modules(model)) == 0:
+ self._logger.info(
+ "PreciseBN is disabled because model does not contain BN layers in training mode."
+ )
+ self._disabled = True
+ return
+
+ self._model = model
+ self._data_loader = data_loader
+ self._num_iter = num_iter
+ self._period = period
+ self._disabled = False
+
+ self._data_iter = None
+
+ def after_step(self):
+ next_iter = self.trainer.iter + 1
+ is_final = next_iter == self.trainer.max_iter
+ if is_final or (self._period > 0 and next_iter % self._period == 0):
+ self.update_stats()
+
+ def update_stats(self):
+ """
+ Update the model with precise statistics. Users can manually call this method.
+ """
+ if self._disabled:
+ return
+
+ if self._data_iter is None:
+ self._data_iter = iter(self._data_loader)
+
+ def data_loader():
+ for num_iter in itertools.count(1):
+ if num_iter % 100 == 0:
+ self._logger.info(
+ "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
+ )
+ # This way we can reuse the same iterator
+ yield next(self._data_iter)
+
+ with EventStorage(): # capture events in a new storage to discard them
+ self._logger.info(
+ "Running precise-BN for {} iterations... ".format(self._num_iter)
+ + "Note that this could produce different statistics every time."
+ )
+ update_bn_stats(self._model, data_loader(), self._num_iter)
+
+
+class TorchMemoryStats(HookBase):
+ """
+ Writes pytorch's cuda memory statistics periodically.
+ """
+
+ def __init__(self, period=20, max_runs=10):
+ """
+ Args:
+ period (int): Output stats each 'period' iterations
+ max_runs (int): Stop the logging after 'max_runs'
+ """
+
+ self._logger = logging.getLogger(__name__)
+ self._period = period
+ self._max_runs = max_runs
+ self._runs = 0
+
+ def after_step(self):
+ if self._runs > self._max_runs:
+ return
+
+ if (self.trainer.iter + 1) % self._period == 0 or (
+ self.trainer.iter == self.trainer.max_iter - 1
+ ):
+ if torch.cuda.is_available():
+ max_reserved_mb = torch.cuda.max_memory_reserved() / 1024.0 / 1024.0
+ reserved_mb = torch.cuda.memory_reserved() / 1024.0 / 1024.0
+ max_allocated_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
+ allocated_mb = torch.cuda.memory_allocated() / 1024.0 / 1024.0
+
+ self._logger.info(
+ (
+ " iter: {} "
+ " max_reserved_mem: {:.0f}MB "
+ " reserved_mem: {:.0f}MB "
+ " max_allocated_mem: {:.0f}MB "
+ " allocated_mem: {:.0f}MB "
+ ).format(
+ self.trainer.iter,
+ max_reserved_mb,
+ reserved_mb,
+ max_allocated_mb,
+ allocated_mb,
+ )
+ )
+
+ self._runs += 1
+ if self._runs == self._max_runs:
+ mem_summary = torch.cuda.memory_summary()
+ self._logger.info("\n" + mem_summary)
+
+ torch.cuda.reset_peak_memory_stats()
diff --git a/vendor/detectron2/detectron2/engine/launch.py b/vendor/detectron2/detectron2/engine/launch.py
new file mode 100644
index 0000000000000000000000000000000000000000..7052c5040e4d9e6553a1b371518cb53fb056524e
--- /dev/null
+++ b/vendor/detectron2/detectron2/engine/launch.py
@@ -0,0 +1,123 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from datetime import timedelta
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+
+from detectron2.utils import comm
+
+__all__ = ["DEFAULT_TIMEOUT", "launch"]
+
+DEFAULT_TIMEOUT = timedelta(minutes=30)
+
+
+def _find_free_port():
+ import socket
+
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ # Binding to port 0 will cause the OS to find an available port for us
+ sock.bind(("", 0))
+ port = sock.getsockname()[1]
+ sock.close()
+ # NOTE: there is still a chance the port could be taken by other processes.
+ return port
+
+
+def launch(
+ main_func,
+ # Should be num_processes_per_machine, but kept for compatibility.
+ num_gpus_per_machine,
+ num_machines=1,
+ machine_rank=0,
+ dist_url=None,
+ args=(),
+ timeout=DEFAULT_TIMEOUT,
+):
+ """
+ Launch multi-process or distributed training.
+ This function must be called on all machines involved in the training.
+ It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
+
+ Args:
+ main_func: a function that will be called by `main_func(*args)`
+ num_gpus_per_machine (int): number of processes per machine. When
+ using GPUs, this should be the number of GPUs.
+ num_machines (int): the total number of machines
+ machine_rank (int): the rank of this machine
+ dist_url (str): url to connect to for distributed jobs, including protocol
+ e.g. "tcp://127.0.0.1:8686".
+ Can be set to "auto" to automatically select a free port on localhost
+ timeout (timedelta): timeout of the distributed workers
+ args (tuple): arguments passed to main_func
+ """
+ world_size = num_machines * num_gpus_per_machine
+ if world_size > 1:
+ # https://github.com/pytorch/pytorch/pull/14391
+ # TODO prctl in spawned processes
+
+ if dist_url == "auto":
+ assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
+ port = _find_free_port()
+ dist_url = f"tcp://127.0.0.1:{port}"
+ if num_machines > 1 and dist_url.startswith("file://"):
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
+ )
+
+ mp.start_processes(
+ _distributed_worker,
+ nprocs=num_gpus_per_machine,
+ args=(
+ main_func,
+ world_size,
+ num_gpus_per_machine,
+ machine_rank,
+ dist_url,
+ args,
+ timeout,
+ ),
+ daemon=False,
+ )
+ else:
+ main_func(*args)
+
+
+def _distributed_worker(
+ local_rank,
+ main_func,
+ world_size,
+ num_gpus_per_machine,
+ machine_rank,
+ dist_url,
+ args,
+ timeout=DEFAULT_TIMEOUT,
+):
+ has_gpu = torch.cuda.is_available()
+ if has_gpu:
+ assert num_gpus_per_machine <= torch.cuda.device_count()
+ global_rank = machine_rank * num_gpus_per_machine + local_rank
+ try:
+ dist.init_process_group(
+ backend="NCCL" if has_gpu else "GLOO",
+ init_method=dist_url,
+ world_size=world_size,
+ rank=global_rank,
+ timeout=timeout,
+ )
+ except Exception as e:
+ logger = logging.getLogger(__name__)
+ logger.error("Process group URL: {}".format(dist_url))
+ raise e
+
+ # Setup the local process group.
+ comm.create_local_process_group(num_gpus_per_machine)
+ if has_gpu:
+ torch.cuda.set_device(local_rank)
+
+ # synchronize is needed here to prevent a possible timeout after calling init_process_group
+ # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
+ comm.synchronize()
+
+ main_func(*args)
diff --git a/vendor/detectron2/detectron2/engine/train_loop.py b/vendor/detectron2/detectron2/engine/train_loop.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f6b96dc66af2d4c93028219a4b13ea16c719892
--- /dev/null
+++ b/vendor/detectron2/detectron2/engine/train_loop.py
@@ -0,0 +1,528 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+import concurrent.futures
+import logging
+import numpy as np
+import time
+import weakref
+from typing import List, Mapping, Optional
+import torch
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+
+import detectron2.utils.comm as comm
+from detectron2.utils.events import EventStorage, get_event_storage
+from detectron2.utils.logger import _log_api_usage
+
+__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"]
+
+
+class HookBase:
+ """
+ Base class for hooks that can be registered with :class:`TrainerBase`.
+
+ Each hook can implement 4 methods. The way they are called is demonstrated
+ in the following snippet:
+ ::
+ hook.before_train()
+ for iter in range(start_iter, max_iter):
+ hook.before_step()
+ trainer.run_step()
+ hook.after_step()
+ iter += 1
+ hook.after_train()
+
+ Notes:
+ 1. In the hook method, users can access ``self.trainer`` to access more
+ properties about the context (e.g., model, current iteration, or config
+ if using :class:`DefaultTrainer`).
+
+ 2. A hook that does something in :meth:`before_step` can often be
+ implemented equivalently in :meth:`after_step`.
+ If the hook takes non-trivial time, it is strongly recommended to
+ implement the hook in :meth:`after_step` instead of :meth:`before_step`.
+ The convention is that :meth:`before_step` should only take negligible time.
+
+ Following this convention will allow hooks that do care about the difference
+ between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
+ function properly.
+
+ """
+
+ trainer: "TrainerBase" = None
+ """
+ A weak reference to the trainer object. Set by the trainer when the hook is registered.
+ """
+
+ def before_train(self):
+ """
+ Called before the first iteration.
+ """
+ pass
+
+ def after_train(self):
+ """
+ Called after the last iteration.
+ """
+ pass
+
+ def before_step(self):
+ """
+ Called before each iteration.
+ """
+ pass
+
+ def after_backward(self):
+ """
+ Called after the backward pass of each iteration.
+ """
+ pass
+
+ def after_step(self):
+ """
+ Called after each iteration.
+ """
+ pass
+
+ def state_dict(self):
+ """
+ Hooks are stateless by default, but can be made checkpointable by
+ implementing `state_dict` and `load_state_dict`.
+ """
+ return {}
+
+
+class TrainerBase:
+ """
+ Base class for iterative trainer with hooks.
+
+ The only assumption we made here is: the training runs in a loop.
+ A subclass can implement what the loop is.
+ We made no assumptions about the existence of dataloader, optimizer, model, etc.
+
+ Attributes:
+ iter(int): the current iteration.
+
+ start_iter(int): The iteration to start with.
+ By convention the minimum possible value is 0.
+
+ max_iter(int): The iteration to end training.
+
+ storage(EventStorage): An EventStorage that's opened during the course of training.
+ """
+
+ def __init__(self) -> None:
+ self._hooks: List[HookBase] = []
+ self.iter: int = 0
+ self.start_iter: int = 0
+ self.max_iter: int
+ self.storage: EventStorage
+ _log_api_usage("trainer." + self.__class__.__name__)
+
+ def register_hooks(self, hooks: List[Optional[HookBase]]) -> None:
+ """
+ Register hooks to the trainer. The hooks are executed in the order
+ they are registered.
+
+ Args:
+ hooks (list[Optional[HookBase]]): list of hooks
+ """
+ hooks = [h for h in hooks if h is not None]
+ for h in hooks:
+ assert isinstance(h, HookBase)
+ # To avoid circular reference, hooks and trainer cannot own each other.
+ # This normally does not matter, but will cause memory leak if the
+ # involved objects contain __del__:
+ # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
+ h.trainer = weakref.proxy(self)
+ self._hooks.extend(hooks)
+
+ def train(self, start_iter: int, max_iter: int):
+ """
+ Args:
+ start_iter, max_iter (int): See docs above
+ """
+ logger = logging.getLogger(__name__)
+ logger.info("Starting training from iteration {}".format(start_iter))
+
+ self.iter = self.start_iter = start_iter
+ self.max_iter = max_iter
+
+ with EventStorage(start_iter) as self.storage:
+ try:
+ self.before_train()
+ for self.iter in range(start_iter, max_iter):
+ self.before_step()
+ self.run_step()
+ self.after_step()
+ # self.iter == max_iter can be used by `after_train` to
+ # tell whether the training successfully finished or failed
+ # due to exceptions.
+ self.iter += 1
+ except Exception:
+ logger.exception("Exception during training:")
+ raise
+ finally:
+ self.after_train()
+
+ def before_train(self):
+ for h in self._hooks:
+ h.before_train()
+
+ def after_train(self):
+ self.storage.iter = self.iter
+ for h in self._hooks:
+ h.after_train()
+
+ def before_step(self):
+ # Maintain the invariant that storage.iter == trainer.iter
+ # for the entire execution of each step
+ self.storage.iter = self.iter
+
+ for h in self._hooks:
+ h.before_step()
+
+ def after_backward(self):
+ for h in self._hooks:
+ h.after_backward()
+
+ def after_step(self):
+ for h in self._hooks:
+ h.after_step()
+
+ def run_step(self):
+ raise NotImplementedError
+
+ def state_dict(self):
+ ret = {"iteration": self.iter}
+ hooks_state = {}
+ for h in self._hooks:
+ sd = h.state_dict()
+ if sd:
+ name = type(h).__qualname__
+ if name in hooks_state:
+ # TODO handle repetitive stateful hooks
+ continue
+ hooks_state[name] = sd
+ if hooks_state:
+ ret["hooks"] = hooks_state
+ return ret
+
+ def load_state_dict(self, state_dict):
+ logger = logging.getLogger(__name__)
+ self.iter = state_dict["iteration"]
+ for key, value in state_dict.get("hooks", {}).items():
+ for h in self._hooks:
+ try:
+ name = type(h).__qualname__
+ except AttributeError:
+ continue
+ if name == key:
+ h.load_state_dict(value)
+ break
+ else:
+ logger.warning(f"Cannot find the hook '{key}', its state_dict is ignored.")
+
+
+class SimpleTrainer(TrainerBase):
+ """
+ A simple trainer for the most common type of task:
+ single-cost single-optimizer single-data-source iterative optimization,
+ optionally using data-parallelism.
+ It assumes that every step, you:
+
+ 1. Compute the loss with a data from the data_loader.
+ 2. Compute the gradients with the above loss.
+ 3. Update the model with the optimizer.
+
+ All other tasks during training (checkpointing, logging, evaluation, LR schedule)
+ are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`.
+
+ If you want to do anything fancier than this,
+ either subclass TrainerBase and implement your own `run_step`,
+ or write your own training loop.
+ """
+
+ def __init__(
+ self,
+ model,
+ data_loader,
+ optimizer,
+ gather_metric_period=1,
+ zero_grad_before_forward=False,
+ async_write_metrics=False,
+ ):
+ """
+ Args:
+ model: a torch Module. Takes a data from data_loader and returns a
+ dict of losses.
+ data_loader: an iterable. Contains data to be used to call model.
+ optimizer: a torch optimizer.
+ gather_metric_period: an int. Every gather_metric_period iterations
+ the metrics are gathered from all the ranks to rank 0 and logged.
+ zero_grad_before_forward: whether to zero the gradients before the forward.
+ async_write_metrics: bool. If True, then write metrics asynchronously to improve
+ training speed
+ """
+ super().__init__()
+
+ """
+ We set the model to training mode in the trainer.
+ However it's valid to train a model that's in eval mode.
+ If you want your model (or a submodule of it) to behave
+ like evaluation during training, you can overwrite its train() method.
+ """
+ model.train()
+
+ self.model = model
+ self.data_loader = data_loader
+ # to access the data loader iterator, call `self._data_loader_iter`
+ self._data_loader_iter_obj = None
+ self.optimizer = optimizer
+ self.gather_metric_period = gather_metric_period
+ self.zero_grad_before_forward = zero_grad_before_forward
+ self.async_write_metrics = async_write_metrics
+ # create a thread pool that can execute non critical logic in run_step asynchronically
+ # use only 1 worker so tasks will be executred in order of submitting.
+ self.concurrent_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+
+ def run_step(self):
+ """
+ Implement the standard training logic described above.
+ """
+ assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
+ start = time.perf_counter()
+ """
+ If you want to do something with the data, you can wrap the dataloader.
+ """
+ data = next(self._data_loader_iter)
+ data_time = time.perf_counter() - start
+
+ if self.zero_grad_before_forward:
+ """
+ If you need to accumulate gradients or do something similar, you can
+ wrap the optimizer with your custom `zero_grad()` method.
+ """
+ self.optimizer.zero_grad()
+
+ """
+ If you want to do something with the losses, you can wrap the model.
+ """
+ loss_dict = self.model(data)
+ if isinstance(loss_dict, torch.Tensor):
+ losses = loss_dict
+ loss_dict = {"total_loss": loss_dict}
+ else:
+ losses = sum(loss_dict.values())
+ if not self.zero_grad_before_forward:
+ """
+ If you need to accumulate gradients or do something similar, you can
+ wrap the optimizer with your custom `zero_grad()` method.
+ """
+ self.optimizer.zero_grad()
+ losses.backward()
+
+ self.after_backward()
+
+ if self.async_write_metrics:
+ # write metrics asynchronically
+ self.concurrent_executor.submit(
+ self._write_metrics, loss_dict, data_time, iter=self.iter
+ )
+ else:
+ self._write_metrics(loss_dict, data_time)
+
+ """
+ If you need gradient clipping/scaling or other processing, you can
+ wrap the optimizer with your custom `step()` method. But it is
+ suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4
+ """
+ self.optimizer.step()
+
+ @property
+ def _data_loader_iter(self):
+ # only create the data loader iterator when it is used
+ if self._data_loader_iter_obj is None:
+ self._data_loader_iter_obj = iter(self.data_loader)
+ return self._data_loader_iter_obj
+
+ def reset_data_loader(self, data_loader_builder):
+ """
+ Delete and replace the current data loader with a new one, which will be created
+ by calling `data_loader_builder` (without argument).
+ """
+ del self.data_loader
+ data_loader = data_loader_builder()
+ self.data_loader = data_loader
+ self._data_loader_iter_obj = None
+
+ def _write_metrics(
+ self,
+ loss_dict: Mapping[str, torch.Tensor],
+ data_time: float,
+ prefix: str = "",
+ iter: Optional[int] = None,
+ ) -> None:
+ logger = logging.getLogger(__name__)
+
+ iter = self.iter if iter is None else iter
+ if (iter + 1) % self.gather_metric_period == 0:
+ try:
+ SimpleTrainer.write_metrics(loss_dict, data_time, iter, prefix)
+ except Exception:
+ logger.exception("Exception in writing metrics: ")
+ raise
+
+ @staticmethod
+ def write_metrics(
+ loss_dict: Mapping[str, torch.Tensor],
+ data_time: float,
+ cur_iter: int,
+ prefix: str = "",
+ ) -> None:
+ """
+ Args:
+ loss_dict (dict): dict of scalar losses
+ data_time (float): time taken by the dataloader iteration
+ prefix (str): prefix for logging keys
+ """
+ metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()}
+ metrics_dict["data_time"] = data_time
+
+ # Gather metrics among all workers for logging
+ # This assumes we do DDP-style training, which is currently the only
+ # supported method in detectron2.
+ all_metrics_dict = comm.gather(metrics_dict)
+
+ if comm.is_main_process():
+ storage = get_event_storage()
+
+ # data_time among workers can have high variance. The actual latency
+ # caused by data_time is the maximum among workers.
+ data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
+ storage.put_scalar("data_time", data_time, cur_iter=cur_iter)
+
+ # average the rest metrics
+ metrics_dict = {
+ k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
+ }
+ total_losses_reduced = sum(metrics_dict.values())
+ if not np.isfinite(total_losses_reduced):
+ raise FloatingPointError(
+ f"Loss became infinite or NaN at iteration={cur_iter}!\n"
+ f"loss_dict = {metrics_dict}"
+ )
+
+ storage.put_scalar(
+ "{}total_loss".format(prefix), total_losses_reduced, cur_iter=cur_iter
+ )
+ if len(metrics_dict) > 1:
+ storage.put_scalars(cur_iter=cur_iter, **metrics_dict)
+
+ def state_dict(self):
+ ret = super().state_dict()
+ ret["optimizer"] = self.optimizer.state_dict()
+ return ret
+
+ def load_state_dict(self, state_dict):
+ super().load_state_dict(state_dict)
+ self.optimizer.load_state_dict(state_dict["optimizer"])
+
+ def after_train(self):
+ super().after_train()
+ self.concurrent_executor.shutdown(wait=True)
+
+
+class AMPTrainer(SimpleTrainer):
+ """
+ Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision
+ in the training loop.
+ """
+
+ def __init__(
+ self,
+ model,
+ data_loader,
+ optimizer,
+ gather_metric_period=1,
+ zero_grad_before_forward=False,
+ grad_scaler=None,
+ precision: torch.dtype = torch.float16,
+ log_grad_scaler: bool = False,
+ async_write_metrics=False,
+ ):
+ """
+ Args:
+ model, data_loader, optimizer, gather_metric_period, zero_grad_before_forward,
+ async_write_metrics: same as in :class:`SimpleTrainer`.
+ grad_scaler: torch GradScaler to automatically scale gradients.
+ precision: torch.dtype as the target precision to cast to in computations
+ """
+ unsupported = "AMPTrainer does not support single-process multi-device training!"
+ if isinstance(model, DistributedDataParallel):
+ assert not (model.device_ids and len(model.device_ids) > 1), unsupported
+ assert not isinstance(model, DataParallel), unsupported
+
+ super().__init__(
+ model, data_loader, optimizer, gather_metric_period, zero_grad_before_forward
+ )
+
+ if grad_scaler is None:
+ from torch.cuda.amp import GradScaler
+
+ grad_scaler = GradScaler()
+ self.grad_scaler = grad_scaler
+ self.precision = precision
+ self.log_grad_scaler = log_grad_scaler
+
+ def run_step(self):
+ """
+ Implement the AMP training logic.
+ """
+ assert self.model.training, "[AMPTrainer] model was changed to eval mode!"
+ assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!"
+ from torch.cuda.amp import autocast
+
+ start = time.perf_counter()
+ data = next(self._data_loader_iter)
+ data_time = time.perf_counter() - start
+
+ if self.zero_grad_before_forward:
+ self.optimizer.zero_grad()
+ with autocast(dtype=self.precision):
+ loss_dict = self.model(data)
+ if isinstance(loss_dict, torch.Tensor):
+ losses = loss_dict
+ loss_dict = {"total_loss": loss_dict}
+ else:
+ losses = sum(loss_dict.values())
+
+ if not self.zero_grad_before_forward:
+ self.optimizer.zero_grad()
+
+ self.grad_scaler.scale(losses).backward()
+
+ if self.log_grad_scaler:
+ storage = get_event_storage()
+ storage.put_scalar("[metric]grad_scaler", self.grad_scaler.get_scale())
+
+ self.after_backward()
+
+ if self.async_write_metrics:
+ # write metrics asynchronically
+ self.concurrent_executor.submit(
+ self._write_metrics, loss_dict, data_time, iter=self.iter
+ )
+ else:
+ self._write_metrics(loss_dict, data_time)
+
+ self.grad_scaler.step(self.optimizer)
+ self.grad_scaler.update()
+
+ def state_dict(self):
+ ret = super().state_dict()
+ ret["grad_scaler"] = self.grad_scaler.state_dict()
+ return ret
+
+ def load_state_dict(self, state_dict):
+ super().load_state_dict(state_dict)
+ self.grad_scaler.load_state_dict(state_dict["grad_scaler"])
diff --git a/vendor/detectron2/detectron2/evaluation/__init__.py b/vendor/detectron2/detectron2/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d96609e8f2261a6800fe85fcf3e1eaeaa44455c6
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
+from .coco_evaluation import COCOEvaluator
+from .rotated_coco_evaluation import RotatedCOCOEvaluator
+from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
+from .lvis_evaluation import LVISEvaluator
+from .panoptic_evaluation import COCOPanopticEvaluator
+from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
+from .sem_seg_evaluation import SemSegEvaluator
+from .testing import print_csv_format, verify_results
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/vendor/detectron2/detectron2/evaluation/cityscapes_evaluation.py b/vendor/detectron2/detectron2/evaluation/cityscapes_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cc7888f0f88ed9b44eae942353a9f4dd4b8782a
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/cityscapes_evaluation.py
@@ -0,0 +1,197 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import glob
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+import torch
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class CityscapesEvaluator(DatasetEvaluator):
+ """
+ Base class for evaluation using cityscapes API.
+ """
+
+ def __init__(self, dataset_name):
+ """
+ Args:
+ dataset_name (str): the name of the dataset.
+ It must have the following metadata associated with it:
+ "thing_classes", "gt_dir".
+ """
+ self._metadata = MetadataCatalog.get(dataset_name)
+ self._cpu_device = torch.device("cpu")
+ self._logger = logging.getLogger(__name__)
+
+ def reset(self):
+ self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_")
+ self._temp_dir = self._working_dir.name
+ # All workers will write to the same results directory
+ # TODO this does not work in distributed training
+ assert (
+ comm.get_local_size() == comm.get_world_size()
+ ), "CityscapesEvaluator currently do not work with multiple machines."
+ self._temp_dir = comm.all_gather(self._temp_dir)[0]
+ if self._temp_dir != self._working_dir.name:
+ self._working_dir.cleanup()
+ self._logger.info(
+ "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir)
+ )
+
+
+class CityscapesInstanceEvaluator(CityscapesEvaluator):
+ """
+ Evaluate instance segmentation results on cityscapes dataset using cityscapes API.
+
+ Note:
+ * It does not work in multi-machine distributed training.
+ * It contains a synchronization, therefore has to be used on all ranks.
+ * Only the main process runs evaluation.
+ """
+
+ def process(self, inputs, outputs):
+ from cityscapesscripts.helpers.labels import name2label
+
+ for input, output in zip(inputs, outputs):
+ file_name = input["file_name"]
+ basename = os.path.splitext(os.path.basename(file_name))[0]
+ pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
+
+ if "instances" in output:
+ output = output["instances"].to(self._cpu_device)
+ num_instances = len(output)
+ with open(pred_txt, "w") as fout:
+ for i in range(num_instances):
+ pred_class = output.pred_classes[i]
+ classes = self._metadata.thing_classes[pred_class]
+ class_id = name2label[classes].id
+ score = output.scores[i]
+ mask = output.pred_masks[i].numpy().astype("uint8")
+ png_filename = os.path.join(
+ self._temp_dir, basename + "_{}_{}.png".format(i, classes)
+ )
+
+ Image.fromarray(mask * 255).save(png_filename)
+ fout.write(
+ "{} {} {}\n".format(os.path.basename(png_filename), class_id, score)
+ )
+ else:
+ # Cityscapes requires a prediction file for every ground truth image.
+ with open(pred_txt, "w") as fout:
+ pass
+
+ def evaluate(self):
+ """
+ Returns:
+ dict: has a key "segm", whose value is a dict of "AP" and "AP50".
+ """
+ comm.synchronize()
+ if comm.get_rank() > 0:
+ return
+ import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
+
+ self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+ # set some global states in cityscapes evaluation API, before evaluating
+ cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+ cityscapes_eval.args.predictionWalk = None
+ cityscapes_eval.args.JSONOutput = False
+ cityscapes_eval.args.colorized = False
+ cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")
+
+ # These lines are adopted from
+ # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
+ gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+ groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png"))
+ assert len(
+ groundTruthImgList
+ ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+ cityscapes_eval.args.groundTruthSearch
+ )
+ predictionImgList = []
+ for gt in groundTruthImgList:
+ predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
+ results = cityscapes_eval.evaluateImgLists(
+ predictionImgList, groundTruthImgList, cityscapes_eval.args
+ )["averages"]
+
+ ret = OrderedDict()
+ ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
+ self._working_dir.cleanup()
+ return ret
+
+
+class CityscapesSemSegEvaluator(CityscapesEvaluator):
+ """
+ Evaluate semantic segmentation results on cityscapes dataset using cityscapes API.
+
+ Note:
+ * It does not work in multi-machine distributed training.
+ * It contains a synchronization, therefore has to be used on all ranks.
+ * Only the main process runs evaluation.
+ """
+
+ def process(self, inputs, outputs):
+ from cityscapesscripts.helpers.labels import trainId2label
+
+ for input, output in zip(inputs, outputs):
+ file_name = input["file_name"]
+ basename = os.path.splitext(os.path.basename(file_name))[0]
+ pred_filename = os.path.join(self._temp_dir, basename + "_pred.png")
+
+ output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy()
+ pred = 255 * np.ones(output.shape, dtype=np.uint8)
+ for train_id, label in trainId2label.items():
+ if label.ignoreInEval:
+ continue
+ pred[output == train_id] = label.id
+ Image.fromarray(pred).save(pred_filename)
+
+ def evaluate(self):
+ comm.synchronize()
+ if comm.get_rank() > 0:
+ return
+ # Load the Cityscapes eval script *after* setting the required env var,
+ # since the script reads CITYSCAPES_DATASET into global variables at load time.
+ import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval
+
+ self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+ # set some global states in cityscapes evaluation API, before evaluating
+ cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+ cityscapes_eval.args.predictionWalk = None
+ cityscapes_eval.args.JSONOutput = False
+ cityscapes_eval.args.colorized = False
+
+ # These lines are adopted from
+ # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa
+ gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+ groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png"))
+ assert len(
+ groundTruthImgList
+ ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+ cityscapes_eval.args.groundTruthSearch
+ )
+ predictionImgList = []
+ for gt in groundTruthImgList:
+ predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt))
+ results = cityscapes_eval.evaluateImgLists(
+ predictionImgList, groundTruthImgList, cityscapes_eval.args
+ )
+ ret = OrderedDict()
+ ret["sem_seg"] = {
+ "IoU": 100.0 * results["averageScoreClasses"],
+ "iIoU": 100.0 * results["averageScoreInstClasses"],
+ "IoU_sup": 100.0 * results["averageScoreCategories"],
+ "iIoU_sup": 100.0 * results["averageScoreInstCategories"],
+ }
+ self._working_dir.cleanup()
+ return ret
diff --git a/vendor/detectron2/detectron2/evaluation/coco_evaluation.py b/vendor/detectron2/detectron2/evaluation/coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe8142cda29613ce1cf78523e422bf598128f590
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/coco_evaluation.py
@@ -0,0 +1,722 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import pickle
+from collections import OrderedDict
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tabulate import tabulate
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_json
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .evaluator import DatasetEvaluator
+
+try:
+ from detectron2.evaluation.fast_eval_api import COCOeval_opt
+except ImportError:
+ COCOeval_opt = COCOeval
+
+
+class COCOEvaluator(DatasetEvaluator):
+ """
+ Evaluate AR for object proposals, AP for instance detection/segmentation, AP
+ for keypoint detection outputs using COCO's metrics.
+ See http://cocodataset.org/#detection-eval and
+ http://cocodataset.org/#keypoints-eval to understand its metrics.
+ The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
+ the metric cannot be computed (e.g. due to no predictions made).
+
+ In addition to COCO, this evaluator is able to support any bounding box detection,
+ instance segmentation, or keypoint detection dataset.
+ """
+
+ def __init__(
+ self,
+ dataset_name,
+ tasks=None,
+ distributed=True,
+ output_dir=None,
+ *,
+ max_dets_per_image=None,
+ use_fast_impl=True,
+ kpt_oks_sigmas=(),
+ allow_cached_coco=True,
+ ):
+ """
+ Args:
+ dataset_name (str): name of the dataset to be evaluated.
+ It must have either the following corresponding metadata:
+
+ "json_file": the path to the COCO format annotation
+
+ Or it must be in detectron2's standard dataset format
+ so it can be converted to COCO format automatically.
+ tasks (tuple[str]): tasks that can be evaluated under the given
+ configuration. A task is one of "bbox", "segm", "keypoints".
+ By default, will infer this automatically from predictions.
+ distributed (True): if True, will collect results from all ranks and run evaluation
+ in the main process.
+ Otherwise, will only evaluate the results in the current process.
+ output_dir (str): optional, an output directory to dump all
+ results predicted on the dataset. The dump contains two files:
+
+ 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
+ contains all the results in the format they are produced by the model.
+ 2. "coco_instances_results.json" a json file in COCO's result format.
+ max_dets_per_image (int): limit on the maximum number of detections per image.
+ By default in COCO, this limit is to 100, but this can be customized
+ to be greater, as is needed in evaluation metrics AP fixed and AP pool
+ (see https://arxiv.org/pdf/2102.01066.pdf)
+ This doesn't affect keypoint evaluation.
+ use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
+ Although the results should be very close to the official implementation in COCO
+ API, it is still recommended to compute results with the official API for use in
+ papers. The faster implementation also uses more RAM.
+ kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
+ See http://cocodataset.org/#keypoints-eval
+ When empty, it will use the defaults in COCO.
+ Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+ allow_cached_coco (bool): Whether to use cached coco json from previous validation
+ runs. You should set this to False if you need to use different validation data.
+ Defaults to True.
+ """
+ self._logger = logging.getLogger(__name__)
+ self._distributed = distributed
+ self._output_dir = output_dir
+
+ if use_fast_impl and (COCOeval_opt is COCOeval):
+ self._logger.info("Fast COCO eval is not built. Falling back to official COCO eval.")
+ use_fast_impl = False
+ self._use_fast_impl = use_fast_impl
+
+ # COCOeval requires the limit on the number of detections per image (maxDets) to be a list
+ # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the
+ # 3rd element (100) is used as the limit on the number of detections per image when
+ # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval,
+ # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults.
+ if max_dets_per_image is None:
+ max_dets_per_image = [1, 10, 100]
+ else:
+ max_dets_per_image = [1, 10, max_dets_per_image]
+ self._max_dets_per_image = max_dets_per_image
+
+ if tasks is not None and isinstance(tasks, CfgNode):
+ kpt_oks_sigmas = (
+ tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas
+ )
+ self._logger.warn(
+ "COCO Evaluator instantiated using config, this is deprecated behavior."
+ " Please pass in explicit arguments instead."
+ )
+ self._tasks = None # Infering it from predictions should be better
+ else:
+ self._tasks = tasks
+
+ self._cpu_device = torch.device("cpu")
+
+ self._metadata = MetadataCatalog.get(dataset_name)
+ if not hasattr(self._metadata, "json_file"):
+ if output_dir is None:
+ raise ValueError(
+ "output_dir must be provided to COCOEvaluator "
+ "for datasets not in COCO format."
+ )
+ self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...")
+
+ cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json")
+ self._metadata.json_file = cache_path
+ convert_to_coco_json(dataset_name, cache_path, allow_cached=allow_cached_coco)
+
+ json_file = PathManager.get_local_path(self._metadata.json_file)
+ with contextlib.redirect_stdout(io.StringIO()):
+ self._coco_api = COCO(json_file)
+
+ # Test set json files do not contain annotations (evaluation must be
+ # performed using the COCO evaluation server).
+ self._do_evaluation = "annotations" in self._coco_api.dataset
+ if self._do_evaluation:
+ self._kpt_oks_sigmas = kpt_oks_sigmas
+
+ def reset(self):
+ self._predictions = []
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+ It is a list of dict. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name", "image_id".
+ outputs: the outputs of a COCO model. It is a list of dicts with key
+ "instances" that contains :class:`Instances`.
+ """
+ for input, output in zip(inputs, outputs):
+ prediction = {"image_id": input["image_id"]}
+
+ if "instances" in output:
+ instances = output["instances"].to(self._cpu_device)
+ prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+ if "proposals" in output:
+ prediction["proposals"] = output["proposals"].to(self._cpu_device)
+ if len(prediction) > 1:
+ self._predictions.append(prediction)
+
+ def evaluate(self, img_ids=None):
+ """
+ Args:
+ img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
+ """
+ if self._distributed:
+ comm.synchronize()
+ predictions = comm.gather(self._predictions, dst=0)
+ predictions = list(itertools.chain(*predictions))
+
+ if not comm.is_main_process():
+ return {}
+ else:
+ predictions = self._predictions
+
+ if len(predictions) == 0:
+ self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+ return {}
+
+ if self._output_dir:
+ PathManager.mkdirs(self._output_dir)
+ file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+ with PathManager.open(file_path, "wb") as f:
+ torch.save(predictions, f)
+
+ self._results = OrderedDict()
+ if "proposals" in predictions[0]:
+ self._eval_box_proposals(predictions)
+ if "instances" in predictions[0]:
+ self._eval_predictions(predictions, img_ids=img_ids)
+ # Copy so the caller can do whatever with results
+ return copy.deepcopy(self._results)
+
+ def _tasks_from_predictions(self, predictions):
+ """
+ Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions.
+ """
+ tasks = {"bbox"}
+ for pred in predictions:
+ if "segmentation" in pred:
+ tasks.add("segm")
+ if "keypoints" in pred:
+ tasks.add("keypoints")
+ return sorted(tasks)
+
+ def _eval_predictions(self, predictions, img_ids=None):
+ """
+ Evaluate predictions. Fill self._results with the metrics of the tasks.
+ """
+ self._logger.info("Preparing results for COCO format ...")
+ coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+ tasks = self._tasks or self._tasks_from_predictions(coco_results)
+
+ # unmap the category ids for COCO
+ if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+ dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
+ all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
+ num_classes = len(all_contiguous_ids)
+ assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
+
+ reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
+ for result in coco_results:
+ category_id = result["category_id"]
+ assert category_id < num_classes, (
+ f"A prediction has class={category_id}, "
+ f"but the dataset only has {num_classes} classes and "
+ f"predicted class id should be in [0, {num_classes - 1}]."
+ )
+ result["category_id"] = reverse_id_mapping[category_id]
+
+ if self._output_dir:
+ file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+ self._logger.info("Saving results to {}".format(file_path))
+ with PathManager.open(file_path, "w") as f:
+ f.write(json.dumps(coco_results))
+ f.flush()
+
+ if not self._do_evaluation:
+ self._logger.info("Annotations are not available for evaluation.")
+ return
+
+ self._logger.info(
+ "Evaluating predictions with {} COCO API...".format(
+ "unofficial" if self._use_fast_impl else "official"
+ )
+ )
+ for task in sorted(tasks):
+ assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!"
+ coco_eval = (
+ _evaluate_predictions_on_coco(
+ self._coco_api,
+ coco_results,
+ task,
+ kpt_oks_sigmas=self._kpt_oks_sigmas,
+ cocoeval_fn=COCOeval_opt if self._use_fast_impl else COCOeval,
+ img_ids=img_ids,
+ max_dets_per_image=self._max_dets_per_image,
+ )
+ if len(coco_results) > 0
+ else None # cocoapi does not handle empty results very well
+ )
+
+ res = self._derive_coco_results(
+ coco_eval, task, class_names=self._metadata.get("thing_classes")
+ )
+ self._results[task] = res
+
+ def _eval_box_proposals(self, predictions):
+ """
+ Evaluate the box proposals in predictions.
+ Fill self._results with the metrics for "box_proposals" task.
+ """
+ if self._output_dir:
+ # Saving generated box proposals to file.
+ # Predicted box_proposals are in XYXY_ABS mode.
+ bbox_mode = BoxMode.XYXY_ABS.value
+ ids, boxes, objectness_logits = [], [], []
+ for prediction in predictions:
+ ids.append(prediction["image_id"])
+ boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+ objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+ proposal_data = {
+ "boxes": boxes,
+ "objectness_logits": objectness_logits,
+ "ids": ids,
+ "bbox_mode": bbox_mode,
+ }
+ with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+ pickle.dump(proposal_data, f)
+
+ if not self._do_evaluation:
+ self._logger.info("Annotations are not available for evaluation.")
+ return
+
+ self._logger.info("Evaluating bbox proposals ...")
+ res = {}
+ areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+ for limit in [100, 1000]:
+ for area, suffix in areas.items():
+ stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit)
+ key = "AR{}@{:d}".format(suffix, limit)
+ res[key] = float(stats["ar"].item() * 100)
+ self._logger.info("Proposal metrics: \n" + create_small_table(res))
+ self._results["box_proposals"] = res
+
+ def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
+ """
+ Derive the desired score numbers from summarized COCOeval.
+
+ Args:
+ coco_eval (None or COCOEval): None represents no predictions from model.
+ iou_type (str):
+ class_names (None or list[str]): if provided, will use it to predict
+ per-category AP.
+
+ Returns:
+ a dict of {metric name: score}
+ """
+
+ metrics = {
+ "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+ "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+ "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
+ }[iou_type]
+
+ if coco_eval is None:
+ self._logger.warn("No predictions from the model!")
+ return {metric: float("nan") for metric in metrics}
+
+ # the standard metrics
+ results = {
+ metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
+ for idx, metric in enumerate(metrics)
+ }
+ self._logger.info(
+ "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
+ )
+ if not np.isfinite(sum(results.values())):
+ self._logger.info("Some metrics cannot be computed and is shown as NaN.")
+
+ if class_names is None or len(class_names) <= 1:
+ return results
+ # Compute per-category AP
+ # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
+ precisions = coco_eval.eval["precision"]
+ # precision has dims (iou, recall, cls, area range, max dets)
+ assert len(class_names) == precisions.shape[2]
+
+ results_per_category = []
+ for idx, name in enumerate(class_names):
+ # area range index 0: all area ranges
+ # max dets index -1: typically 100 per image
+ precision = precisions[:, :, idx, 0, -1]
+ precision = precision[precision > -1]
+ ap = np.mean(precision) if precision.size else float("nan")
+ results_per_category.append(("{}".format(name), float(ap * 100)))
+
+ # tabulate it
+ N_COLS = min(6, len(results_per_category) * 2)
+ results_flatten = list(itertools.chain(*results_per_category))
+ results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
+ table = tabulate(
+ results_2d,
+ tablefmt="pipe",
+ floatfmt=".3f",
+ headers=["category", "AP"] * (N_COLS // 2),
+ numalign="left",
+ )
+ self._logger.info("Per-category {} AP: \n".format(iou_type) + table)
+
+ results.update({"AP-" + name: ap for name, ap in results_per_category})
+ return results
+
+
+def instances_to_coco_json(instances, img_id):
+ """
+ Dump an "Instances" object to a COCO-format json that's used for evaluation.
+
+ Args:
+ instances (Instances):
+ img_id (int): the image id
+
+ Returns:
+ list[dict]: list of json annotations in COCO format.
+ """
+ num_instance = len(instances)
+ if num_instance == 0:
+ return []
+
+ boxes = instances.pred_boxes.tensor.numpy()
+ boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ boxes = boxes.tolist()
+ scores = instances.scores.tolist()
+ classes = instances.pred_classes.tolist()
+
+ has_mask = instances.has("pred_masks")
+ if has_mask:
+ # use RLE to encode the masks, because they are too large and takes memory
+ # since this evaluator stores outputs of the entire dataset
+ rles = [
+ mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
+ for mask in instances.pred_masks
+ ]
+ for rle in rles:
+ # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+ # json writer which always produces strings cannot serialize a bytestream
+ # unless you decode it. Thankfully, utf-8 works out (which is also what
+ # the pycocotools/_mask.pyx does).
+ rle["counts"] = rle["counts"].decode("utf-8")
+
+ has_keypoints = instances.has("pred_keypoints")
+ if has_keypoints:
+ keypoints = instances.pred_keypoints
+
+ results = []
+ for k in range(num_instance):
+ result = {
+ "image_id": img_id,
+ "category_id": classes[k],
+ "bbox": boxes[k],
+ "score": scores[k],
+ }
+ if has_mask:
+ result["segmentation"] = rles[k]
+ if has_keypoints:
+ # In COCO annotations,
+ # keypoints coordinates are pixel indices.
+ # However our predictions are floating point coordinates.
+ # Therefore we subtract 0.5 to be consistent with the annotation format.
+ # This is the inverse of data loading logic in `datasets/coco.py`.
+ keypoints[k][:, :2] -= 0.5
+ result["keypoints"] = keypoints[k].flatten().tolist()
+ results.append(result)
+ return results
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
+ """
+ Evaluate detection proposal recall metrics. This function is a much
+ faster alternative to the official COCO API recall evaluation code. However,
+ it produces slightly different results.
+ """
+ # Record max overlap value for each gt box
+ # Return vector of overlap values
+ areas = {
+ "all": 0,
+ "small": 1,
+ "medium": 2,
+ "large": 3,
+ "96-128": 4,
+ "128-256": 5,
+ "256-512": 6,
+ "512-inf": 7,
+ }
+ area_ranges = [
+ [0**2, 1e5**2], # all
+ [0**2, 32**2], # small
+ [32**2, 96**2], # medium
+ [96**2, 1e5**2], # large
+ [96**2, 128**2], # 96-128
+ [128**2, 256**2], # 128-256
+ [256**2, 512**2], # 256-512
+ [512**2, 1e5**2],
+ ] # 512-inf
+ assert area in areas, "Unknown area range: {}".format(area)
+ area_range = area_ranges[areas[area]]
+ gt_overlaps = []
+ num_pos = 0
+
+ for prediction_dict in dataset_predictions:
+ predictions = prediction_dict["proposals"]
+
+ # sort predictions in descending order
+ # TODO maybe remove this and make it explicit in the documentation
+ inds = predictions.objectness_logits.sort(descending=True)[1]
+ predictions = predictions[inds]
+
+ ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
+ anno = coco_api.loadAnns(ann_ids)
+ gt_boxes = [
+ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+ for obj in anno
+ if obj["iscrowd"] == 0
+ ]
+ gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes
+ gt_boxes = Boxes(gt_boxes)
+ gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])
+
+ if len(gt_boxes) == 0 or len(predictions) == 0:
+ continue
+
+ valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+ gt_boxes = gt_boxes[valid_gt_inds]
+
+ num_pos += len(gt_boxes)
+
+ if len(gt_boxes) == 0:
+ continue
+
+ if limit is not None and len(predictions) > limit:
+ predictions = predictions[:limit]
+
+ overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+ _gt_overlaps = torch.zeros(len(gt_boxes))
+ for j in range(min(len(predictions), len(gt_boxes))):
+ # find which proposal box maximally covers each gt box
+ # and get the iou amount of coverage for each gt box
+ max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+ # find which gt box is 'best' covered (i.e. 'best' = most iou)
+ gt_ovr, gt_ind = max_overlaps.max(dim=0)
+ assert gt_ovr >= 0
+ # find the proposal box that covers the best covered gt box
+ box_ind = argmax_overlaps[gt_ind]
+ # record the iou coverage of this gt box
+ _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+ assert _gt_overlaps[j] == gt_ovr
+ # mark the proposal box and the gt box as used
+ overlaps[box_ind, :] = -1
+ overlaps[:, gt_ind] = -1
+
+ # append recorded iou coverage level
+ gt_overlaps.append(_gt_overlaps)
+ gt_overlaps = (
+ torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+ )
+ gt_overlaps, _ = torch.sort(gt_overlaps)
+
+ if thresholds is None:
+ step = 0.05
+ thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+ recalls = torch.zeros_like(thresholds)
+ # compute recall for each iou threshold
+ for i, t in enumerate(thresholds):
+ recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+ # ar = 2 * np.trapz(recalls, thresholds)
+ ar = recalls.mean()
+ return {
+ "ar": ar,
+ "recalls": recalls,
+ "thresholds": thresholds,
+ "gt_overlaps": gt_overlaps,
+ "num_pos": num_pos,
+ }
+
+
+def _evaluate_predictions_on_coco(
+ coco_gt,
+ coco_results,
+ iou_type,
+ kpt_oks_sigmas=None,
+ cocoeval_fn=COCOeval_opt,
+ img_ids=None,
+ max_dets_per_image=None,
+):
+ """
+ Evaluate the coco results using COCOEval API.
+ """
+ assert len(coco_results) > 0
+
+ if iou_type == "segm":
+ coco_results = copy.deepcopy(coco_results)
+ # When evaluating mask AP, if the results contain bbox, cocoapi will
+ # use the box area as the area of the instance, instead of the mask area.
+ # This leads to a different definition of small/medium/large.
+ # We remove the bbox field to let mask AP use mask area.
+ for c in coco_results:
+ c.pop("bbox", None)
+
+ coco_dt = coco_gt.loadRes(coco_results)
+ coco_eval = cocoeval_fn(coco_gt, coco_dt, iou_type)
+ # For COCO, the default max_dets_per_image is [1, 10, 100].
+ if max_dets_per_image is None:
+ max_dets_per_image = [1, 10, 100] # Default from COCOEval
+ else:
+ assert (
+ len(max_dets_per_image) >= 3
+ ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3"
+ # In the case that user supplies a custom input for max_dets_per_image,
+ # apply COCOevalMaxDets to evaluate AP with the custom input.
+ if max_dets_per_image[2] != 100:
+ coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type)
+ if iou_type != "keypoints":
+ coco_eval.params.maxDets = max_dets_per_image
+
+ if img_ids is not None:
+ coco_eval.params.imgIds = img_ids
+
+ if iou_type == "keypoints":
+ # Use the COCO default keypoint OKS sigmas unless overrides are specified
+ if kpt_oks_sigmas:
+ assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!"
+ coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas)
+ # COCOAPI requires every detection and every gt to have keypoints, so
+ # we just take the first entry from both
+ num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3
+ num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3
+ num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas)
+ assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, (
+ f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. "
+ f"Ground truth contains {num_keypoints_gt} keypoints. "
+ f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. "
+ "They have to agree with each other. For meaning of OKS, please refer to "
+ "http://cocodataset.org/#keypoints-eval."
+ )
+
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ return coco_eval
+
+
+class COCOevalMaxDets(COCOeval):
+ """
+ Modified version of COCOeval for evaluating AP with a custom
+ maxDets (by default for COCO, maxDets is 100)
+ """
+
+ def summarize(self):
+ """
+ Compute and display summary metrics for evaluation results given
+ a custom value for max_dets_per_image
+ """
+
+ def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+ p = self.params
+ iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+ titleStr = "Average Precision" if ap == 1 else "Average Recall"
+ typeStr = "(AP)" if ap == 1 else "(AR)"
+ iouStr = (
+ "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+ if iouThr is None
+ else "{:0.2f}".format(iouThr)
+ )
+
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+ if ap == 1:
+ # dimension of precision: [TxRxKxAxM]
+ s = self.eval["precision"]
+ # IoU
+ if iouThr is not None:
+ t = np.where(iouThr == p.iouThrs)[0]
+ s = s[t]
+ s = s[:, :, :, aind, mind]
+ else:
+ # dimension of recall: [TxKxAxM]
+ s = self.eval["recall"]
+ if iouThr is not None:
+ t = np.where(iouThr == p.iouThrs)[0]
+ s = s[t]
+ s = s[:, :, aind, mind]
+ if len(s[s > -1]) == 0:
+ mean_s = -1
+ else:
+ mean_s = np.mean(s[s > -1])
+ print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+ return mean_s
+
+ def _summarizeDets():
+ stats = np.zeros((12,))
+ # Evaluate AP using the custom limit on maximum detections per image
+ stats[0] = _summarize(1, maxDets=self.params.maxDets[2])
+ stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+ stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+ stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
+ stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+ stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+ stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+ stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
+ return stats
+
+ def _summarizeKps():
+ stats = np.zeros((10,))
+ stats[0] = _summarize(1, maxDets=20)
+ stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
+ stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
+ stats[3] = _summarize(1, maxDets=20, areaRng="medium")
+ stats[4] = _summarize(1, maxDets=20, areaRng="large")
+ stats[5] = _summarize(0, maxDets=20)
+ stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
+ stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
+ stats[8] = _summarize(0, maxDets=20, areaRng="medium")
+ stats[9] = _summarize(0, maxDets=20, areaRng="large")
+ return stats
+
+ if not self.eval:
+ raise Exception("Please run accumulate() first")
+ iouType = self.params.iouType
+ if iouType == "segm" or iouType == "bbox":
+ summarize = _summarizeDets
+ elif iouType == "keypoints":
+ summarize = _summarizeKps
+ self.stats = summarize()
+
+ def __str__(self):
+ self.summarize()
diff --git a/vendor/detectron2/detectron2/evaluation/evaluator.py b/vendor/detectron2/detectron2/evaluation/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..baf996002b2fddc8c1952408d450b5bf69394f0a
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/evaluator.py
@@ -0,0 +1,224 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import logging
+import time
+from collections import OrderedDict, abc
+from contextlib import ExitStack, contextmanager
+from typing import List, Union
+import torch
+from torch import nn
+
+from detectron2.utils.comm import get_world_size, is_main_process
+from detectron2.utils.logger import log_every_n_seconds
+
+
+class DatasetEvaluator:
+ """
+ Base class for a dataset evaluator.
+
+ The function :func:`inference_on_dataset` runs the model over
+ all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
+
+ This class will accumulate information of the inputs/outputs (by :meth:`process`),
+ and produce evaluation results in the end (by :meth:`evaluate`).
+ """
+
+ def reset(self):
+ """
+ Preparation for a new round of evaluation.
+ Should be called before starting a round of evaluation.
+ """
+ pass
+
+ def process(self, inputs, outputs):
+ """
+ Process the pair of inputs and outputs.
+ If they contain batches, the pairs can be consumed one-by-one using `zip`:
+
+ .. code-block:: python
+
+ for input_, output in zip(inputs, outputs):
+ # do evaluation on single input/output pair
+ ...
+
+ Args:
+ inputs (list): the inputs that's used to call the model.
+ outputs (list): the return value of `model(inputs)`
+ """
+ pass
+
+ def evaluate(self):
+ """
+ Evaluate/summarize the performance, after processing all input/output pairs.
+
+ Returns:
+ dict:
+ A new evaluator class can return a dict of arbitrary format
+ as long as the user can process the results.
+ In our train_net.py, we expect the following format:
+
+ * key: the name of the task (e.g., bbox)
+ * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
+ """
+ pass
+
+
+class DatasetEvaluators(DatasetEvaluator):
+ """
+ Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
+
+ This class dispatches every evaluation call to
+ all of its :class:`DatasetEvaluator`.
+ """
+
+ def __init__(self, evaluators):
+ """
+ Args:
+ evaluators (list): the evaluators to combine.
+ """
+ super().__init__()
+ self._evaluators = evaluators
+
+ def reset(self):
+ for evaluator in self._evaluators:
+ evaluator.reset()
+
+ def process(self, inputs, outputs):
+ for evaluator in self._evaluators:
+ evaluator.process(inputs, outputs)
+
+ def evaluate(self):
+ results = OrderedDict()
+ for evaluator in self._evaluators:
+ result = evaluator.evaluate()
+ if is_main_process() and result is not None:
+ for k, v in result.items():
+ assert (
+ k not in results
+ ), "Different evaluators produce results with the same key {}".format(k)
+ results[k] = v
+ return results
+
+
+def inference_on_dataset(
+ model, data_loader, evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None]
+):
+ """
+ Run model on the data_loader and evaluate the metrics with evaluator.
+ Also benchmark the inference speed of `model.__call__` accurately.
+ The model will be used in eval mode.
+
+ Args:
+ model (callable): a callable which takes an object from
+ `data_loader` and returns some outputs.
+
+ If it's an nn.Module, it will be temporarily set to `eval` mode.
+ If you wish to evaluate a model in `training` mode instead, you can
+ wrap the given model and override its behavior of `.eval()` and `.train()`.
+ data_loader: an iterable object with a length.
+ The elements it generates will be the inputs to the model.
+ evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark,
+ but don't want to do any evaluation.
+
+ Returns:
+ The return value of `evaluator.evaluate()`
+ """
+ num_devices = get_world_size()
+ logger = logging.getLogger(__name__)
+ logger.info("Start inference on {} batches".format(len(data_loader)))
+
+ total = len(data_loader) # inference data loader must have a fixed length
+ if evaluator is None:
+ # create a no-op evaluator
+ evaluator = DatasetEvaluators([])
+ if isinstance(evaluator, abc.MutableSequence):
+ evaluator = DatasetEvaluators(evaluator)
+ evaluator.reset()
+
+ num_warmup = min(5, total - 1)
+ start_time = time.perf_counter()
+ total_data_time = 0
+ total_compute_time = 0
+ total_eval_time = 0
+ with ExitStack() as stack:
+ if isinstance(model, nn.Module):
+ stack.enter_context(inference_context(model))
+ stack.enter_context(torch.no_grad())
+
+ start_data_time = time.perf_counter()
+ for idx, inputs in enumerate(data_loader):
+ total_data_time += time.perf_counter() - start_data_time
+ if idx == num_warmup:
+ start_time = time.perf_counter()
+ total_data_time = 0
+ total_compute_time = 0
+ total_eval_time = 0
+
+ start_compute_time = time.perf_counter()
+ outputs = model(inputs)
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ total_compute_time += time.perf_counter() - start_compute_time
+
+ start_eval_time = time.perf_counter()
+ evaluator.process(inputs, outputs)
+ total_eval_time += time.perf_counter() - start_eval_time
+
+ iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
+ data_seconds_per_iter = total_data_time / iters_after_start
+ compute_seconds_per_iter = total_compute_time / iters_after_start
+ eval_seconds_per_iter = total_eval_time / iters_after_start
+ total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start
+ if idx >= num_warmup * 2 or compute_seconds_per_iter > 5:
+ eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1)))
+ log_every_n_seconds(
+ logging.INFO,
+ (
+ f"Inference done {idx + 1}/{total}. "
+ f"Dataloading: {data_seconds_per_iter:.4f} s/iter. "
+ f"Inference: {compute_seconds_per_iter:.4f} s/iter. "
+ f"Eval: {eval_seconds_per_iter:.4f} s/iter. "
+ f"Total: {total_seconds_per_iter:.4f} s/iter. "
+ f"ETA={eta}"
+ ),
+ n=5,
+ )
+ start_data_time = time.perf_counter()
+
+ # Measure the time only for this worker (before the synchronization barrier)
+ total_time = time.perf_counter() - start_time
+ total_time_str = str(datetime.timedelta(seconds=total_time))
+ # NOTE this format is parsed by grep
+ logger.info(
+ "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format(
+ total_time_str, total_time / (total - num_warmup), num_devices
+ )
+ )
+ total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
+ logger.info(
+ "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format(
+ total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
+ )
+ )
+
+ results = evaluator.evaluate()
+ # An evaluator may return None when not in main process.
+ # Replace it by an empty dict instead to make it easier for downstream code to handle
+ if results is None:
+ results = {}
+ return results
+
+
+@contextmanager
+def inference_context(model):
+ """
+ A context where the model is temporarily changed to eval mode,
+ and restored to previous mode afterwards.
+
+ Args:
+ model: a torch Module
+ """
+ training_mode = model.training
+ model.eval()
+ yield
+ model.train(training_mode)
diff --git a/vendor/detectron2/detectron2/evaluation/fast_eval_api.py b/vendor/detectron2/detectron2/evaluation/fast_eval_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eb202bd5efa3ec3d366027b1debffc269ae8b17
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/fast_eval_api.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+import time
+from pycocotools.cocoeval import COCOeval
+
+from detectron2 import _C
+
+logger = logging.getLogger(__name__)
+
+
+class COCOeval_opt(COCOeval):
+ """
+ This is a slightly modified version of the original COCO API, where the functions evaluateImg()
+ and accumulate() are implemented in C++ to speedup evaluation
+ """
+
+ def evaluate(self):
+ """
+ Run per image evaluation on given images and store results in self.evalImgs_cpp, a
+ datastructure that isn't readable from Python but is used by a c++ implementation of
+ accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure
+ self.evalImgs because this datastructure is a computational bottleneck.
+ :return: None
+ """
+ tic = time.time()
+
+ p = self.params
+ # add backward compatibility if useSegm is specified in params
+ if p.useSegm is not None:
+ p.iouType = "segm" if p.useSegm == 1 else "bbox"
+ logger.info("Evaluate annotation type *{}*".format(p.iouType))
+ p.imgIds = list(np.unique(p.imgIds))
+ if p.useCats:
+ p.catIds = list(np.unique(p.catIds))
+ p.maxDets = sorted(p.maxDets)
+ self.params = p
+
+ self._prepare() # bottleneck
+
+ # loop through images, area range, max detection number
+ catIds = p.catIds if p.useCats else [-1]
+
+ if p.iouType == "segm" or p.iouType == "bbox":
+ computeIoU = self.computeIoU
+ elif p.iouType == "keypoints":
+ computeIoU = self.computeOks
+ self.ious = {
+ (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+ } # bottleneck
+
+ maxDet = p.maxDets[-1]
+
+ # <<<< Beginning of code differences with original COCO API
+ def convert_instances_to_cpp(instances, is_det=False):
+ # Convert annotations for a list of instances in an image to a format that's fast
+ # to access in C++
+ instances_cpp = []
+ for instance in instances:
+ instance_cpp = _C.InstanceAnnotation(
+ int(instance["id"]),
+ instance["score"] if is_det else instance.get("score", 0.0),
+ instance["area"],
+ bool(instance.get("iscrowd", 0)),
+ bool(instance.get("ignore", 0)),
+ )
+ instances_cpp.append(instance_cpp)
+ return instances_cpp
+
+ # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+ ground_truth_instances = [
+ [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+ for imgId in p.imgIds
+ ]
+ detected_instances = [
+ [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
+ for imgId in p.imgIds
+ ]
+ ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+
+ if not p.useCats:
+ # For each image, flatten per-category lists into a single list
+ ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
+ detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
+
+ # Call C++ implementation of self.evaluateImgs()
+ self._evalImgs_cpp = _C.COCOevalEvaluateImages(
+ p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
+ )
+ self._evalImgs = None
+
+ self._paramsEval = copy.deepcopy(self.params)
+ toc = time.time()
+ logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
+ # >>>> End of code differences with original COCO API
+
+ def accumulate(self):
+ """
+ Accumulate per image evaluation results and store the result in self.eval. Does not
+ support changing parameter settings from those used by self.evaluate()
+ """
+ logger.info("Accumulating evaluation results...")
+ tic = time.time()
+ assert hasattr(
+ self, "_evalImgs_cpp"
+ ), "evaluate() must be called before accmulate() is called."
+
+ self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
+
+ # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+ self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+ self.eval["counts"][:1] + self.eval["counts"][2:]
+ )
+
+ # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+ # num_area_ranges X num_max_detections
+ self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
+ self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
+ toc = time.time()
+ logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
diff --git a/vendor/detectron2/detectron2/evaluation/lvis_evaluation.py b/vendor/detectron2/detectron2/evaluation/lvis_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cc854a157dc469be99a9be1bb7d570068adc891
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/lvis_evaluation.py
@@ -0,0 +1,380 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import json
+import logging
+import os
+import pickle
+from collections import OrderedDict
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .coco_evaluation import instances_to_coco_json
+from .evaluator import DatasetEvaluator
+
+
+class LVISEvaluator(DatasetEvaluator):
+ """
+ Evaluate object proposal and instance detection/segmentation outputs using
+ LVIS's metrics and evaluation API.
+ """
+
+ def __init__(
+ self,
+ dataset_name,
+ tasks=None,
+ distributed=True,
+ output_dir=None,
+ *,
+ max_dets_per_image=None,
+ ):
+ """
+ Args:
+ dataset_name (str): name of the dataset to be evaluated.
+ It must have the following corresponding metadata:
+ "json_file": the path to the LVIS format annotation
+ tasks (tuple[str]): tasks that can be evaluated under the given
+ configuration. A task is one of "bbox", "segm".
+ By default, will infer this automatically from predictions.
+ distributed (True): if True, will collect results from all ranks for evaluation.
+ Otherwise, will evaluate the results in the current process.
+ output_dir (str): optional, an output directory to dump results.
+ max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+ This limit, by default of the LVIS dataset, is 300.
+ """
+ from lvis import LVIS
+
+ self._logger = logging.getLogger(__name__)
+
+ if tasks is not None and isinstance(tasks, CfgNode):
+ self._logger.warn(
+ "COCO Evaluator instantiated using config, this is deprecated behavior."
+ " Please pass in explicit arguments instead."
+ )
+ self._tasks = None # Infering it from predictions should be better
+ else:
+ self._tasks = tasks
+
+ self._distributed = distributed
+ self._output_dir = output_dir
+ self._max_dets_per_image = max_dets_per_image
+
+ self._cpu_device = torch.device("cpu")
+
+ self._metadata = MetadataCatalog.get(dataset_name)
+ json_file = PathManager.get_local_path(self._metadata.json_file)
+ self._lvis_api = LVIS(json_file)
+ # Test set json files do not contain annotations (evaluation must be
+ # performed using the LVIS evaluation server).
+ self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
+
+ def reset(self):
+ self._predictions = []
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
+ It is a list of dict. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name", "image_id".
+ outputs: the outputs of a LVIS model. It is a list of dicts with key
+ "instances" that contains :class:`Instances`.
+ """
+ for input, output in zip(inputs, outputs):
+ prediction = {"image_id": input["image_id"]}
+
+ if "instances" in output:
+ instances = output["instances"].to(self._cpu_device)
+ prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+ if "proposals" in output:
+ prediction["proposals"] = output["proposals"].to(self._cpu_device)
+ self._predictions.append(prediction)
+
+ def evaluate(self):
+ if self._distributed:
+ comm.synchronize()
+ predictions = comm.gather(self._predictions, dst=0)
+ predictions = list(itertools.chain(*predictions))
+
+ if not comm.is_main_process():
+ return
+ else:
+ predictions = self._predictions
+
+ if len(predictions) == 0:
+ self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
+ return {}
+
+ if self._output_dir:
+ PathManager.mkdirs(self._output_dir)
+ file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+ with PathManager.open(file_path, "wb") as f:
+ torch.save(predictions, f)
+
+ self._results = OrderedDict()
+ if "proposals" in predictions[0]:
+ self._eval_box_proposals(predictions)
+ if "instances" in predictions[0]:
+ self._eval_predictions(predictions)
+ # Copy so the caller can do whatever with results
+ return copy.deepcopy(self._results)
+
+ def _tasks_from_predictions(self, predictions):
+ for pred in predictions:
+ if "segmentation" in pred:
+ return ("bbox", "segm")
+ return ("bbox",)
+
+ def _eval_predictions(self, predictions):
+ """
+ Evaluate predictions. Fill self._results with the metrics of the tasks.
+
+ Args:
+ predictions (list[dict]): list of outputs from the model
+ """
+ self._logger.info("Preparing results in the LVIS format ...")
+ lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+ tasks = self._tasks or self._tasks_from_predictions(lvis_results)
+
+ # LVIS evaluator can be used to evaluate results for COCO dataset categories.
+ # In this case `_metadata` variable will have a field with COCO-specific category mapping.
+ if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+ reverse_id_mapping = {
+ v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+ }
+ for result in lvis_results:
+ result["category_id"] = reverse_id_mapping[result["category_id"]]
+ else:
+ # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
+ for result in lvis_results:
+ result["category_id"] += 1
+
+ if self._output_dir:
+ file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
+ self._logger.info("Saving results to {}".format(file_path))
+ with PathManager.open(file_path, "w") as f:
+ f.write(json.dumps(lvis_results))
+ f.flush()
+
+ if not self._do_evaluation:
+ self._logger.info("Annotations are not available for evaluation.")
+ return
+
+ self._logger.info("Evaluating predictions ...")
+ for task in sorted(tasks):
+ res = _evaluate_predictions_on_lvis(
+ self._lvis_api,
+ lvis_results,
+ task,
+ max_dets_per_image=self._max_dets_per_image,
+ class_names=self._metadata.get("thing_classes"),
+ )
+ self._results[task] = res
+
+ def _eval_box_proposals(self, predictions):
+ """
+ Evaluate the box proposals in predictions.
+ Fill self._results with the metrics for "box_proposals" task.
+ """
+ if self._output_dir:
+ # Saving generated box proposals to file.
+ # Predicted box_proposals are in XYXY_ABS mode.
+ bbox_mode = BoxMode.XYXY_ABS.value
+ ids, boxes, objectness_logits = [], [], []
+ for prediction in predictions:
+ ids.append(prediction["image_id"])
+ boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+ objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+ proposal_data = {
+ "boxes": boxes,
+ "objectness_logits": objectness_logits,
+ "ids": ids,
+ "bbox_mode": bbox_mode,
+ }
+ with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+ pickle.dump(proposal_data, f)
+
+ if not self._do_evaluation:
+ self._logger.info("Annotations are not available for evaluation.")
+ return
+
+ self._logger.info("Evaluating bbox proposals ...")
+ res = {}
+ areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+ for limit in [100, 1000]:
+ for area, suffix in areas.items():
+ stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
+ key = "AR{}@{:d}".format(suffix, limit)
+ res[key] = float(stats["ar"].item() * 100)
+ self._logger.info("Proposal metrics: \n" + create_small_table(res))
+ self._results["box_proposals"] = res
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
+ """
+ Evaluate detection proposal recall metrics. This function is a much
+ faster alternative to the official LVIS API recall evaluation code. However,
+ it produces slightly different results.
+ """
+ # Record max overlap value for each gt box
+ # Return vector of overlap values
+ areas = {
+ "all": 0,
+ "small": 1,
+ "medium": 2,
+ "large": 3,
+ "96-128": 4,
+ "128-256": 5,
+ "256-512": 6,
+ "512-inf": 7,
+ }
+ area_ranges = [
+ [0**2, 1e5**2], # all
+ [0**2, 32**2], # small
+ [32**2, 96**2], # medium
+ [96**2, 1e5**2], # large
+ [96**2, 128**2], # 96-128
+ [128**2, 256**2], # 128-256
+ [256**2, 512**2], # 256-512
+ [512**2, 1e5**2],
+ ] # 512-inf
+ assert area in areas, "Unknown area range: {}".format(area)
+ area_range = area_ranges[areas[area]]
+ gt_overlaps = []
+ num_pos = 0
+
+ for prediction_dict in dataset_predictions:
+ predictions = prediction_dict["proposals"]
+
+ # sort predictions in descending order
+ # TODO maybe remove this and make it explicit in the documentation
+ inds = predictions.objectness_logits.sort(descending=True)[1]
+ predictions = predictions[inds]
+
+ ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
+ anno = lvis_api.load_anns(ann_ids)
+ gt_boxes = [
+ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
+ ]
+ gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes
+ gt_boxes = Boxes(gt_boxes)
+ gt_areas = torch.as_tensor([obj["area"] for obj in anno])
+
+ if len(gt_boxes) == 0 or len(predictions) == 0:
+ continue
+
+ valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+ gt_boxes = gt_boxes[valid_gt_inds]
+
+ num_pos += len(gt_boxes)
+
+ if len(gt_boxes) == 0:
+ continue
+
+ if limit is not None and len(predictions) > limit:
+ predictions = predictions[:limit]
+
+ overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+ _gt_overlaps = torch.zeros(len(gt_boxes))
+ for j in range(min(len(predictions), len(gt_boxes))):
+ # find which proposal box maximally covers each gt box
+ # and get the iou amount of coverage for each gt box
+ max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+ # find which gt box is 'best' covered (i.e. 'best' = most iou)
+ gt_ovr, gt_ind = max_overlaps.max(dim=0)
+ assert gt_ovr >= 0
+ # find the proposal box that covers the best covered gt box
+ box_ind = argmax_overlaps[gt_ind]
+ # record the iou coverage of this gt box
+ _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+ assert _gt_overlaps[j] == gt_ovr
+ # mark the proposal box and the gt box as used
+ overlaps[box_ind, :] = -1
+ overlaps[:, gt_ind] = -1
+
+ # append recorded iou coverage level
+ gt_overlaps.append(_gt_overlaps)
+ gt_overlaps = (
+ torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+ )
+ gt_overlaps, _ = torch.sort(gt_overlaps)
+
+ if thresholds is None:
+ step = 0.05
+ thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+ recalls = torch.zeros_like(thresholds)
+ # compute recall for each iou threshold
+ for i, t in enumerate(thresholds):
+ recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+ # ar = 2 * np.trapz(recalls, thresholds)
+ ar = recalls.mean()
+ return {
+ "ar": ar,
+ "recalls": recalls,
+ "thresholds": thresholds,
+ "gt_overlaps": gt_overlaps,
+ "num_pos": num_pos,
+ }
+
+
+def _evaluate_predictions_on_lvis(
+ lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None
+):
+ """
+ Args:
+ iou_type (str):
+ max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+ This limit, by default of the LVIS dataset, is 300.
+ class_names (None or list[str]): if provided, will use it to predict
+ per-category AP.
+
+ Returns:
+ a dict of {metric name: score}
+ """
+ metrics = {
+ "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+ "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+ }[iou_type]
+
+ logger = logging.getLogger(__name__)
+
+ if len(lvis_results) == 0: # TODO: check if needed
+ logger.warn("No predictions from the model!")
+ return {metric: float("nan") for metric in metrics}
+
+ if iou_type == "segm":
+ lvis_results = copy.deepcopy(lvis_results)
+ # When evaluating mask AP, if the results contain bbox, LVIS API will
+ # use the box area as the area of the instance, instead of the mask area.
+ # This leads to a different definition of small/medium/large.
+ # We remove the bbox field to let mask AP use mask area.
+ for c in lvis_results:
+ c.pop("bbox", None)
+
+ if max_dets_per_image is None:
+ max_dets_per_image = 300 # Default for LVIS dataset
+
+ from lvis import LVISEval, LVISResults
+
+ logger.info(f"Evaluating with max detections per image = {max_dets_per_image}")
+ lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image)
+ lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
+ lvis_eval.run()
+ lvis_eval.print_results()
+
+ # Pull the standard metrics from the LVIS results
+ results = lvis_eval.get_results()
+ results = {metric: float(results[metric] * 100) for metric in metrics}
+ logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
+ return results
diff --git a/vendor/detectron2/detectron2/evaluation/panoptic_evaluation.py b/vendor/detectron2/detectron2/evaluation/panoptic_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3462b7f9abf6feaa499976bfed526ebd17e31
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/panoptic_evaluation.py
@@ -0,0 +1,199 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+from typing import Optional
+from PIL import Image
+from tabulate import tabulate
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+logger = logging.getLogger(__name__)
+
+
+class COCOPanopticEvaluator(DatasetEvaluator):
+ """
+ Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
+ It saves panoptic segmentation prediction in `output_dir`
+
+ It contains a synchronize call and has to be called from all workers.
+ """
+
+ def __init__(self, dataset_name: str, output_dir: Optional[str] = None):
+ """
+ Args:
+ dataset_name: name of the dataset
+ output_dir: output directory to save results for evaluation.
+ """
+ self._metadata = MetadataCatalog.get(dataset_name)
+ self._thing_contiguous_id_to_dataset_id = {
+ v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+ }
+ self._stuff_contiguous_id_to_dataset_id = {
+ v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items()
+ }
+
+ self._output_dir = output_dir
+ if self._output_dir is not None:
+ PathManager.mkdirs(self._output_dir)
+
+ def reset(self):
+ self._predictions = []
+
+ def _convert_category_id(self, segment_info):
+ isthing = segment_info.pop("isthing", None)
+ if isthing is None:
+ # the model produces panoptic category id directly. No more conversion needed
+ return segment_info
+ if isthing is True:
+ segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[
+ segment_info["category_id"]
+ ]
+ else:
+ segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[
+ segment_info["category_id"]
+ ]
+ return segment_info
+
+ def process(self, inputs, outputs):
+ from panopticapi.utils import id2rgb
+
+ for input, output in zip(inputs, outputs):
+ panoptic_img, segments_info = output["panoptic_seg"]
+ panoptic_img = panoptic_img.cpu().numpy()
+ if segments_info is None:
+ # If "segments_info" is None, we assume "panoptic_img" is a
+ # H*W int32 image storing the panoptic_id in the format of
+ # category_id * label_divisor + instance_id. We reserve -1 for
+ # VOID label, and add 1 to panoptic_img since the official
+ # evaluation script uses 0 for VOID label.
+ label_divisor = self._metadata.label_divisor
+ segments_info = []
+ for panoptic_label in np.unique(panoptic_img):
+ if panoptic_label == -1:
+ # VOID region.
+ continue
+ pred_class = panoptic_label // label_divisor
+ isthing = (
+ pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values()
+ )
+ segments_info.append(
+ {
+ "id": int(panoptic_label) + 1,
+ "category_id": int(pred_class),
+ "isthing": bool(isthing),
+ }
+ )
+ # Official evaluation script uses 0 for VOID label.
+ panoptic_img += 1
+
+ file_name = os.path.basename(input["file_name"])
+ file_name_png = os.path.splitext(file_name)[0] + ".png"
+ with io.BytesIO() as out:
+ Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
+ segments_info = [self._convert_category_id(x) for x in segments_info]
+ self._predictions.append(
+ {
+ "image_id": input["image_id"],
+ "file_name": file_name_png,
+ "png_string": out.getvalue(),
+ "segments_info": segments_info,
+ }
+ )
+
+ def evaluate(self):
+ comm.synchronize()
+
+ self._predictions = comm.gather(self._predictions)
+ self._predictions = list(itertools.chain(*self._predictions))
+ if not comm.is_main_process():
+ return
+
+ # PanopticApi requires local files
+ gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
+ gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)
+
+ with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
+ logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
+ for p in self._predictions:
+ with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
+ f.write(p.pop("png_string"))
+
+ with open(gt_json, "r") as f:
+ json_data = json.load(f)
+ json_data["annotations"] = self._predictions
+
+ output_dir = self._output_dir or pred_dir
+ predictions_json = os.path.join(output_dir, "predictions.json")
+ with PathManager.open(predictions_json, "w") as f:
+ f.write(json.dumps(json_data))
+
+ from panopticapi.evaluation import pq_compute
+
+ with contextlib.redirect_stdout(io.StringIO()):
+ pq_res = pq_compute(
+ gt_json,
+ PathManager.get_local_path(predictions_json),
+ gt_folder=gt_folder,
+ pred_folder=pred_dir,
+ )
+
+ res = {}
+ res["PQ"] = 100 * pq_res["All"]["pq"]
+ res["SQ"] = 100 * pq_res["All"]["sq"]
+ res["RQ"] = 100 * pq_res["All"]["rq"]
+ res["PQ_th"] = 100 * pq_res["Things"]["pq"]
+ res["SQ_th"] = 100 * pq_res["Things"]["sq"]
+ res["RQ_th"] = 100 * pq_res["Things"]["rq"]
+ res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
+ res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
+ res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
+
+ results = OrderedDict({"panoptic_seg": res})
+ _print_panoptic_results(pq_res)
+
+ return results
+
+
+def _print_panoptic_results(pq_res):
+ headers = ["", "PQ", "SQ", "RQ", "#categories"]
+ data = []
+ for name in ["All", "Things", "Stuff"]:
+ row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]]
+ data.append(row)
+ table = tabulate(
+ data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center"
+ )
+ logger.info("Panoptic Evaluation Results:\n" + table)
+
+
+if __name__ == "__main__":
+ from detectron2.utils.logger import setup_logger
+
+ logger = setup_logger()
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--gt-json")
+ parser.add_argument("--gt-dir")
+ parser.add_argument("--pred-json")
+ parser.add_argument("--pred-dir")
+ args = parser.parse_args()
+
+ from panopticapi.evaluation import pq_compute
+
+ with contextlib.redirect_stdout(io.StringIO()):
+ pq_res = pq_compute(
+ args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir
+ )
+ _print_panoptic_results(pq_res)
diff --git a/vendor/detectron2/detectron2/evaluation/pascal_voc_evaluation.py b/vendor/detectron2/detectron2/evaluation/pascal_voc_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..88bb42e6f75f5f0faa4b774ddf16938477a37d2b
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/pascal_voc_evaluation.py
@@ -0,0 +1,300 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import OrderedDict, defaultdict
+from functools import lru_cache
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class PascalVOCDetectionEvaluator(DatasetEvaluator):
+ """
+ Evaluate Pascal VOC style AP for Pascal VOC dataset.
+ It contains a synchronization, therefore has to be called from all ranks.
+
+ Note that the concept of AP can be implemented in different ways and may not
+ produce identical results. This class mimics the implementation of the official
+ Pascal VOC Matlab API, and should produce similar but not identical results to the
+ official API.
+ """
+
+ def __init__(self, dataset_name):
+ """
+ Args:
+ dataset_name (str): name of the dataset, e.g., "voc_2007_test"
+ """
+ self._dataset_name = dataset_name
+ meta = MetadataCatalog.get(dataset_name)
+
+ # Too many tiny files, download all to local for speed.
+ annotation_dir_local = PathManager.get_local_path(
+ os.path.join(meta.dirname, "Annotations/")
+ )
+ self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml")
+ self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt")
+ self._class_names = meta.thing_classes
+ assert meta.year in [2007, 2012], meta.year
+ self._is_2007 = meta.year == 2007
+ self._cpu_device = torch.device("cpu")
+ self._logger = logging.getLogger(__name__)
+
+ def reset(self):
+ self._predictions = defaultdict(list) # class name -> list of prediction strings
+
+ def process(self, inputs, outputs):
+ for input, output in zip(inputs, outputs):
+ image_id = input["image_id"]
+ instances = output["instances"].to(self._cpu_device)
+ boxes = instances.pred_boxes.tensor.numpy()
+ scores = instances.scores.tolist()
+ classes = instances.pred_classes.tolist()
+ for box, score, cls in zip(boxes, scores, classes):
+ xmin, ymin, xmax, ymax = box
+ # The inverse of data loading logic in `datasets/pascal_voc.py`
+ xmin += 1
+ ymin += 1
+ self._predictions[cls].append(
+ f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"
+ )
+
+ def evaluate(self):
+ """
+ Returns:
+ dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
+ """
+ all_predictions = comm.gather(self._predictions, dst=0)
+ if not comm.is_main_process():
+ return
+ predictions = defaultdict(list)
+ for predictions_per_rank in all_predictions:
+ for clsid, lines in predictions_per_rank.items():
+ predictions[clsid].extend(lines)
+ del all_predictions
+
+ self._logger.info(
+ "Evaluating {} using {} metric. "
+ "Note that results do not use the official Matlab API.".format(
+ self._dataset_name, 2007 if self._is_2007 else 2012
+ )
+ )
+
+ with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
+ res_file_template = os.path.join(dirname, "{}.txt")
+
+ aps = defaultdict(list) # iou -> ap per class
+ for cls_id, cls_name in enumerate(self._class_names):
+ lines = predictions.get(cls_id, [""])
+
+ with open(res_file_template.format(cls_name), "w") as f:
+ f.write("\n".join(lines))
+
+ for thresh in range(50, 100, 5):
+ rec, prec, ap = voc_eval(
+ res_file_template,
+ self._anno_file_template,
+ self._image_set_path,
+ cls_name,
+ ovthresh=thresh / 100.0,
+ use_07_metric=self._is_2007,
+ )
+ aps[thresh].append(ap * 100)
+
+ ret = OrderedDict()
+ mAP = {iou: np.mean(x) for iou, x in aps.items()}
+ ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]}
+ return ret
+
+
+##############################################################################
+#
+# Below code is modified from
+# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Bharath Hariharan
+# --------------------------------------------------------
+
+"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
+
+
+@lru_cache(maxsize=None)
+def parse_rec(filename):
+ """Parse a PASCAL VOC xml file."""
+ with PathManager.open(filename) as f:
+ tree = ET.parse(f)
+ objects = []
+ for obj in tree.findall("object"):
+ obj_struct = {}
+ obj_struct["name"] = obj.find("name").text
+ obj_struct["pose"] = obj.find("pose").text
+ obj_struct["truncated"] = int(obj.find("truncated").text)
+ obj_struct["difficult"] = int(obj.find("difficult").text)
+ bbox = obj.find("bndbox")
+ obj_struct["bbox"] = [
+ int(bbox.find("xmin").text),
+ int(bbox.find("ymin").text),
+ int(bbox.find("xmax").text),
+ int(bbox.find("ymax").text),
+ ]
+ objects.append(obj_struct)
+
+ return objects
+
+
+def voc_ap(rec, prec, use_07_metric=False):
+ """Compute VOC AP given precision and recall. If use_07_metric is true, uses
+ the VOC 07 11-point method (default:False).
+ """
+ if use_07_metric:
+ # 11 point metric
+ ap = 0.0
+ for t in np.arange(0.0, 1.1, 0.1):
+ if np.sum(rec >= t) == 0:
+ p = 0
+ else:
+ p = np.max(prec[rec >= t])
+ ap = ap + p / 11.0
+ else:
+ # correct AP calculation
+ # first append sentinel values at the end
+ mrec = np.concatenate(([0.0], rec, [1.0]))
+ mpre = np.concatenate(([0.0], prec, [0.0]))
+
+ # compute the precision envelope
+ for i in range(mpre.size - 1, 0, -1):
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ i = np.where(mrec[1:] != mrec[:-1])[0]
+
+ # and sum (\Delta recall) * prec
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+ return ap
+
+
+def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
+ """rec, prec, ap = voc_eval(detpath,
+ annopath,
+ imagesetfile,
+ classname,
+ [ovthresh],
+ [use_07_metric])
+
+ Top level function that does the PASCAL VOC evaluation.
+
+ detpath: Path to detections
+ detpath.format(classname) should produce the detection results file.
+ annopath: Path to annotations
+ annopath.format(imagename) should be the xml annotations file.
+ imagesetfile: Text file containing the list of images, one image per line.
+ classname: Category name (duh)
+ [ovthresh]: Overlap threshold (default = 0.5)
+ [use_07_metric]: Whether to use VOC07's 11 point AP computation
+ (default False)
+ """
+ # assumes detections are in detpath.format(classname)
+ # assumes annotations are in annopath.format(imagename)
+ # assumes imagesetfile is a text file with each line an image name
+
+ # first load gt
+ # read list of images
+ with PathManager.open(imagesetfile, "r") as f:
+ lines = f.readlines()
+ imagenames = [x.strip() for x in lines]
+
+ # load annots
+ recs = {}
+ for imagename in imagenames:
+ recs[imagename] = parse_rec(annopath.format(imagename))
+
+ # extract gt objects for this class
+ class_recs = {}
+ npos = 0
+ for imagename in imagenames:
+ R = [obj for obj in recs[imagename] if obj["name"] == classname]
+ bbox = np.array([x["bbox"] for x in R])
+ difficult = np.array([x["difficult"] for x in R]).astype(bool)
+ # difficult = np.array([False for x in R]).astype(bool) # treat all "difficult" as GT
+ det = [False] * len(R)
+ npos = npos + sum(~difficult)
+ class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}
+
+ # read dets
+ detfile = detpath.format(classname)
+ with open(detfile, "r") as f:
+ lines = f.readlines()
+
+ splitlines = [x.strip().split(" ") for x in lines]
+ image_ids = [x[0] for x in splitlines]
+ confidence = np.array([float(x[1]) for x in splitlines])
+ BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)
+
+ # sort by confidence
+ sorted_ind = np.argsort(-confidence)
+ BB = BB[sorted_ind, :]
+ image_ids = [image_ids[x] for x in sorted_ind]
+
+ # go down dets and mark TPs and FPs
+ nd = len(image_ids)
+ tp = np.zeros(nd)
+ fp = np.zeros(nd)
+ for d in range(nd):
+ R = class_recs[image_ids[d]]
+ bb = BB[d, :].astype(float)
+ ovmax = -np.inf
+ BBGT = R["bbox"].astype(float)
+
+ if BBGT.size > 0:
+ # compute overlaps
+ # intersection
+ ixmin = np.maximum(BBGT[:, 0], bb[0])
+ iymin = np.maximum(BBGT[:, 1], bb[1])
+ ixmax = np.minimum(BBGT[:, 2], bb[2])
+ iymax = np.minimum(BBGT[:, 3], bb[3])
+ iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
+ ih = np.maximum(iymax - iymin + 1.0, 0.0)
+ inters = iw * ih
+
+ # union
+ uni = (
+ (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
+ + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
+ - inters
+ )
+
+ overlaps = inters / uni
+ ovmax = np.max(overlaps)
+ jmax = np.argmax(overlaps)
+
+ if ovmax > ovthresh:
+ if not R["difficult"][jmax]:
+ if not R["det"][jmax]:
+ tp[d] = 1.0
+ R["det"][jmax] = 1
+ else:
+ fp[d] = 1.0
+ else:
+ fp[d] = 1.0
+
+ # compute precision recall
+ fp = np.cumsum(fp)
+ tp = np.cumsum(tp)
+ rec = tp / float(npos)
+ # avoid divide by zero in case the first detection matches a difficult
+ # ground truth
+ prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+ ap = voc_ap(rec, prec, use_07_metric)
+
+ return rec, prec, ap
diff --git a/vendor/detectron2/detectron2/evaluation/rotated_coco_evaluation.py b/vendor/detectron2/detectron2/evaluation/rotated_coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea6d1b381dcf106339a03f08577df673ad439c46
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/rotated_coco_evaluation.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import numpy as np
+import os
+import torch
+from pycocotools.cocoeval import COCOeval, maskUtils
+
+from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.file_io import PathManager
+
+from .coco_evaluation import COCOEvaluator
+
+
+class RotatedCOCOeval(COCOeval):
+ @staticmethod
+ def is_rotated(box_list):
+ if type(box_list) == np.ndarray:
+ return box_list.shape[1] == 5
+ elif type(box_list) == list:
+ if box_list == []: # cannot decide the box_dim
+ return False
+ return np.all(
+ np.array(
+ [
+ (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray))
+ for obj in box_list
+ ]
+ )
+ )
+ return False
+
+ @staticmethod
+ def boxlist_to_tensor(boxlist, output_box_dim):
+ if type(boxlist) == np.ndarray:
+ box_tensor = torch.from_numpy(boxlist)
+ elif type(boxlist) == list:
+ if boxlist == []:
+ return torch.zeros((0, output_box_dim), dtype=torch.float32)
+ else:
+ box_tensor = torch.FloatTensor(boxlist)
+ else:
+ raise Exception("Unrecognized boxlist type")
+
+ input_box_dim = box_tensor.shape[1]
+ if input_box_dim != output_box_dim:
+ if input_box_dim == 4 and output_box_dim == 5:
+ box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+ else:
+ raise Exception(
+ "Unable to convert from {}-dim box to {}-dim box".format(
+ input_box_dim, output_box_dim
+ )
+ )
+ return box_tensor
+
+ def compute_iou_dt_gt(self, dt, gt, is_crowd):
+ if self.is_rotated(dt) or self.is_rotated(gt):
+ # TODO: take is_crowd into consideration
+ assert all(c == 0 for c in is_crowd)
+ dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
+ gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
+ return pairwise_iou_rotated(dt, gt)
+ else:
+ # This is the same as the classical COCO evaluation
+ return maskUtils.iou(dt, gt, is_crowd)
+
+ def computeIoU(self, imgId, catId):
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return []
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in inds]
+ if len(dt) > p.maxDets[-1]:
+ dt = dt[0 : p.maxDets[-1]]
+
+ assert p.iouType == "bbox", "unsupported iouType for iou computation"
+
+ g = [g["bbox"] for g in gt]
+ d = [d["bbox"] for d in dt]
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o["iscrowd"]) for o in gt]
+
+ # Note: this function is copied from cocoeval.py in cocoapi
+ # and the major difference is here.
+ ious = self.compute_iou_dt_gt(d, g, iscrowd)
+ return ious
+
+
+class RotatedCOCOEvaluator(COCOEvaluator):
+ """
+ Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs,
+ with rotated boxes support.
+ Note: this uses IOU only and does not consider angle differences.
+ """
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+ It is a list of dict. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name", "image_id".
+ outputs: the outputs of a COCO model. It is a list of dicts with key
+ "instances" that contains :class:`Instances`.
+ """
+ for input, output in zip(inputs, outputs):
+ prediction = {"image_id": input["image_id"]}
+
+ if "instances" in output:
+ instances = output["instances"].to(self._cpu_device)
+
+ prediction["instances"] = self.instances_to_json(instances, input["image_id"])
+ if "proposals" in output:
+ prediction["proposals"] = output["proposals"].to(self._cpu_device)
+ self._predictions.append(prediction)
+
+ def instances_to_json(self, instances, img_id):
+ num_instance = len(instances)
+ if num_instance == 0:
+ return []
+
+ boxes = instances.pred_boxes.tensor.numpy()
+ if boxes.shape[1] == 4:
+ boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ boxes = boxes.tolist()
+ scores = instances.scores.tolist()
+ classes = instances.pred_classes.tolist()
+
+ results = []
+ for k in range(num_instance):
+ result = {
+ "image_id": img_id,
+ "category_id": classes[k],
+ "bbox": boxes[k],
+ "score": scores[k],
+ }
+
+ results.append(result)
+ return results
+
+ def _eval_predictions(self, predictions, img_ids=None): # img_ids: unused
+ """
+ Evaluate predictions on the given tasks.
+ Fill self._results with the metrics of the tasks.
+ """
+ self._logger.info("Preparing results for COCO format ...")
+ coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+
+ # unmap the category ids for COCO
+ if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+ reverse_id_mapping = {
+ v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+ }
+ for result in coco_results:
+ result["category_id"] = reverse_id_mapping[result["category_id"]]
+
+ if self._output_dir:
+ file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+ self._logger.info("Saving results to {}".format(file_path))
+ with PathManager.open(file_path, "w") as f:
+ f.write(json.dumps(coco_results))
+ f.flush()
+
+ if not self._do_evaluation:
+ self._logger.info("Annotations are not available for evaluation.")
+ return
+
+ self._logger.info("Evaluating predictions ...")
+
+ assert self._tasks is None or set(self._tasks) == {
+ "bbox"
+ }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported"
+ coco_eval = (
+ self._evaluate_predictions_on_coco(self._coco_api, coco_results)
+ if len(coco_results) > 0
+ else None # cocoapi does not handle empty results very well
+ )
+
+ task = "bbox"
+ res = self._derive_coco_results(
+ coco_eval, task, class_names=self._metadata.get("thing_classes")
+ )
+ self._results[task] = res
+
+ def _evaluate_predictions_on_coco(self, coco_gt, coco_results):
+ """
+ Evaluate the coco results using COCOEval API.
+ """
+ assert len(coco_results) > 0
+
+ coco_dt = coco_gt.loadRes(coco_results)
+
+ # Only bbox is supported for now
+ coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox")
+
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ return coco_eval
diff --git a/vendor/detectron2/detectron2/evaluation/sem_seg_evaluation.py b/vendor/detectron2/detectron2/evaluation/sem_seg_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3735de62761bd6be4444250dcd4a83239666af1f
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/sem_seg_evaluation.py
@@ -0,0 +1,265 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+from typing import Optional, Union
+import pycocotools.mask as mask_util
+import torch
+from PIL import Image
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+_CV2_IMPORTED = True
+try:
+ import cv2 # noqa
+except ImportError:
+ # OpenCV is an optional dependency at the moment
+ _CV2_IMPORTED = False
+
+
+def load_image_into_numpy_array(
+ filename: str,
+ copy: bool = False,
+ dtype: Optional[Union[np.dtype, str]] = None,
+) -> np.ndarray:
+ with PathManager.open(filename, "rb") as f:
+ array = np.array(Image.open(f), copy=copy, dtype=dtype)
+ return array
+
+
+class SemSegEvaluator(DatasetEvaluator):
+ """
+ Evaluate semantic segmentation metrics.
+ """
+
+ def __init__(
+ self,
+ dataset_name,
+ distributed=True,
+ output_dir=None,
+ *,
+ sem_seg_loading_fn=load_image_into_numpy_array,
+ num_classes=None,
+ ignore_label=None,
+ ):
+ """
+ Args:
+ dataset_name (str): name of the dataset to be evaluated.
+ distributed (bool): if True, will collect results from all ranks for evaluation.
+ Otherwise, will evaluate the results in the current process.
+ output_dir (str): an output directory to dump results.
+ sem_seg_loading_fn: function to read sem seg file and load into numpy array.
+ Default provided, but projects can customize.
+ num_classes, ignore_label: deprecated argument
+ """
+ self._logger = logging.getLogger(__name__)
+ if num_classes is not None:
+ self._logger.warn(
+ "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata."
+ )
+ if ignore_label is not None:
+ self._logger.warn(
+ "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata."
+ )
+ self._dataset_name = dataset_name
+ self._distributed = distributed
+ self._output_dir = output_dir
+
+ self._cpu_device = torch.device("cpu")
+
+ self.input_file_to_gt_file = {
+ dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
+ for dataset_record in DatasetCatalog.get(dataset_name)
+ }
+
+ meta = MetadataCatalog.get(dataset_name)
+ # Dict that maps contiguous training ids to COCO category ids
+ try:
+ c2d = meta.stuff_dataset_id_to_contiguous_id
+ self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
+ except AttributeError:
+ self._contiguous_id_to_dataset_id = None
+ self._class_names = meta.stuff_classes
+ self.sem_seg_loading_fn = sem_seg_loading_fn
+ self._num_classes = len(meta.stuff_classes)
+ if num_classes is not None:
+ assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}"
+ self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label
+
+ # This is because cv2.erode did not work for int datatype. Only works for uint8.
+ self._compute_boundary_iou = True
+ if not _CV2_IMPORTED:
+ self._compute_boundary_iou = False
+ self._logger.warn(
+ """Boundary IoU calculation requires OpenCV. B-IoU metrics are
+ not going to be computed because OpenCV is not available to import."""
+ )
+ if self._num_classes >= np.iinfo(np.uint8).max:
+ self._compute_boundary_iou = False
+ self._logger.warn(
+ f"""SemSegEvaluator(num_classes) is more than supported value for Boundary IoU calculation!
+ B-IoU metrics are not going to be computed. Max allowed value (exclusive)
+ for num_classes for calculating Boundary IoU is {np.iinfo(np.uint8).max}.
+ The number of classes of dataset {self._dataset_name} is {self._num_classes}"""
+ )
+
+ def reset(self):
+ self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64)
+ self._b_conf_matrix = np.zeros(
+ (self._num_classes + 1, self._num_classes + 1), dtype=np.int64
+ )
+ self._predictions = []
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a model.
+ It is a list of dicts. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name".
+ outputs: the outputs of a model. It is either list of semantic segmentation predictions
+ (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
+ segmentation prediction in the same format.
+ """
+ for input, output in zip(inputs, outputs):
+ output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
+ pred = np.array(output, dtype=np.int)
+ gt_filename = self.input_file_to_gt_file[input["file_name"]]
+ gt = self.sem_seg_loading_fn(gt_filename, dtype=np.int)
+
+ gt[gt == self._ignore_label] = self._num_classes
+
+ self._conf_matrix += np.bincount(
+ (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
+ minlength=self._conf_matrix.size,
+ ).reshape(self._conf_matrix.shape)
+
+ if self._compute_boundary_iou:
+ b_gt = self._mask_to_boundary(gt.astype(np.uint8))
+ b_pred = self._mask_to_boundary(pred.astype(np.uint8))
+
+ self._b_conf_matrix += np.bincount(
+ (self._num_classes + 1) * b_pred.reshape(-1) + b_gt.reshape(-1),
+ minlength=self._conf_matrix.size,
+ ).reshape(self._conf_matrix.shape)
+
+ self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
+
+ def evaluate(self):
+ """
+ Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
+
+ * Mean intersection-over-union averaged across classes (mIoU)
+ * Frequency Weighted IoU (fwIoU)
+ * Mean pixel accuracy averaged across classes (mACC)
+ * Pixel Accuracy (pACC)
+ """
+ if self._distributed:
+ synchronize()
+ conf_matrix_list = all_gather(self._conf_matrix)
+ b_conf_matrix_list = all_gather(self._b_conf_matrix)
+ self._predictions = all_gather(self._predictions)
+ self._predictions = list(itertools.chain(*self._predictions))
+ if not is_main_process():
+ return
+
+ self._conf_matrix = np.zeros_like(self._conf_matrix)
+ for conf_matrix in conf_matrix_list:
+ self._conf_matrix += conf_matrix
+
+ self._b_conf_matrix = np.zeros_like(self._b_conf_matrix)
+ for b_conf_matrix in b_conf_matrix_list:
+ self._b_conf_matrix += b_conf_matrix
+
+ if self._output_dir:
+ PathManager.mkdirs(self._output_dir)
+ file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
+ with PathManager.open(file_path, "w") as f:
+ f.write(json.dumps(self._predictions))
+
+ acc = np.full(self._num_classes, np.nan, dtype=np.float)
+ iou = np.full(self._num_classes, np.nan, dtype=np.float)
+ tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
+ pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
+ class_weights = pos_gt / np.sum(pos_gt)
+ pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
+ acc_valid = pos_gt > 0
+ acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
+ union = pos_gt + pos_pred - tp
+ iou_valid = np.logical_and(acc_valid, union > 0)
+ iou[iou_valid] = tp[iou_valid] / union[iou_valid]
+ macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
+ miou = np.sum(iou[iou_valid]) / np.sum(iou_valid)
+ fiou = np.sum(iou[iou_valid] * class_weights[iou_valid])
+ pacc = np.sum(tp) / np.sum(pos_gt)
+
+ if self._compute_boundary_iou:
+ b_iou = np.full(self._num_classes, np.nan, dtype=np.float)
+ b_tp = self._b_conf_matrix.diagonal()[:-1].astype(np.float)
+ b_pos_gt = np.sum(self._b_conf_matrix[:-1, :-1], axis=0).astype(np.float)
+ b_pos_pred = np.sum(self._b_conf_matrix[:-1, :-1], axis=1).astype(np.float)
+ b_union = b_pos_gt + b_pos_pred - b_tp
+ b_iou_valid = b_union > 0
+ b_iou[b_iou_valid] = b_tp[b_iou_valid] / b_union[b_iou_valid]
+
+ res = {}
+ res["mIoU"] = 100 * miou
+ res["fwIoU"] = 100 * fiou
+ for i, name in enumerate(self._class_names):
+ res[f"IoU-{name}"] = 100 * iou[i]
+ if self._compute_boundary_iou:
+ res[f"BoundaryIoU-{name}"] = 100 * b_iou[i]
+ res[f"min(IoU, B-Iou)-{name}"] = 100 * min(iou[i], b_iou[i])
+ res["mACC"] = 100 * macc
+ res["pACC"] = 100 * pacc
+ for i, name in enumerate(self._class_names):
+ res[f"ACC-{name}"] = 100 * acc[i]
+
+ if self._output_dir:
+ file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
+ with PathManager.open(file_path, "wb") as f:
+ torch.save(res, f)
+ results = OrderedDict({"sem_seg": res})
+ self._logger.info(results)
+ return results
+
+ def encode_json_sem_seg(self, sem_seg, input_file_name):
+ """
+ Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
+ See http://cocodataset.org/#format-results
+ """
+ json_list = []
+ for label in np.unique(sem_seg):
+ if self._contiguous_id_to_dataset_id is not None:
+ assert (
+ label in self._contiguous_id_to_dataset_id
+ ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
+ dataset_id = self._contiguous_id_to_dataset_id[label]
+ else:
+ dataset_id = int(label)
+ mask = (sem_seg == label).astype(np.uint8)
+ mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
+ mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
+ json_list.append(
+ {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
+ )
+ return json_list
+
+ def _mask_to_boundary(self, mask: np.ndarray, dilation_ratio=0.02):
+ assert mask.ndim == 2, "mask_to_boundary expects a 2-dimensional image"
+ h, w = mask.shape
+ diag_len = np.sqrt(h**2 + w**2)
+ dilation = max(1, int(round(dilation_ratio * diag_len)))
+ kernel = np.ones((3, 3), dtype=np.uint8)
+
+ padded_mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
+ eroded_mask_with_padding = cv2.erode(padded_mask, kernel, iterations=dilation)
+ eroded_mask = eroded_mask_with_padding[1:-1, 1:-1]
+ boundary = mask - eroded_mask
+ return boundary
diff --git a/vendor/detectron2/detectron2/evaluation/testing.py b/vendor/detectron2/detectron2/evaluation/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e5ae625bb0593fc20739dd3ea549157e4df4f3d
--- /dev/null
+++ b/vendor/detectron2/detectron2/evaluation/testing.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import pprint
+import sys
+from collections.abc import Mapping
+
+
+def print_csv_format(results):
+ """
+ Print main metrics in a format similar to Detectron,
+ so that they are easy to copypaste into a spreadsheet.
+
+ Args:
+ results (OrderedDict[dict]): task_name -> {metric -> score}
+ unordered dict can also be printed, but in arbitrary order
+ """
+ assert isinstance(results, Mapping) or not len(results), results
+ logger = logging.getLogger(__name__)
+ for task, res in results.items():
+ if isinstance(res, Mapping):
+ # Don't print "AP-category" metrics since they are usually not tracked.
+ important_res = [(k, v) for k, v in res.items() if "-" not in k]
+ logger.info("copypaste: Task: {}".format(task))
+ logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
+ logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
+ else:
+ logger.info(f"copypaste: {task}={res}")
+
+
+def verify_results(cfg, results):
+ """
+ Args:
+ results (OrderedDict[dict]): task_name -> {metric -> score}
+
+ Returns:
+ bool: whether the verification succeeds or not
+ """
+ expected_results = cfg.TEST.EXPECTED_RESULTS
+ if not len(expected_results):
+ return True
+
+ ok = True
+ for task, metric, expected, tolerance in expected_results:
+ actual = results[task].get(metric, None)
+ if actual is None:
+ ok = False
+ continue
+ if not np.isfinite(actual):
+ ok = False
+ continue
+ diff = abs(actual - expected)
+ if diff > tolerance:
+ ok = False
+
+ logger = logging.getLogger(__name__)
+ if not ok:
+ logger.error("Result verification failed!")
+ logger.error("Expected Results: " + str(expected_results))
+ logger.error("Actual Results: " + pprint.pformat(results))
+
+ sys.exit(1)
+ else:
+ logger.info("Results verification passed.")
+ return ok
+
+
+def flatten_results_dict(results):
+ """
+ Expand a hierarchical dict of scalars into a flat dict of scalars.
+ If results[k1][k2][k3] = v, the returned dict will have the entry
+ {"k1/k2/k3": v}.
+
+ Args:
+ results (dict):
+ """
+ r = {}
+ for k, v in results.items():
+ if isinstance(v, Mapping):
+ v = flatten_results_dict(v)
+ for kk, vv in v.items():
+ r[k + "/" + kk] = vv
+ else:
+ r[k] = v
+ return r
diff --git a/vendor/detectron2/detectron2/export/README.md b/vendor/detectron2/detectron2/export/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c86ff62516f4e8e4b1a6c1f33f11192933cf3861
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/README.md
@@ -0,0 +1,15 @@
+
+This directory contains code to prepare a detectron2 model for deployment.
+Currently it supports exporting a detectron2 model to TorchScript, ONNX, or (deprecated) Caffe2 format.
+
+Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
+
+
+### Acknowledgements
+
+Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
+
+Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
+help export Detectron2 models to TorchScript.
+
+Thanks to ONNX Converter team at Microsoft who help export Detectron2 models to ONNX.
diff --git a/vendor/detectron2/detectron2/export/__init__.py b/vendor/detectron2/detectron2/export/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a58758f64aae6071fa688be4400622ce6036efa
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+import warnings
+
+from .flatten import TracingAdapter
+from .torchscript import dump_torchscript_IR, scripting_with_instances
+
+try:
+ from caffe2.proto import caffe2_pb2 as _tmp
+ from caffe2.python import core
+
+ # caffe2 is optional
+except ImportError:
+ pass
+else:
+ from .api import *
+
+
+# TODO: Update ONNX Opset version and run tests when a newer PyTorch is supported
+STABLE_ONNX_OPSET_VERSION = 11
+
+
+def add_export_config(cfg):
+ warnings.warn(
+ "add_export_config has been deprecated and behaves as no-op function.", DeprecationWarning
+ )
+ return cfg
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/vendor/detectron2/detectron2/export/api.py b/vendor/detectron2/detectron2/export/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a272fed929217f18e04f731365f4bf7472110fc
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/api.py
@@ -0,0 +1,230 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import os
+import torch
+from caffe2.proto import caffe2_pb2
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.utils.file_io import PathManager
+
+from .caffe2_inference import ProtobufDetectionModel
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph
+
+__all__ = [
+ "Caffe2Model",
+ "Caffe2Tracer",
+]
+
+
+class Caffe2Tracer:
+ """
+ Make a detectron2 model traceable with Caffe2 operators.
+ This class creates a traceable version of a detectron2 model which:
+
+ 1. Rewrite parts of the model using ops in Caffe2. Note that some ops do
+ not have GPU implementation in Caffe2.
+ 2. Remove post-processing and only produce raw layer outputs
+
+ After making a traceable model, the class provide methods to export such a
+ model to different deployment formats.
+ Exported graph produced by this class take two input tensors:
+
+ 1. (1, C, H, W) float "data" which is an image (usually in [0, 255]).
+ (H, W) often has to be padded to multiple of 32 (depend on the model
+ architecture).
+ 2. 1x3 float "im_info", each row of which is (height, width, 1.0).
+ Height and width are true image shapes before padding.
+
+ The class currently only supports models using builtin meta architectures.
+ Batch inference is not supported, and contributions are welcome.
+ """
+
+ def __init__(self, cfg: CfgNode, model: nn.Module, inputs):
+ """
+ Args:
+ cfg (CfgNode): a detectron2 config used to construct caffe2-compatible model.
+ model (nn.Module): An original pytorch model. Must be among a few official models
+ in detectron2 that can be converted to become caffe2-compatible automatically.
+ Weights have to be already loaded to this model.
+ inputs: sample inputs that the given model takes for inference.
+ Will be used to trace the model. For most models, random inputs with
+ no detected objects will not work as they lead to wrong traces.
+ """
+ assert isinstance(cfg, CfgNode), cfg
+ assert isinstance(model, torch.nn.Module), type(model)
+
+ # TODO make it support custom models, by passing in c2 model directly
+ C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
+ self.traceable_model = C2MetaArch(cfg, copy.deepcopy(model))
+ self.inputs = inputs
+ self.traceable_inputs = self.traceable_model.get_caffe2_inputs(inputs)
+
+ def export_caffe2(self):
+ """
+ Export the model to Caffe2's protobuf format.
+ The returned object can be saved with its :meth:`.save_protobuf()` method.
+ The result can be loaded and executed using Caffe2 runtime.
+
+ Returns:
+ :class:`Caffe2Model`
+ """
+ from .caffe2_export import export_caffe2_detection_model
+
+ predict_net, init_net = export_caffe2_detection_model(
+ self.traceable_model, self.traceable_inputs
+ )
+ return Caffe2Model(predict_net, init_net)
+
+ def export_onnx(self):
+ """
+ Export the model to ONNX format.
+ Note that the exported model contains custom ops only available in caffe2, therefore it
+ cannot be directly executed by other runtime (such as onnxruntime or TensorRT).
+ Post-processing or transformation passes may be applied on the model to accommodate
+ different runtimes, but we currently do not provide support for them.
+
+ Returns:
+ onnx.ModelProto: an onnx model.
+ """
+ from .caffe2_export import export_onnx_model as export_onnx_model_impl
+
+ return export_onnx_model_impl(self.traceable_model, (self.traceable_inputs,))
+
+ def export_torchscript(self):
+ """
+ Export the model to a ``torch.jit.TracedModule`` by tracing.
+ The returned object can be saved to a file by ``.save()``.
+
+ Returns:
+ torch.jit.TracedModule: a torch TracedModule
+ """
+ logger = logging.getLogger(__name__)
+ logger.info("Tracing the model with torch.jit.trace ...")
+ with torch.no_grad():
+ return torch.jit.trace(self.traceable_model, (self.traceable_inputs,))
+
+
+class Caffe2Model(nn.Module):
+ """
+ A wrapper around the traced model in Caffe2's protobuf format.
+ The exported graph has different inputs/outputs from the original Pytorch
+ model, as explained in :class:`Caffe2Tracer`. This class wraps around the
+ exported graph to simulate the same interface as the original Pytorch model.
+ It also provides functions to save/load models in Caffe2's format.'
+
+ Examples:
+ ::
+ c2_model = Caffe2Tracer(cfg, torch_model, inputs).export_caffe2()
+ inputs = [{"image": img_tensor_CHW}]
+ outputs = c2_model(inputs)
+ orig_outputs = torch_model(inputs)
+ """
+
+ def __init__(self, predict_net, init_net):
+ super().__init__()
+ self.eval() # always in eval mode
+ self._predict_net = predict_net
+ self._init_net = init_net
+ self._predictor = None
+
+ __init__.__HIDE_SPHINX_DOC__ = True
+
+ @property
+ def predict_net(self):
+ """
+ caffe2.core.Net: the underlying caffe2 predict net
+ """
+ return self._predict_net
+
+ @property
+ def init_net(self):
+ """
+ caffe2.core.Net: the underlying caffe2 init net
+ """
+ return self._init_net
+
+ def save_protobuf(self, output_dir):
+ """
+ Save the model as caffe2's protobuf format.
+ It saves the following files:
+
+ * "model.pb": definition of the graph. Can be visualized with
+ tools like `netron `_.
+ * "model_init.pb": model parameters
+ * "model.pbtxt": human-readable definition of the graph. Not
+ needed for deployment.
+
+ Args:
+ output_dir (str): the output directory to save protobuf files.
+ """
+ logger = logging.getLogger(__name__)
+ logger.info("Saving model to {} ...".format(output_dir))
+ if not PathManager.exists(output_dir):
+ PathManager.mkdirs(output_dir)
+
+ with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f:
+ f.write(self._predict_net.SerializeToString())
+ with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f:
+ f.write(str(self._predict_net))
+ with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f:
+ f.write(self._init_net.SerializeToString())
+
+ def save_graph(self, output_file, inputs=None):
+ """
+ Save the graph as SVG format.
+
+ Args:
+ output_file (str): a SVG file
+ inputs: optional inputs given to the model.
+ If given, the inputs will be used to run the graph to record
+ shape of every tensor. The shape information will be
+ saved together with the graph.
+ """
+ from .caffe2_export import run_and_save_graph
+
+ if inputs is None:
+ save_graph(self._predict_net, output_file, op_only=False)
+ else:
+ size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0)
+ device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii")
+ inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device)
+ inputs = [x.cpu().numpy() for x in inputs]
+ run_and_save_graph(self._predict_net, self._init_net, inputs, output_file)
+
+ @staticmethod
+ def load_protobuf(dir):
+ """
+ Args:
+ dir (str): a directory used to save Caffe2Model with
+ :meth:`save_protobuf`.
+ The files "model.pb" and "model_init.pb" are needed.
+
+ Returns:
+ Caffe2Model: the caffe2 model loaded from this directory.
+ """
+ predict_net = caffe2_pb2.NetDef()
+ with PathManager.open(os.path.join(dir, "model.pb"), "rb") as f:
+ predict_net.ParseFromString(f.read())
+
+ init_net = caffe2_pb2.NetDef()
+ with PathManager.open(os.path.join(dir, "model_init.pb"), "rb") as f:
+ init_net.ParseFromString(f.read())
+
+ return Caffe2Model(predict_net, init_net)
+
+ def __call__(self, inputs):
+ """
+ An interface that wraps around a Caffe2 model and mimics detectron2's models'
+ input/output format. See details about the format at :doc:`/tutorials/models`.
+ This is used to compare the outputs of caffe2 model with its original torch model.
+
+ Due to the extra conversion between Pytorch/Caffe2, this method is not meant for
+ benchmark. Because of the conversion, this method also has dependency
+ on detectron2 in order to convert to detectron2's output format.
+ """
+ if self._predictor is None:
+ self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net)
+ return self._predictor(inputs)
diff --git a/vendor/detectron2/detectron2/export/c10.py b/vendor/detectron2/detectron2/export/c10.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9a3ee38c8df7c05ac53985b5ec1c5535f360187
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/c10.py
@@ -0,0 +1,571 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import math
+from typing import Dict
+import torch
+import torch.nn.functional as F
+
+from detectron2.layers import ShapeSpec, cat
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
+from detectron2.structures import Boxes, ImageList, Instances, Keypoints, RotatedBoxes
+
+from .shared import alias, to_device
+
+
+"""
+This file contains caffe2-compatible implementation of several detectron2 components.
+"""
+
+
+class Caffe2Boxes(Boxes):
+ """
+ Representing a list of detectron2.structures.Boxes from minibatch, each box
+ is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
+ (batch index + 5 coordinates) for RotatedBoxes.
+ """
+
+ def __init__(self, tensor):
+ assert isinstance(tensor, torch.Tensor)
+ assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
+ # TODO: make tensor immutable when dim is Nx5 for Boxes,
+ # and Nx6 for RotatedBoxes?
+ self.tensor = tensor
+
+
+# TODO clean up this class, maybe just extend Instances
+class InstancesList(object):
+ """
+ Tensor representation of a list of Instances object for a batch of images.
+
+ When dealing with a batch of images with Caffe2 ops, a list of bboxes
+ (instances) are usually represented by single Tensor with size
+ (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
+ for providing common functions to convert between these two representations.
+ """
+
+ def __init__(self, im_info, indices, extra_fields=None):
+ # [N, 3] -> (H, W, Scale)
+ self.im_info = im_info
+ # [N,] -> indice of batch to which the instance belongs
+ self.indices = indices
+ # [N, ...]
+ self.batch_extra_fields = extra_fields or {}
+
+ self.image_size = self.im_info
+
+ def get_fields(self):
+ """like `get_fields` in the Instances object,
+ but return each field in tensor representations"""
+ ret = {}
+ for k, v in self.batch_extra_fields.items():
+ # if isinstance(v, torch.Tensor):
+ # tensor_rep = v
+ # elif isinstance(v, (Boxes, Keypoints)):
+ # tensor_rep = v.tensor
+ # else:
+ # raise ValueError("Can't find tensor representation for: {}".format())
+ ret[k] = v
+ return ret
+
+ def has(self, name):
+ return name in self.batch_extra_fields
+
+ def set(self, name, value):
+ # len(tensor) is a bad practice that generates ONNX constants during tracing.
+ # Although not a problem for the `assert` statement below, torch ONNX exporter
+ # still raises a misleading warning as it does not this call comes from `assert`
+ if isinstance(value, Boxes):
+ data_len = value.tensor.shape[0]
+ elif isinstance(value, torch.Tensor):
+ data_len = value.shape[0]
+ else:
+ data_len = len(value)
+ if len(self.batch_extra_fields):
+ assert (
+ len(self) == data_len
+ ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+ self.batch_extra_fields[name] = value
+
+ def __getattr__(self, name):
+ if name not in self.batch_extra_fields:
+ raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+ return self.batch_extra_fields[name]
+
+ def __len__(self):
+ return len(self.indices)
+
+ def flatten(self):
+ ret = []
+ for _, v in self.batch_extra_fields.items():
+ if isinstance(v, (Boxes, Keypoints)):
+ ret.append(v.tensor)
+ else:
+ ret.append(v)
+ return ret
+
+ @staticmethod
+ def to_d2_instances_list(instances_list):
+ """
+ Convert InstancesList to List[Instances]. The input `instances_list` can
+ also be a List[Instances], in this case this method is a non-op.
+ """
+ if not isinstance(instances_list, InstancesList):
+ assert all(isinstance(x, Instances) for x in instances_list)
+ return instances_list
+
+ ret = []
+ for i, info in enumerate(instances_list.im_info):
+ instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
+
+ ids = instances_list.indices == i
+ for k, v in instances_list.batch_extra_fields.items():
+ if isinstance(v, torch.Tensor):
+ instances.set(k, v[ids])
+ continue
+ elif isinstance(v, Boxes):
+ instances.set(k, v[ids, -4:])
+ continue
+
+ target_type, tensor_source = v
+ assert isinstance(tensor_source, torch.Tensor)
+ assert tensor_source.shape[0] == instances_list.indices.shape[0]
+ tensor_source = tensor_source[ids]
+
+ if issubclass(target_type, Boxes):
+ instances.set(k, Boxes(tensor_source[:, -4:]))
+ elif issubclass(target_type, Keypoints):
+ instances.set(k, Keypoints(tensor_source))
+ elif issubclass(target_type, torch.Tensor):
+ instances.set(k, tensor_source)
+ else:
+ raise ValueError("Can't handle targe type: {}".format(target_type))
+
+ ret.append(instances)
+ return ret
+
+
+class Caffe2Compatible(object):
+ """
+ A model can inherit this class to indicate that it can be traced and deployed with caffe2.
+ """
+
+ def _get_tensor_mode(self):
+ return self._tensor_mode
+
+ def _set_tensor_mode(self, v):
+ self._tensor_mode = v
+
+ tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
+ """
+ If true, the model expects C2-style tensor only inputs/outputs format.
+ """
+
+
+class Caffe2RPN(Caffe2Compatible, rpn.RPN):
+ @classmethod
+ def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+ ret = super(Caffe2Compatible, cls).from_config(cfg, input_shape)
+ assert tuple(cfg.MODEL.RPN.BBOX_REG_WEIGHTS) == (1.0, 1.0, 1.0, 1.0) or tuple(
+ cfg.MODEL.RPN.BBOX_REG_WEIGHTS
+ ) == (1.0, 1.0, 1.0, 1.0, 1.0)
+ return ret
+
+ def _generate_proposals(
+ self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None
+ ):
+ assert isinstance(images, ImageList)
+ if self.tensor_mode:
+ im_info = images.image_sizes
+ else:
+ im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to(
+ images.tensor.device
+ )
+ assert isinstance(im_info, torch.Tensor)
+
+ rpn_rois_list = []
+ rpn_roi_probs_list = []
+ for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
+ objectness_logits_pred,
+ anchor_deltas_pred,
+ [b for (n, b) in self.anchor_generator.cell_anchors.named_buffers()],
+ self.anchor_generator.strides,
+ ):
+ scores = scores.detach()
+ bbox_deltas = bbox_deltas.detach()
+
+ rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
+ scores,
+ bbox_deltas,
+ im_info,
+ cell_anchors_tensor,
+ spatial_scale=1.0 / feat_stride,
+ pre_nms_topN=self.pre_nms_topk[self.training],
+ post_nms_topN=self.post_nms_topk[self.training],
+ nms_thresh=self.nms_thresh,
+ min_size=self.min_box_size,
+ # correct_transform_coords=True, # deprecated argument
+ angle_bound_on=True, # Default
+ angle_bound_lo=-180,
+ angle_bound_hi=180,
+ clip_angle_thresh=1.0, # Default
+ legacy_plus_one=False,
+ )
+ rpn_rois_list.append(rpn_rois)
+ rpn_roi_probs_list.append(rpn_roi_probs)
+
+ # For FPN in D2, in RPN all proposals from different levels are concated
+ # together, ranked and picked by top post_nms_topk. Then in ROIPooler
+ # it calculates level_assignments and calls the RoIAlign from
+ # the corresponding level.
+
+ if len(objectness_logits_pred) == 1:
+ rpn_rois = rpn_rois_list[0]
+ rpn_roi_probs = rpn_roi_probs_list[0]
+ else:
+ assert len(rpn_rois_list) == len(rpn_roi_probs_list)
+ rpn_post_nms_topN = self.post_nms_topk[self.training]
+
+ device = rpn_rois_list[0].device
+ input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
+
+ # TODO remove this after confirming rpn_max_level/rpn_min_level
+ # is not needed in CollectRpnProposals.
+ feature_strides = list(self.anchor_generator.strides)
+ rpn_min_level = int(math.log2(feature_strides[0]))
+ rpn_max_level = int(math.log2(feature_strides[-1]))
+ assert (rpn_max_level - rpn_min_level + 1) == len(
+ rpn_rois_list
+ ), "CollectRpnProposals requires continuous levels"
+
+ rpn_rois = torch.ops._caffe2.CollectRpnProposals(
+ input_list,
+ # NOTE: in current implementation, rpn_max_level and rpn_min_level
+ # are not needed, only the subtraction of two matters and it
+ # can be infer from the number of inputs. Keep them now for
+ # consistency.
+ rpn_max_level=2 + len(rpn_rois_list) - 1,
+ rpn_min_level=2,
+ rpn_post_nms_topN=rpn_post_nms_topN,
+ )
+ rpn_rois = to_device(rpn_rois, device)
+ rpn_roi_probs = []
+
+ proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
+ return proposals, {}
+
+ def forward(self, images, features, gt_instances=None):
+ assert not self.training
+ features = [features[f] for f in self.in_features]
+ objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
+ return self._generate_proposals(
+ images,
+ objectness_logits_pred,
+ anchor_deltas_pred,
+ gt_instances,
+ )
+
+ @staticmethod
+ def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
+ proposals = InstancesList(
+ im_info=im_info,
+ indices=rpn_rois[:, 0],
+ extra_fields={
+ "proposal_boxes": Caffe2Boxes(rpn_rois),
+ "objectness_logits": (torch.Tensor, rpn_roi_probs),
+ },
+ )
+ if not tensor_mode:
+ proposals = InstancesList.to_d2_instances_list(proposals)
+ else:
+ proposals = [proposals]
+ return proposals
+
+
+class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
+ @staticmethod
+ def c2_preprocess(box_lists):
+ assert all(isinstance(x, Boxes) for x in box_lists)
+ if all(isinstance(x, Caffe2Boxes) for x in box_lists):
+ # input is pure-tensor based
+ assert len(box_lists) == 1
+ pooler_fmt_boxes = box_lists[0].tensor
+ else:
+ pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
+ return pooler_fmt_boxes
+
+ def forward(self, x, box_lists):
+ assert not self.training
+
+ pooler_fmt_boxes = self.c2_preprocess(box_lists)
+ num_level_assignments = len(self.level_poolers)
+
+ if num_level_assignments == 1:
+ if isinstance(self.level_poolers[0], ROIAlignRotated):
+ c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+ aligned = True
+ else:
+ c2_roi_align = torch.ops._caffe2.RoIAlign
+ aligned = self.level_poolers[0].aligned
+
+ x0 = x[0]
+ if x0.is_quantized:
+ x0 = x0.dequantize()
+
+ out = c2_roi_align(
+ x0,
+ pooler_fmt_boxes,
+ order="NCHW",
+ spatial_scale=float(self.level_poolers[0].spatial_scale),
+ pooled_h=int(self.output_size[0]),
+ pooled_w=int(self.output_size[1]),
+ sampling_ratio=int(self.level_poolers[0].sampling_ratio),
+ aligned=aligned,
+ )
+ return out
+
+ device = pooler_fmt_boxes.device
+ assert (
+ self.max_level - self.min_level + 1 == 4
+ ), "Currently DistributeFpnProposals only support 4 levels"
+ fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
+ to_device(pooler_fmt_boxes, "cpu"),
+ roi_canonical_scale=self.canonical_box_size,
+ roi_canonical_level=self.canonical_level,
+ roi_max_level=self.max_level,
+ roi_min_level=self.min_level,
+ legacy_plus_one=False,
+ )
+ fpn_outputs = [to_device(x, device) for x in fpn_outputs]
+
+ rois_fpn_list = fpn_outputs[:-1]
+ rois_idx_restore_int32 = fpn_outputs[-1]
+
+ roi_feat_fpn_list = []
+ for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
+ if isinstance(pooler, ROIAlignRotated):
+ c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+ aligned = True
+ else:
+ c2_roi_align = torch.ops._caffe2.RoIAlign
+ aligned = bool(pooler.aligned)
+
+ if x_level.is_quantized:
+ x_level = x_level.dequantize()
+
+ roi_feat_fpn = c2_roi_align(
+ x_level,
+ roi_fpn,
+ order="NCHW",
+ spatial_scale=float(pooler.spatial_scale),
+ pooled_h=int(self.output_size[0]),
+ pooled_w=int(self.output_size[1]),
+ sampling_ratio=int(pooler.sampling_ratio),
+ aligned=aligned,
+ )
+ roi_feat_fpn_list.append(roi_feat_fpn)
+
+ roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
+ assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, (
+ "Caffe2 export requires tracing with a model checkpoint + input that can produce valid"
+ " detections. But no detections were obtained with the given checkpoint and input!"
+ )
+ roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
+ return roi_feat
+
+
+def caffe2_fast_rcnn_outputs_inference(tensor_mode, box_predictor, predictions, proposals):
+ """equivalent to FastRCNNOutputLayers.inference"""
+ num_classes = box_predictor.num_classes
+ score_thresh = box_predictor.test_score_thresh
+ nms_thresh = box_predictor.test_nms_thresh
+ topk_per_image = box_predictor.test_topk_per_image
+ is_rotated = len(box_predictor.box2box_transform.weights) == 5
+
+ if is_rotated:
+ box_dim = 5
+ assert box_predictor.box2box_transform.weights[4] == 1, (
+ "The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
+ + " thus enforcing the angle weight to be 1 for now"
+ )
+ box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
+ else:
+ box_dim = 4
+ box2box_transform_weights = box_predictor.box2box_transform.weights
+
+ class_logits, box_regression = predictions
+ if num_classes + 1 == class_logits.shape[1]:
+ class_prob = F.softmax(class_logits, -1)
+ else:
+ assert num_classes == class_logits.shape[1]
+ class_prob = F.sigmoid(class_logits)
+ # BoxWithNMSLimit will infer num_classes from the shape of the class_prob
+ # So append a zero column as placeholder for the background class
+ class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1)
+
+ assert box_regression.shape[1] % box_dim == 0
+ cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
+
+ input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
+
+ proposal_boxes = proposals[0].proposal_boxes
+ if isinstance(proposal_boxes, Caffe2Boxes):
+ rois = Caffe2Boxes.cat([p.proposal_boxes for p in proposals])
+ elif isinstance(proposal_boxes, RotatedBoxes):
+ rois = RotatedBoxes.cat([p.proposal_boxes for p in proposals])
+ elif isinstance(proposal_boxes, Boxes):
+ rois = Boxes.cat([p.proposal_boxes for p in proposals])
+ else:
+ raise NotImplementedError(
+ 'Expected proposals[0].proposal_boxes to be type "Boxes", '
+ f"instead got {type(proposal_boxes)}"
+ )
+
+ device, dtype = rois.tensor.device, rois.tensor.dtype
+ if input_tensor_mode:
+ im_info = proposals[0].image_size
+ rois = rois.tensor
+ else:
+ im_info = torch.tensor([[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]])
+ batch_ids = cat(
+ [
+ torch.full((b, 1), i, dtype=dtype, device=device)
+ for i, b in enumerate(len(p) for p in proposals)
+ ],
+ dim=0,
+ )
+ rois = torch.cat([batch_ids, rois.tensor], dim=1)
+
+ roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
+ to_device(rois, "cpu"),
+ to_device(box_regression, "cpu"),
+ to_device(im_info, "cpu"),
+ weights=box2box_transform_weights,
+ apply_scale=True,
+ rotated=is_rotated,
+ angle_bound_on=True,
+ angle_bound_lo=-180,
+ angle_bound_hi=180,
+ clip_angle_thresh=1.0,
+ legacy_plus_one=False,
+ )
+ roi_pred_bbox = to_device(roi_pred_bbox, device)
+ roi_batch_splits = to_device(roi_batch_splits, device)
+
+ nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
+ to_device(class_prob, "cpu"),
+ to_device(roi_pred_bbox, "cpu"),
+ to_device(roi_batch_splits, "cpu"),
+ score_thresh=float(score_thresh),
+ nms=float(nms_thresh),
+ detections_per_im=int(topk_per_image),
+ soft_nms_enabled=False,
+ soft_nms_method="linear",
+ soft_nms_sigma=0.5,
+ soft_nms_min_score_thres=0.001,
+ rotated=is_rotated,
+ cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
+ input_boxes_include_bg_cls=False,
+ output_classes_include_bg_cls=False,
+ legacy_plus_one=False,
+ )
+ roi_score_nms = to_device(nms_outputs[0], device)
+ roi_bbox_nms = to_device(nms_outputs[1], device)
+ roi_class_nms = to_device(nms_outputs[2], device)
+ roi_batch_splits_nms = to_device(nms_outputs[3], device)
+ roi_keeps_nms = to_device(nms_outputs[4], device)
+ roi_keeps_size_nms = to_device(nms_outputs[5], device)
+ if not tensor_mode:
+ roi_class_nms = roi_class_nms.to(torch.int64)
+
+ roi_batch_ids = cat(
+ [
+ torch.full((b, 1), i, dtype=dtype, device=device)
+ for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
+ ],
+ dim=0,
+ )
+
+ roi_class_nms = alias(roi_class_nms, "class_nms")
+ roi_score_nms = alias(roi_score_nms, "score_nms")
+ roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
+ roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
+ roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
+ roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
+
+ results = InstancesList(
+ im_info=im_info,
+ indices=roi_batch_ids[:, 0],
+ extra_fields={
+ "pred_boxes": Caffe2Boxes(roi_bbox_nms),
+ "scores": roi_score_nms,
+ "pred_classes": roi_class_nms,
+ },
+ )
+
+ if not tensor_mode:
+ results = InstancesList.to_d2_instances_list(results)
+ batch_splits = roi_batch_splits_nms.int().tolist()
+ kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
+ else:
+ results = [results]
+ kept_indices = [roi_keeps_nms]
+
+ return results, kept_indices
+
+
+class Caffe2FastRCNNOutputsInference:
+ def __init__(self, tensor_mode):
+ self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode
+
+ def __call__(self, box_predictor, predictions, proposals):
+ return caffe2_fast_rcnn_outputs_inference(
+ self.tensor_mode, box_predictor, predictions, proposals
+ )
+
+
+def caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances):
+ """equivalent to mask_head.mask_rcnn_inference"""
+ if all(isinstance(x, InstancesList) for x in pred_instances):
+ assert len(pred_instances) == 1
+ mask_probs_pred = pred_mask_logits.sigmoid()
+ mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
+ pred_instances[0].set("pred_masks", mask_probs_pred)
+ else:
+ mask_rcnn_inference(pred_mask_logits, pred_instances)
+
+
+class Caffe2MaskRCNNInference:
+ def __call__(self, pred_mask_logits, pred_instances):
+ return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances)
+
+
+def caffe2_keypoint_rcnn_inference(use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances):
+ # just return the keypoint heatmap for now,
+ # there will be option to call HeatmapMaxKeypointOp
+ output = alias(pred_keypoint_logits, "kps_score")
+ if all(isinstance(x, InstancesList) for x in pred_instances):
+ assert len(pred_instances) == 1
+ if use_heatmap_max_keypoint:
+ device = output.device
+ output = torch.ops._caffe2.HeatmapMaxKeypoint(
+ to_device(output, "cpu"),
+ pred_instances[0].pred_boxes.tensor,
+ should_output_softmax=True, # worth make it configerable?
+ )
+ output = to_device(output, device)
+ output = alias(output, "keypoints_out")
+ pred_instances[0].set("pred_keypoints", output)
+ return pred_keypoint_logits
+
+
+class Caffe2KeypointRCNNInference:
+ def __init__(self, use_heatmap_max_keypoint):
+ self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+
+ def __call__(self, pred_keypoint_logits, pred_instances):
+ return caffe2_keypoint_rcnn_inference(
+ self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances
+ )
diff --git a/vendor/detectron2/detectron2/export/caffe2_export.py b/vendor/detectron2/detectron2/export/caffe2_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..d609c27c7deb396352967dbcbc79b1e00f2a2de1
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/caffe2_export.py
@@ -0,0 +1,203 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+import io
+import logging
+import numpy as np
+from typing import List
+import onnx
+import onnx.optimizer
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python.onnx.backend import Caffe2Backend
+from tabulate import tabulate
+from termcolor import colored
+from torch.onnx import OperatorExportTypes
+
+from .shared import (
+ ScopedWS,
+ construct_init_net_from_params,
+ fuse_alias_placeholder,
+ fuse_copy_between_cpu_and_gpu,
+ get_params_from_init_net,
+ group_norm_replace_aten_with_caffe2,
+ infer_device_type,
+ remove_dead_end_ops,
+ remove_reshape_for_fc,
+ save_graph,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def export_onnx_model(model, inputs):
+ """
+ Trace and export a model to onnx format.
+
+ Args:
+ model (nn.Module):
+ inputs (tuple[args]): the model will be called by `model(*inputs)`
+
+ Returns:
+ an onnx model
+ """
+ assert isinstance(model, torch.nn.Module)
+
+ # make sure all modules are in eval mode, onnx may change the training state
+ # of the module if the states are not consistent
+ def _check_eval(module):
+ assert not module.training
+
+ model.apply(_check_eval)
+
+ # Export the model to ONNX
+ with torch.no_grad():
+ with io.BytesIO() as f:
+ torch.onnx.export(
+ model,
+ inputs,
+ f,
+ operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
+ # verbose=True, # NOTE: uncomment this for debugging
+ # export_params=True,
+ )
+ onnx_model = onnx.load_from_string(f.getvalue())
+
+ return onnx_model
+
+
+def _op_stats(net_def):
+ type_count = {}
+ for t in [op.type for op in net_def.op]:
+ type_count[t] = type_count.get(t, 0) + 1
+ type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet
+ type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count
+ return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list)
+
+
+def _assign_device_option(
+ predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor]
+):
+ """
+ ONNX exported network doesn't have concept of device, assign necessary
+ device option for each op in order to make it runable on GPU runtime.
+ """
+
+ def _get_device_type(torch_tensor):
+ assert torch_tensor.device.type in ["cpu", "cuda"]
+ assert torch_tensor.device.index == 0
+ return torch_tensor.device.type
+
+ def _assign_op_device_option(net_proto, net_ssa, blob_device_types):
+ for op, ssa_i in zip(net_proto.op, net_ssa):
+ if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]:
+ op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+ else:
+ devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]]
+ assert all(d == devices[0] for d in devices)
+ if devices[0] == "cuda":
+ op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+
+ # update ops in predict_net
+ predict_net_input_device_types = {
+ (name, 0): _get_device_type(tensor)
+ for name, tensor in zip(predict_net.external_input, tensor_inputs)
+ }
+ predict_net_device_types = infer_device_type(
+ predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch"
+ )
+ predict_net_ssa, _ = core.get_ssa(predict_net)
+ _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types)
+
+ # update ops in init_net
+ init_net_ssa, versions = core.get_ssa(init_net)
+ init_net_output_device_types = {
+ (name, versions[name]): predict_net_device_types[(name, 0)]
+ for name in init_net.external_output
+ }
+ init_net_device_types = infer_device_type(
+ init_net, known_status=init_net_output_device_types, device_name_style="pytorch"
+ )
+ _assign_op_device_option(init_net, init_net_ssa, init_net_device_types)
+
+
+def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]):
+ """
+ Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX.
+
+ Arg:
+ model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py
+ tensor_inputs: a list of tensors that caffe2 model takes as input.
+ """
+ model = copy.deepcopy(model)
+ assert isinstance(model, torch.nn.Module)
+ assert hasattr(model, "encode_additional_info")
+
+ # Export via ONNX
+ logger.info(
+ "Exporting a {} model via ONNX ...".format(type(model).__name__)
+ + " Some warnings from ONNX are expected and are usually not to worry about."
+ )
+ onnx_model = export_onnx_model(model, (tensor_inputs,))
+ # Convert ONNX model to Caffe2 protobuf
+ init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
+ ops_table = [[op.type, op.input, op.output] for op in predict_net.op]
+ table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe")
+ logger.info(
+ "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan")
+ )
+
+ # Apply protobuf optimization
+ fuse_alias_placeholder(predict_net, init_net)
+ if any(t.device.type != "cpu" for t in tensor_inputs):
+ fuse_copy_between_cpu_and_gpu(predict_net)
+ remove_dead_end_ops(init_net)
+ _assign_device_option(predict_net, init_net, tensor_inputs)
+ params, device_options = get_params_from_init_net(init_net)
+ predict_net, params = remove_reshape_for_fc(predict_net, params)
+ init_net = construct_init_net_from_params(params, device_options)
+ group_norm_replace_aten_with_caffe2(predict_net)
+
+ # Record necessary information for running the pb model in Detectron2 system.
+ model.encode_additional_info(predict_net, init_net)
+
+ logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net)))
+ logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net)))
+
+ return predict_net, init_net
+
+
+def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path):
+ """
+ Run the caffe2 model on given inputs, recording the shape and draw the graph.
+
+ predict_net/init_net: caffe2 model.
+ tensor_inputs: a list of tensors that caffe2 model takes as input.
+ graph_save_path: path for saving graph of exported model.
+ """
+
+ logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path))
+ save_graph(predict_net, graph_save_path, op_only=False)
+
+ # Run the exported Caffe2 net
+ logger.info("Running ONNX exported model ...")
+ with ScopedWS("__ws_tmp__", True) as ws:
+ ws.RunNetOnce(init_net)
+ initialized_blobs = set(ws.Blobs())
+ uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs]
+ for name, blob in zip(uninitialized, tensor_inputs):
+ ws.FeedBlob(name, blob)
+
+ try:
+ ws.RunNetOnce(predict_net)
+ except RuntimeError as e:
+ logger.warning("Encountered RuntimeError: \n{}".format(str(e)))
+
+ ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()}
+ blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)}
+
+ logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path))
+ save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes)
+
+ return ws_blobs
diff --git a/vendor/detectron2/detectron2/export/caffe2_inference.py b/vendor/detectron2/detectron2/export/caffe2_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..deb886c0417285ed1d5ad85eb941fa1ac757cdab
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/caffe2_inference.py
@@ -0,0 +1,161 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+from itertools import count
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
+
+logger = logging.getLogger(__name__)
+
+
+# ===== ref: mobile-vision predictor's 'Caffe2Wrapper' class ======
+class ProtobufModel(torch.nn.Module):
+ """
+ Wrapper of a caffe2's protobuf model.
+ It works just like nn.Module, but running caffe2 under the hood.
+ Input/Output are tuple[tensor] that match the caffe2 net's external_input/output.
+ """
+
+ _ids = count(0)
+
+ def __init__(self, predict_net, init_net):
+ logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...")
+ super().__init__()
+ assert isinstance(predict_net, caffe2_pb2.NetDef)
+ assert isinstance(init_net, caffe2_pb2.NetDef)
+ # create unique temporary workspace for each instance
+ self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids))
+ self.net = core.Net(predict_net)
+
+ logger.info("Running init_net once to fill the parameters ...")
+ with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
+ ws.RunNetOnce(init_net)
+ uninitialized_external_input = []
+ for blob in self.net.Proto().external_input:
+ if blob not in ws.Blobs():
+ uninitialized_external_input.append(blob)
+ ws.CreateBlob(blob)
+ ws.CreateNet(self.net)
+
+ self._error_msgs = set()
+ self._input_blobs = uninitialized_external_input
+
+ def _infer_output_devices(self, inputs):
+ """
+ Returns:
+ list[str]: list of device for each external output
+ """
+
+ def _get_device_type(torch_tensor):
+ assert torch_tensor.device.type in ["cpu", "cuda"]
+ assert torch_tensor.device.index == 0
+ return torch_tensor.device.type
+
+ predict_net = self.net.Proto()
+ input_device_types = {
+ (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs)
+ }
+ device_type_map = infer_device_type(
+ predict_net, known_status=input_device_types, device_name_style="pytorch"
+ )
+ ssa, versions = core.get_ssa(predict_net)
+ versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
+ output_devices = [device_type_map[outp] for outp in versioned_outputs]
+ return output_devices
+
+ def forward(self, inputs):
+ """
+ Args:
+ inputs (tuple[torch.Tensor])
+
+ Returns:
+ tuple[torch.Tensor]
+ """
+ assert len(inputs) == len(self._input_blobs), (
+ f"Length of inputs ({len(inputs)}) "
+ f"doesn't match the required input blobs: {self._input_blobs}"
+ )
+
+ with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
+ for b, tensor in zip(self._input_blobs, inputs):
+ ws.FeedBlob(b, tensor)
+
+ try:
+ ws.RunNet(self.net.Proto().name)
+ except RuntimeError as e:
+ if not str(e) in self._error_msgs:
+ self._error_msgs.add(str(e))
+ logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
+ logger.warning("Catch the error and use partial results.")
+
+ c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output]
+ # Remove outputs of current run, this is necessary in order to
+ # prevent fetching the result from previous run if the model fails
+ # in the middle.
+ for b in self.net.Proto().external_output:
+ # Needs to create uninitialized blob to make the net runable.
+ # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
+ # but there'no such API.
+ ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).")
+
+ # Cast output to torch.Tensor on the desired device
+ output_devices = (
+ self._infer_output_devices(inputs)
+ if any(t.device.type != "cpu" for t in inputs)
+ else ["cpu" for _ in self.net.Proto().external_output]
+ )
+
+ outputs = []
+ for name, c2_output, device in zip(
+ self.net.Proto().external_output, c2_outputs, output_devices
+ ):
+ if not isinstance(c2_output, np.ndarray):
+ raise RuntimeError(
+ "Invalid output for blob {}, received: {}".format(name, c2_output)
+ )
+ outputs.append(torch.tensor(c2_output).to(device=device))
+ return tuple(outputs)
+
+
+class ProtobufDetectionModel(torch.nn.Module):
+ """
+ A class works just like a pytorch meta arch in terms of inference, but running
+ caffe2 model under the hood.
+ """
+
+ def __init__(self, predict_net, init_net, *, convert_outputs=None):
+ """
+ Args:
+ predict_net, init_net (core.Net): caffe2 nets
+ convert_outptus (callable): a function that converts caffe2
+ outputs to the same format of the original pytorch model.
+ By default, use the one defined in the caffe2 meta_arch.
+ """
+ super().__init__()
+ self.protobuf_model = ProtobufModel(predict_net, init_net)
+ self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
+ self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
+
+ if convert_outputs is None:
+ meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
+ meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
+ self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
+ else:
+ self._convert_outputs = convert_outputs
+
+ def _convert_inputs(self, batched_inputs):
+ # currently all models convert inputs in the same way
+ return convert_batched_inputs_to_c2_format(
+ batched_inputs, self.size_divisibility, self.device
+ )
+
+ def forward(self, batched_inputs):
+ c2_inputs = self._convert_inputs(batched_inputs)
+ c2_results = self.protobuf_model(c2_inputs)
+ c2_results = dict(zip(self.protobuf_model.net.Proto().external_output, c2_results))
+ return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
diff --git a/vendor/detectron2/detectron2/export/caffe2_modeling.py b/vendor/detectron2/detectron2/export/caffe2_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e675c45d62f7b363a298099cd520c417832d58c
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/caffe2_modeling.py
@@ -0,0 +1,420 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import io
+import struct
+import types
+import torch
+
+from detectron2.modeling import meta_arch
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.roi_heads import keypoint_head
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+
+from .c10 import Caffe2Compatible
+from .caffe2_patch import ROIHeadsPatcher, patch_generalized_rcnn
+from .shared import (
+ alias,
+ check_set_pb_arg,
+ get_pb_arg_floats,
+ get_pb_arg_valf,
+ get_pb_arg_vali,
+ get_pb_arg_vals,
+ mock_torch_nn_functional_interpolate,
+)
+
+
+def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False):
+ """
+ A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
+ to detectron2's format (i.e. list of Instances instance).
+ This only works when the model follows the Caffe2 detectron's naming convention.
+
+ Args:
+ image_sizes (List[List[int, int]]): [H, W] of every image.
+ tensor_outputs (Dict[str, Tensor]): external_output to its tensor.
+
+ force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
+ if the mask is not found from tensor_outputs (usually due to model crash)
+ """
+
+ results = [Instances(image_size) for image_size in image_sizes]
+
+ batch_splits = tensor_outputs.get("batch_splits", None)
+ if batch_splits:
+ raise NotImplementedError()
+ assert len(image_sizes) == 1
+ result = results[0]
+
+ bbox_nms = tensor_outputs["bbox_nms"]
+ score_nms = tensor_outputs["score_nms"]
+ class_nms = tensor_outputs["class_nms"]
+ # Detection will always success because Conv support 0-batch
+ assert bbox_nms is not None
+ assert score_nms is not None
+ assert class_nms is not None
+ if bbox_nms.shape[1] == 5:
+ result.pred_boxes = RotatedBoxes(bbox_nms)
+ else:
+ result.pred_boxes = Boxes(bbox_nms)
+ result.scores = score_nms
+ result.pred_classes = class_nms.to(torch.int64)
+
+ mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
+ if mask_fcn_probs is not None:
+ # finish the mask pred
+ mask_probs_pred = mask_fcn_probs
+ num_masks = mask_probs_pred.shape[0]
+ class_pred = result.pred_classes
+ indices = torch.arange(num_masks, device=class_pred.device)
+ mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
+ result.pred_masks = mask_probs_pred
+ elif force_mask_on:
+ # NOTE: there's no way to know the height/width of mask here, it won't be
+ # used anyway when batch size is 0, so just set them to 0.
+ result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)
+
+ keypoints_out = tensor_outputs.get("keypoints_out", None)
+ kps_score = tensor_outputs.get("kps_score", None)
+ if keypoints_out is not None:
+ # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
+ keypoints_tensor = keypoints_out
+ # NOTE: it's possible that prob is not calculated if "should_output_softmax"
+ # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
+ # it doesn't affect mAP. TODO: check more carefully.
+ keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
+ result.pred_keypoints = keypoint_xyp
+ elif kps_score is not None:
+ # keypoint heatmap to sparse data structure
+ pred_keypoint_logits = kps_score
+ keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])
+
+ return results
+
+
+def _cast_to_f32(f64):
+ return struct.unpack("f", struct.pack("f", f64))[0]
+
+
+def set_caffe2_compatible_tensor_mode(model, enable=True):
+ def _fn(m):
+ if isinstance(m, Caffe2Compatible):
+ m.tensor_mode = enable
+
+ model.apply(_fn)
+
+
+def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
+ """
+ See get_caffe2_inputs() below.
+ """
+ assert all(isinstance(x, dict) for x in batched_inputs)
+ assert all(x["image"].dim() == 3 for x in batched_inputs)
+
+ images = [x["image"] for x in batched_inputs]
+ images = ImageList.from_tensors(images, size_divisibility)
+
+ im_info = []
+ for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
+ target_height = input_per_image.get("height", image_size[0])
+ target_width = input_per_image.get("width", image_size[1]) # noqa
+ # NOTE: The scale inside im_info is kept as convention and for providing
+ # post-processing information if further processing is needed. For
+ # current Caffe2 model definitions that don't include post-processing inside
+ # the model, this number is not used.
+ # NOTE: There can be a slight difference between width and height
+ # scales, using a single number can results in numerical difference
+ # compared with D2's post-processing.
+ scale = target_height / image_size[0]
+ im_info.append([image_size[0], image_size[1], scale])
+ im_info = torch.Tensor(im_info)
+
+ return images.tensor.to(device), im_info.to(device)
+
+
+class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module):
+ """
+ Base class for caffe2-compatible implementation of a meta architecture.
+ The forward is traceable and its traced graph can be converted to caffe2
+ graph through ONNX.
+ """
+
+ def __init__(self, cfg, torch_model, enable_tensor_mode=True):
+ """
+ Args:
+ cfg (CfgNode):
+ torch_model (nn.Module): the detectron2 model (meta_arch) to be
+ converted.
+ """
+ super().__init__()
+ self._wrapped_model = torch_model
+ self.eval()
+ set_caffe2_compatible_tensor_mode(self, enable_tensor_mode)
+
+ def get_caffe2_inputs(self, batched_inputs):
+ """
+ Convert pytorch-style structured inputs to caffe2-style inputs that
+ are tuples of tensors.
+
+ Args:
+ batched_inputs (list[dict]): inputs to a detectron2 model
+ in its standard format. Each dict has "image" (CHW tensor), and optionally
+ "height" and "width".
+
+ Returns:
+ tuple[Tensor]:
+ tuple of tensors that will be the inputs to the
+ :meth:`forward` method. For existing models, the first
+ is an NCHW tensor (padded and batched); the second is
+ a im_info Nx3 tensor, where the rows are
+ (height, width, unused legacy parameter)
+ """
+ return convert_batched_inputs_to_c2_format(
+ batched_inputs,
+ self._wrapped_model.backbone.size_divisibility,
+ self._wrapped_model.device,
+ )
+
+ def encode_additional_info(self, predict_net, init_net):
+ """
+ Save extra metadata that will be used by inference in the output protobuf.
+ """
+ pass
+
+ def forward(self, inputs):
+ """
+ Run the forward in caffe2-style. It has to use caffe2-compatible ops
+ and the method will be used for tracing.
+
+ Args:
+ inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`.
+ They will be the inputs of the converted caffe2 graph.
+
+ Returns:
+ tuple[Tensor]: output tensors. They will be the outputs of the
+ converted caffe2 graph.
+ """
+ raise NotImplementedError
+
+ def _caffe2_preprocess_image(self, inputs):
+ """
+ Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
+ It normalizes the input images, and the final caffe2 graph assumes the
+ inputs have been batched already.
+ """
+ data, im_info = inputs
+ data = alias(data, "data")
+ im_info = alias(im_info, "im_info")
+ mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
+ normalized_data = (data - mean) / std
+ normalized_data = alias(normalized_data, "normalized_data")
+
+ # Pack (data, im_info) into ImageList which is recognized by self.inference.
+ images = ImageList(tensor=normalized_data, image_sizes=im_info)
+ return images
+
+ @staticmethod
+ def get_outputs_converter(predict_net, init_net):
+ """
+ Creates a function that converts outputs of the caffe2 model to
+ detectron2's standard format.
+ The function uses information in `predict_net` and `init_net` that are
+ available at inferene time. Therefore the function logic can be used in inference.
+
+ The returned function has the following signature:
+
+ def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs
+
+ Where
+
+ * batched_inputs (list[dict]): the original input format of the meta arch
+ * c2_inputs (tuple[Tensor]): the caffe2 inputs.
+ * c2_results (dict[str, Tensor]): the caffe2 output format,
+ corresponding to the outputs of the :meth:`forward` function.
+ * detectron2_outputs: the original output format of the meta arch.
+
+ This function can be used to compare the outputs of the original meta arch and
+ the converted caffe2 graph.
+
+ Returns:
+ callable: a callable of the above signature.
+ """
+ raise NotImplementedError
+
+
+class Caffe2GeneralizedRCNN(Caffe2MetaArch):
+ def __init__(self, cfg, torch_model, enable_tensor_mode=True):
+ assert isinstance(torch_model, meta_arch.GeneralizedRCNN)
+ torch_model = patch_generalized_rcnn(torch_model)
+ super().__init__(cfg, torch_model, enable_tensor_mode)
+
+ try:
+ use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
+ except AttributeError:
+ use_heatmap_max_keypoint = False
+ self.roi_heads_patcher = ROIHeadsPatcher(
+ self._wrapped_model.roi_heads, use_heatmap_max_keypoint
+ )
+ if self.tensor_mode:
+ self.roi_heads_patcher.patch_roi_heads()
+
+ def encode_additional_info(self, predict_net, init_net):
+ size_divisibility = self._wrapped_model.backbone.size_divisibility
+ check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+ check_set_pb_arg(
+ predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+ )
+ check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN")
+
+ @mock_torch_nn_functional_interpolate()
+ def forward(self, inputs):
+ if not self.tensor_mode:
+ return self._wrapped_model.inference(inputs)
+ images = self._caffe2_preprocess_image(inputs)
+ features = self._wrapped_model.backbone(images.tensor)
+ proposals, _ = self._wrapped_model.proposal_generator(images, features)
+ detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
+ return tuple(detector_results[0].flatten())
+
+ @staticmethod
+ def get_outputs_converter(predict_net, init_net):
+ def f(batched_inputs, c2_inputs, c2_results):
+ _, im_info = c2_inputs
+ image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+ results = assemble_rcnn_outputs_by_name(image_sizes, c2_results)
+ return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+ return f
+
+
+class Caffe2RetinaNet(Caffe2MetaArch):
+ def __init__(self, cfg, torch_model):
+ assert isinstance(torch_model, meta_arch.RetinaNet)
+ super().__init__(cfg, torch_model)
+
+ @mock_torch_nn_functional_interpolate()
+ def forward(self, inputs):
+ assert self.tensor_mode
+ images = self._caffe2_preprocess_image(inputs)
+
+ # explicitly return the images sizes to avoid removing "im_info" by ONNX
+ # since it's not used in the forward path
+ return_tensors = [images.image_sizes]
+
+ features = self._wrapped_model.backbone(images.tensor)
+ features = [features[f] for f in self._wrapped_model.head_in_features]
+ for i, feature_i in enumerate(features):
+ features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True)
+ return_tensors.append(features[i])
+
+ pred_logits, pred_anchor_deltas = self._wrapped_model.head(features)
+ for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)):
+ return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i)))
+ return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i)))
+
+ return tuple(return_tensors)
+
+ def encode_additional_info(self, predict_net, init_net):
+ size_divisibility = self._wrapped_model.backbone.size_divisibility
+ check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+ check_set_pb_arg(
+ predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+ )
+ check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet")
+
+ # Inference parameters:
+ check_set_pb_arg(
+ predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.test_score_thresh)
+ )
+ check_set_pb_arg(
+ predict_net, "topk_candidates", "i", self._wrapped_model.test_topk_candidates
+ )
+ check_set_pb_arg(
+ predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.test_nms_thresh)
+ )
+ check_set_pb_arg(
+ predict_net,
+ "max_detections_per_image",
+ "i",
+ self._wrapped_model.max_detections_per_image,
+ )
+
+ check_set_pb_arg(
+ predict_net,
+ "bbox_reg_weights",
+ "floats",
+ [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights],
+ )
+ self._encode_anchor_generator_cfg(predict_net)
+
+ def _encode_anchor_generator_cfg(self, predict_net):
+ # serialize anchor_generator for future use
+ serialized_anchor_generator = io.BytesIO()
+ torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator)
+ # Ideally we can put anchor generating inside the model, then we don't
+ # need to store this information.
+ bytes = serialized_anchor_generator.getvalue()
+ check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes)
+
+ @staticmethod
+ def get_outputs_converter(predict_net, init_net):
+ self = types.SimpleNamespace()
+ serialized_anchor_generator = io.BytesIO(
+ get_pb_arg_vals(predict_net, "serialized_anchor_generator", None)
+ )
+ self.anchor_generator = torch.load(serialized_anchor_generator)
+ bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None)
+ self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights))
+ self.test_score_thresh = get_pb_arg_valf(predict_net, "score_threshold", None)
+ self.test_topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None)
+ self.test_nms_thresh = get_pb_arg_valf(predict_net, "nms_threshold", None)
+ self.max_detections_per_image = get_pb_arg_vali(
+ predict_net, "max_detections_per_image", None
+ )
+
+ # hack to reuse inference code from RetinaNet
+ for meth in [
+ "forward_inference",
+ "inference_single_image",
+ "_transpose_dense_predictions",
+ "_decode_multi_level_predictions",
+ "_decode_per_level_predictions",
+ ]:
+ setattr(self, meth, functools.partial(getattr(meta_arch.RetinaNet, meth), self))
+
+ def f(batched_inputs, c2_inputs, c2_results):
+ _, im_info = c2_inputs
+ image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+ dummy_images = ImageList(
+ torch.randn(
+ (
+ len(im_info),
+ 3,
+ )
+ + tuple(image_sizes[0])
+ ),
+ image_sizes,
+ )
+
+ num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")])
+ pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)]
+ pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)]
+
+ # For each feature level, feature should have the same batch size and
+ # spatial dimension as the box_cls and box_delta.
+ dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits]
+ # self.num_classess can be inferred
+ self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4)
+
+ results = self.forward_inference(
+ dummy_images, dummy_features, [pred_logits, pred_anchor_deltas]
+ )
+ return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+ return f
+
+
+META_ARCH_CAFFE2_EXPORT_TYPE_MAP = {
+ "GeneralizedRCNN": Caffe2GeneralizedRCNN,
+ "RetinaNet": Caffe2RetinaNet,
+}
diff --git a/vendor/detectron2/detectron2/export/caffe2_patch.py b/vendor/detectron2/detectron2/export/caffe2_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..2da70ae34e31dfe1a2ab4d5625a3e2b096aa5c7f
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/caffe2_patch.py
@@ -0,0 +1,189 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import contextlib
+from unittest import mock
+import torch
+
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads import keypoint_head, mask_head
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+
+from .c10 import (
+ Caffe2Compatible,
+ Caffe2FastRCNNOutputsInference,
+ Caffe2KeypointRCNNInference,
+ Caffe2MaskRCNNInference,
+ Caffe2ROIPooler,
+ Caffe2RPN,
+ caffe2_fast_rcnn_outputs_inference,
+ caffe2_keypoint_rcnn_inference,
+ caffe2_mask_rcnn_inference,
+)
+
+
+class GenericMixin(object):
+ pass
+
+
+class Caffe2CompatibleConverter(object):
+ """
+ A GenericUpdater which implements the `create_from` interface, by modifying
+ module object and assign it with another class replaceCls.
+ """
+
+ def __init__(self, replaceCls):
+ self.replaceCls = replaceCls
+
+ def create_from(self, module):
+ # update module's class to the new class
+ assert isinstance(module, torch.nn.Module)
+ if issubclass(self.replaceCls, GenericMixin):
+ # replaceCls should act as mixin, create a new class on-the-fly
+ new_class = type(
+ "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
+ (self.replaceCls, module.__class__),
+ {}, # {"new_method": lambda self: ...},
+ )
+ module.__class__ = new_class
+ else:
+ # replaceCls is complete class, this allow arbitrary class swap
+ module.__class__ = self.replaceCls
+
+ # initialize Caffe2Compatible
+ if isinstance(module, Caffe2Compatible):
+ module.tensor_mode = False
+
+ return module
+
+
+def patch(model, target, updater, *args, **kwargs):
+ """
+ recursively (post-order) update all modules with the target type and its
+ subclasses, make a initialization/composition/inheritance/... via the
+ updater.create_from.
+ """
+ for name, module in model.named_children():
+ model._modules[name] = patch(module, target, updater, *args, **kwargs)
+ if isinstance(model, target):
+ return updater.create_from(model, *args, **kwargs)
+ return model
+
+
+def patch_generalized_rcnn(model):
+ ccc = Caffe2CompatibleConverter
+ model = patch(model, rpn.RPN, ccc(Caffe2RPN))
+ model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
+
+ return model
+
+
+@contextlib.contextmanager
+def mock_fastrcnn_outputs_inference(
+ tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
+):
+ with mock.patch.object(
+ box_predictor_type,
+ "inference",
+ autospec=True,
+ side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
+ ) as mocked_func:
+ yield
+ if check:
+ assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
+ with mock.patch(
+ "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
+ ) as mocked_func:
+ yield
+ if check:
+ assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
+ with mock.patch(
+ "{}.keypoint_rcnn_inference".format(patched_module),
+ side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
+ ) as mocked_func:
+ yield
+ if check:
+ assert mocked_func.call_count > 0
+
+
+class ROIHeadsPatcher:
+ def __init__(self, heads, use_heatmap_max_keypoint):
+ self.heads = heads
+ self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+ self.previous_patched = {}
+
+ @contextlib.contextmanager
+ def mock_roi_heads(self, tensor_mode=True):
+ """
+ Patching several inference functions inside ROIHeads and its subclasses
+
+ Args:
+ tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
+ format or not. Default to True.
+ """
+ # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
+ # are called inside the same file as BaseXxxHead due to using mock.patch.
+ kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
+ mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
+
+ mock_ctx_managers = [
+ mock_fastrcnn_outputs_inference(
+ tensor_mode=tensor_mode,
+ check=True,
+ box_predictor_type=type(self.heads.box_predictor),
+ )
+ ]
+ if getattr(self.heads, "keypoint_on", False):
+ mock_ctx_managers += [
+ mock_keypoint_rcnn_inference(
+ tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
+ )
+ ]
+ if getattr(self.heads, "mask_on", False):
+ mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
+
+ with contextlib.ExitStack() as stack: # python 3.3+
+ for mgr in mock_ctx_managers:
+ stack.enter_context(mgr)
+ yield
+
+ def patch_roi_heads(self, tensor_mode=True):
+ self.previous_patched["box_predictor"] = self.heads.box_predictor.inference
+ self.previous_patched["keypoint_rcnn"] = keypoint_head.keypoint_rcnn_inference
+ self.previous_patched["mask_rcnn"] = mask_head.mask_rcnn_inference
+
+ def patched_fastrcnn_outputs_inference(predictions, proposal):
+ return caffe2_fast_rcnn_outputs_inference(
+ True, self.heads.box_predictor, predictions, proposal
+ )
+
+ self.heads.box_predictor.inference = patched_fastrcnn_outputs_inference
+
+ if getattr(self.heads, "keypoint_on", False):
+
+ def patched_keypoint_rcnn_inference(pred_keypoint_logits, pred_instances):
+ return caffe2_keypoint_rcnn_inference(
+ self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances
+ )
+
+ keypoint_head.keypoint_rcnn_inference = patched_keypoint_rcnn_inference
+
+ if getattr(self.heads, "mask_on", False):
+
+ def patched_mask_rcnn_inference(pred_mask_logits, pred_instances):
+ return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances)
+
+ mask_head.mask_rcnn_inference = patched_mask_rcnn_inference
+
+ def unpatch_roi_heads(self):
+ self.heads.box_predictor.inference = self.previous_patched["box_predictor"]
+ keypoint_head.keypoint_rcnn_inference = self.previous_patched["keypoint_rcnn"]
+ mask_head.mask_rcnn_inference = self.previous_patched["mask_rcnn"]
diff --git a/vendor/detectron2/detectron2/export/flatten.py b/vendor/detectron2/detectron2/export/flatten.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5ba4297567d650f147eebeed361e9d62fab899d
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/flatten.py
@@ -0,0 +1,330 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import collections
+from dataclasses import dataclass
+from typing import Callable, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.structures import Boxes, Instances, ROIMasks
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+from .torchscript_patch import patch_builtin_len
+
+
+@dataclass
+class Schema:
+ """
+ A Schema defines how to flatten a possibly hierarchical object into tuple of
+ primitive objects, so it can be used as inputs/outputs of PyTorch's tracing.
+
+ PyTorch does not support tracing a function that produces rich output
+ structures (e.g. dict, Instances, Boxes). To trace such a function, we
+ flatten the rich object into tuple of tensors, and return this tuple of tensors
+ instead. Meanwhile, we also need to know how to "rebuild" the original object
+ from the flattened results, so we can evaluate the flattened results.
+ A Schema defines how to flatten an object, and while flattening it, it records
+ necessary schemas so that the object can be rebuilt using the flattened outputs.
+
+ The flattened object and the schema object is returned by ``.flatten`` classmethod.
+ Then the original object can be rebuilt with the ``__call__`` method of schema.
+
+ A Schema is a dataclass that can be serialized easily.
+ """
+
+ # inspired by FetchMapper in tensorflow/python/client/session.py
+
+ @classmethod
+ def flatten(cls, obj):
+ raise NotImplementedError
+
+ def __call__(self, values):
+ raise NotImplementedError
+
+ @staticmethod
+ def _concat(values):
+ ret = ()
+ sizes = []
+ for v in values:
+ assert isinstance(v, tuple), "Flattened results must be a tuple"
+ ret = ret + v
+ sizes.append(len(v))
+ return ret, sizes
+
+ @staticmethod
+ def _split(values, sizes):
+ if len(sizes):
+ expected_len = sum(sizes)
+ assert (
+ len(values) == expected_len
+ ), f"Values has length {len(values)} but expect length {expected_len}."
+ ret = []
+ for k in range(len(sizes)):
+ begin, end = sum(sizes[:k]), sum(sizes[: k + 1])
+ ret.append(values[begin:end])
+ return ret
+
+
+@dataclass
+class ListSchema(Schema):
+ schemas: List[Schema] # the schemas that define how to flatten each element in the list
+ sizes: List[int] # the flattened length of each element
+
+ def __call__(self, values):
+ values = self._split(values, self.sizes)
+ if len(values) != len(self.schemas):
+ raise ValueError(
+ f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!"
+ )
+ values = [m(v) for m, v in zip(self.schemas, values)]
+ return list(values)
+
+ @classmethod
+ def flatten(cls, obj):
+ res = [flatten_to_tuple(k) for k in obj]
+ values, sizes = cls._concat([k[0] for k in res])
+ return values, cls([k[1] for k in res], sizes)
+
+
+@dataclass
+class TupleSchema(ListSchema):
+ def __call__(self, values):
+ return tuple(super().__call__(values))
+
+
+@dataclass
+class IdentitySchema(Schema):
+ def __call__(self, values):
+ return values[0]
+
+ @classmethod
+ def flatten(cls, obj):
+ return (obj,), cls()
+
+
+@dataclass
+class DictSchema(ListSchema):
+ keys: List[str]
+
+ def __call__(self, values):
+ values = super().__call__(values)
+ return dict(zip(self.keys, values))
+
+ @classmethod
+ def flatten(cls, obj):
+ for k in obj.keys():
+ if not isinstance(k, str):
+ raise KeyError("Only support flattening dictionaries if keys are str.")
+ keys = sorted(obj.keys())
+ values = [obj[k] for k in keys]
+ ret, schema = ListSchema.flatten(values)
+ return ret, cls(schema.schemas, schema.sizes, keys)
+
+
+@dataclass
+class InstancesSchema(DictSchema):
+ def __call__(self, values):
+ image_size, fields = values[-1], values[:-1]
+ fields = super().__call__(fields)
+ return Instances(image_size, **fields)
+
+ @classmethod
+ def flatten(cls, obj):
+ ret, schema = super().flatten(obj.get_fields())
+ size = obj.image_size
+ if not isinstance(size, torch.Tensor):
+ size = torch.tensor(size)
+ return ret + (size,), schema
+
+
+@dataclass
+class TensorWrapSchema(Schema):
+ """
+ For classes that are simple wrapper of tensors, e.g.
+ Boxes, RotatedBoxes, BitMasks
+ """
+
+ class_name: str
+
+ def __call__(self, values):
+ return locate(self.class_name)(values[0])
+
+ @classmethod
+ def flatten(cls, obj):
+ return (obj.tensor,), cls(_convert_target_to_string(type(obj)))
+
+
+# if more custom structures needed in the future, can allow
+# passing in extra schemas for custom types
+def flatten_to_tuple(obj):
+ """
+ Flatten an object so it can be used for PyTorch tracing.
+ Also returns how to rebuild the original object from the flattened outputs.
+
+ Returns:
+ res (tuple): the flattened results that can be used as tracing outputs
+ schema: an object with a ``__call__`` method such that ``schema(res) == obj``.
+ It is a pure dataclass that can be serialized.
+ """
+ schemas = [
+ ((str, bytes), IdentitySchema),
+ (list, ListSchema),
+ (tuple, TupleSchema),
+ (collections.abc.Mapping, DictSchema),
+ (Instances, InstancesSchema),
+ ((Boxes, ROIMasks), TensorWrapSchema),
+ ]
+ for klass, schema in schemas:
+ if isinstance(obj, klass):
+ F = schema
+ break
+ else:
+ F = IdentitySchema
+
+ return F.flatten(obj)
+
+
+class TracingAdapter(nn.Module):
+ """
+ A model may take rich input/output format (e.g. dict or custom classes),
+ but `torch.jit.trace` requires tuple of tensors as input/output.
+ This adapter flattens input/output format of a model so it becomes traceable.
+
+ It also records the necessary schema to rebuild model's inputs/outputs from flattened
+ inputs/outputs.
+
+ Example:
+ ::
+ outputs = model(inputs) # inputs/outputs may be rich structure
+ adapter = TracingAdapter(model, inputs)
+
+ # can now trace the model, with adapter.flattened_inputs, or another
+ # tuple of tensors with the same length and meaning
+ traced = torch.jit.trace(adapter, adapter.flattened_inputs)
+
+ # traced model can only produce flattened outputs (tuple of tensors)
+ flattened_outputs = traced(*adapter.flattened_inputs)
+ # adapter knows the schema to convert it back (new_outputs == outputs)
+ new_outputs = adapter.outputs_schema(flattened_outputs)
+ """
+
+ flattened_inputs: Tuple[torch.Tensor] = None
+ """
+ Flattened version of inputs given to this class's constructor.
+ """
+
+ inputs_schema: Schema = None
+ """
+ Schema of the inputs given to this class's constructor.
+ """
+
+ outputs_schema: Schema = None
+ """
+ Schema of the output produced by calling the given model with inputs.
+ """
+
+ def __init__(
+ self,
+ model: nn.Module,
+ inputs,
+ inference_func: Optional[Callable] = None,
+ allow_non_tensor: bool = False,
+ ):
+ """
+ Args:
+ model: an nn.Module
+ inputs: An input argument or a tuple of input arguments used to call model.
+ After flattening, it has to only consist of tensors.
+ inference_func: a callable that takes (model, *inputs), calls the
+ model with inputs, and return outputs. By default it
+ is ``lambda model, *inputs: model(*inputs)``. Can be override
+ if you need to call the model differently.
+ allow_non_tensor: allow inputs/outputs to contain non-tensor objects.
+ This option will filter out non-tensor objects to make the
+ model traceable, but ``inputs_schema``/``outputs_schema`` cannot be
+ used anymore because inputs/outputs cannot be rebuilt from pure tensors.
+ This is useful when you're only interested in the single trace of
+ execution (e.g. for flop count), but not interested in
+ generalizing the traced graph to new inputs.
+ """
+ super().__init__()
+ if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)):
+ model = model.module
+ self.model = model
+ if not isinstance(inputs, tuple):
+ inputs = (inputs,)
+ self.inputs = inputs
+ self.allow_non_tensor = allow_non_tensor
+
+ if inference_func is None:
+ inference_func = lambda model, *inputs: model(*inputs) # noqa
+ self.inference_func = inference_func
+
+ self.flattened_inputs, self.inputs_schema = flatten_to_tuple(inputs)
+
+ if all(isinstance(x, torch.Tensor) for x in self.flattened_inputs):
+ return
+ if self.allow_non_tensor:
+ self.flattened_inputs = tuple(
+ [x for x in self.flattened_inputs if isinstance(x, torch.Tensor)]
+ )
+ self.inputs_schema = None
+ else:
+ for input in self.flattened_inputs:
+ if not isinstance(input, torch.Tensor):
+ raise ValueError(
+ "Inputs for tracing must only contain tensors. "
+ f"Got a {type(input)} instead."
+ )
+
+ def forward(self, *args: torch.Tensor):
+ with torch.no_grad(), patch_builtin_len():
+ if self.inputs_schema is not None:
+ inputs_orig_format = self.inputs_schema(args)
+ else:
+ if len(args) != len(self.flattened_inputs) or any(
+ x is not y for x, y in zip(args, self.flattened_inputs)
+ ):
+ raise ValueError(
+ "TracingAdapter does not contain valid inputs_schema."
+ " So it cannot generalize to other inputs and must be"
+ " traced with `.flattened_inputs`."
+ )
+ inputs_orig_format = self.inputs
+
+ outputs = self.inference_func(self.model, *inputs_orig_format)
+ flattened_outputs, schema = flatten_to_tuple(outputs)
+
+ flattened_output_tensors = tuple(
+ [x for x in flattened_outputs if isinstance(x, torch.Tensor)]
+ )
+ if len(flattened_output_tensors) < len(flattened_outputs):
+ if self.allow_non_tensor:
+ flattened_outputs = flattened_output_tensors
+ self.outputs_schema = None
+ else:
+ raise ValueError(
+ "Model cannot be traced because some model outputs "
+ "cannot flatten to tensors."
+ )
+ else: # schema is valid
+ if self.outputs_schema is None:
+ self.outputs_schema = schema
+ else:
+ assert self.outputs_schema == schema, (
+ "Model should always return outputs with the same "
+ "structure so it can be traced!"
+ )
+ return flattened_outputs
+
+ def _create_wrapper(self, traced_model):
+ """
+ Return a function that has an input/output interface the same as the
+ original model, but it calls the given traced model under the hood.
+ """
+
+ def forward(*args):
+ flattened_inputs, _ = flatten_to_tuple(args)
+ flattened_outputs = traced_model(*flattened_inputs)
+ return self.outputs_schema(flattened_outputs)
+
+ return forward
diff --git a/vendor/detectron2/detectron2/export/shared.py b/vendor/detectron2/detectron2/export/shared.py
new file mode 100644
index 0000000000000000000000000000000000000000..53ba9335e26819f9381115eba17bbbe3816b469c
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/shared.py
@@ -0,0 +1,1039 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import collections
+import copy
+import functools
+import logging
+import numpy as np
+import os
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from unittest import mock
+import caffe2.python.utils as putils
+import torch
+import torch.nn.functional as F
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core, net_drawer, workspace
+from torch.nn.functional import interpolate as interp
+
+logger = logging.getLogger(__name__)
+
+
+# ==== torch/utils_toffee/cast.py =======================================
+
+
+def to_device(t, device_str):
+ """
+ This function is a replacement of .to(another_device) such that it allows the
+ casting to be traced properly by explicitly calling the underlying copy ops.
+ It also avoids introducing unncessary op when casting to the same device.
+ """
+ src = t.device
+ dst = torch.device(device_str)
+
+ if src == dst:
+ return t
+ elif src.type == "cuda" and dst.type == "cpu":
+ return torch.ops._caffe2.CopyGPUToCPU(t)
+ elif src.type == "cpu" and dst.type == "cuda":
+ return torch.ops._caffe2.CopyCPUToGPU(t)
+ else:
+ raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst))
+
+
+# ==== torch/utils_toffee/interpolate.py =======================================
+
+
+# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py
+def BilinearInterpolation(tensor_in, up_scale):
+ assert up_scale % 2 == 0, "Scale should be even"
+
+ def upsample_filt(size):
+ factor = (size + 1) // 2
+ if size % 2 == 1:
+ center = factor - 1
+ else:
+ center = factor - 0.5
+
+ og = np.ogrid[:size, :size]
+ return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
+
+ kernel_size = int(up_scale) * 2
+ bil_filt = upsample_filt(kernel_size)
+
+ dim = int(tensor_in.shape[1])
+ kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32)
+ kernel[range(dim), range(dim), :, :] = bil_filt
+
+ tensor_out = F.conv_transpose2d(
+ tensor_in,
+ weight=to_device(torch.Tensor(kernel), tensor_in.device),
+ bias=None,
+ stride=int(up_scale),
+ padding=int(up_scale / 2),
+ )
+
+ return tensor_out
+
+
+# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if
+# using dynamic `scale_factor` rather than static `size`. (T43166860)
+# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly.
+def onnx_compatibale_interpolate(
+ input, size=None, scale_factor=None, mode="nearest", align_corners=None
+):
+ # NOTE: The input dimensions are interpreted in the form:
+ # `mini-batch x channels x [optional depth] x [optional height] x width`.
+ if size is None and scale_factor is not None:
+ if input.dim() == 4:
+ if isinstance(scale_factor, (int, float)):
+ height_scale, width_scale = (scale_factor, scale_factor)
+ else:
+ assert isinstance(scale_factor, (tuple, list))
+ assert len(scale_factor) == 2
+ height_scale, width_scale = scale_factor
+
+ assert not align_corners, "No matching C2 op for align_corners == True"
+ if mode == "nearest":
+ return torch.ops._caffe2.ResizeNearest(
+ input, order="NCHW", width_scale=width_scale, height_scale=height_scale
+ )
+ elif mode == "bilinear":
+ logger.warning(
+ "Use F.conv_transpose2d for bilinear interpolate"
+ " because there's no such C2 op, this may cause significant"
+ " slowdown and the boundary pixels won't be as same as"
+ " using F.interpolate due to padding."
+ )
+ assert height_scale == width_scale
+ return BilinearInterpolation(input, up_scale=height_scale)
+ logger.warning("Output size is not static, it might cause ONNX conversion issue")
+
+ return interp(input, size, scale_factor, mode, align_corners)
+
+
+def mock_torch_nn_functional_interpolate():
+ def decorator(func):
+ @functools.wraps(func)
+ def _mock_torch_nn_functional_interpolate(*args, **kwargs):
+ if torch.onnx.is_in_onnx_export():
+ with mock.patch(
+ "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate
+ ):
+ return func(*args, **kwargs)
+ else:
+ return func(*args, **kwargs)
+
+ return _mock_torch_nn_functional_interpolate
+
+ return decorator
+
+
+# ==== torch/utils_caffe2/ws_utils.py ==========================================
+
+
+class ScopedWS(object):
+ def __init__(self, ws_name, is_reset, is_cleanup=False):
+ self.ws_name = ws_name
+ self.is_reset = is_reset
+ self.is_cleanup = is_cleanup
+ self.org_ws = ""
+
+ def __enter__(self):
+ self.org_ws = workspace.CurrentWorkspace()
+ if self.ws_name is not None:
+ workspace.SwitchWorkspace(self.ws_name, True)
+ if self.is_reset:
+ workspace.ResetWorkspace()
+
+ return workspace
+
+ def __exit__(self, *args):
+ if self.is_cleanup:
+ workspace.ResetWorkspace()
+ if self.ws_name is not None:
+ workspace.SwitchWorkspace(self.org_ws)
+
+
+def fetch_any_blob(name):
+ bb = None
+ try:
+ bb = workspace.FetchBlob(name)
+ except TypeError:
+ bb = workspace.FetchInt8Blob(name)
+ except Exception as e:
+ logger.error("Get blob {} error: {}".format(name, e))
+
+ return bb
+
+
+# ==== torch/utils_caffe2/protobuf.py ==========================================
+
+
+def get_pb_arg(pb, arg_name):
+ for x in pb.arg:
+ if x.name == arg_name:
+ return x
+ return None
+
+
+def get_pb_arg_valf(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return arg.f if arg is not None else default_val
+
+
+def get_pb_arg_floats(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return list(map(float, arg.floats)) if arg is not None else default_val
+
+
+def get_pb_arg_ints(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return list(map(int, arg.ints)) if arg is not None else default_val
+
+
+def get_pb_arg_vali(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return arg.i if arg is not None else default_val
+
+
+def get_pb_arg_vals(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return arg.s if arg is not None else default_val
+
+
+def get_pb_arg_valstrings(pb, arg_name, default_val):
+ arg = get_pb_arg(pb, arg_name)
+ return list(arg.strings) if arg is not None else default_val
+
+
+def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False):
+ arg = get_pb_arg(pb, arg_name)
+ if arg is None:
+ arg = putils.MakeArgument(arg_name, arg_value)
+ assert hasattr(arg, arg_attr)
+ pb.arg.extend([arg])
+ if allow_override and getattr(arg, arg_attr) != arg_value:
+ logger.warning(
+ "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value)
+ )
+ setattr(arg, arg_attr, arg_value)
+ else:
+ assert arg is not None
+ assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format(
+ getattr(arg, arg_attr), arg_value
+ )
+
+
+def _create_const_fill_op_from_numpy(name, tensor, device_option=None):
+ assert type(tensor) == np.ndarray
+ kTypeNameMapper = {
+ np.dtype("float32"): "GivenTensorFill",
+ np.dtype("int32"): "GivenTensorIntFill",
+ np.dtype("int64"): "GivenTensorInt64Fill",
+ np.dtype("uint8"): "GivenTensorStringFill",
+ }
+
+ args_dict = {}
+ if tensor.dtype == np.dtype("uint8"):
+ args_dict.update({"values": [str(tensor.data)], "shape": [1]})
+ else:
+ args_dict.update({"values": tensor, "shape": tensor.shape})
+
+ if device_option is not None:
+ args_dict["device_option"] = device_option
+
+ return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict)
+
+
+def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor):
+ assert type(int8_tensor) == workspace.Int8Tensor
+ kTypeNameMapper = {
+ np.dtype("int32"): "Int8GivenIntTensorFill",
+ np.dtype("uint8"): "Int8GivenTensorFill",
+ }
+
+ tensor = int8_tensor.data
+ assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")]
+ values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor
+
+ return core.CreateOperator(
+ kTypeNameMapper[tensor.dtype],
+ [],
+ [name],
+ values=values,
+ shape=tensor.shape,
+ Y_scale=int8_tensor.scale,
+ Y_zero_point=int8_tensor.zero_point,
+ )
+
+
+def create_const_fill_op(
+ name: str,
+ blob: Union[np.ndarray, workspace.Int8Tensor],
+ device_option: Optional[caffe2_pb2.DeviceOption] = None,
+) -> caffe2_pb2.OperatorDef:
+ """
+ Given a blob object, return the Caffe2 operator that creates this blob
+ as constant. Currently support NumPy tensor and Caffe2 Int8Tensor.
+ """
+
+ tensor_type = type(blob)
+ assert tensor_type in [
+ np.ndarray,
+ workspace.Int8Tensor,
+ ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format(
+ name, type(blob)
+ )
+
+ if tensor_type == np.ndarray:
+ return _create_const_fill_op_from_numpy(name, blob, device_option)
+ elif tensor_type == workspace.Int8Tensor:
+ assert device_option is None
+ return _create_const_fill_op_from_c2_int8_tensor(name, blob)
+
+
+def construct_init_net_from_params(
+ params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None
+) -> caffe2_pb2.NetDef:
+ """
+ Construct the init_net from params dictionary
+ """
+ init_net = caffe2_pb2.NetDef()
+ device_options = device_options or {}
+ for name, blob in params.items():
+ if isinstance(blob, str):
+ logger.warning(
+ (
+ "Blob {} with type {} is not supported in generating init net,"
+ " skipped.".format(name, type(blob))
+ )
+ )
+ continue
+ init_net.op.extend(
+ [create_const_fill_op(name, blob, device_option=device_options.get(name, None))]
+ )
+ init_net.external_output.append(name)
+ return init_net
+
+
+def get_producer_map(ssa):
+ """
+ Return dict from versioned blob to (i, j),
+ where i is index of producer op, j is the index of output of that op.
+ """
+ producer_map = {}
+ for i in range(len(ssa)):
+ outputs = ssa[i][1]
+ for j, outp in enumerate(outputs):
+ producer_map[outp] = (i, j)
+ return producer_map
+
+
+def get_consumer_map(ssa):
+ """
+ Return dict from versioned blob to list of (i, j),
+ where i is index of consumer op, j is the index of input of that op.
+ """
+ consumer_map = collections.defaultdict(list)
+ for i in range(len(ssa)):
+ inputs = ssa[i][0]
+ for j, inp in enumerate(inputs):
+ consumer_map[inp].append((i, j))
+ return consumer_map
+
+
+def get_params_from_init_net(
+ init_net: caffe2_pb2.NetDef,
+) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]:
+ """
+ Take the output blobs from init_net by running it.
+ Outputs:
+ params: dict from blob name to numpy array
+ device_options: dict from blob name to the device option of its creating op
+ """
+ # NOTE: this assumes that the params is determined by producer op with the
+ # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor.
+ def _get_device_option(producer_op):
+ if producer_op.type == "CopyGPUToCPU":
+ return caffe2_pb2.DeviceOption()
+ else:
+ return producer_op.device_option
+
+ with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws:
+ ws.RunNetOnce(init_net)
+ params = {b: fetch_any_blob(b) for b in init_net.external_output}
+ ssa, versions = core.get_ssa(init_net)
+ producer_map = get_producer_map(ssa)
+ device_options = {
+ b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]])
+ for b in init_net.external_output
+ }
+ return params, device_options
+
+
+def _updater_raise(op, input_types, output_types):
+ raise RuntimeError(
+ "Failed to apply updater for op {} given input_types {} and"
+ " output_types {}".format(op, input_types, output_types)
+ )
+
+
+def _generic_status_identifier(
+ predict_net: caffe2_pb2.NetDef,
+ status_updater: Callable,
+ known_status: Dict[Tuple[str, int], Any],
+) -> Dict[Tuple[str, int], Any]:
+ """
+ Statically infer the status of each blob, the status can be such as device type
+ (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here
+ is versioned blob (Tuple[str, int]) in the format compatible with ssa.
+ Inputs:
+ predict_net: the caffe2 network
+ status_updater: a callable, given an op and the status of its input/output,
+ it returns the updated status of input/output. `None` is used for
+ representing unknown status.
+ known_status: a dict containing known status, used as initialization.
+ Outputs:
+ A dict mapping from versioned blob to its status
+ """
+ ssa, versions = core.get_ssa(predict_net)
+ versioned_ext_input = [(b, 0) for b in predict_net.external_input]
+ versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output]
+ all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa])
+
+ allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output)
+ assert all(k in allowed_vbs for k in known_status)
+ assert all(v is not None for v in known_status.values())
+ _known_status = copy.deepcopy(known_status)
+
+ def _check_and_update(key, value):
+ assert value is not None
+ if key in _known_status:
+ if not _known_status[key] == value:
+ raise RuntimeError(
+ "Confilict status for {}, existing status {}, new status {}".format(
+ key, _known_status[key], value
+ )
+ )
+ _known_status[key] = value
+
+ def _update_i(op, ssa_i):
+ versioned_inputs = ssa_i[0]
+ versioned_outputs = ssa_i[1]
+
+ inputs_status = [_known_status.get(b, None) for b in versioned_inputs]
+ outputs_status = [_known_status.get(b, None) for b in versioned_outputs]
+
+ new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status)
+
+ for versioned_blob, status in zip(
+ versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status
+ ):
+ if status is not None:
+ _check_and_update(versioned_blob, status)
+
+ for op, ssa_i in zip(predict_net.op, ssa):
+ _update_i(op, ssa_i)
+ for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)):
+ _update_i(op, ssa_i)
+
+ # NOTE: This strictly checks all the blob from predict_net must be assgined
+ # a known status. However sometimes it's impossible (eg. having deadend op),
+ # we may relax this constraint if
+ for k in all_versioned_blobs:
+ if k not in _known_status:
+ raise NotImplementedError(
+ "Can not infer the status for {}. Currently only support the case where"
+ " a single forward and backward pass can identify status for all blobs.".format(k)
+ )
+
+ return _known_status
+
+
+def infer_device_type(
+ predict_net: caffe2_pb2.NetDef,
+ known_status: Dict[Tuple[str, int], Any],
+ device_name_style: str = "caffe2",
+) -> Dict[Tuple[str, int], str]:
+ """Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob"""
+
+ assert device_name_style in ["caffe2", "pytorch"]
+ _CPU_STR = "cpu"
+ _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda"
+
+ def _copy_cpu_to_gpu_updater(op, input_types, output_types):
+ if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR:
+ _updater_raise(op, input_types, output_types)
+ return ([_CPU_STR], [_GPU_STR])
+
+ def _copy_gpu_to_cpu_updater(op, input_types, output_types):
+ if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR:
+ _updater_raise(op, input_types, output_types)
+ return ([_GPU_STR], [_CPU_STR])
+
+ def _other_ops_updater(op, input_types, output_types):
+ non_none_types = [x for x in input_types + output_types if x is not None]
+ if len(non_none_types) > 0:
+ the_type = non_none_types[0]
+ if not all(x == the_type for x in non_none_types):
+ _updater_raise(op, input_types, output_types)
+ else:
+ the_type = None
+ return ([the_type for _ in op.input], [the_type for _ in op.output])
+
+ def _device_updater(op, *args, **kwargs):
+ return {
+ "CopyCPUToGPU": _copy_cpu_to_gpu_updater,
+ "CopyGPUToCPU": _copy_gpu_to_cpu_updater,
+ }.get(op.type, _other_ops_updater)(op, *args, **kwargs)
+
+ return _generic_status_identifier(predict_net, _device_updater, known_status)
+
+
+# ==== torch/utils_caffe2/vis.py ===============================================
+
+
+def _modify_blob_names(ops, blob_rename_f):
+ ret = []
+
+ def _replace_list(blob_list, replaced_list):
+ del blob_list[:]
+ blob_list.extend(replaced_list)
+
+ for x in ops:
+ cur = copy.deepcopy(x)
+ _replace_list(cur.input, list(map(blob_rename_f, cur.input)))
+ _replace_list(cur.output, list(map(blob_rename_f, cur.output)))
+ ret.append(cur)
+
+ return ret
+
+
+def _rename_blob(name, blob_sizes, blob_ranges):
+ def _list_to_str(bsize):
+ ret = ", ".join([str(x) for x in bsize])
+ ret = "[" + ret + "]"
+ return ret
+
+ ret = name
+ if blob_sizes is not None and name in blob_sizes:
+ ret += "\n" + _list_to_str(blob_sizes[name])
+ if blob_ranges is not None and name in blob_ranges:
+ ret += "\n" + _list_to_str(blob_ranges[name])
+
+ return ret
+
+
+# graph_name could not contain word 'graph'
+def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None):
+ blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges)
+ return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f)
+
+
+def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None):
+ graph = None
+ ops = net.op
+ if blob_rename_func is not None:
+ ops = _modify_blob_names(ops, blob_rename_func)
+ if not op_only:
+ graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB")
+ else:
+ graph = net_drawer.GetPydotGraphMinimal(
+ ops, graph_name, rankdir="TB", minimal_dependency=True
+ )
+
+ try:
+ par_dir = os.path.dirname(file_name)
+ if not os.path.exists(par_dir):
+ os.makedirs(par_dir)
+
+ format = os.path.splitext(os.path.basename(file_name))[-1]
+ if format == ".png":
+ graph.write_png(file_name)
+ elif format == ".pdf":
+ graph.write_pdf(file_name)
+ elif format == ".svg":
+ graph.write_svg(file_name)
+ else:
+ print("Incorrect format {}".format(format))
+ except Exception as e:
+ print("Error when writing graph to image {}".format(e))
+
+ return graph
+
+
+# ==== torch/utils_toffee/aten_to_caffe2.py ====================================
+
+
+def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef):
+ """
+ For ONNX exported model, GroupNorm will be represented as ATen op,
+ this can be a drop in replacement from ATen to GroupNorm
+ """
+ count = 0
+ for op in predict_net.op:
+ if op.type == "ATen":
+ op_name = get_pb_arg_vals(op, "operator", None) # return byte in py3
+ if op_name and op_name.decode() == "group_norm":
+ op.arg.remove(get_pb_arg(op, "operator"))
+
+ if get_pb_arg_vali(op, "cudnn_enabled", None):
+ op.arg.remove(get_pb_arg(op, "cudnn_enabled"))
+
+ num_groups = get_pb_arg_vali(op, "num_groups", None)
+ if num_groups is not None:
+ op.arg.remove(get_pb_arg(op, "num_groups"))
+ check_set_pb_arg(op, "group", "i", num_groups)
+
+ op.type = "GroupNorm"
+ count += 1
+ if count > 1:
+ logger.info("Replaced {} ATen operator to GroupNormOp".format(count))
+
+
+# ==== torch/utils_toffee/alias.py =============================================
+
+
+def alias(x, name, is_backward=False):
+ if not torch.onnx.is_in_onnx_export():
+ return x
+ assert isinstance(x, torch.Tensor)
+ return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward)
+
+
+def fuse_alias_placeholder(predict_net, init_net):
+ """Remove AliasWithName placeholder and rename the input/output of it"""
+ # First we finish all the re-naming
+ for i, op in enumerate(predict_net.op):
+ if op.type == "AliasWithName":
+ assert len(op.input) == 1
+ assert len(op.output) == 1
+ name = get_pb_arg_vals(op, "name", None).decode()
+ is_backward = bool(get_pb_arg_vali(op, "is_backward", 0))
+ rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward)
+ rename_op_output(predict_net, i, 0, name)
+
+ # Remove AliasWithName, should be very safe since it's a non-op
+ new_ops = []
+ for op in predict_net.op:
+ if op.type != "AliasWithName":
+ new_ops.append(op)
+ else:
+ # safety check
+ assert op.input == op.output
+ assert op.input[0] == op.arg[0].s.decode()
+ del predict_net.op[:]
+ predict_net.op.extend(new_ops)
+
+
+# ==== torch/utils_caffe2/graph_transform.py ===================================
+
+
+class IllegalGraphTransformError(ValueError):
+ """When a graph transform function call can't be executed."""
+
+
+def _rename_versioned_blob_in_proto(
+ proto: caffe2_pb2.NetDef,
+ old_name: str,
+ new_name: str,
+ version: int,
+ ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]],
+ start_versions: Dict[str, int],
+ end_versions: Dict[str, int],
+):
+ """In given proto, rename all blobs with matched version"""
+ # Operater list
+ for op, i_th_ssa in zip(proto.op, ssa):
+ versioned_inputs, versioned_outputs = i_th_ssa
+ for i in range(len(op.input)):
+ if versioned_inputs[i] == (old_name, version):
+ op.input[i] = new_name
+ for i in range(len(op.output)):
+ if versioned_outputs[i] == (old_name, version):
+ op.output[i] = new_name
+ # external_input
+ if start_versions.get(old_name, 0) == version:
+ for i in range(len(proto.external_input)):
+ if proto.external_input[i] == old_name:
+ proto.external_input[i] = new_name
+ # external_output
+ if end_versions.get(old_name, 0) == version:
+ for i in range(len(proto.external_output)):
+ if proto.external_output[i] == old_name:
+ proto.external_output[i] = new_name
+
+
+def rename_op_input(
+ predict_net: caffe2_pb2.NetDef,
+ init_net: caffe2_pb2.NetDef,
+ op_id: int,
+ input_id: int,
+ new_name: str,
+ from_producer: bool = False,
+):
+ """
+ Rename the op_id-th operator in predict_net, change it's input_id-th input's
+ name to the new_name. It also does automatic re-route and change
+ external_input and init_net if necessary.
+ - It requires the input is only consumed by this op.
+ - This function modifies predict_net and init_net in-place.
+ - When from_producer is enable, this also updates other operators that consumes
+ the same input. Be cautious because may trigger unintended behavior.
+ """
+ assert isinstance(predict_net, caffe2_pb2.NetDef)
+ assert isinstance(init_net, caffe2_pb2.NetDef)
+
+ init_net_ssa, init_net_versions = core.get_ssa(init_net)
+ predict_net_ssa, predict_net_versions = core.get_ssa(
+ predict_net, copy.deepcopy(init_net_versions)
+ )
+
+ versioned_inputs, versioned_outputs = predict_net_ssa[op_id]
+ old_name, version = versioned_inputs[input_id]
+
+ if from_producer:
+ producer_map = get_producer_map(predict_net_ssa)
+ if not (old_name, version) in producer_map:
+ raise NotImplementedError(
+ "Can't find producer, the input {} is probably from"
+ " init_net, this is not supported yet.".format(old_name)
+ )
+ producer = producer_map[(old_name, version)]
+ rename_op_output(predict_net, producer[0], producer[1], new_name)
+ return
+
+ def contain_targets(op_ssa):
+ return (old_name, version) in op_ssa[0]
+
+ is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa]
+ if sum(is_consumer) > 1:
+ raise IllegalGraphTransformError(
+ (
+ "Input '{}' of operator(#{}) are consumed by other ops, please use"
+ + " rename_op_output on the producer instead. Offending op: \n{}"
+ ).format(old_name, op_id, predict_net.op[op_id])
+ )
+
+ # update init_net
+ _rename_versioned_blob_in_proto(
+ init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions
+ )
+ # update predict_net
+ _rename_versioned_blob_in_proto(
+ predict_net,
+ old_name,
+ new_name,
+ version,
+ predict_net_ssa,
+ init_net_versions,
+ predict_net_versions,
+ )
+
+
+def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str):
+ """
+ Rename the op_id-th operator in predict_net, change it's output_id-th input's
+ name to the new_name. It also does automatic re-route and change
+ external_output and if necessary.
+ - It allows multiple consumers of its output.
+ - This function modifies predict_net in-place, doesn't need init_net.
+ """
+ assert isinstance(predict_net, caffe2_pb2.NetDef)
+
+ ssa, blob_versions = core.get_ssa(predict_net)
+
+ versioned_inputs, versioned_outputs = ssa[op_id]
+ old_name, version = versioned_outputs[output_id]
+
+ # update predict_net
+ _rename_versioned_blob_in_proto(
+ predict_net, old_name, new_name, version, ssa, {}, blob_versions
+ )
+
+
+def get_sub_graph_external_input_output(
+ predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int]
+) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]:
+ """
+ Return the list of external input/output of sub-graph,
+ each element is tuple of the name and corresponding version in predict_net.
+
+ external input/output is defined the same way as caffe2 NetDef.
+ """
+ ssa, versions = core.get_ssa(predict_net)
+
+ all_inputs = []
+ all_outputs = []
+ for op_id in sub_graph_op_indices:
+ all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs]
+ all_outputs += list(ssa[op_id][1]) # ssa output won't repeat
+
+ # for versioned blobs, external inputs are just those blob in all_inputs
+ # but not in all_outputs
+ ext_inputs = [inp for inp in all_inputs if inp not in all_outputs]
+
+ # external outputs are essentially outputs of this subgraph that are used
+ # outside of this sub-graph (including predict_net.external_output)
+ all_other_inputs = sum(
+ (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices),
+ [(outp, versions[outp]) for outp in predict_net.external_output],
+ )
+ ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)]
+
+ return ext_inputs, ext_outputs
+
+
+class DiGraph:
+ """A DAG representation of caffe2 graph, each vertice is a versioned blob."""
+
+ def __init__(self):
+ self.vertices = set()
+ self.graph = collections.defaultdict(list)
+
+ def add_edge(self, u, v):
+ self.graph[u].append(v)
+ self.vertices.add(u)
+ self.vertices.add(v)
+
+ # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/
+ def get_all_paths(self, s, d):
+ visited = {k: False for k in self.vertices}
+ path = []
+ all_paths = []
+
+ def _get_all_paths_util(graph, u, d, visited, path):
+ visited[u] = True
+ path.append(u)
+ if u == d:
+ all_paths.append(copy.deepcopy(path))
+ else:
+ for i in graph[u]:
+ if not visited[i]:
+ _get_all_paths_util(graph, i, d, visited, path)
+ path.pop()
+ visited[u] = False
+
+ _get_all_paths_util(self.graph, s, d, visited, path)
+ return all_paths
+
+ @staticmethod
+ def from_ssa(ssa):
+ graph = DiGraph()
+ for op_id in range(len(ssa)):
+ for inp in ssa[op_id][0]:
+ for outp in ssa[op_id][1]:
+ graph.add_edge(inp, outp)
+ return graph
+
+
+def _get_dependency_chain(ssa, versioned_target, versioned_source):
+ """
+ Return the index list of relevant operator to produce target blob from source blob,
+ if there's no dependency, return empty list.
+ """
+
+ # finding all paths between nodes can be O(N!), thus we can only search
+ # in the subgraph using the op starting from the first consumer of source blob
+ # to the producer of the target blob.
+ consumer_map = get_consumer_map(ssa)
+ producer_map = get_producer_map(ssa)
+ start_op = min(x[0] for x in consumer_map[versioned_source]) - 15
+ end_op = (
+ producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op
+ )
+ sub_graph_ssa = ssa[start_op : end_op + 1]
+ if len(sub_graph_ssa) > 30:
+ logger.warning(
+ "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it"
+ " might take non-trival time to find all paths between them.".format(
+ versioned_source, versioned_target, start_op, end_op
+ )
+ )
+
+ dag = DiGraph.from_ssa(sub_graph_ssa)
+ paths = dag.get_all_paths(versioned_source, versioned_target) # include two ends
+ ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths]
+ return sorted(set().union(*[set(ops) for ops in ops_in_paths]))
+
+
+def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]:
+ """
+ Idenfity the reshape sub-graph in a protobuf.
+ The reshape sub-graph is defined as matching the following pattern:
+
+ (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐
+ └-------------------------------------------> Reshape -> (output_blob)
+
+ Return:
+ List of sub-graphs, each sub-graph is represented as a list of indices
+ of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape]
+ """
+
+ ssa, _ = core.get_ssa(predict_net)
+
+ ret = []
+ for i, op in enumerate(predict_net.op):
+ if op.type == "Reshape":
+ assert len(op.input) == 2
+ input_ssa = ssa[i][0]
+ data_source = input_ssa[0]
+ shape_source = input_ssa[1]
+ op_indices = _get_dependency_chain(ssa, shape_source, data_source)
+ ret.append(op_indices + [i])
+ return ret
+
+
+def remove_reshape_for_fc(predict_net, params):
+ """
+ In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape
+ a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping
+ doesn't work well with ONNX and Int8 tools, and cause using extra
+ ops (eg. ExpandDims) that might not be available on mobile.
+ Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape
+ after exporting ONNX model.
+ """
+ from caffe2.python import core
+
+ # find all reshape sub-graph that can be removed, which is now all Reshape
+ # sub-graph whose output is only consumed by FC.
+ # TODO: to make it safer, we may need the actually value to better determine
+ # if a Reshape before FC is removable.
+ reshape_sub_graphs = identify_reshape_sub_graph(predict_net)
+ sub_graphs_to_remove = []
+ for reshape_sub_graph in reshape_sub_graphs:
+ reshape_op_id = reshape_sub_graph[-1]
+ assert predict_net.op[reshape_op_id].type == "Reshape"
+ ssa, _ = core.get_ssa(predict_net)
+ reshape_output = ssa[reshape_op_id][1][0]
+ consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]]
+ if all(predict_net.op[consumer].type == "FC" for consumer in consumers):
+ # safety check if the sub-graph is isolated, for this reshape sub-graph,
+ # it means it has one non-param external input and one external output.
+ ext_inputs, ext_outputs = get_sub_graph_external_input_output(
+ predict_net, reshape_sub_graph
+ )
+ non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+ if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1:
+ sub_graphs_to_remove.append(reshape_sub_graph)
+
+ # perform removing subgraph by:
+ # 1: rename the Reshape's output to its input, then the graph can be
+ # seen as in-place itentify, meaning whose external input/output are the same.
+ # 2: simply remove those ops.
+ remove_op_ids = []
+ params_to_remove = []
+ for sub_graph in sub_graphs_to_remove:
+ logger.info(
+ "Remove Reshape sub-graph:\n{}".format(
+ "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph])
+ )
+ )
+ reshape_op_id = sub_graph[-1]
+ new_reshap_output = predict_net.op[reshape_op_id].input[0]
+ rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output)
+ ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph)
+ non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+ params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0]
+ assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1
+ assert ext_outputs[0][0] == non_params_ext_inputs[0][0]
+ assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1
+ remove_op_ids.extend(sub_graph)
+ params_to_remove.extend(params_ext_inputs)
+
+ predict_net = copy.deepcopy(predict_net)
+ new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids]
+ del predict_net.op[:]
+ predict_net.op.extend(new_ops)
+ for versioned_params in params_to_remove:
+ name = versioned_params[0]
+ logger.info("Remove params: {} from init_net and predict_net.external_input".format(name))
+ del params[name]
+ predict_net.external_input.remove(name)
+
+ return predict_net, params
+
+
+def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef):
+ """
+ In-place fuse extra copy ops between cpu/gpu for the following case:
+ a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1
+ -CopyBToA> c2 -NextOp2-> d2
+ The fused network will look like:
+ a -NextOp1-> d1
+ -NextOp2-> d2
+ """
+
+ _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"]
+
+ def _fuse_once(predict_net):
+ ssa, blob_versions = core.get_ssa(predict_net)
+ consumer_map = get_consumer_map(ssa)
+ versioned_external_output = [
+ (name, blob_versions[name]) for name in predict_net.external_output
+ ]
+
+ for op_id, op in enumerate(predict_net.op):
+ if op.type in _COPY_OPS:
+ fw_copy_versioned_output = ssa[op_id][1][0]
+ consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]]
+ reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)]
+
+ is_fusable = (
+ len(consumer_ids) > 0
+ and fw_copy_versioned_output not in versioned_external_output
+ and all(
+ predict_net.op[_op_id].type == reverse_op_type
+ and ssa[_op_id][1][0] not in versioned_external_output
+ for _op_id in consumer_ids
+ )
+ )
+
+ if is_fusable:
+ for rv_copy_op_id in consumer_ids:
+ # making each NextOp uses "a" directly and removing Copy ops
+ rs_copy_versioned_output = ssa[rv_copy_op_id][1][0]
+ next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0]
+ predict_net.op[next_op_id].input[inp_id] = op.input[0]
+ # remove CopyOps
+ new_ops = [
+ op
+ for i, op in enumerate(predict_net.op)
+ if i != op_id and i not in consumer_ids
+ ]
+ del predict_net.op[:]
+ predict_net.op.extend(new_ops)
+ return True
+
+ return False
+
+ # _fuse_once returns False is nothing can be fused
+ while _fuse_once(predict_net):
+ pass
+
+
+def remove_dead_end_ops(net_def: caffe2_pb2.NetDef):
+ """remove ops if its output is not used or not in external_output"""
+ ssa, versions = core.get_ssa(net_def)
+ versioned_external_output = [(name, versions[name]) for name in net_def.external_output]
+ consumer_map = get_consumer_map(ssa)
+ removed_op_ids = set()
+
+ def _is_dead_end(versioned_blob):
+ return not (
+ versioned_blob in versioned_external_output
+ or (
+ len(consumer_map[versioned_blob]) > 0
+ and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob])
+ )
+ )
+
+ for i, ssa_i in reversed(list(enumerate(ssa))):
+ versioned_outputs = ssa_i[1]
+ if all(_is_dead_end(outp) for outp in versioned_outputs):
+ removed_op_ids.add(i)
+
+ # simply removing those deadend ops should have no effect to external_output
+ new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids]
+ del net_def.op[:]
+ net_def.op.extend(new_ops)
diff --git a/vendor/detectron2/detectron2/export/torchscript.py b/vendor/detectron2/detectron2/export/torchscript.py
new file mode 100644
index 0000000000000000000000000000000000000000..24fe59bda44225324928542df3f2ef1745375dfd
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/torchscript.py
@@ -0,0 +1,132 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import torch
+
+from detectron2.utils.file_io import PathManager
+
+from .torchscript_patch import freeze_training_mode, patch_instances
+
+__all__ = ["scripting_with_instances", "dump_torchscript_IR"]
+
+
+def scripting_with_instances(model, fields):
+ """
+ Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
+ attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult
+ for scripting to support it out of the box. This function is made to support scripting
+ a model that uses :class:`Instances`. It does the following:
+
+ 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
+ but with all attributes been "static".
+ The attributes need to be statically declared in the ``fields`` argument.
+ 2. Register ``new_Instances``, and force scripting compiler to
+ use it when trying to compile ``Instances``.
+
+ After this function, the process will be reverted. User should be able to script another model
+ using different fields.
+
+ Example:
+ Assume that ``Instances`` in the model consist of two attributes named
+ ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
+ :class:`Tensor` respectively during inference. You can call this function like:
+ ::
+ fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
+ torchscipt_model = scripting_with_instances(model, fields)
+
+ Note:
+ It only support models in evaluation mode.
+
+ Args:
+ model (nn.Module): The input model to be exported by scripting.
+ fields (Dict[str, type]): Attribute names and corresponding type that
+ ``Instances`` will use in the model. Note that all attributes used in ``Instances``
+ need to be added, regardless of whether they are inputs/outputs of the model.
+ Data type not defined in detectron2 is not supported for now.
+
+ Returns:
+ torch.jit.ScriptModule: the model in torchscript format
+ """
+ assert (
+ not model.training
+ ), "Currently we only support exporting models in evaluation mode to torchscript"
+
+ with freeze_training_mode(model), patch_instances(fields):
+ scripted_model = torch.jit.script(model)
+ return scripted_model
+
+
+# alias for old name
+export_torchscript_with_instances = scripting_with_instances
+
+
+def dump_torchscript_IR(model, dir):
+ """
+ Dump IR of a TracedModule/ScriptModule/Function in various format (code, graph,
+ inlined graph). Useful for debugging.
+
+ Args:
+ model (TracedModule/ScriptModule/ScriptFUnction): traced or scripted module
+ dir (str): output directory to dump files.
+ """
+ dir = os.path.expanduser(dir)
+ PathManager.mkdirs(dir)
+
+ def _get_script_mod(mod):
+ if isinstance(mod, torch.jit.TracedModule):
+ return mod._actual_script_module
+ return mod
+
+ # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
+ with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
+
+ def get_code(mod):
+ # Try a few ways to get code using private attributes.
+ try:
+ # This contains more information than just `mod.code`
+ return _get_script_mod(mod)._c.code
+ except AttributeError:
+ pass
+ try:
+ return mod.code
+ except AttributeError:
+ return None
+
+ def dump_code(prefix, mod):
+ code = get_code(mod)
+ name = prefix or "root model"
+ if code is None:
+ f.write(f"Could not found code for {name} (type={mod.original_name})\n")
+ f.write("\n")
+ else:
+ f.write(f"\nCode for {name}, type={mod.original_name}:\n")
+ f.write(code)
+ f.write("\n")
+ f.write("-" * 80)
+
+ for name, m in mod.named_children():
+ dump_code(prefix + "." + name, m)
+
+ if isinstance(model, torch.jit.ScriptFunction):
+ f.write(get_code(model))
+ else:
+ dump_code("", model)
+
+ def _get_graph(model):
+ try:
+ # Recursively dump IR of all modules
+ return _get_script_mod(model)._c.dump_to_str(True, False, False)
+ except AttributeError:
+ return model.graph.str()
+
+ with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
+ f.write(_get_graph(model))
+
+ # Dump IR of the entire graph (all submodules inlined)
+ with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
+ f.write(str(model.inlined_graph))
+
+ if not isinstance(model, torch.jit.ScriptFunction):
+ # Dump the model structure in pytorch style
+ with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
+ f.write(str(model))
diff --git a/vendor/detectron2/detectron2/export/torchscript_patch.py b/vendor/detectron2/detectron2/export/torchscript_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..da9b324f1582e31d1a16d2fe462ac2989bea56ea
--- /dev/null
+++ b/vendor/detectron2/detectron2/export/torchscript_patch.py
@@ -0,0 +1,406 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import sys
+import tempfile
+from contextlib import ExitStack, contextmanager
+from copy import deepcopy
+from unittest import mock
+import torch
+from torch import nn
+
+# need some explicit imports due to https://github.com/pytorch/pytorch/issues/38964
+import detectron2 # noqa F401
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import _import_file
+
+_counter = 0
+
+
+def _clear_jit_cache():
+ from torch.jit._recursive import concrete_type_store
+ from torch.jit._state import _jit_caching_layer
+
+ concrete_type_store.type_store.clear() # for modules
+ _jit_caching_layer.clear() # for free functions
+
+
+def _add_instances_conversion_methods(newInstances):
+ """
+ Add from_instances methods to the scripted Instances class.
+ """
+ cls_name = newInstances.__name__
+
+ @torch.jit.unused
+ def from_instances(instances: Instances):
+ """
+ Create scripted Instances from original Instances
+ """
+ fields = instances.get_fields()
+ image_size = instances.image_size
+ ret = newInstances(image_size)
+ for name, val in fields.items():
+ assert hasattr(ret, f"_{name}"), f"No attribute named {name} in {cls_name}"
+ setattr(ret, name, deepcopy(val))
+ return ret
+
+ newInstances.from_instances = from_instances
+
+
+@contextmanager
+def patch_instances(fields):
+ """
+ A contextmanager, under which the Instances class in detectron2 is replaced
+ by a statically-typed scriptable class, defined by `fields`.
+ See more in `scripting_with_instances`.
+ """
+
+ with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile(
+ mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False
+ ) as f:
+ try:
+ # Objects that use Instances should not reuse previously-compiled
+ # results in cache, because `Instances` could be a new class each time.
+ _clear_jit_cache()
+
+ cls_name, s = _gen_instance_module(fields)
+ f.write(s)
+ f.flush()
+ f.close()
+
+ module = _import(f.name)
+ new_instances = getattr(module, cls_name)
+ _ = torch.jit.script(new_instances)
+ # let torchscript think Instances was scripted already
+ Instances.__torch_script_class__ = True
+ # let torchscript find new_instances when looking for the jit type of Instances
+ Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances)
+
+ _add_instances_conversion_methods(new_instances)
+ yield new_instances
+ finally:
+ try:
+ del Instances.__torch_script_class__
+ del Instances._jit_override_qualname
+ except AttributeError:
+ pass
+ sys.modules.pop(module.__name__)
+
+
+def _gen_instance_class(fields):
+ """
+ Args:
+ fields (dict[name: type])
+ """
+
+ class _FieldType:
+ def __init__(self, name, type_):
+ assert isinstance(name, str), f"Field name must be str, got {name}"
+ self.name = name
+ self.type_ = type_
+ self.annotation = f"{type_.__module__}.{type_.__name__}"
+
+ fields = [_FieldType(k, v) for k, v in fields.items()]
+
+ def indent(level, s):
+ return " " * 4 * level + s
+
+ lines = []
+
+ global _counter
+ _counter += 1
+
+ cls_name = "ScriptedInstances{}".format(_counter)
+
+ field_names = tuple(x.name for x in fields)
+ extra_args = ", ".join([f"{f.name}: Optional[{f.annotation}] = None" for f in fields])
+ lines.append(
+ f"""
+class {cls_name}:
+ def __init__(self, image_size: Tuple[int, int], {extra_args}):
+ self.image_size = image_size
+ self._field_names = {field_names}
+"""
+ )
+
+ for f in fields:
+ lines.append(
+ indent(2, f"self._{f.name} = torch.jit.annotate(Optional[{f.annotation}], {f.name})")
+ )
+
+ for f in fields:
+ lines.append(
+ f"""
+ @property
+ def {f.name}(self) -> {f.annotation}:
+ # has to use a local for type refinement
+ # https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement
+ t = self._{f.name}
+ assert t is not None, "{f.name} is None and cannot be accessed!"
+ return t
+
+ @{f.name}.setter
+ def {f.name}(self, value: {f.annotation}) -> None:
+ self._{f.name} = value
+"""
+ )
+
+ # support method `__len__`
+ lines.append(
+ """
+ def __len__(self) -> int:
+"""
+ )
+ for f in fields:
+ lines.append(
+ f"""
+ t = self._{f.name}
+ if t is not None:
+ return len(t)
+"""
+ )
+ lines.append(
+ """
+ raise NotImplementedError("Empty Instances does not support __len__!")
+"""
+ )
+
+ # support method `has`
+ lines.append(
+ """
+ def has(self, name: str) -> bool:
+"""
+ )
+ for f in fields:
+ lines.append(
+ f"""
+ if name == "{f.name}":
+ return self._{f.name} is not None
+"""
+ )
+ lines.append(
+ """
+ return False
+"""
+ )
+
+ # support method `to`
+ none_args = ", None" * len(fields)
+ lines.append(
+ f"""
+ def to(self, device: torch.device) -> "{cls_name}":
+ ret = {cls_name}(self.image_size{none_args})
+"""
+ )
+ for f in fields:
+ if hasattr(f.type_, "to"):
+ lines.append(
+ f"""
+ t = self._{f.name}
+ if t is not None:
+ ret._{f.name} = t.to(device)
+"""
+ )
+ else:
+ # For now, ignore fields that cannot be moved to devices.
+ # Maybe can support other tensor-like classes (e.g. __torch_function__)
+ pass
+ lines.append(
+ """
+ return ret
+"""
+ )
+
+ # support method `getitem`
+ none_args = ", None" * len(fields)
+ lines.append(
+ f"""
+ def __getitem__(self, item) -> "{cls_name}":
+ ret = {cls_name}(self.image_size{none_args})
+"""
+ )
+ for f in fields:
+ lines.append(
+ f"""
+ t = self._{f.name}
+ if t is not None:
+ ret._{f.name} = t[item]
+"""
+ )
+ lines.append(
+ """
+ return ret
+"""
+ )
+
+ # support method `cat`
+ # this version does not contain checks that all instances have same size and fields
+ none_args = ", None" * len(fields)
+ lines.append(
+ f"""
+ def cat(self, instances: List["{cls_name}"]) -> "{cls_name}":
+ ret = {cls_name}(self.image_size{none_args})
+"""
+ )
+ for f in fields:
+ lines.append(
+ f"""
+ t = self._{f.name}
+ if t is not None:
+ values: List[{f.annotation}] = [x.{f.name} for x in instances]
+ if torch.jit.isinstance(t, torch.Tensor):
+ ret._{f.name} = torch.cat(values, dim=0)
+ else:
+ ret._{f.name} = t.cat(values)
+"""
+ )
+ lines.append(
+ """
+ return ret"""
+ )
+
+ # support method `get_fields()`
+ lines.append(
+ """
+ def get_fields(self) -> Dict[str, Tensor]:
+ ret = {}
+ """
+ )
+ for f in fields:
+ if f.type_ == Boxes:
+ stmt = "t.tensor"
+ elif f.type_ == torch.Tensor:
+ stmt = "t"
+ else:
+ stmt = f'assert False, "unsupported type {str(f.type_)}"'
+ lines.append(
+ f"""
+ t = self._{f.name}
+ if t is not None:
+ ret["{f.name}"] = {stmt}
+ """
+ )
+ lines.append(
+ """
+ return ret"""
+ )
+ return cls_name, os.linesep.join(lines)
+
+
+def _gen_instance_module(fields):
+ # TODO: find a more automatic way to enable import of other classes
+ s = """
+from copy import deepcopy
+import torch
+from torch import Tensor
+import typing
+from typing import *
+
+import detectron2
+from detectron2.structures import Boxes, Instances
+
+"""
+
+ cls_name, cls_def = _gen_instance_class(fields)
+ s += cls_def
+ return cls_name, s
+
+
+def _import(path):
+ return _import_file(
+ "{}{}".format(sys.modules[__name__].__name__, _counter), path, make_importable=True
+ )
+
+
+@contextmanager
+def patch_builtin_len(modules=()):
+ """
+ Patch the builtin len() function of a few detectron2 modules
+ to use __len__ instead, because __len__ does not convert values to
+ integers and therefore is friendly to tracing.
+
+ Args:
+ modules (list[stsr]): names of extra modules to patch len(), in
+ addition to those in detectron2.
+ """
+
+ def _new_len(obj):
+ return obj.__len__()
+
+ with ExitStack() as stack:
+ MODULES = [
+ "detectron2.modeling.roi_heads.fast_rcnn",
+ "detectron2.modeling.roi_heads.mask_head",
+ "detectron2.modeling.roi_heads.keypoint_head",
+ ] + list(modules)
+ ctxs = [stack.enter_context(mock.patch(mod + ".len")) for mod in MODULES]
+ for m in ctxs:
+ m.side_effect = _new_len
+ yield
+
+
+def patch_nonscriptable_classes():
+ """
+ Apply patches on a few nonscriptable detectron2 classes.
+ Should not have side-effects on eager usage.
+ """
+ # __prepare_scriptable__ can also be added to models for easier maintenance.
+ # But it complicates the clean model code.
+
+ from detectron2.modeling.backbone import ResNet, FPN
+
+ # Due to https://github.com/pytorch/pytorch/issues/36061,
+ # we change backbone to use ModuleList for scripting.
+ # (note: this changes param names in state_dict)
+
+ def prepare_resnet(self):
+ ret = deepcopy(self)
+ ret.stages = nn.ModuleList(ret.stages)
+ for k in self.stage_names:
+ delattr(ret, k)
+ return ret
+
+ ResNet.__prepare_scriptable__ = prepare_resnet
+
+ def prepare_fpn(self):
+ ret = deepcopy(self)
+ ret.lateral_convs = nn.ModuleList(ret.lateral_convs)
+ ret.output_convs = nn.ModuleList(ret.output_convs)
+ for name, _ in self.named_children():
+ if name.startswith("fpn_"):
+ delattr(ret, name)
+ return ret
+
+ FPN.__prepare_scriptable__ = prepare_fpn
+
+ # Annotate some attributes to be constants for the purpose of scripting,
+ # even though they are not constants in eager mode.
+ from detectron2.modeling.roi_heads import StandardROIHeads
+
+ if hasattr(StandardROIHeads, "__annotations__"):
+ # copy first to avoid editing annotations of base class
+ StandardROIHeads.__annotations__ = deepcopy(StandardROIHeads.__annotations__)
+ StandardROIHeads.__annotations__["mask_on"] = torch.jit.Final[bool]
+ StandardROIHeads.__annotations__["keypoint_on"] = torch.jit.Final[bool]
+
+
+# These patches are not supposed to have side-effects.
+patch_nonscriptable_classes()
+
+
+@contextmanager
+def freeze_training_mode(model):
+ """
+ A context manager that annotates the "training" attribute of every submodule
+ to constant, so that the training codepath in these modules can be
+ meta-compiled away. Upon exiting, the annotations are reverted.
+ """
+ classes = {type(x) for x in model.modules()}
+ # __constants__ is the old way to annotate constants and not compatible
+ # with __annotations__ .
+ classes = {x for x in classes if not hasattr(x, "__constants__")}
+ for cls in classes:
+ cls.__annotations__["training"] = torch.jit.Final[bool]
+ yield
+ for cls in classes:
+ cls.__annotations__["training"] = bool
diff --git a/vendor/detectron2/detectron2/layers/__init__.py b/vendor/detectron2/detectron2/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..761a3d1c7afa049e9779ee9fc4d299e9aae38cad
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm, CycleBatchNormList
+from .deform_conv import DeformConv, ModulatedDeformConv
+from .mask_ops import paste_masks_in_image
+from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
+from .roi_align import ROIAlign, roi_align
+from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
+from .shape_spec import ShapeSpec
+from .wrappers import (
+ BatchNorm2d,
+ Conv2d,
+ ConvTranspose2d,
+ cat,
+ interpolate,
+ Linear,
+ nonzero_tuple,
+ cross_entropy,
+ empty_input_loss_func_wrapper,
+ shapes_to_tensor,
+ move_device_like,
+)
+from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
+from .aspp import ASPP
+from .losses import ciou_loss, diou_loss
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/vendor/detectron2/detectron2/layers/aspp.py b/vendor/detectron2/detectron2/layers/aspp.py
new file mode 100644
index 0000000000000000000000000000000000000000..14861aa9ede4fea6a69a49f189bcab997b558148
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/aspp.py
@@ -0,0 +1,144 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from copy import deepcopy
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .batch_norm import get_norm
+from .blocks import DepthwiseSeparableConv2d
+from .wrappers import Conv2d
+
+
+class ASPP(nn.Module):
+ """
+ Atrous Spatial Pyramid Pooling (ASPP).
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ dilations,
+ *,
+ norm,
+ activation,
+ pool_kernel_size=None,
+ dropout: float = 0.0,
+ use_depthwise_separable_conv=False,
+ ):
+ """
+ Args:
+ in_channels (int): number of input channels for ASPP.
+ out_channels (int): number of output channels.
+ dilations (list): a list of 3 dilations in ASPP.
+ norm (str or callable): normalization for all conv layers.
+ See :func:`layers.get_norm` for supported format. norm is
+ applied to all conv layers except the conv following
+ global average pooling.
+ activation (callable): activation function.
+ pool_kernel_size (tuple, list): the average pooling size (kh, kw)
+ for image pooling layer in ASPP. If set to None, it always
+ performs global average pooling. If not None, it must be
+ divisible by the shape of inputs in forward(). It is recommended
+ to use a fixed input feature size in training, and set this
+ option to match this size, so that it performs global average
+ pooling in training, and the size of the pooling window stays
+ consistent in inference.
+ dropout (float): apply dropout on the output of ASPP. It is used in
+ the official DeepLab implementation with a rate of 0.1:
+ https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa
+ use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d
+ for 3x3 convs in ASPP, proposed in :paper:`DeepLabV3+`.
+ """
+ super(ASPP, self).__init__()
+ assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
+ self.pool_kernel_size = pool_kernel_size
+ self.dropout = dropout
+ use_bias = norm == ""
+ self.convs = nn.ModuleList()
+ # conv 1x1
+ self.convs.append(
+ Conv2d(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ bias=use_bias,
+ norm=get_norm(norm, out_channels),
+ activation=deepcopy(activation),
+ )
+ )
+ weight_init.c2_xavier_fill(self.convs[-1])
+ # atrous convs
+ for dilation in dilations:
+ if use_depthwise_separable_conv:
+ self.convs.append(
+ DepthwiseSeparableConv2d(
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ padding=dilation,
+ dilation=dilation,
+ norm1=norm,
+ activation1=deepcopy(activation),
+ norm2=norm,
+ activation2=deepcopy(activation),
+ )
+ )
+ else:
+ self.convs.append(
+ Conv2d(
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ padding=dilation,
+ dilation=dilation,
+ bias=use_bias,
+ norm=get_norm(norm, out_channels),
+ activation=deepcopy(activation),
+ )
+ )
+ weight_init.c2_xavier_fill(self.convs[-1])
+ # image pooling
+ # We do not add BatchNorm because the spatial resolution is 1x1,
+ # the original TF implementation has BatchNorm.
+ if pool_kernel_size is None:
+ image_pooling = nn.Sequential(
+ nn.AdaptiveAvgPool2d(1),
+ Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+ )
+ else:
+ image_pooling = nn.Sequential(
+ nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
+ Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+ )
+ weight_init.c2_xavier_fill(image_pooling[1])
+ self.convs.append(image_pooling)
+
+ self.project = Conv2d(
+ 5 * out_channels,
+ out_channels,
+ kernel_size=1,
+ bias=use_bias,
+ norm=get_norm(norm, out_channels),
+ activation=deepcopy(activation),
+ )
+ weight_init.c2_xavier_fill(self.project)
+
+ def forward(self, x):
+ size = x.shape[-2:]
+ if self.pool_kernel_size is not None:
+ if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
+ raise ValueError(
+ "`pool_kernel_size` must be divisible by the shape of inputs. "
+ "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
+ )
+ res = []
+ for conv in self.convs:
+ res.append(conv(x))
+ res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
+ res = torch.cat(res, dim=1)
+ res = self.project(res)
+ res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
+ return res
diff --git a/vendor/detectron2/detectron2/layers/batch_norm.py b/vendor/detectron2/detectron2/layers/batch_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..f594587628b842607404ee9793ece7a11ef98775
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/batch_norm.py
@@ -0,0 +1,320 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+import torch.distributed as dist
+from fvcore.nn.distributed import differentiable_all_reduce
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.utils import comm, env
+
+from .wrappers import BatchNorm2d
+
+
+class FrozenBatchNorm2d(nn.Module):
+ """
+ BatchNorm2d where the batch statistics and the affine parameters are fixed.
+
+ It contains non-trainable buffers called
+ "weight" and "bias", "running_mean", "running_var",
+ initialized to perform identity transformation.
+
+ The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
+ which are computed from the original four parameters of BN.
+ The affine transform `x * weight + bias` will perform the equivalent
+ computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
+ When loading a backbone model from Caffe2, "running_mean" and "running_var"
+ will be left unchanged as identity transformation.
+
+ Other pre-trained backbone models may contain all 4 parameters.
+
+ The forward is implemented by `F.batch_norm(..., training=False)`.
+ """
+
+ _version = 3
+
+ def __init__(self, num_features, eps=1e-5):
+ super().__init__()
+ self.num_features = num_features
+ self.eps = eps
+ self.register_buffer("weight", torch.ones(num_features))
+ self.register_buffer("bias", torch.zeros(num_features))
+ self.register_buffer("running_mean", torch.zeros(num_features))
+ self.register_buffer("running_var", torch.ones(num_features) - eps)
+ self.register_buffer("num_batches_tracked", None)
+
+ def forward(self, x):
+ if x.requires_grad:
+ # When gradients are needed, F.batch_norm will use extra memory
+ # because its backward op computes gradients for weight/bias as well.
+ scale = self.weight * (self.running_var + self.eps).rsqrt()
+ bias = self.bias - self.running_mean * scale
+ scale = scale.reshape(1, -1, 1, 1)
+ bias = bias.reshape(1, -1, 1, 1)
+ out_dtype = x.dtype # may be half
+ return x * scale.to(out_dtype) + bias.to(out_dtype)
+ else:
+ # When gradients are not needed, F.batch_norm is a single fused op
+ # and provide more optimization opportunities.
+ return F.batch_norm(
+ x,
+ self.running_mean,
+ self.running_var,
+ self.weight,
+ self.bias,
+ training=False,
+ eps=self.eps,
+ )
+
+ def _load_from_state_dict(
+ self,
+ state_dict,
+ prefix,
+ local_metadata,
+ strict,
+ missing_keys,
+ unexpected_keys,
+ error_msgs,
+ ):
+ version = local_metadata.get("version", None)
+
+ if version is None or version < 2:
+ # No running_mean/var in early versions
+ # This will silent the warnings
+ if prefix + "running_mean" not in state_dict:
+ state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean)
+ if prefix + "running_var" not in state_dict:
+ state_dict[prefix + "running_var"] = torch.ones_like(self.running_var)
+
+ super()._load_from_state_dict(
+ state_dict,
+ prefix,
+ local_metadata,
+ strict,
+ missing_keys,
+ unexpected_keys,
+ error_msgs,
+ )
+
+ def __repr__(self):
+ return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps)
+
+ @classmethod
+ def convert_frozen_batchnorm(cls, module):
+ """
+ Convert all BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
+
+ Args:
+ module (torch.nn.Module):
+
+ Returns:
+ If module is BatchNorm/SyncBatchNorm, returns a new module.
+ Otherwise, in-place convert module and return it.
+
+ Similar to convert_sync_batchnorm in
+ https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
+ """
+ bn_module = nn.modules.batchnorm
+ bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm)
+ res = module
+ if isinstance(module, bn_module):
+ res = cls(module.num_features)
+ if module.affine:
+ res.weight.data = module.weight.data.clone().detach()
+ res.bias.data = module.bias.data.clone().detach()
+ res.running_mean.data = module.running_mean.data
+ res.running_var.data = module.running_var.data
+ res.eps = module.eps
+ res.num_batches_tracked = module.num_batches_tracked
+ else:
+ for name, child in module.named_children():
+ new_child = cls.convert_frozen_batchnorm(child)
+ if new_child is not child:
+ res.add_module(name, new_child)
+ return res
+
+
+def get_norm(norm, out_channels):
+ """
+ Args:
+ norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
+ or a callable that takes a channel number and returns
+ the normalization layer as a nn.Module.
+
+ Returns:
+ nn.Module or None: the normalization layer
+ """
+ if norm is None:
+ return None
+ if isinstance(norm, str):
+ if len(norm) == 0:
+ return None
+ norm = {
+ "BN": BatchNorm2d,
+ # Fixed in https://github.com/pytorch/pytorch/pull/36382
+ "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
+ "FrozenBN": FrozenBatchNorm2d,
+ "GN": lambda channels: nn.GroupNorm(32, channels),
+ # for debugging:
+ "nnSyncBN": nn.SyncBatchNorm,
+ "naiveSyncBN": NaiveSyncBatchNorm,
+ # expose stats_mode N as an option to caller, required for zero-len inputs
+ "naiveSyncBN_N": lambda channels: NaiveSyncBatchNorm(channels, stats_mode="N"),
+ "LN": lambda channels: LayerNorm(channels),
+ }[norm]
+ return norm(out_channels)
+
+
+class NaiveSyncBatchNorm(BatchNorm2d):
+ """
+ In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient
+ when the batch size on each worker is different.
+ (e.g., when scale augmentation is used, or when it is applied to mask head).
+
+ This is a slower but correct alternative to `nn.SyncBatchNorm`.
+
+ Note:
+ There isn't a single definition of Sync BatchNorm.
+
+ When ``stats_mode==""``, this module computes overall statistics by using
+ statistics of each worker with equal weight. The result is true statistics
+ of all samples (as if they are all on one worker) only when all workers
+ have the same (N, H, W). This mode does not support inputs with zero batch size.
+
+ When ``stats_mode=="N"``, this module computes overall statistics by weighting
+ the statistics of each worker by their ``N``. The result is true statistics
+ of all samples (as if they are all on one worker) only when all workers
+ have the same (H, W). It is slower than ``stats_mode==""``.
+
+ Even though the result of this module may not be the true statistics of all samples,
+ it may still be reasonable because it might be preferrable to assign equal weights
+ to all workers, regardless of their (H, W) dimension, instead of putting larger weight
+ on larger images. From preliminary experiments, little difference is found between such
+ a simplified implementation and an accurate computation of overall mean & variance.
+ """
+
+ def __init__(self, *args, stats_mode="", **kwargs):
+ super().__init__(*args, **kwargs)
+ assert stats_mode in ["", "N"]
+ self._stats_mode = stats_mode
+
+ def forward(self, input):
+ if comm.get_world_size() == 1 or not self.training:
+ return super().forward(input)
+
+ B, C = input.shape[0], input.shape[1]
+
+ half_input = input.dtype == torch.float16
+ if half_input:
+ # fp16 does not have good enough numerics for the reduction here
+ input = input.float()
+ mean = torch.mean(input, dim=[0, 2, 3])
+ meansqr = torch.mean(input * input, dim=[0, 2, 3])
+
+ if self._stats_mode == "":
+ assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.'
+ vec = torch.cat([mean, meansqr], dim=0)
+ vec = differentiable_all_reduce(vec) * (1.0 / dist.get_world_size())
+ mean, meansqr = torch.split(vec, C)
+ momentum = self.momentum
+ else:
+ if B == 0:
+ vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype)
+ vec = vec + input.sum() # make sure there is gradient w.r.t input
+ else:
+ vec = torch.cat(
+ [
+ mean,
+ meansqr,
+ torch.ones([1], device=mean.device, dtype=mean.dtype),
+ ],
+ dim=0,
+ )
+ vec = differentiable_all_reduce(vec * B)
+
+ total_batch = vec[-1].detach()
+ momentum = total_batch.clamp(max=1) * self.momentum # no update if total_batch is 0
+ mean, meansqr, _ = torch.split(vec / total_batch.clamp(min=1), C) # avoid div-by-zero
+
+ var = meansqr - mean * mean
+ invstd = torch.rsqrt(var + self.eps)
+ scale = self.weight * invstd
+ bias = self.bias - mean * scale
+ scale = scale.reshape(1, -1, 1, 1)
+ bias = bias.reshape(1, -1, 1, 1)
+
+ self.running_mean += momentum * (mean.detach() - self.running_mean)
+ self.running_var += momentum * (var.detach() - self.running_var)
+ ret = input * scale + bias
+ if half_input:
+ ret = ret.half()
+ return ret
+
+
+class CycleBatchNormList(nn.ModuleList):
+ """
+ Implement domain-specific BatchNorm by cycling.
+
+ When a BatchNorm layer is used for multiple input domains or input
+ features, it might need to maintain a separate test-time statistics
+ for each domain. See Sec 5.2 in :paper:`rethinking-batchnorm`.
+
+ This module implements it by using N separate BN layers
+ and it cycles through them every time a forward() is called.
+
+ NOTE: The caller of this module MUST guarantee to always call
+ this module by multiple of N times. Otherwise its test-time statistics
+ will be incorrect.
+ """
+
+ def __init__(self, length: int, bn_class=nn.BatchNorm2d, **kwargs):
+ """
+ Args:
+ length: number of BatchNorm layers to cycle.
+ bn_class: the BatchNorm class to use
+ kwargs: arguments of the BatchNorm class, such as num_features.
+ """
+ self._affine = kwargs.pop("affine", True)
+ super().__init__([bn_class(**kwargs, affine=False) for k in range(length)])
+ if self._affine:
+ # shared affine, domain-specific BN
+ channels = self[0].num_features
+ self.weight = nn.Parameter(torch.ones(channels))
+ self.bias = nn.Parameter(torch.zeros(channels))
+ self._pos = 0
+
+ def forward(self, x):
+ ret = self[self._pos](x)
+ self._pos = (self._pos + 1) % len(self)
+
+ if self._affine:
+ w = self.weight.reshape(1, -1, 1, 1)
+ b = self.bias.reshape(1, -1, 1, 1)
+ return ret * w + b
+ else:
+ return ret
+
+ def extra_repr(self):
+ return f"affine={self._affine}"
+
+
+class LayerNorm(nn.Module):
+ """
+ A LayerNorm variant, popularized by Transformers, that performs point-wise mean and
+ variance normalization over the channel dimension for inputs that have shape
+ (batch_size, channels, height, width).
+ https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950
+ """
+
+ def __init__(self, normalized_shape, eps=1e-6):
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(normalized_shape))
+ self.bias = nn.Parameter(torch.zeros(normalized_shape))
+ self.eps = eps
+ self.normalized_shape = (normalized_shape,)
+
+ def forward(self, x):
+ u = x.mean(1, keepdim=True)
+ s = (x - u).pow(2).mean(1, keepdim=True)
+ x = (x - u) / torch.sqrt(s + self.eps)
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
+ return x
diff --git a/vendor/detectron2/detectron2/layers/blocks.py b/vendor/detectron2/detectron2/layers/blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..1995a4bf7339e8deb7eaaffda4f819dda55e7ac7
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/blocks.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import fvcore.nn.weight_init as weight_init
+from torch import nn
+
+from .batch_norm import FrozenBatchNorm2d, get_norm
+from .wrappers import Conv2d
+
+
+"""
+CNN building blocks.
+"""
+
+
+class CNNBlockBase(nn.Module):
+ """
+ A CNN block is assumed to have input channels, output channels and a stride.
+ The input and output of `forward()` method must be NCHW tensors.
+ The method can perform arbitrary computation but must match the given
+ channels and stride specification.
+
+ Attribute:
+ in_channels (int):
+ out_channels (int):
+ stride (int):
+ """
+
+ def __init__(self, in_channels, out_channels, stride):
+ """
+ The `__init__` method of any subclass should also contain these arguments.
+
+ Args:
+ in_channels (int):
+ out_channels (int):
+ stride (int):
+ """
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.stride = stride
+
+ def freeze(self):
+ """
+ Make this block not trainable.
+ This method sets all parameters to `requires_grad=False`,
+ and convert all BatchNorm layers to FrozenBatchNorm
+
+ Returns:
+ the block itself
+ """
+ for p in self.parameters():
+ p.requires_grad = False
+ FrozenBatchNorm2d.convert_frozen_batchnorm(self)
+ return self
+
+
+class DepthwiseSeparableConv2d(nn.Module):
+ """
+ A kxk depthwise convolution + a 1x1 convolution.
+
+ In :paper:`xception`, norm & activation are applied on the second conv.
+ :paper:`mobilenet` uses norm & activation on both convs.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ padding=1,
+ dilation=1,
+ *,
+ norm1=None,
+ activation1=None,
+ norm2=None,
+ activation2=None,
+ ):
+ """
+ Args:
+ norm1, norm2 (str or callable): normalization for the two conv layers.
+ activation1, activation2 (callable(Tensor) -> Tensor): activation
+ function for the two conv layers.
+ """
+ super().__init__()
+ self.depthwise = Conv2d(
+ in_channels,
+ in_channels,
+ kernel_size=kernel_size,
+ padding=padding,
+ dilation=dilation,
+ groups=in_channels,
+ bias=not norm1,
+ norm=get_norm(norm1, in_channels),
+ activation=activation1,
+ )
+ self.pointwise = Conv2d(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ bias=not norm2,
+ norm=get_norm(norm2, out_channels),
+ activation=activation2,
+ )
+
+ # default initialization
+ weight_init.c2_msra_fill(self.depthwise)
+ weight_init.c2_msra_fill(self.pointwise)
+
+ def forward(self, x):
+ return self.pointwise(self.depthwise(x))
diff --git a/vendor/detectron2/detectron2/layers/csrc/README.md b/vendor/detectron2/detectron2/layers/csrc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..778ed3da0bae89820831bcd8a72ff7b9cad8d4dd
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/csrc/README.md
@@ -0,0 +1,7 @@
+
+
+To add a new Op:
+
+1. Create a new directory
+2. Implement new ops there
+3. Delcare its Python interface in `vision.cpp`.
diff --git a/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
new file mode 100644
index 0000000000000000000000000000000000000000..03f4211003f42f601f0cfcf4a690f5da4a0a1f67
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
@@ -0,0 +1,115 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+#include
+
+namespace detectron2 {
+
+at::Tensor ROIAlignRotated_forward_cpu(
+ const at::Tensor& input,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio);
+
+at::Tensor ROIAlignRotated_backward_cpu(
+ const at::Tensor& grad,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int batch_size,
+ const int channels,
+ const int height,
+ const int width,
+ const int sampling_ratio);
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+at::Tensor ROIAlignRotated_forward_cuda(
+ const at::Tensor& input,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio);
+
+at::Tensor ROIAlignRotated_backward_cuda(
+ const at::Tensor& grad,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int batch_size,
+ const int channels,
+ const int height,
+ const int width,
+ const int sampling_ratio);
+#endif
+
+// Interface for Python
+inline at::Tensor ROIAlignRotated_forward(
+ const at::Tensor& input,
+ const at::Tensor& rois,
+ const double spatial_scale,
+ const int64_t pooled_height,
+ const int64_t pooled_width,
+ const int64_t sampling_ratio) {
+ if (input.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+ return ROIAlignRotated_forward_cuda(
+ input,
+ rois,
+ spatial_scale,
+ pooled_height,
+ pooled_width,
+ sampling_ratio);
+#else
+ AT_ERROR("Detectron2 is not compiled with GPU support!");
+#endif
+ }
+ return ROIAlignRotated_forward_cpu(
+ input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
+}
+
+inline at::Tensor ROIAlignRotated_backward(
+ const at::Tensor& grad,
+ const at::Tensor& rois,
+ const double spatial_scale,
+ const int64_t pooled_height,
+ const int64_t pooled_width,
+ const int64_t batch_size,
+ const int64_t channels,
+ const int64_t height,
+ const int64_t width,
+ const int64_t sampling_ratio) {
+ if (grad.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+ return ROIAlignRotated_backward_cuda(
+ grad,
+ rois,
+ spatial_scale,
+ pooled_height,
+ pooled_width,
+ batch_size,
+ channels,
+ height,
+ width,
+ sampling_ratio);
+#else
+ AT_ERROR("Detectron2 is not compiled with GPU support!");
+#endif
+ }
+ return ROIAlignRotated_backward_cpu(
+ grad,
+ rois,
+ spatial_scale,
+ pooled_height,
+ pooled_width,
+ batch_size,
+ channels,
+ height,
+ width,
+ sampling_ratio);
+}
+
+} // namespace detectron2
diff --git a/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a3d3056cc71a4acaafb570739a9dd247a7eb1ed
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
@@ -0,0 +1,522 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include
+#include "ROIAlignRotated.h"
+
+// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
+// and PyTorch ROIAlign (non-rotated) Op implementations.
+// The key difference between this implementation and those ones is
+// we don't do "legacy offset" in this version, as there aren't many previous
+// works, if any, using the "legacy" ROIAlignRotated Op.
+// This would make the interface a bit cleaner.
+
+namespace detectron2 {
+
+namespace {
+template
+struct PreCalc {
+ int pos1;
+ int pos2;
+ int pos3;
+ int pos4;
+ T w1;
+ T w2;
+ T w3;
+ T w4;
+};
+
+template
+void pre_calc_for_bilinear_interpolate(
+ const int height,
+ const int width,
+ const int pooled_height,
+ const int pooled_width,
+ const int iy_upper,
+ const int ix_upper,
+ T roi_start_h,
+ T roi_start_w,
+ T bin_size_h,
+ T bin_size_w,
+ int roi_bin_grid_h,
+ int roi_bin_grid_w,
+ T roi_center_h,
+ T roi_center_w,
+ T cos_theta,
+ T sin_theta,
+ std::vector>& pre_calc) {
+ int pre_calc_index = 0;
+ for (int ph = 0; ph < pooled_height; ph++) {
+ for (int pw = 0; pw < pooled_width; pw++) {
+ for (int iy = 0; iy < iy_upper; iy++) {
+ const T yy = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < ix_upper; ix++) {
+ const T xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ // Rotate by theta around the center and translate
+ // In image space, (y, x) is the order for Right Handed System,
+ // and this is essentially multiplying the point by a rotation matrix
+ // to rotate it counterclockwise through angle theta.
+ T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+ T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+ // deal with: inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ // empty
+ PreCalc pc;
+ pc.pos1 = 0;
+ pc.pos2 = 0;
+ pc.pos3 = 0;
+ pc.pos4 = 0;
+ pc.w1 = 0;
+ pc.w2 = 0;
+ pc.w3 = 0;
+ pc.w4 = 0;
+ pre_calc[pre_calc_index] = pc;
+ pre_calc_index += 1;
+ continue;
+ }
+
+ if (y < 0) {
+ y = 0;
+ }
+ if (x < 0) {
+ x = 0;
+ }
+
+ int y_low = (int)y;
+ int x_low = (int)x;
+ int y_high;
+ int x_high;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+ T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ // save weights and indices
+ PreCalc pc;
+ pc.pos1 = y_low * width + x_low;
+ pc.pos2 = y_low * width + x_high;
+ pc.pos3 = y_high * width + x_low;
+ pc.pos4 = y_high * width + x_high;
+ pc.w1 = w1;
+ pc.w2 = w2;
+ pc.w3 = w3;
+ pc.w4 = w4;
+ pre_calc[pre_calc_index] = pc;
+
+ pre_calc_index += 1;
+ }
+ }
+ }
+ }
+}
+
+template
+void bilinear_interpolate_gradient(
+ const int height,
+ const int width,
+ T y,
+ T x,
+ T& w1,
+ T& w2,
+ T& w3,
+ T& w4,
+ int& x_low,
+ int& x_high,
+ int& y_low,
+ int& y_high) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ // empty
+ w1 = w2 = w3 = w4 = 0.;
+ x_low = x_high = y_low = y_high = -1;
+ return;
+ }
+
+ if (y < 0) {
+ y = 0;
+ }
+
+ if (x < 0) {
+ x = 0;
+ }
+
+ y_low = (int)y;
+ x_low = (int)x;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+
+ // reference in forward
+ // T v1 = input[y_low * width + x_low];
+ // T v2 = input[y_low * width + x_high];
+ // T v3 = input[y_high * width + x_low];
+ // T v4 = input[y_high * width + x_high];
+ // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+ w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ return;
+}
+
+template
+inline void add(T* address, const T& val) {
+ *address += val;
+}
+
+} // namespace
+
+template
+void ROIAlignRotatedForward(
+ const int nthreads,
+ const T* input,
+ const T& spatial_scale,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio,
+ const T* rois,
+ T* output) {
+ int n_rois = nthreads / channels / pooled_width / pooled_height;
+ // (n, c, ph, pw) is an element in the pooled output
+ // can be parallelized using omp
+ // #pragma omp parallel for num_threads(32)
+ for (int n = 0; n < n_rois; n++) {
+ int index_n = n * channels * pooled_width * pooled_height;
+
+ const T* current_roi = rois + n * 6;
+ int roi_batch_ind = current_roi[0];
+
+ // Do not use rounding; this implementation detail is critical
+ // ROIAlignRotated supports align == true, i.e., continuous coordinate
+ // by default, thus the 0.5 offset
+ T offset = (T)0.5;
+ T roi_center_w = current_roi[1] * spatial_scale - offset;
+ T roi_center_h = current_roi[2] * spatial_scale - offset;
+ T roi_width = current_roi[3] * spatial_scale;
+ T roi_height = current_roi[4] * spatial_scale;
+ T theta = current_roi[5] * M_PI / 180.0;
+ T cos_theta = cos(theta);
+ T sin_theta = sin(theta);
+
+ AT_ASSERTM(
+ roi_width >= 0 && roi_height >= 0,
+ "ROIs in ROIAlignRotated do not have non-negative size!");
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (sampling_ratio > 0)
+ ? sampling_ratio
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+ // We do average (integral) pooling inside a bin
+ const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+
+ // we want to precalculate indices and weights shared by all channels,
+ // this is the key point of optimization
+ std::vector> pre_calc(
+ roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ T roi_start_h = -roi_height / 2.0;
+ T roi_start_w = -roi_width / 2.0;
+
+ pre_calc_for_bilinear_interpolate(
+ height,
+ width,
+ pooled_height,
+ pooled_width,
+ roi_bin_grid_h,
+ roi_bin_grid_w,
+ roi_start_h,
+ roi_start_w,
+ bin_size_h,
+ bin_size_w,
+ roi_bin_grid_h,
+ roi_bin_grid_w,
+ roi_center_h,
+ roi_center_w,
+ cos_theta,
+ sin_theta,
+ pre_calc);
+
+ for (int c = 0; c < channels; c++) {
+ int index_n_c = index_n + c * pooled_width * pooled_height;
+ const T* offset_input =
+ input + (roi_batch_ind * channels + c) * height * width;
+ int pre_calc_index = 0;
+
+ for (int ph = 0; ph < pooled_height; ph++) {
+ for (int pw = 0; pw < pooled_width; pw++) {
+ int index = index_n_c + ph * pooled_width + pw;
+
+ T output_val = 0.;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ PreCalc pc = pre_calc[pre_calc_index];
+ output_val += pc.w1 * offset_input[pc.pos1] +
+ pc.w2 * offset_input[pc.pos2] +
+ pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4];
+
+ pre_calc_index += 1;
+ }
+ }
+ output_val /= count;
+
+ output[index] = output_val;
+ } // for pw
+ } // for ph
+ } // for c
+ } // for n
+}
+
+template
+void ROIAlignRotatedBackward(
+ const int nthreads,
+ // may not be contiguous. should index using n_stride, etc
+ const T* grad_output,
+ const T& spatial_scale,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio,
+ T* grad_input,
+ const T* rois,
+ const int n_stride,
+ const int c_stride,
+ const int h_stride,
+ const int w_stride) {
+ for (int index = 0; index < nthreads; index++) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* current_roi = rois + n * 6;
+ int roi_batch_ind = current_roi[0];
+
+ // Do not use rounding; this implementation detail is critical
+ // ROIAlignRotated supports align == true, i.e., continuous coordinate
+ // by default, thus the 0.5 offset
+ T offset = (T)0.5;
+ T roi_center_w = current_roi[1] * spatial_scale - offset;
+ T roi_center_h = current_roi[2] * spatial_scale - offset;
+ T roi_width = current_roi[3] * spatial_scale;
+ T roi_height = current_roi[4] * spatial_scale;
+ T theta = current_roi[5] * M_PI / 180.0;
+ T cos_theta = cos(theta);
+ T sin_theta = sin(theta);
+
+ AT_ASSERTM(
+ roi_width >= 0 && roi_height >= 0,
+ "ROIs in ROIAlignRotated do not have non-negative size!");
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ T* offset_grad_input =
+ grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+ int output_offset = n * n_stride + c * c_stride;
+ const T* offset_grad_output = grad_output + output_offset;
+ const T grad_output_this_bin =
+ offset_grad_output[ph * h_stride + pw * w_stride];
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (sampling_ratio > 0)
+ ? sampling_ratio
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ T roi_start_h = -roi_height / 2.0;
+ T roi_start_w = -roi_width / 2.0;
+
+ // We do average (integral) pooling inside a bin
+ const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T yy = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ // Rotate by theta around the center and translate
+ T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+ T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+
+ T w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+
+ bilinear_interpolate_gradient(
+ height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
+
+ T g1 = grad_output_this_bin * w1 / count;
+ T g2 = grad_output_this_bin * w2 / count;
+ T g3 = grad_output_this_bin * w3 / count;
+ T g4 = grad_output_this_bin * w4 / count;
+
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ // atomic add is not needed for now since it is single threaded
+ add(offset_grad_input + y_low * width + x_low, static_cast(g1));
+ add(offset_grad_input + y_low * width + x_high, static_cast(g2));
+ add(offset_grad_input + y_high * width + x_low, static_cast(g3));
+ add(offset_grad_input + y_high * width + x_high, static_cast(g4));
+ } // if
+ } // ix
+ } // iy
+ } // for
+} // ROIAlignRotatedBackward
+
+at::Tensor ROIAlignRotated_forward_cpu(
+ const at::Tensor& input,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio) {
+ AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor");
+ AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
+
+ at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
+
+ at::CheckedFrom c = "ROIAlign_forward_cpu";
+ at::checkAllSameType(c, {input_t, rois_t});
+
+ auto num_rois = rois.size(0);
+ auto channels = input.size(1);
+ auto height = input.size(2);
+ auto width = input.size(3);
+
+ at::Tensor output = at::zeros(
+ {num_rois, channels, pooled_height, pooled_width}, input.options());
+
+ auto output_size = num_rois * pooled_height * pooled_width * channels;
+
+ if (output.numel() == 0) {
+ return output;
+ }
+
+ auto input_ = input.contiguous(), rois_ = rois.contiguous();
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ input.scalar_type(), "ROIAlignRotated_forward", [&] {
+ ROIAlignRotatedForward(
+ output_size,
+ input_.data_ptr(),
+ spatial_scale,
+ channels,
+ height,
+ width,
+ pooled_height,
+ pooled_width,
+ sampling_ratio,
+ rois_.data_ptr(),
+ output.data_ptr());
+ });
+ return output;
+}
+
+at::Tensor ROIAlignRotated_backward_cpu(
+ const at::Tensor& grad,
+ const at::Tensor& rois,
+ const float spatial_scale,
+ const int pooled_height,
+ const int pooled_width,
+ const int batch_size,
+ const int channels,
+ const int height,
+ const int width,
+ const int sampling_ratio) {
+ AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor");
+ AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
+
+ at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
+
+ at::CheckedFrom c = "ROIAlignRotated_backward_cpu";
+ at::checkAllSameType(c, {grad_t, rois_t});
+
+ at::Tensor grad_input =
+ at::zeros({batch_size, channels, height, width}, grad.options());
+
+ // handle possibly empty gradients
+ if (grad.numel() == 0) {
+ return grad_input;
+ }
+
+ // get stride values to ensure indexing into gradients is correct.
+ int n_stride = grad.stride(0);
+ int c_stride = grad.stride(1);
+ int h_stride = grad.stride(2);
+ int w_stride = grad.stride(3);
+
+ auto rois_ = rois.contiguous();
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ grad.scalar_type(), "ROIAlignRotated_forward", [&] {
+ ROIAlignRotatedBackward(
+ grad.numel(),
+ grad.data_ptr(),
+ spatial_scale,
+ channels,
+ height,
+ width,
+ pooled_height,
+ pooled_width,
+ sampling_ratio,
+ grad_input.data_ptr(),
+ rois_.data_ptr(),
+ n_stride,
+ c_stride,
+ h_stride,
+ w_stride);
+ });
+ return grad_input;
+}
+
+} // namespace detectron2
diff --git a/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..fca186519143b168a912c880a4cf495a0a5a9322
--- /dev/null
+++ b/vendor/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
@@ -0,0 +1,443 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include
+#include
+#include
+#include
+
+// TODO make it in a common file
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+ i += blockDim.x * gridDim.x)
+
+// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
+// and PyTorch ROIAlign (non-rotated) Op implementations.
+// The key difference between this implementation and those ones is
+// we don't do "legacy offset" in this version, as there aren't many previous
+// works, if any, using the "legacy" ROIAlignRotated Op.
+// This would make the interface a bit cleaner.
+
+namespace detectron2 {
+
+namespace {
+
+template
+__device__ T bilinear_interpolate(
+ const T* input,
+ const int height,
+ const int width,
+ T y,
+ T x) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ // empty
+ return 0;
+ }
+
+ if (y < 0) {
+ y = 0;
+ }
+
+ if (x < 0) {
+ x = 0;
+ }
+
+ int y_low = (int)y;
+ int x_low = (int)x;
+ int y_high;
+ int x_high;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+ // do bilinear interpolation
+ T v1 = input[y_low * width + x_low];
+ T v2 = input[y_low * width + x_high];
+ T v3 = input[y_high * width + x_low];
+ T v4 = input[y_high * width + x_high];
+ T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+ return val;
+}
+
+template
+__device__ void bilinear_interpolate_gradient(
+ const int height,
+ const int width,
+ T y,
+ T x,
+ T& w1,
+ T& w2,
+ T& w3,
+ T& w4,
+ int& x_low,
+ int& x_high,
+ int& y_low,
+ int& y_high) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ // empty
+ w1 = w2 = w3 = w4 = 0.;
+ x_low = x_high = y_low = y_high = -1;
+ return;
+ }
+
+ if (y < 0) {
+ y = 0;
+ }
+
+ if (x < 0) {
+ x = 0;
+ }
+
+ y_low = (int)y;
+ x_low = (int)x;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+
+ // reference in forward
+ // T v1 = input[y_low * width + x_low];
+ // T v2 = input[y_low * width + x_high];
+ // T v3 = input[y_high * width + x_low];
+ // T v4 = input[y_high * width + x_high];
+ // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+ w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ return;
+}
+
+} // namespace
+
+template
+__global__ void RoIAlignRotatedForward(
+ const int nthreads,
+ const T* input,
+ const T spatial_scale,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio,
+ const T* rois,
+ T* top_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* current_roi = rois + n * 6;
+ int roi_batch_ind = current_roi[0];
+
+ // Do not use rounding; this implementation detail is critical
+ // ROIAlignRotated supports align == true, i.e., continuous coordinate
+ // by default, thus the 0.5 offset
+ T offset = (T)0.5;
+ T roi_center_w = current_roi[1] * spatial_scale - offset;
+ T roi_center_h = current_roi[2] * spatial_scale - offset;
+ T roi_width = current_roi[3] * spatial_scale;
+ T roi_height = current_roi[4] * spatial_scale;
+ T theta = current_roi[5] * M_PI / 180.0;
+ T cos_theta = cos(theta);
+ T sin_theta = sin(theta);
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ const T* offset_input =
+ input + (roi_batch_ind * channels + c) * height * width;
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (sampling_ratio > 0)
+ ? sampling_ratio
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ T roi_start_h = -roi_height / 2.0;
+ T roi_start_w = -roi_width / 2.0;
+
+ // We do average (inte gral) pooling inside a bin
+ const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+
+ T output_val = 0.;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
+ {
+ const T yy = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ // Rotate by theta around the center and translate
+ T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+ T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+
+ T val = bilinear_interpolate(offset_input, height, width, y, x);
+ output_val += val;
+ }
+ }
+ output_val /= count;
+
+ top_data[index] = output_val;
+ }
+}
+
+template
+__global__ void RoIAlignRotatedBackwardFeature(
+ const int nthreads,
+ const T* top_diff,
+ const int num_rois,
+ const T spatial_scale,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooled_height,
+ const int pooled_width,
+ const int sampling_ratio,
+ T* bottom_diff,
+ const T* rois) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* current_roi = rois + n * 6;
+ int roi_batch_ind = current_roi[0];
+
+ // Do not use rounding; this implementation detail is critical
+ // ROIAlignRotated supports align == true, i.e., continuous coordinate
+ // by default, thus the 0.5 offset
+ T offset = (T)0.5;
+ T roi_center_w = current_roi[1] * spatial_scale - offset;
+ T roi_center_h = current_roi[2] * spatial_scale - offset;
+ T roi_width = current_roi[3] * spatial_scale;
+ T roi_height = current_roi[4] * spatial_scale;
+ T theta = current_roi[5] * M_PI / 180.0;
+ T cos_theta = cos(theta);
+ T sin_theta = sin(theta);
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ T* offset_bottom_diff =
+ bottom_diff + (roi_batch_ind * channels + c) * height * width;
+
+ int top_offset = (n * channels + c) * pooled_height * pooled_width;
+ const T* offset_top_diff = top_diff + top_offset;
+ const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (sampling_ratio > 0)
+ ? sampling_ratio
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ T roi_start_h = -roi_height / 2.0;
+ T roi_start_w = -roi_width / 2.0;
+
+ // We do average (integral) pooling inside a bin
+ const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
+ {
+ const T yy = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast